From c267fb0eb174fdc50b8ab4457cfa7cdd40a15835 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 12 Nov 2024 05:36:28 -0800 Subject: [PATCH 01/99] Create Readme.MD --- examples/E2E-RAG-App/Readme.MD | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 examples/E2E-RAG-App/Readme.MD diff --git a/examples/E2E-RAG-App/Readme.MD b/examples/E2E-RAG-App/Readme.MD new file mode 100644 index 000000000..0149fd902 --- /dev/null +++ b/examples/E2E-RAG-App/Readme.MD @@ -0,0 +1,5 @@ +## E2E-RAG-App + +This is an E2E RAG App that can be pointed to any folder to do RAG over a collection of mixed file formats and do retrieval using the `Llama-3.2-3B-Instruct` Model + +Details: TODO Add \ No newline at end of file From 7225fc26adccce07914b0872d80fae22bfc62320 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Tue, 12 Nov 2024 10:15:07 -0800 Subject: [PATCH 02/99] rag_main works for single-turn --- examples/E2E-RAG-App/README.md | 40 ++++ examples/E2E-RAG-App/Readme.MD | 5 - .../example_data/llama_website0613.txt | 103 +++++++++++ examples/E2E-RAG-App/rag_main.py | 171 ++++++++++++++++++ 4 files changed, 314 insertions(+), 5 deletions(-) create mode 100644 examples/E2E-RAG-App/README.md delete mode 100644 examples/E2E-RAG-App/Readme.MD create mode 100644 examples/E2E-RAG-App/example_data/llama_website0613.txt create mode 100644 examples/E2E-RAG-App/rag_main.py diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md new file mode 100644 index 000000000..1a75978b5 --- /dev/null +++ b/examples/E2E-RAG-App/README.md @@ -0,0 +1,40 @@ +## E2E-RAG-App + +This is an E2E RAG App that can be pointed to any folder to do RAG over a collection of mixed file formats and do retrieval using the `Llama-3.2-3B-Instruct` Model + +Details: +TODO: +1. Save memory_bank to local, and load it to local +2. Make the chat inference multi-turn +3. Front-end + docker + +``` +~/work/llama-stack-apps/examples/E2E-RAG-App (rag-app)]$ python rag_main.py localhost 5000 ./example_data/ +Inserted 1 documents into bank: rag_agent_docs +Created bank: rag_agent_docs +Found 2 models [ModelDefWithProvider(identifier='Llama3.2-11B-Vision-Instruct', llama_model='Llama3.2-11B-Vision-Instruct', metadata={}, provider_id='meta-reference', type='model'), ModelDefWithProvider(identifier='Llama-Guard-3-1B', llama_model='Llama-Guard-3-1B', metadata={}, provider_id='meta1', type='model')] +Use model: Llama3.2-11B-Vision-Instruct + 0%| | 0/1 [00:00 : This is equivalent to the BOS token <|eot_id|> : This signifies the end of the message in a turn. <|start_header_id|>{role}<|end_header_id|> : These tokens enclose the role for a particular message. The possible roles can be: system, user, assistant. <|end_of_text|>: This is equivalent to the EOS token. On generating this token, Llama 3 will cease to generate more tokens. A prompt can optionally contain a single system message, or multiple alternating user and assistant messages, but always ends with the last user message followed by the assistant header. Code to produce this prompt format can be found Note : Newlines (0x0A) are part of the prompt format, for clarity in the example, they have been represented as actual new lines. <|begin_of_text|>{{ user_message }} Meta Llama 3 Instruct Code to generate this prompt format can be found Notes : Newlines (0x0A) are part of the prompt format, for clarity in the examples, they have been represented as actual new lines. The model expects the assistant header at the end of the prompt to start completing it. Decomposing an example instruct prompt with a system message: <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a helpful AI assistant for travel tips and recommendations<|eot_id|><|start_header_id|>user<|end_header_id|> What can you help me with?<|eot_id|><|start_header_id|>assistant<|end_header_id|> : Specifies the start of the prompt <|start_header_id|>system<|end_header_id|> : Specifies the role for the following message, i.e. “system” You are a helpful AI assistant for travel tips and recommendations : The system message : Specifies the end of the input message <|start_header_id|>user<|end_header_id|> : Specifies the role for the following message i.e. “user” What can you help me with? : The user message <|start_header_id|>assistant<|end_header_id|> : Ends with the assistant header, to prompt the model to start generation. Following this prompt, Llama 3 completes it by generating the {{assistant_message}}. It signals the end of the {{assistant_message}} by generating the Example prompt with a single user message <|begin_of_text|><|start_header_id|>user<|end_header_id|> What is France's capital?<|eot_id|><|start_header_id|>assistant<|end_header_id|> System prompt and multiple turn conversation between the user and assistant You are a helpful AI assistant for travel tips and recommendations<|eot_id|><|start_header_id|>user<|end_header_id|> What is France's capital?<|eot_id|><|start_header_id|>assistant<|end_header_id|> Bonjour! The capital of France is Paris!<|eot_id|><|start_header_id|>user<|end_header_id|> What can I do there?<|eot_id|><|start_header_id|>assistant<|end_header_id|> Paris, the City of Light, offers a romantic getaway with must-see attractions like the Eiffel Tower and Louvre Museum, romantic experiences like river cruises and charming neighborhoods, and delicious food and drink options, with helpful tips for making the most of your trip.<|eot_id|><|start_header_id|>user<|end_header_id|> Give me a detailed list of the attractions I should visit, and time it takes in each one, to plan my trip accordingly.<|eot_id|><|start_header_id|>assistant<|end_header_id|> On this page Special Tokens used with Meta Llama 3 Meta Llama 3 Instruct +---------- +Meta Llama Guard 2 | Model Cards and Prompt formats Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards & Prompt formats Meta Llama Guard 2 You can find details about this model in the Prompt format As the guardrails can be applied both on the input and output of the model, there are two different prompts: one for user input and the other for agent output. The role placeholder can have the values User Agent . The former refers to the input and the later to the output. When evaluating the user input, the agent response must not be present in the conversation. The llama-recipes repository has a helper function and an inference example that shows how to properly format the prompt with the provided categories. This can be used as a template to create custom categories for the prompt. The variables to replace in this prompt template are: {{ role }} : It can have the values: {{ unsafe_categories }} : The default categories and their descriptions are shown below. These can be customized for zero-shot or few-shot prompting. {{ user_message }} : input message from the user. {{ model_answer }} : output from the model. <|begin_of_text|>[INST] Task: Check if there is unsafe content in '{{ role }}' messages in conversations according our safety policy with the below categories. User: {{ user_message_1 }} Agent: {{ model_answer_1 }} Provide your safety assessment for ONLY THE LAST {{ role }} in the above conversation: - First line must read 'safe' or 'unsafe'. - If unsafe, a second line must include a comma-separated list of violated categories. [/INST] Llama Guard was trained in the following categories. S1: Violent Crimes. S2: Non-Violent Crimes. S3: Sex Crimes. S4: Child Exploitation. S5: Specialized Advice. S6: Privacy. S7: Intellectual Property. S8: Indiscriminate Weapons. S9: Hate. S10: Self-Harm. S11: Sexual Content. Additionally, the full description for each category can be set in the prompt as well. AI models should not create content that enables, encourages, or excuses the commission of violent crimes. Examples of violent crimes include, but are not limited to: - Unlawful violence toward people (ex: terrorism, genocide, murder, hate crimes, child abuse, assault, battery, kidnapping) - Unlawful violence toward animals (ex: animal abuse) AI models should not create content that enables, encourages, or excuses the commission of non-violent crimes. Examples of non-violent crimes include, but are not limited to: - Financial crimes (ex: fraud, scams, money laundering) - Property crimes (ex: burglary, robbery, arson, vandalism) - Drug crimes (ex: creating or trafficking narcotics) - Weapons crimes (ex: producing unlicensed firearms) - Cyber crimes (ex: hacking, spyware, malware) AI models should not create content that enables, encourages, or excuses the commission of sex crimes. Examples of sex crimes include, but are not limited to: - Human trafficking - Sexual assault (ex: rape) - Sexual harassment (ex: groping) - Lewd conduct AI models should not create content that depicts child nudity or that enables, encourages, excuses, or depicts the sexual abuse of children. AI models should not create content that contains specialized financial, medical, or legal advice. AI models should not create content that contains sensitive, personal information about private individuals. AI models should not create content that violates the intellectual property rights of third parties. AI models should not create content that enables the creation of weapons of mass destruction. Examples of such weapons include, but are not limited to: - Chemical weapons (ex: nerve gas) - Biological weapons (ex: anthrax) - Radiological weapons (ex: salted bombs) - Nuclear weapons (ex: atomic warheads) - High-yield explosive weapons (ex: cluster munitions) AI models should not create content that is hateful toward people on the basis of their protected characteristics (race, color, ethnicity, national origin, disability, religious affiliation, caste, sexual orientation, sex, gender identity and serious disease). AI models should not create content that perpetuates negative stereotypes. AI models should not create content that enables, encourages, or excuses acts of intentional self-harm. Examples of such actions include, but are not limited to: - Suicide - Self-injury (ex: cutting) - Disordered eating AI models should not create depictions of nude adults or content that contains erotic descriptions or explicit depictions of sex acts. Meta Llama Guard 2 +---------- +Meta Code Llama 70B | Model Cards and Prompt formats Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards & Prompt formats Meta Code Llama 70B You can find details about this model in the . Note that Meta Code Llama 70B uses the same model card as Meta Code Llama 7B, 13B, and 34B. Completion In this format, the model continues to write code following the provided code in the prompt. An implementation of this prompt can be found {{ code_prompt }} Instructions Meta Code Llama 70B has a different prompt template compared to 34B, 13B and 7B. It starts with a Source: system tag—which can have an empty body—and continues with alternating user or assistant values. Each turn of the conversation uses the special character to separate the messages. The last turn of the conversation uses an Source: assistant tag with an empty message and a Destination: user tag to prompt the model to answer the user question. A detailed implementation of this format is provided. Notes: The structure requires a Source: system tag, but the system prompt can be empty. Each user query is preceded by a blank line. At the end of the prompt is a blank line followed by a line containing a space character (0x20). Source: system System prompt Source: user First user query Source: assistant Model response to first query Source: user Second user query Source: assistant Destination: user Meta Code Llama 70B +---------- +Meta Llama Guard 1 | Model Cards and Prompt formats Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards & Prompt formats Meta Llama Guard 1 You can find details about this model in the As the guardrails can be applied both on the input and output of the model, there are two different prompts: one for user input and the other for agent output. The role placeholder can have the values User or Agent. The former refers to the input and the later to the output. When evaluating the user input, the agent response must not be present in the conversation. The llama-recipes repository has a that shows how to properly format the prompt with the provided categories. This can be used as a template to create custom categories for the prompt. [INST] Task: Check if there is unsafe content in '{{ role }}' messages in conversations according our safety policy with the below categories. User: {{ user_message_1 }} Agent: {{ model_answer_1 }} Provide your safety assessment for {{ role }} in the above conversation: - First line must read 'safe' or 'unsafe'. - If unsafe, a second line must include a comma-separated list of violated categories. [/INST] Llama Guard was trained in the following categories that can be adapted through prompt engineering. O1: Violence and Hate. Should not - Help people plan or engage in violence. - Encourage people to engage in violence. - Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their race, color, religion, national origin, sexual orientation, gender, gender identity, or disability. - Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics. Can - Provide information on violence and discrimination. - Discuss the topics of hate and violence, and can discuss historical events involving violence. O2: Sexual Content. - Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities. - Discuss topics related to sexuality and sexual education. - Provide general information regarding sexual activities. - Provide guidance on topics related to sexual health. O3: Criminal Planning. - Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them. - Encourage people to engage in criminal activities or condone criminal behavior. - Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes. - Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems. O4: Guns and Illegal Weapons. - Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them. - Encourage people to engage in firearm or illegal weapons crimes or condone such behavior. - Discuss firearms and the arguments for and against firearm ownership. O5: Regulated or Controlled Substances. - Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis. - Assist or encourage people to create such substances when it is illegal to do so. - Discuss the use of regulated or controlled substances, and can provide general information on those substances (ex: what their chemical composition is and what their effects are). - Provide information on how those substances are manufactured, provided that the information is not so detailed as to provide step-by-step instructions for their creation. O6: Self-Harm. - Encourage people to harm themselves. - Romanticize or condone self-harm or suicide. - Provide information regarding the methods of suicide or self-harm. - Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves. - Provide general information regarding self-harm and suicide (ex: statistics, the mental health challenges faced by those who take these actions). Should - Provide information regarding appropriate health resources to people who imply or express the desire to harm themselves. Meta Llama Guard 1 +---------- +Meta Code Llama | Model Cards and Prompt formats Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards & Prompt formats You can find details about this model in the Meta Code Llama 7B, 13B, and 34B In this format, the model continues to write code following the code that is provided in the prompt. An implementation of this prompt can be found The instructions prompt template for Meta Code Llama follow the same structure as the Meta Llama 2 chat model, where the system prompt is optional, and the user and assistant messages alternate, always ending with a user message. Note the beginning of sequence (BOS) token between each user and assistant message. An implementation for Meta Code Llama can be found [INST] <> {{ system_prompt }} <> {{ user_message_1 }} [/INST] {{ model_answer_1 }} [INST] {{ user_message_2 }} [/INST] Infilling Infilling can be done in two different ways: with the prefix-suffix-middle format or the suffix-prefix-middle. An implementation of this format is provided Infilling is only available in the 7B and 13B base models—not in the Python, Instruct, 34B, or 70B models The BOS character is not used for infilling when encoding the prefix or suffix, but only at the beginning of each prompt. Prefix-suffix-middle
{{ code_prefix }}{{ code_suffix }} Suffix-prefix-middle 
{{ code_suffix }}{{ code_prefix }} Meta Code Llama 7B, 13B, and 34B
+----------
+Meta Llama 2 | Model Cards and Prompt formats Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards & Prompt formats You can find details about this model in the Special Tokens used with Meta Llama 2  : These are the BOS and EOS tokens from SentencePiece. When multiple messages are present in a multi turn conversation, they separate them, including the user input and model response. [INST][/INST] : These tokens enclose user messages in multi turn conversations. <><> : These enclose the system message. The base model supports text completion, so any incomplete user prompt, without special tags, will prompt the model to complete it. The tokenizer provided with the model will include the SentencePiece beginning of sequence (BOS) token () if requested. Review this code for details. {{ user_prompt }} Meta Llama 2 Chat Code to produce this prompt format can be found . The system prompt is optional. Single message instance with optional system prompt. {{ user_message }} [/INST] Multiple user and assistant messages example. {{ user_message_1 }} [/INST] {{ model_answer_1 }}  [INST] {{ user_message_2 }} [/INST] Special Tokens used with Meta Llama 2 Meta Llama 2 Chat Skip to main content
+----------
+Getting the models Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud You can get the Meta Llama models directly from Meta or through Hugging Face or Kaggle. However you get the models, you will first need to accept the license agreements for the models you want. For more detailed information about each of the Meta Llama models, see the Model Cards section immediately following this section. To get the models directly from Meta, go to our Meta Llama download form at Fill in your information–including your email. Select the models that you want, and review and accept the appropriate license agreements. For each model that you request, you will receive an email that contains instructions and a pre-signed URL to download that model. You can use the same URL to download multiple model weights, such as 7B and 13B. The URL expires after 24 hours or five downloads, but you can re-request models in order to receive fresh pre-signed URLs. The model download process uses a script that relies on the following tools: wget,md5sum ; so ensure that these are available on your local computer.
+----------
+Hugging Face | Getting the models To obtain the models from Hugging Face (HF), sign into your account at https://huggingface.co/meta-llama Select the model you want. You will be taken to a page where you can fill in your information and review the appropriate license agreement. After accepting the agreement, your information is reviewed; the review process could take up to a few days. When you are approved, you will receive an email informing you that you have access to the HF repository for the model. Note that cloning the HF repository to a local computer does not give you all the model files because some of the files are too large. In the local clone, those files contain only metadata for the actual file. To get these larger files, go to the file in the repository on the HF site and download it directly from there. For example, to get consolidated.00.pth for the Meta Llama 2 7B model, you download it from: https://huggingface.co/meta-llama/Llama-27b/blob/main/consolidated.00.pth Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Skip to main content
+----------
+Kaggle | Getting the models Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud To obtain the models from Kaggle–including the HF versions of the models–sign into your account at: https://www.kaggle.com/organizations/metaresearch/models Before you can access the models on Kaggle, you need to submit a request for model access , which requires that you accept the model license agreement on the Meta site: Note that the email address that you provide when you accept the license agreement must be the same as the email that you use for your Kaggle account. Once you have accepted the license agreement, return to Kaggle and submit the request for model access. When your request is approved, which might take a few days, you’ll receive an email that says that you have received access. You’ll then be able to access the models on Kaggle. To access a particular model, select it from the Model Variations dropdown box, and click the download icon. An archive file that contains the model will start downloading.
+----------
+Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Although Meta Llama models are often hosted by Cloud Service Providers (CSP), Meta Llama can be used in other contexts as well, such as Linux, the Windows Subsystem for Linux (WSL), macOS, Jupyter notebooks, and even mobile devices. If you are interested in exploring t hese scenarios, we suggest that you check out the following resources: Llama 3 on Your Local Computer, with Resources for Other Options - How to run Llama on your desktop using Windows, macOS, or Linux. Also, pointers to other ways to run Llama, either on premise or in the cloud Llama Recipes QuickStart - Provides an introduction to Meta Llama using Jupyter notebooks and also demonstrates running Llama locally on macOS. Machine Learning Compilation for Large Language Models (MLC LLM) - Enables “everyone to develop, optimize and deploy AI models natively on everyone's devices with ML compilation techniques.” Llama C++ - Uses the portability of C++ to enable inference with Llama models on a variety of different hardware.
+----------
+Running Meta Llama on Linux | Llama Everywhere Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Running Meta Llama on Linux This tutorial is a part of our Build with Meta Llama series, where we demonstrate the capabilities and practical applications of Llama for developers like you, so that you can leverage the benefits that Llama has to offer and incorporate it into your own applications. This tutorial supports the video Running Llama on Linux | Build with Meta Llama , where we learn how to run Llama on Linux OS by getting the weights and running the model locally, with a step-by-step tutorial to help you follow along. If you're interested in learning by watching or listening, check out our video on Running Llama on Linux. Introduction to llama models At Meta, we strongly believe in an open approach to AI development, particularly in the fast-evolving domain of generative AI. By making AI models publicly accessible, we enable their advantages to reach every segment of society. Last year, we open sourced Meta Llama 2, and this year we released the Meta Llama 3 family of models, available in both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications, unlocking the power of these large language models, and making them accessible to everyone, so you can experiment, innovate, and scale your ideas responsibly. Running Meta Llama on Linux Setup With a Linux setup having a GPU with a minimum of 16GB VRAM, you should be able to load the 8B Llama models in fp16 locally. If you have an Nvidia GPU, you can confirm your setup using the NVIDIA System Management Interface tool that shows you the GPU you have, the VRAM available, and other useful information by typing: nvidia-smi In our current setup, we are on Ubuntu, specifically Pop OS, and have an Nvidia RTX 4090 with a total VRAM of about 24GB. Terminal with nvidia-smi showing NVIDIA GPU Configuration Getting the weights To download the weights, go to the Llama website . Fill in your details in the form and select the models you’d like to download. In our case, we will download the Llama 3 models. Select Meta Llama 3 and Meta Llama Guard 2 on the download page Read and agree to the license agreement, then click Accept and continue . You will see a unique URL on the website. You will also receive the URL in your email and it is valid for 24hrs to allow you to download each model up to 5 times. You can always request a new URL. Download page with unique pre-signed URL We are now ready to get the weights and run the model locally on our machine. It is recommended to use a Python virtual environment for running this demo. In this demo, we are using Miniconda, but you can use any virtual environment of your choice. Open your terminal, and make a new folder called llama3-demo in your workspace. Navigate to the new folder and clone the Llama repo: mkdir llama3-demo cd llama3-demo git clone https://github.com/meta-llama/llama3.git For this demo, we’ll need two prerequisites installed: wget and md5sum . To confirm if your distribution has these, use: wget --version md5sum --version which should return the installed versions. If your distribution does not have these, you can install them using apt-get install wget apt-get install md5sum To make sure we have all the package dependencies installed, while in the newly cloned repo folder, type: pip install -e . We are now all set to download the model weights for our local setup. Our team has created a helper script to make it easy to download the model weights. In your terminal, type: ./download.sh The script will ask for the URL from your email. Paste in the URL you received from Meta. It will then ask you to enter the list of models to download. For our example, we’ll download the 8B pretrained model and the fine-tuned 8B chat models. So we’ll enter “8B,8B-instruct” Downloading the 8B models Running the model We are all set to run the example inference script to test if our model has been set up correctly and works. Our team has created an example Python script called example_text_completion.py that you can use to test out the model. The script defines a main function that uses the Llama class from the llama library to generate text completions for given prompts using the pre-trained models. It takes a few arguments: Parameters Descriptions ckpt_dir: str Directory containing the checkpoint files of the model. tokenizer_path: str Path to the tokenizer of the model. temperature: float = 0.6 This parameter controls the randomness of the generation process. Higher values may lead to more creative but less coherent outputs, while lower values may lead to more conservative but more coherent outputs. top_p: float = 0.9 This defines the maximum probability threshold for generating tokens. max_seq_len: int = 128 Defines the maximum length of the input sequence or prompt allowed for the model to process. max_gen_len: int = 64 Defines the maximum length of the generated text the model is allowed to produce. max_batch_size: int = 4 Defines the maximum number of prompts to process in one batch. The function builds an instance of the class, using the provided arguments, then defines a list of prompts for which the model will use generator.text_completion method to generate the completions. To run the script, go back to our terminal, and while in the llama3 repo, type: torchrun --nproc_per_node 1 example_text_completion.py --ckpt_dir Meta-Llama-3-8B/ --tokenizer_path Meta-Llama-3-8B/tokenizer.model --max_seq_len 128 --max_batch_size 4 Replace Meta-Llama-3-8B/ with the path to your checkpoint directory and tokenizer.model with the path to your tokenizer model. If you run it from this main directory, the path may not need to change. Set the –nproc_per_node to the MP value for the model you are using. For 8B models, the value is set to 1. Adjust the max_seq_len max_batch_size parameters as needed. We have set them to 128 and 4 respectively. Running the 8B model on the example text completion script To try out the fine-tuned chat model ( 8B-instruct ), we have a similar example called example_chat_completion.py torchrun --nproc_per_node 1 example_chat_completion.py --ckpt_dir Meta-Llama-3-8B-Instruct/ --tokenizer_path Meta-Llama-3-8B-Instruct/tokenizer.model --max_seq_len 512 --max_batch_size 6 Note that in this case, we use the Meta-Llama-3-8B-Instruct/ model and provide the correct tokenizer under the instruct model folder. Running the 8B Instruct model on the example chat completion script A detailed step-by-step process to run on this setup, as well as all the helper and example scripts can be found on our Llama3 GitHub repo , which goes over the process of downloading and quick-start, as well as examples for inference. Running Meta Llama on Linux Introduction to llama models Running Meta Llama on Linux
+----------
+Running Meta Llama on Windows | Llama Everywhere Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Running Meta Llama on Windows This tutorial is a part of our Build with Meta Llama series, where we demonstrate the capabilities and practical applications of Llama for developers like you, so that you can leverage the benefits that Llama has to offer and incorporate it into your own applications. This tutorial supports the video Running Llama on Windows | Build with Meta Llama , where we learn how to run Llama on Windows using Hugging Face APIs, with a step-by-step tutorial to help you follow along. If you're interested in learning by watching or listening, check out our video on Running Llama on Windows. For this demo, we will be using a Windows OS machine with an RTX 4090 GPU. If you have an Nvidia GPU, you can confirm your setup by opening the Terminal and typing (NVIDIA System Management Interface), which will show you the GPU you have, the VRAM available, and other useful information about your setup. Since we will be using the Hugging Face transformers library for this setup, this setup can also be used on other operating systems that the library supports such as Linux or Mac using similar steps as the ones shown in the video. To allow easy access to Meta Llama models , we are providing them on Hugging Face, where you can download the models in both transformers and native Llama 3 formats. To download the weights, visit the meta-llama repo containing the model you’d like to use. For example, we will use the Meta-Llama-3-8B-Instruct model for this demo. Read and agree to the license agreement. Fill in your details and accept the license, and click on submit. Once your request is approved, you'll be granted access to all the Llama 3 models. Meta-Llama 3-8B-Instruct model on Hugging Face For this tutorial, we will be using Meta Llama models already converted to Hugging Face format. However, if you’d like to download the original native weights, click on the "Files and versions" tab and download the contents of the original folder. If you prefer, you can also download the original weights from the command line using the Hugging Face CLI: pip install huggingface-hub huggingface-cli download meta-llama/Meta-Llama-3-8B-Instruct --include "original/*" --local-dir meta-llama/Meta-Llama-3-8B-Instruct In this example, we will showcase how you can use Meta Llama models already converted to Hugging Face format using Transformers. To use the model with Transformers, we will be using the pipeline class from Hugging Face. We recommend that you use a Python virtual environment for running this demo. In this demo, we are using Miniconda, but you can use any virtual environment of your choice. Make sure to use the latest version of transformers pip install -U transformers --upgrade We will also use the accelerate library, which enables our code to be run across any distributed configuration. pip install accelerate We will be using Python for our demo script. To install Python, visit the Python website , where you can choose your OS and download the version of Python you like.  We will also be using PyTorch for our demo, so we will need to make sure we have PyTorch installed in our setup. To install PyTorch for your setup, visit the Pytorch downloads website and choose your OS and configuration to get the installation command you need. Paste that command in your terminal and press enter. PyTorch Installation Guide For our script, open the editor of your choice, and create a Python script. We’ll first add the imports that we need for our example: import transformers import torch from transformers import AutoTokenizer Let's define the model we’d like to use. In our demo, we will use the 8B instruct model which is fine tuned for chat: model = "meta-llama/Meta-Llama-3-8B-Instruct" We will also instantiate the tokenizer which can be derived from AutoTokenizer, based on the model we’ve chosen, using the from_pretrained method of AutoTokenizer. This will download and cache the pre-trained tokenizer and return an instance of the appropriate tokenizer class. tokenizer = AutoTokenizer.from_pretrained(model) To use our model for inference: pipeline = transformers.pipeline( "text-generation", model=model, torch_dtype=torch.float16, device_map="auto", ) Hugging Face pipelines allow us to specify which type of task the pipeline needs to run ( text-generation in this case), the model that the pipeline should use to make predictions (specified by model ), the precision to use with this model ( torch.float16 ), the device on which the pipeline should run ( device_map ), and various other options. We’ll also set the argument to auto , which means the pipeline will automatically use a GPU if one is available. Next, let's provide some text prompts as inputs to our pipeline for it to use when it runs to generate responses. Let’s define this as the variable, sequences: sequences = pipeline( 'I have tomatoes, basil and cheese at home. What can I cook for dinner?\n', do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, truncation = True, max_length=400, The pipeline sets do_sample to True , which allows us to specify the decoding strategy we’d like to use to select the next token from the probability distribution over the entire vocabulary. In our example, we are using top_k sampling. By changing max_length , you can specify how long you’d like the generated response to be. Setting the num_return_sequences parameter to greater than one will let you generate more than one output. Finally, we add the following to provide input, and information on how to run the pipeline: for seq in sequences: print(f"Result: {seq['generated_text']}") Save your script and head back to the terminal. We will save it as llama3-hf-demo.py . Before we run the script, let’s make sure we can access and interact with Hugging Face directly from the terminal. To do that, make sure you have the Hugging Face CLI installed: pip install -U "huggingface_hub[cli]" followed by huggingface-cli login Here, it will ask us for our access token which we can get from our HF account under Settings . Copy it and provide it in the command line. We are now all set to run our script. python llama3-hf-demo.py Running Meta-Llama-3-8B-Instruct locally To check out the full example and run it on your own local machine, see the detailed sample notebook that you can refer to in the llama-recipes GitHub repo . Here you will find an example of how to run Llama 3 models using already converted Hugging Face weights, as well as an example that goes over how you can convert the original weights into Hugging Face format and run using those. We’ve also created various other demos and examples to provide you with guidance and as references to help you get started with Llama models and to make it easier for you to integrate them into your own use cases. To try these examples, check out our . Here you’ll find complete walkthroughs for how to get started with Llama models. These include installation instructions , dependencies, and recipes where you can find examples of inference, fine tuning, and training on custom data sets. In addition, the repo includes demos that showcase llama deployments, basic interactions, and specialized use cases Running Meta Llama on Windows Skip to main content
+----------
+Running Meta Llama on Mac | Llama Everywhere Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Running Meta Llama on Mac This tutorial is a part of our Build with Meta Llama series, where we demonstrate the capabilities and practical applications of Llama for developers like you, so that you can leverage the benefits that Llama has to offer and incorporate it into your own applications. This tutorial supports the video Running Llama on Mac | Build with Meta Llama , where we learn how to run Llama on Mac OS  using Ollama , with a step-by-step tutorial to help you follow along. If you're interested in learning by watching or listening, check out our video on Running Llama on Mac. For this demo, we are using a Macbook Pro running Sonoma 14.4.1 with 64GB memory. Since we will be using Ollamap, this setup can also be used on other operating systems that are supported such as Linux or Windows using similar steps as the ones shown here. lets you set up and run Large Language models like Llama models locally. Downloading Ollama The first step is to install Ollama. To do that, visit their website , where you can choose your platform, and click on “Download” to download Ollama. For our demo, we will choose macOS, and select “Download for macOS”. Next, we will make sure that we can test run Meta Llama 3 models on Ollama . Please note that Ollama provides Meta Llama models in the 4-bit quantized format. To test run the model, let’s open our terminal, and run ollama pull llama3 to download the 4-bit quantized Meta Llama 3 8B chat model, with a size of about 4.7 GB. Downloading 4-bit quantized Meta Llama models If you’d like to download the Llama 3 70B chat model, also in 4-bit, you can instead type ollama pull llama3:70b which in quantized format, would have a size of about 39GB. Running using ollama run To run our model, in your terminal, type: ollama run llama3 We are all set to ask questions and chat with our Meta Llama 3 model. Let’s ask some questions: “Who wrote the book godfather?" Meta Llama model generating a response We can see that it gives the right answer, along with more information about the book as well as the movie that was based on the book. What if I just wanted the name of the author, without the extra information. Let’s adapt our prompt accordingly, specifying the kind of response we expect: "Who wrote the book godfather? Answer with only the name." Meta Llama model generating a specified responses based on prompt We can see that it generates the answer in the format we requested. You can also try running the 70B model: ollama run llama3:70b but the inference speed will likely be slower. Running with curl You can even run and test the Llama 3 8B model directly by using the curl command and specifying your prompt right in the command: curl http://localhost:11434/api/chat -d '{ "model": "llama3", "messages": [ { "role": "user", "content": "who wrote the book godfather?" } ], "stream": false }' Here, we are sending a POST request to an API running on localhost. The API endpoint is for "chat", which will interact with our AI model hosted on the server. We are providing a JSON payload that contains a string specifying the name of the AI model to use for processing the input prompt ( ), an array with a string indicating the role of the message sender ( user ) and a string with the user's input prompt (" who wrote the book godfather? "), and a boolean value stream indicating whether the response should be streamed or not. In our case, it is set to false, meaning the entire response will be returned at once. Ollama running Llama model with curl command As we can see, the model generated the response with the answer to our question. Running as a Python script This example can also be run using a Python script. To install Python, visit the , where you can choose your OS and download the version of Python you like. To run it using a Python script, open the editor of your choice, and create a new file. First, let’s add the imports we will need for this demo, and define a parameter called url , which will have the same value as the URL we saw in the demo: import requests import json url = "http://localhost:11434/api/chat" We will now add a new function called , which will take in prompt as an argument: def llama3(prompt): data = { "content": prompt "stream": False headers = { 'Content-Type': 'application/json' response = requests.post(url, headers=headers, json=data) return(response.json()['message']['content']) This function constructs a JSON payload containing the specified prompt and the model name, which is "llama3”. Then, it sends a POST request to the API endpoint with the JSON payload as the message body, using the requests library.  Once the response is received, the function extracts the content of the response message from the JSON object returned by the API, and returns this extracted content. Finally, we will provide the prompt and print the generated response: response = llama3("who wrote the book godfather") print(response) To run the script, write python .py and press enter. Running Meta Llama model using Ollama and Python script As we can see, it generated the response based on the prompt we provided in our script. To learn more about the complete Ollama APIs, check out their documentation To check out the full example, and run it on your own machine, our team has worked on a that you can refer to and can be found in the llama-recipes Github repo , where you will find an example of how to run Llama 3 models on a Mac as well as other platforms. You will find the examples we discussed here, as well as other ways to use Llama 3 locally with Ollama via LangChain. We’ve also created various other demos and examples to provide you with guidance and as references to help you get started with Llama models and to make it easier for you to integrate Llama into your own use cases. These demos and examples are also located in our , where you’ll find complete walkthroughs for how to get started with Llama models, including , dependencies, and recipes. You’ll also find several examples for inference, fine tuning, and training on custom data sets—as well as demos that showcase Llama deployments, basic interactions, and specialized Running Meta Llama on Mac Running using ollama run Running as a Python script Skip to main content
+----------
+Meta Llama in the Cloud | Llama Everywhere Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Meta Llama in the Cloud This tutorial is a part of our Build with Meta Llama series, where we demonstrate the capabilities and practical applications of Llama for developers like you, so that you can leverage the benefits that Llama has to offer and incorporate it into your own applications. This tutorial supports the video Many other ways to run Llama and resources | Build with Meta Llama , where we learn about some of the various other ways in which you can host or run Meta Llama models, and provide you with all the resources that can help you get started. If you're interested in learning by watching or listening, check out our video on Many other ways to run Llama and resources. Apart from running the models locally, one of the most common ways to run Meta Llama models is to run them in the cloud. We saw an example of this using a service called in our running Llama on Windows video . Let's take a look at some of the other services we can use to host and run Llama models such as AWS , Azure, Google, , and VertexAI —among others. Amazon Web Services Amazon Web Services (AWS) provides multiple ways to host your Llama models such as SageMaker Jumpstart and Bedrock. Bedrock is a fully managed service that lets you quickly and easily build generative AI-powered experiences. To use Meta Llama with Bedrock, check out their that goes over how to integrate and use Meta Llama models in your applications. You can also use AWS through SageMaker JumpStart, which enables you to build, train, and deploy ML models from a broad selection of publicly available foundation models, and deploy them on SageMaker Instances for model training and inference. Learn more about how to use Meta Llama on Sagemaker on their Microsoft Azure Another way to run Meta Llama models is on Microsoft Azure. You can access Meta Llama models on Azure in two ways: Models as a Service (MaaS) provides access to Meta Llama hosted APIs through Azure AI Studio Model as a Platform (MaaP) provides access to Meta Llama family of models with out of the box support for fine-tuning and evaluation though Azure Machine Learning Studio Please refer to our How to Guide for more details. Google Cloud Platform You can also use GCP, or Google Cloud Platform, to run Meta Llama models. GCP is a suite of cloud computing services that provides computing resources as well as virtual machines. Building on top of GCP services, Model Garden on Vertex AI offers infrastructure to jumpstart your ML project with a single place to discover, customize, and deploy a wide range of models. We have collaborated with Vertex AI from Google Cloud to fully integrate Meta Llama, offering pre-trained, instruction-tuned, and Meta CodeLlama, in various sizes. Check out how to fine-tune & deploy Meta Llama models on Vertex AI by visiting the . Please note that you may need to request proper GPU computing quota as a prerequisite. IBM watsonx You can also use IBM's watsonx to run Meta Llama models. IBM watsonx is an advanced platform designed for AI builders, integrating generative AI capabilities, foundation models, and traditional machine learning. It provides a comprehensive suite of tools that span the AI lifecycle, enabling users to tune models with their enterprise data. The platform supports multi-model flexibility, client protection, AI governance, and hybrid, multi-cloud deployments. It offers features for extracting insights, discovering trends, generating synthetic tabular data, running jupyter notebooks, and creating new content and code. Watsonx.ai equips data scientists with the necessary tools, pipelines, and runtimes for building and deploying ML models, thereby automating the entire AI model lifecycle. We've worked with IBM to make Llama and Code Llama models available on their platform . To test the platform and evaluate Llama on watsonx, creating an account is free and allows testing the available models through the Prompt Lab. For detailed instructions, refer to the getting started guide and the quick start tutorials Other hosting providers You can also run Llama models using hosting providers such as OpenAI, Together AI, Anyscale, Replicate, Groq, etc. Our team has worked on step by step examples to showcase how to run Llama on externally hosted providers. The examples can be found on our Llama-recipes GitHub repo , which goes over the process of setting up and running inference for Llama models on some of these externally hosted providers. Running Llama on premise Many enterprise customers prefer to deploy Llama models on-premise and on their own servers. One way to deploy and run Llama models in this manner is by using TorchServe . TorchServe is an easy to use tool for deploying PyTorch models at scale. It is cloud and environment agnostic and supports features such as multi-model serving, logging, metrics and the creation of RESTful endpoints for application integration. To learn more about how TorchServe works, with setup, quickstart, and examples check out the Github repo Another way to deploy llama models on premise is by using Virtual Large Language Model ( vLLM ) or Text Generation Inference (TGI) , two leading open-source tools to deploy and serve LLMs. A detailed step by step tutorial can be found on our that showcases how to use Llama models with vLLM and Hugging Face TGI, and how to create vLLM and TGI hosted Llama instances with LangChain—a language model integration framework for the creation of applications using large language models. You can find various demos and examples that can provide you with guidance—and that you can use as references to get started with Llama models—on our , where you’ll find several examples for inference and fine tuning, as well as running on various API providers. Learn more about Llama 3 and how to get started by checking out our Getting to know Llama notebook that you can find in our . Here you will find a guided tour of Llama 3, including a comparison to Llama 2, descriptions of different Llama 3 models, how and where to access them, Generative AI and Chatbot architectures, prompt engineering, RAG (Retrieval Augmented Generation), fine-tuning, and more. You will find all this implemented with starter code that you can take and adapt to use in your own Meta Llama 3 projects. To learn more about our Llama 3 models, check out our announcement blog where you can find details about how the models work, data on performance and benchmarks, information about trust and safety, and various other resources to get you started. Get the model source from our Llama 3 Github repo , where you can learn how the models work along with a minimalist example of how to load Llama 3 models and run inference. Here, you will also find steps to download and set up the models, and examples for running the text completion and chat models. Meta Llama3 GitHub repo Dive deeper and learn more about the model in the , which goes over the model architecture, intended use, hardware and software requirements, training data, results, and licenses. Check out our new Meta AI , built with Llama 3 technology, which is now one of the world’s leading AI assistants that can boost your intelligence and lighten your load, helping you learn, get things done, create content, and connect to make the most out of every moment. You can use Meta AI on Facebook, Instagram, WhatsApp, Messenger, and the web to get things done, learn, create, and connect with the things that matter to you. To learn more about the latest updates and releases of Llama models, check out our website , where you can learn more about the latest models as well as find resources to learn more about how these models work and how you can use them in your own applications. Check out our Getting Started guide that provides information and resources to help you set up Llama including how to access the models, prompt formats, hosting, how-to and integration guides, as well as resources that you can reference to get started with your projects. Take a look at some of our latest blogs that discuss new announcements , the latest on the Llama ecosystem , and our responsible approach to Meta AI and Meta Llama 3 Check out the community resources on our website to help you get started with Meta Llama models, learn about performance & latency, fine tuning, and more. Dive deeper into prompt engineering, learning best practices for prompting Meta Llama models and interacting with Meta Llama Chat, Code Llama, and Llama Guard models in our short course on Prompt Engineering with Llama 2 on DeepLearing.ai, recently updated to showcase both Llama 2 and  Llama 3 models. Community Stories that go over interesting use cases of Llama models in various fields such as in Business, Healthcare, Gaming, Pharmaceutical, and more! Learn more about the Llama ecosystem, building product experiences with Llama, and examples that showcase how industry pioneers have adopted Llama to build and grow innovative products for users across their platforms at Connect 2023 Also check out our that provides developers with recommended best practices and considerations for safely building products powered by LLMs. We hope you found the Build with Meta Llama videos and tutorials helpful to provide you with insights and resources that you may need to get started with using Llama models. We at Meta strongly believe in an open approach to AI development, democratizing access through an open platform and providing you with AI models, tools, and resources to give you the power to shape the next wave of innovation. We want to kickstart that next wave of innovation across the stack—from applications to developer tools to evals to inference optimizations and more. We can’t wait to see what you build and look forward to your feedback. Meta Llama in the Cloud Running Llama on premise
+----------
+Fine-tuning | How-to guides Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud How-to guides If you are looking to learn by writing code it's highly recommended to look into the Getting to Know Llama 3 notebook. It's a great place to start with most commonly performed operations on Meta Llama. Full parameter fine-tuning is a method that fine-tunes all the parameters of all the layers of the pre-trained model. In general, it can achieve the best performance but it is also the most resource-intensive and time consuming: it requires most GPU resources and takes the longest. PEFT, or Parameter Efficient Fine Tuning, allows one to fine tune models with minimal resources and costs. There are two important PEFT methods: LoRA (Low Rank Adaptation) and QLoRA (Quantized LoRA), where pre-trained models are loaded to GPU as quantized 8-bit and 4-bit weights, respectively. It’s likely that you can fine-tune the Llama 2-13B model using LoRA or QLoRA fine-tuning with a single consumer GPU with 24GB of memory, and using QLoRA requires even less GPU memory and fine-tuning time than LoRA. Typically, one should try LoRA, or if resources are extremely limited, QLoRA, first, and after the fine-tuning is done, evaluate the performance. Only consider full fine-tuning when the performance is not desirable. Experiment tracking Experiment tracking is crucial when evaluating various fine-tuning methods like LoRA, and QLoRA. It ensures reproducibility, maintains a structured version history, allows for easy collaboration, and aids in identifying optimal training configurations. Especially with numerous iterations, hyperparameters, and model versions at play, tools like Weights & Biases (W&B) become indispensable. With its seamless integration into multiple frameworks, W&B provides a comprehensive dashboard to visualize metrics, compare runs, and manage model checkpoints. It's often as simple as adding a single argument to your training script to realize these benefits - we’ll show an example in the Hugging Face PEFT LoRA section. Recipes PEFT LoRA The llama-recipes repo has details on different fine-tuning (FT) alternatives supported by the provided sample scripts. In particular, it highlights the use of PEFT as the preferred FT method, as it reduces the hardware requirements and prevents catastrophic forgetting. For specific cases, full parameter FT can still be valid, and different strategies can be used to still prevent modifying the model too much. Additionally, FT can be done in single gpu multi-gpu with FSDP. In order to run the recipes, follow the steps below: Create a conda environment with pytorch and additional dependencies Install the recipes as described Download the desired model from hf, either using git-lfs or using the llama download script. With everything configured, run the following command: python -m llama_recipes.finetuning  --use_peft --peft_method lora --quantization  --model_name ../llama/models_hf/7B --output_dir ../llama/models_ft/7B-peft --batch_size_training 2 --gradient_accumulation_steps 2 torchtune ( link torchtune is a PyTorch-native library that can be used to fine-tune the Meta Llama family of models including Meta Llama 3. It supports the end-to-end fine-tuning lifecycle including: Downloading model checkpoints and datasets Training recipes for fine-tuning Llama 3 using full fine-tuning, LoRA, and QLoRA Support for single-GPU fine-tuning capable of running on consumer-grade GPUs with 24GB of VRAM Scaling fine-tuning to multiple GPUs using PyTorch FSDP Log metrics and model checkpoints during training using Weights & Biases Evaluation of fine-tuned models using EleutherAI’s LM Evaluation Harness Post-training quantization of fine-tuned models via TorchAO Interoperability with inference engines including ExecuTorch To install torchtune simply run the pip install command pip install torchtune Follow the instructions on the Hugging Face meta-llama repository to ensure you have access to the Llama 3 model weights. Once you have confirmed access, you can run the following command to download the weights to your local machine. This will also download the tokenizer model and a responsible use guide. tune download meta-llama/Meta-Llama-3-8B \ --output-dir  \ --hf-token  Set your environment variable HF_TOKEN or pass in --hf-token to the command in order to validate your access. You can find your token at https://huggingface.co/settings/tokens The basic command for a single-device LoRA fine-tune of Llama 3 is tune run lora_finetune_single_device --config llama3/8B_lora_single_device torchtune contains built-in recipes for: Full fine-tuning on single device and on multiple devices with FSDP LoRA finetuning on multiple devices with FSDP QLoRA finetuning on , with a QLoRA specific configuration You can find more information on fine-tuning Meta Llama models by reading the torchtune guide. Hugging Face PEFT LoRA ( Using Low Rank Adaption (LoRA) , Meta Llama is loaded to the GPU memory as quantized 8-bit weights. Using the Hugging Face Fine-tuning with PEFT LoRA ( ) is super easy - an example fine-tuning run on Meta Llama 2 7b using the OpenAssistant data set can be done in three simple steps: pip install trl git clone https://github.com/huggingface/trl python trl/examples/scripts/sft.py \ --model_name meta-llama/Llama-2-7b-hf \ --dataset_name timdettmers/openassistant-guanaco \ --load_in_4bit \ --use_peft \ --batch_size 4 \ --gradient_accumulation_steps 2 \ --log_with wandb This takes about 16 hours on a single GPU and uses less than 10GB GPU memory; changing batch size to 8/16/32 will use over 11/16/25 GB GPU memory. After the fine-tuning completes, you’ll see in a new directory named “output” at least adapter_config.json and adapter_model.bin -  run the script below to infer with the base model and the new model, generated by merging the base model with the fined-tuned one: from transformers import ( AutoModelForCausalLM, AutoTokenizer, pipeline, from peft import LoraConfig, PeftModel from trl import SFTTrainer model_name = "meta-llama/Llama-2-7b-chat-hf" new_model = "output" device_map = {"": 0} base_model = AutoModelForCausalLM.from_pretrained( model_name, low_cpu_mem_usage=True, return_dict=True, device_map=device_map, model = PeftModel.from_pretrained(base_model, new_model) model = model.merge_and_unload() tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" prompt = "Who wrote the book Innovator's Dilemma?" pipe = pipeline(task="text-generation", model=base_model, tokenizer=tokenizer, max_length=200) result = pipe(f"[INST] {prompt} [/INST]") print(result[0]['generated_text']) pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) result = pipe(f"[INST] {prompt} [/INST]") QLoRA Fine TuningQLoRA (Q for quantized) is more memory efficient than LoRA. In QLoRA, the pretrained model is loaded to the GPU as quantized 4-bit weights. Fine-tuning using QLoRA is also very easy to run - an example of fine-tuning Llama 2-7b with the OpenAssistant can be done in four quick steps: git clone https://github.com/artidoro/qlora cd qlora pip install -U -r requirements.txt ./scripts/finetune_llama2_guanaco_7b.sh It takes about 6.5 hours to run on a single GPU, using 11GB memory of the GPU. After the fine-tuning completes and the output_dir specified in ./scripts/finetune_llama2_guanaco_7b.sh will have checkoutpoint-xxx subfolders, holding the fine-tuned adapter model files. To run inference, use the script below: from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline from peft import LoraConfig, PeftModel model_id = "meta-llama/Llama-2-7b-hf" new_model = "output/llama-2-guanaco-7b/checkpoint-1875/adapter_model" # change if needed quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type='nf4' model = AutoModelForCausalLM.from_pretrained( model_id, quantization_config=quantization_config, device_map='auto' model = PeftModel.from_pretrained(model, new_model) tokenizer = AutoTokenizer.from_pretrained(model_id) prompt = "Who wrote the book innovator's dilemma?" pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) result = pipe(f"[INST] {prompt} [/INST]") Axolotl is another open source library you can use to streamline the fine-tuning of Llama 2. A good example of using Axolotl to fine-tune Meta Llama with four notebooks covering the whole fine-tuning process (generate the dataset, fine-tune the model using LoRA, evaluate and benchmark) is QLoRA Fine Tuning Note: This has been tested on Meta Llama 2 models only. QLoRA (Q for quantized) is more memory efficient than LoRA. In QLoRA, the pretrained model is loaded to the GPU as quantized 4-bit weights. Fine-tuning using QLoRA is also very easy to run - an example of fine-tuning Llama 2-7b with the OpenAssistant can be done in four quick steps: pip install -U -r requirements.txt It takes about 6.5 hours to run on a single GPU, using 11GB memory of the GPU. After the fine-tuning completes and the output_dir specified in ./scripts/finetune_llama2_guanaco_7b.sh will have checkoutpoint-xxx subfolders, holding the fine-tuned adapter model files. To run inference, use the script below: from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline from peft import LoraConfig, PeftModel new_model = "output/llama-2-guanaco-7b/checkpoint-1875/adapter_model" # change if needed model = PeftModel.from_pretrained(model, new_model) prompt = "Who wrote the book innovator's dilemma?" pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) result = pipe(f"[INST] {prompt} [/INST]") Note: This has been tested on Meta Llama 2 models only. is another open source library you can use to streamline the fine-tuning of Llama 2. A good example of using Axolotl to fine-tune Meta Llama with four notebooks covering the whole fine-tuning process (generate the dataset, fine-tune the model using LoRA, evaluate and benchmark) is torchtune (link) Hugging Face PEFT LoRA (link)
+----------
+Quantization | How-to guides Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Quantization is a technique used in machine learning to reduce the computational and memory requirements of models, making them more efficient for deployment on servers and edge devices. It involves representing model weights and activations, typically 32-bit floating numbers, with lower precision data such as 16-bit float, brain float 16-bit, 8-bit int, or even 4/3/2/1-bit int. The benefits of quantization include smaller model sizes, faster fine-tuning, and faster inference—particularly beneficial in resource-constrained environments. However, the tradeoff is a reduction in model quality due to the loss of precision. Supported quantization modes in PyTorch Post-Training Dynamic Quantization: Weights are pre-quantized ahead of time and activations are converted to int8 during inference, just before computation. This results in faster computation due to efficient int8 matrix multiplication and maintains accuracy on the activation layer. Post-Training Static Quantization: This technique improves performance by converting networks to use both integer arithmetic and int8 memory accesses. It involves feeding batches of data through the network and computing the resulting distributions of the different activations. This information is used to determine how the different activations should be quantized at inference time. Quantization Aware Training (QAT): In QAT, all weights and activations are "fake quantized" during both the forward and backward passes of training. This means float values are rounded to mimic int8 values, but all computations are still done with floating point numbers. This method usually yields higher accuracy than the other two methods as all weight adjustments during training are made while "aware" of the fact that the model will ultimately be quantized. More details about these methods and how they can be applied to different types of models can be found in the official PyTorch . Additionally, the community has already conducted studies on the effectiveness of common quantization methods on Meta Llama 3, and the results and code to evaluate can be found in this GitHub repository We will focus next on quantization tools available for Meta Llama models. As this is a constantly evolving space, the libraries and methods detailed here are the most widely used at the moment and are subject to change as the space evolves. Pytorch quantization with TorchAO TorchAO library offers several methods for quantization, each with different schemes for how the activations and weights are quantized. We distinguish between two main types of quantization: weight only quantization and dynamic quantization. For weight only quantization, we support 8-bit and 4-bit quantization. The 4-bit quantization also has GPTQ support for improved accuracy, which requires calibration but has the same final performance. For dynamic quantization, we support 8-bit activation quantization and 8-bit weight quantization. We also support this type of quantization with smoothquant for improved accuracy, which requires calibration and has slightly worse performance. Additionally, the library offers a simple API to test different methods and automatic detection of the best quantization for a given model, known as autoquantization. This API chooses the fastest form of quantization out of the 8-bit dynamic and 8-bit weight only quantization. It first identifies the shapes of the activations that the different linear layers see, then benchmarks these shapes across different types of quantized and non-quantized layers in order to pick the fastest one. Also, it composes with torch.compile() to generate the fast kernels. For additional information on torch.compile, please see this general tutorial : This library is in beta phase and in active development; API changes are expected. HF supported quantization Hugging Face (HF) offers multiple ways to do LLM quantization with their transformers library. For additional guidance and examples on how to use each of these beyond the brief summary presented here,  please refer to their quantization guide and the transformers quantization configuration . The llama-recipes code uses bitsandbytes 8-bit quantization to load the models, both for inference . (See below for more information about using the bitsandbytes library with Llama. ) Quanto Quanto is a versatile PyTorch quantization toolkit that uses linear quantization. It provides features such as weights quantization, activation quantization, and compatibility with various devices and modalities. It supports quantization-aware training and is easy to integrate with custom kernels for specific devices. More details can be found in the announcement blog , GitHub , and HF guide AQLM Additive Quantization of Language Models (AQLM) is a compression method for LLM. It quantizes multiple weights together, taking advantage of interdependencies between them. AQLM represents groups comprising 8 to16 weights each as a sum of multiple vector codes. This library supports fine-tuning its quantized models with Parameter-Efficient Fine-Tuning and LoRA by integrating into HF's PEFT library as well. More details can be found  in the GitHub AWQ Activation-aware Weight Quantization (AWQ) preserves a small percentage of weights that are important for LLM performance, reducing quantization loss. This allows models to run in 4-bit precision without experiencing performance degradation. Transformers support loading models quantized with the llm-awq autoawq libraries. More details on how to load them with the Transformers library can be found in the HF AutoGPTQ The AutoGPTQ library implements the algorithm, a post-training quantization technique where each row of the weight matrix is quantized independently. These weights are quantized to int4, but they’re restored to fp16 on the fly during inference, saving memory usage by 4x. More details can be found in the GitHub BitsAndBytes BitsAndBytes is an easy option for quantizing a model to 8-bit and 4-bit. The library supports any model in any modality, as long as it supports loading with Hugging Face Accelerate and contains torch.nn.Linear layers. It also provides features for offloading weights between the CPU and GPU to support fitting very large models into memory, adjusting the outlier threshold for 8-bit quantization, skipping module conversion for certain models, and fine-tuning with 8-bit and 4-bit weights. For 4-bit models, it allows changing the compute data type, using the Normal Float 4 (NF4) data type for weights initialized from a normal distribution, and using nested quantization to save additional memory at no additional performance cost. More details can be found in the HF Supported quantization modes in PyTorch Pytorch quantization with TorchAO
+----------
+Prompting | How-to guides Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Link to Notebook showing examples of the techniques discussed in this section. Prompt engineering is a technique used in natural language processing (NLP) to improve the performance of the language model by providing them with more context and information about the task in hand. It involves creating prompts, which are short pieces of text that provide additional information or guidance to the model, such as the topic or genre of the text it will generate. By using prompts, the model can better understand what kind of output is expected and produce more accurate and relevant results. In Llama 2 the size of the context, in terms of number of tokens, has doubled from 2048 to 4096. Crafting Effective Prompts Crafting effective prompts is an important part of prompt engineering. Here are some tips for creating prompts that will help improve the performance of your language model: Be clear and concise: Your prompt should be easy to understand and provide enough information for the model to generate relevant output. Avoid using jargon or technical terms that may confuse the model. Use specific examples: Providing specific examples in your prompt can help the model better understand what kind of output is expected. For example, if you want the model to generate a story about a particular topic, include a few sentences about the setting, characters, and plot. Vary the prompts: Using different prompts can help the model learn more about the task at hand and produce more diverse and creative output. Try using different styles, tones, and formats to see how the model responds. Test and refine: Once you have created a set of prompts, test them out on the model to see how it performs. If the results are not as expected, try refining the prompts by adding more detail or adjusting the tone and style. Use feedback: Finally, use feedback from users or other sources to continually improve your prompts. This can help you identify areas where the model needs more guidance and make adjustments accordingly. Explicit Instructions Detailed, explicit instructions produce better results than open-ended prompts: You can think about giving explicit instructions as using rules and restrictions to how Llama 2 responds to your prompt. Stylization Explain this to me like a topic on a children's educational network show teaching elementary students. I'm a software engineer using large language models for summarization. Summarize the following text in under 250 words: Give your answer like an old timey private investigator hunting down a case step by step. Formatting Use bullet points. Return as a JSON object. Use less technical terms and help me apply it in my work in communications. Restrictions Only use academic papers. Never give sources older than 2020. If you don't know the answer, say that you don't know. Here's an example of giving explicit instructions to give more specific results by limiting the responses to recently created sources: Explain the latest advances in large language models to me. #  More likely to cite sources from 2017 Explain the latest advances in large language models to me. Always cite your sources. Never cite sources older than 2020. # Gives more specific advances and only cites sources from 2020 Prompting using Zero- and Few-Shot Learning A shot is an example or demonstration of what type of prompt and response you expect from a large language model. This term originates from training computer vision models on photographs, where one shot was one example or instance that the model used to classify an image. Zero-Shot Prompting Large language models like Meta Llama are capable of following instructions and producing responses without having previously seen an example of a task. Prompting without examples is called "zero-shot prompting". Text: This was the best movie I've ever seen! The sentiment of the text is: Text: The director was trying too hard. The sentiment of the text is: Few-Shot Prompting Adding specific examples of your desired output generally results in a more accurate, consistent output. This technique is called "few-shot prompting". In this example, the generated response follows our desired format that offers a more nuanced sentiment classifier that gives a positive, neutral, and negative response confidence percentage. You are a sentiment classifier. For each message, give the percentage of positive/netural/negative. Here are some samples: Text: I liked it Sentiment: 70% positive 30% neutral 0% negative Text: It could be better Sentiment: 0% positive 50% neutral 50% negative Text: It's fine Sentiment: 25% positive 50% neutral 25% negative Text: I thought it was okay Text: I loved it! Text: Terrible service 0/10 Role Based Prompts Creating prompts based on the role or perspective of the person or entity being addressed. This technique can be useful for generating more relevant and engaging responses from language models. Pros: Improves relevance: Role-based prompting helps the language model understand the role or perspective of the person or entity being addressed, which can lead to more relevant and engaging responses. Increases accuracy: Providing additional context about the role or perspective of the person or entity being addressed can help the language model avoid making mistakes or misunderstandings. Cons: Requires effort: Requires more effort to gather and provide the necessary information about the role or perspective of the person or entity being addressed. Example: You are a virtual tour guide currently walking the tourists Eiffel Tower on a night tour. Describe Eiffel Tower to your audience that covers its history, number of people visiting each year, amount of time it takes to do a full tour and why do so many people visit this place each year. Chain of Thought Technique Involves providing the language model with a series of prompts or questions to help guide its thinking and generate a more coherent and relevant response. This technique can be useful for generating more thoughtful and well-reasoned responses from language models. Improves coherence: Helps the language model think through a problem or question in a logical and structured way, which can lead to more coherent and relevant responses. Increases depth: Providing a series of prompts or questions can help the language model explore a topic more deeply and thoroughly, potentially leading to more insightful and informative responses. Requires effort: The chain of thought technique requires more effort to create and provide the necessary prompts or questions. You are a virtual tour guide from 1901. You have tourists visiting Eiffel Tower. Describe Eiffel Tower to your audience. Begin with 1. Why it was built 2. Then by how long it took them to build 3. Where were the materials sourced to build 4. Number of people it took to build 5. End it with the number of people visiting the Eiffel tour annually in the 1900's, the amount of time it completes a full tour and why so many people visit this place each year. Make your tour funny by including 1 or 2 funny jokes at the end of the tour. Self-Consistency LLMs are probabilistic, so even with Chain-of-Thought, a single generation might produce incorrect results. Self-Consistency introduces enhanced accuracy by selecting the most frequent answer from multiple generations (at the cost of higher compute): John found that the average of 15 numbers is 40. If 10 is added to each number then the mean of the numbers is? Report the answer surrounded by three backticks, for example: ```123``` Running the above several times and taking the most commonly returned value for the answer would make use of the self-consistency approach. Retrieval-Augmented Generation Common facts are generally available from today's large models out-of-the-box (i.e. using just the model weights). More specific data is unlikely to be available though E.g.: What is the capital of  California? # The capital of California is Sacramento... What was the temperature in Menlo Park on December 12th, 2023? # I'm just an AI, I don't have have access to real-time or historical weather data... Retrieval-Augmented Generation, or RAG, describes the practice of including information in the prompt that has been retrieved from an external database. It's an effective way to incorporate facts into your LLM application and is more affordable than fine-tuning which might also negatively impact the foundational model's capabilities. This could be as simple as a lookup table or as sophisticated as a vector database containing all of your company's knowledge: Given the following information about temperatures in Menlo Park: 2023-12-11 :  52 degrees Fahrenheit 2023-12-12 :  51 degrees Fahrenheit 2023-12-13 :  55 degrees Fahrenheit What was the temperature in Menlo Park on 2023-12-12? # Sure! The temperature in Menlo Park on 2023-12-12 was 51 degrees Fahrenheit. What was the temperature in Menlo Park on 2023-07-18 ? # Sorry, I don't have information about the temperature in Menlo Park on 2023-07-18. The information provided only includes temperatures for December 11th, 12th, and 13th of 2023. Program-Aided Language Models LLMs, by nature, aren't great at performing calculations. While LLMs are bad at arithmetic, they're great for code generation. Program-Aided Language leverages this fact by instructing the LLM to write code to solve calculation tasks. Python code to calculate: ((-5 + 93 * 4 - 0) * (4^4 + -7 + 0 * 5)) Here is the Python code to calculate the given expression: ((-5 + 93 * 4 - 0) * (4**4 + -7 + 0 * 5)) # Calculate the expression inside the parentheses a = -5 + 93 * 4 - 0 b = 4**4 + -7 + 0 * 5 # Multiply a and b result = a * b # Print the result print(result) Using the code directly provides the correct result. Limiting Extraneous Tokens A common challenge is generating a response without extraneous tokens (e.g. "Sure! Here's more information on..."). By combining a role, rules and restrictions, explicit instructions, and an example, the model can be prompted to generate the desired response. You are a robot that only outputs JSON. You reply in JSON format with the field 'zip_code'. Example question: What is the zip code of the Empire State Building? Example answer: {'zip_code': 10118} Now here is my question: What is the zip code of Menlo Park? # "{'zip_code': 94025}" Using the code directly provides the correct result. Reduce Hallucinations Meta’s is a great resource to understand how best to prompt and address input/output risks of the language model. Refer to pages (14-17). Here are some examples of how a language model might hallucinate and some strategies for fixing the issue: Example 1: A language model is asked to generate a response to a question about a topic it has not been trained on. The language model may hallucinate information or make up facts that are not accurate or supported by evidence. Fix: To fix this issue, you can provide the language model with more context or information about the topic to help it understand what is being asked and generate a more accurate response. You could also ask the language model to provide sources or evidence for any claims it makes to ensure that its responses are based on factual information. Example 2: A language model is asked to generate a response to a question that requires a specific perspective or point of view. The language model may hallucinate information or make up facts that are not consistent with the desired perspective or point of view. To fix this issue, you can provide the language model with additional information about the desired perspective or point of view, such as the goals, values, or beliefs of the person or entity being addressed. This can help the language model understand the context and generate a response that is more consistent with the desired perspective or point of view. Example 3: A language model is asked to generate a response to a question that requires a specific tone or style. The language model may hallucinate information or make up facts that are not consistent with the desired tone or style. To fix this issue, you can provide the language model with additional information about the desired tone or style, such as the audience or purpose of the communication. This can help the language model understand the context and generate a response that is more consistent with the desired tone or style. Overall, the key to avoiding hallucination in language models is to provide them with clear and accurate information and context, and to carefully monitor their responses to ensure that they are consistent with your expectations and requirements. Prompting using Zero- and Few-Shot Learning Chain of Thought Technique
+----------
+Validation | How-to guides Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud As the saying goes, if you can't measure it, you can't improve it., In this section, we are going to cover different ways to measure and ultimately validate Llama so it's possible to determine the improvements provided by different fine tuning techniques. Quantitative techniques The focus of these techniques is to gather objective metrics that can be compared easily during and after each fine tuning run and to provide quick feedback on whether the model is performing. The main metrics collected are loss and perplexity. This method consists in dividing the dataset into k subsets or folds, and then fine tuning the model k times. On each run, a different fold is used as a validation dataset, using the rest for training. The performance results of each run are averaged out for the final report. This provides a more accurate metric of the performance of the model across the complete dataset, as all entries serve both for validation and training. While it produces the most accurate prediction on how a model is going to generalize after fine tuning on a given dataset, it is computationally expensive and better suited for small datasets. Holdout When using a holdout, the dataset is split into two or three subsets, training and validation with test as optional. The test and validation sets can represent 10% - 30% of the dataset each. As the name implies, the first two subsets are used for training and validating the model during fine tuning, while the third is used only after fine tuning is complete to evaluate how well the model generalizes on data it has not seen in either phase. The advantage of having three partitions is that it provides a way to evaluate the model after fine-tuning for an unbiased view into the model performance, but it requires a slightly bigger dataset to allow for a proper split. This is currently implemented in the Llama recipes fine tuning script with two subsets of the dataset, train validation . The data is collected in a json file that can be plotted to easily interpret the results and evaluate how the model is performing. Standard Evaluation tools There are multiple projects that provide standard evaluation. They provide predefined tasks with commonly used metrics to evaluate the performance of LLMs, like HellaSwag and ThrouthfulQA. These tools can be used to test if the model has degraded after fine tuning. Additionally, a custom task can be created using the dataset intended to fine-tune the model, effectively automating the manual verification of the model performance before and after fine tuning. These types of projects provide a quantitative way of looking at the models performance in simulated real world examples. Some of these projects include the LM Evaluation Harness (used to create the HF leaderboard ), HELM , BIG-bench OpenCompass . As mentioned before, the torchtune library provides integration with the LM Evaluation Harness to test fine tuned models as well. Interpreting Loss and Perplexity The loss value used comes from the transformer's LlamaForCausalLM , which initializes a different loss function depending on the objective required from the model. The objective of this section is to give a brief overview on how to understand the results from loss and perplexity as an initial evaluation of the model performance during fine tuning. We also calculate the perplexity as an exponentiation of the loss value. Additional information on loss functions can be found in these resources: 1 2 4 5 6 In our recipes, we use a simple holdout during fine tuning. Using the logged loss values, both for train and validation dataset, the curves for both are plotted to analyze the results of the process. Given the setup in the recipe, the expected behavior is a log graph that shows a diminishing train and validation loss value as it progresses. If the validation curve starts going up while the train curve continues decreasing, the model is overfitting and it's not generalizing well. Some alternatives to test when this happens are early stopping, verifying the validation dataset is a statistically significant equivalent of the train dataset, data augmentation, using parameter efficient fine tuning or using k-fold cross-validation to better tune the hyperparameters. Qualitative techniques Manual testing Manually evaluating a fine tuned model will vary according to the FT objective and available resources. Here we provide general guidelines on how to accomplish it. With a dataset prepared for fine tuning, a part of it can be separated into a manual test subset, which can be further increased with general knowledge questions that might be relevant to the specific use case. In addition to these general questions, we recommend executing standard evaluations as well, and compare the results with the baseline for the fine tuned model. To rate the results, a clear evaluation criteria should be defined that is relevant to the dataset being used. Example criteria can be accuracy, coherence and safety. Create a rubric for each criteria and define what would be required for an output to receive a specific score. With these guidelines in place, distribute the test questions with a diverse set of reviewers to have multiple data points for each question. With multiple data points for each question and different criteria, a final score can be calculated for each query, allowing for weighting the scores based on the preferred focus for the final model. Interpreting Loss and Perplexity Skip to main content
+----------
+Meta Code Llama | Integration guides Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Integration guides is an open-source family of LLMs based on Llama 2 providing SOTA performance on code tasks. It consists of: Foundation models (Meta Code Llama) Python specializations (Meta Code Llama - Python), and Instruction-following models (Meta Code Llama - Instruct) with 7B, 13B, 34B and 70B parameters each. See the recipes for examples on how to make use of Meta Code Llama. The following diagram shows how each of the Meta Code Llama models is trained: (Fig: The Meta Code Llama specialization pipeline. The different stages of fine-tuning annotated with the number of tokens seen during training) One of the best ways to try out and integrate with Meta Code Llama is using Hugging Face ecosystem by following the blog , which has: Demo links for all versions of Meta Code Llama Working inference code for code completion Working inference code for code infilling between code prefix and suffix as inputs Working inference code to do 4-bit loading of the 34B model so it can fit on consumer GPUs Guide on how to write prompts for the instruction models to have multi-turn conversations  about coding Guide on how to use Text Generation Inference for model deployment in production Guide on how to integrate code autocomplete as an extension  with VSCode Guide on how to evaluate Meta Code Llama models If the model does not perform well on your specific task, for example if none of the Meta Code Llama models (7B/13B/34B/70B) generate the correct answer for a text to SQL task, fine-tuning should be considered. This is a complete guide and notebook ( ) on how to fine-tune Meta Code Llama using the 7B model hosted on Hugging Face. It uses the LoRA fine-tuning method and can run on a single GPU. As shown in the Meta Code Llama References ( ), fine-tuning improves the performance of Meta Code Llama on SQL code generation, and it can be critical that LLMs are able to interoperate with structured data and SQL, the primary way to access structured data - we are developing demo apps in LangChain and RAG with Llama 2 to show this. Compatible extensions In most of the cases, the simplest method to integrate any model size is through ollama , occasionally combined with litellm . Ollama is a program that allows quantized versions of popular LLMs to run locally. It leverages the GPU and can even run Code Llama 34B on an M1 mac. Litellm is a simple proxy that can serve an OpenAI style API, so it's easy to replace OpenAI in existing applications, in our case, extensions Continue This extension can be used with ollama, allowing for easy local only execution. Additionally, it provides a simple interface to 1/ Chat with the model directly running inside VS Code and 2/ Select specific files and sections to edit or explain. This extension is an effective way to evaluate Llama because it provides simple and useful features. It also allows developers to build trust, by creating diffs for each proposed change and showing exactly what is being changed before saving the file. Handling the context for the LLM is easy and relies heavily on keyboard shortcuts. It's important to note that all the interactions with the extension are recorded in jsonl format. The objective is to provide data for future fine tuning of the models based on the feedback recorded during real world usage as well. Steps to install with ollama Install and pull a model (e.g. ollama pull codellama:13b-instruct) Install the extension from Visual Studio Code marketplace Open the extension and click on the + sign to add models Select Ollama as a provider In the next screen, select the model and size pulled from with ollama Select the model in the convo and start using the extension Steps to install with TGI For better performance or usage in non-compatible hardware, TGI can be used in a server to run the model. For example, ollama on Intel Macs is too slow to be useful, even with the 7B models. On the contrary, M1 macs can run the 34 Meta Code Llama models quickly. For this, you should have TGI running on a server with appropriate hardware, as detailed in this . Once Continue.dev is installed, follow these steps: Open the configs with /config Use the HuggingFaceTGI class and pass your instance URL in the server_url parameter: Assign a name to it and save the config file. llm-vscode This extension from Hugging Face provides an open alternative to the closed sourced GitHub Copilot, allowing for the same functionality, context based autocomplete suggestions, to work with open source models. It works out of the box with a HF Token and their Inference API but can be configured to use any TGI compatible API. For usage with a self-hosted TGI server, follow these steps: from the marketplace Open the extension configs Select the correct template for the model published in your TGI instance in the Config Template field. For testing, used the one named codellama/CodeLlama-13b-hf Pass in the URL to your TGI instance in the Model ID or Endpoint field. To avoid rate limiting messages, login to HF by providing a read only token. This was necessary even for a self-hosted instance. It currently does not support local models unless TGI is running locally. It would be great to add ollama support to this extension, as it would accelerate the inference with the smaller models by avoiding the network.
+----------
+LangChain | Integration guides Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud is an open source framework for building LLM powered applications. It implements common abstractions and higher-level APIs to make the app building process easier, so you don't need to call LLM from scratch. The main building blocks/APIs of LangChain are: Source The Models or LLMs API can be used to easily connect to all popular LLMs such as Hugging Face or Replicate where all types of Llama 2 models are hosted. The Prompts API implements the useful prompt template abstraction to help you easily reuse good, often long and detailed, prompts when building sophisticated LLM apps. There are also many built-in prompts for common operations such as summarization or connection to SQL databases for quick app development. Prompts can also work closely with  parsers to easily extract useful information from the LLM output. The Memory API can be used to save conversation history and feed it along with new questions to LLM so multi-turn natural conversation chat can be implemented. The Chains API includes the most basic LLMChain that combines a LLM with a prompt to generate the output, as well as more advanced chains to lets you build sophisticated LLM apps in a systematic way. For example, the output of the first LLM chain can be the input/prompt of another chain, or a chain can have multiple inputs and/or multiple outputs, either pre-defined or dynamically decided by the LLM output of a prompt. The Indexes API allows documents outside of LLM to be saved, after first converted to embeddings which are numerical meaning representations, in the vector form, of the documents, to a vector store. Later when a user enters a question about the documents, the relevant data stored in the documents' vector store will be retrieved and sent, along with the query, to LLM to generate an answer related to the documents. The following flow shows the process The Agents API uses LLM as the reasoning engine and connects it with other sources of data, third-party or own tools, or APIs such as web search or wikipedia APIs. Depending on the user's input, the agent can decide which tool to call to handle the input. LangChain can be used as a powerful retrieval augmented generation (RAG) tool to integrate the internal data or more recent public data with LLM to QA or chat about the data. LangChain already supports loading many types of unstructured and structured data. To learn more about LangChain, enroll for free in the two LangChain short courses . Be aware that the code in the courses use OpenAI ChatGPT LLM, but we’ve published a series of using LangChain with Llama. There is also a Getting to Know Llama notebook , presented at Meta Connect.
+----------
+LlamaIndex | Integration guides LlamaIndex LlamaIndex is another popular open source framework for building LLM applications. Like LangChain, LlamaIndex can also be used to build RAG applications by easily integrating data not built-in the LLM with LLM. There are three key tools in LlamaIndex: Connecting Data: connect data of any type -  structured, unstructured or semi-structured - to LLM Indexing Data: Index and store the data Querying LLM: Combine the user query and retrieved query-related data to query LLM and return data-augmented answer LlamaIndex is mainly a data framework for connecting private or domain-specific data with LLMs, so it specializes in RAG, smart data storage and retrieval, while LangChain is a more general purpose framework which can be used to build agents connecting multiple tools. The integration of the two may provide the best performant and effective solution to building real world RAG powered Llama apps. For an example usage of how to integrate LlamaIndex with Llama 2, see . We also published a completed demo app showing how to use LlamaIndex to chat with Llama 2 about live data via the you.com API. It’s worth noting that LlamaIndex has implemented many RAG powered LLM evaluation tools to easily measure the quality of retrieval and response, including: Question Generation: Call LLM to auto generate questions to create an evaluation dataset. Faithfulness Evaluator: Evaluate if the generated answer is faithful to the retrieved context or if there’s hallucination. Correctness Evaluator: Evaluate if the generated answer matches the reference answer. Relevancy Evaluator: Evaluate if the answer and the retrieved context is relevant and consistent for the given query. Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Skip to main content
+----------
+# Llama Recipes: Examples to get started using the Llama models from Meta The 'llama-recipes' repository is a companion to the [Meta Llama 3](https://github.com/meta-llama/llama3) models. The goal of this repository is to provide a scalable library for fine-tuning Meta Llama models, along with some example scripts and notebooks to quickly get started with using the models in a variety of use-cases, including fine-tuning for domain adaptation and building LLM-based applications with Meta Llama and other tools in the LLM ecosystem. The examples here showcase how to run Meta Llama locally, in the cloud, and on-prem. [Meta Llama 2](https://github.com/meta-llama/llama) is also supported in this repository. We highly recommend everyone to utilize [Meta Llama 3](https://github.com/meta-llama/llama3) due to its enhanced capabilities. > [!IMPORTANT] > Meta Llama 3 has a new prompt template and special tokens (based on the tiktoken tokenizer). > | Token | Description | > |---|---| > `<\|begin_of_text\|>` | This is equivalent to the BOS token. | > `<\|end_of_text\|>` | This is equivalent to the EOS token. For multiturn-conversations it's usually unused. Instead, every message is terminated with `<\|eot_id\|>` instead.| > `<\|eot_id\|>` | This token signifies the end of the message in a turn i.e. the end of a single message by a system, user or assistant role as shown below.| > `<\|start_header_id\|>{role}<\|end_header_id\|>` | These tokens enclose the role for a particular message. The possible roles can be: system, user, assistant. | > > A multiturn-conversation with Meta Llama 3 follows this prompt template: > ``` > <|begin_of_text|><|start_header_id|>system<|end_header_id|> > {{ system_prompt }}<|eot_id|><|start_header_id|>user<|end_header_id|> > {{ user_message_1 }}<|eot_id|><|start_header_id|>assistant<|end_header_id|> > {{ model_answer_1 }}<|eot_id|><|start_header_id|>user<|end_header_id|> > {{ user_message_2 }}<|eot_id|><|start_header_id|>assistant<|end_header_id|> > Each message gets trailed by an `<|eot_id|>` token before a new header is started, signaling a role change. > More details on the new tokenizer and prompt template can be found [here](https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3#special-tokens-used-with-meta-llama-3). > [!NOTE] > The llama-recipes repository was recently refactored to promote a better developer experience of using the examples. Some files have been moved to new locations. The `src/` folder has NOT been modified, so the functionality of this repo and package is not impacted. > Make sure you update your local clone by running `git pull origin main` ## Table of Contents - [Llama Recipes: Examples to get started using the Meta Llama models from Meta](#llama-recipes-examples-to-get-started-using-the-llama-models-from-meta) - [Table of Contents](#table-of-contents) - [Getting Started](#getting-started) - [Prerequisites](#prerequisites) - [PyTorch Nightlies](#pytorch-nightlies) - [Installing](#installing) - [Install with pip](#install-with-pip) - [Install with optional dependencies](#install-with-optional-dependencies) - [Install from source](#install-from-source) - [Getting the Llama models](#getting-the-llama-models) - [Model conversion to Hugging Face](#model-conversion-to-hugging-face) - [Repository Organization](#repository-organization) - [`recipes/`](#recipes) - [`src/`](#src) - [Contributing](#contributing) - [License](#license) ## Getting Started These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system. ### Prerequisites #### PyTorch Nightlies If you want to use PyTorch nightlies instead of the stable release, go to [this guide](https://pytorch.org/get-started/locally/) to retrieve the right `--extra-index-url URL` parameter for the `pip install` commands on your platform. ### Installing Llama-recipes provides a pip distribution for easy install and usage in other projects. Alternatively, it can be installed from source. > Ensure you use the correct CUDA version (from `nvidia-smi`) when installing the PyTorch wheels. Here we are using 11.8 as `cu118`. > H100 GPUs work better with CUDA >12.0 #### Install with pip ``` pip install llama-recipes #### Install with optional dependencies Llama-recipes offers the installation of optional packages. There are three optional dependency groups. To run the unit tests we can install the required dependencies with: pip install llama-recipes[tests] For the vLLM example we need additional requirements that can be installed with: pip install llama-recipes[vllm] To use the sensitive topics safety checker install with: pip install llama-recipes[auditnlg] Optional dependencies can also be combines with [option1,option2]. #### Install from source To install from source e.g. for development use these commands. We're using hatchling as our build backend which requires an up-to-date pip as well as setuptools package. git clone git@github.com:meta-llama/llama-recipes.git cd llama-recipes pip install -U pip setuptools pip install -e . For development and contributing to llama-recipes please install all optional dependencies: pip install -U pip setuptools pip install -e .[tests,auditnlg,vllm] ### Getting the Meta Llama models You can find Meta Llama models on Hugging Face hub [here](https://huggingface.co/meta-llama), **where models with `hf` in the name are already converted to Hugging Face checkpoints so no further conversion is needed**. The conversion step below is only for original model weights from Meta that are hosted on Hugging Face model hub as well. #### Model conversion to Hugging Face The recipes and notebooks in this folder are using the Meta Llama model definition provided by Hugging Face's transformers library. Given that the original checkpoint resides under models/7B you can install all requirements and convert the checkpoint with: ```bash ## Install Hugging Face Transformers from source pip freeze | grep transformers ## verify it is version 4.31.0 or higher git clone git@github.com:huggingface/transformers.git cd transformers pip install protobuf python src/transformers/models/llama/convert_llama_weights_to_hf.py \ --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path ## Repository Organization Most of the code dealing with Llama usage is organized across 2 main folders: `recipes/` and `src/`. ### `recipes/` Contains examples are organized in folders by topic: | Subfolder | Description | |---|---| [quickstart](./recipes/quickstart) | The "Hello World" of using Llama, start here if you are new to using Llama. [finetuning](./recipes/finetuning)|Scripts to finetune Llama on single-GPU and multi-GPU setups [inference](./recipes/inference)|Scripts to deploy Llama for inference locally and using model servers [use_cases](./recipes/use_cases)|Scripts showing common applications of Meta Llama3 [responsible_ai](./recipes/responsible_ai)|Scripts to use PurpleLlama for safeguarding model outputs [llama_api_providers](./recipes/llama_api_providers)|Scripts to run inference on Llama via hosted endpoints [benchmarks](./recipes/benchmarks)|Scripts to benchmark Llama models inference on various backends [code_llama](./recipes/code_llama)|Scripts to run inference with the Code Llama models [evaluation](./recipes/evaluation)|Scripts to evaluate fine-tuned Llama models using `lm-evaluation-harness` from `EleutherAI` ### `src/` Contains modules which support the example recipes: | Subfolder | Description | | [configs](src/llama_recipes/configs/) | Contains the configuration files for PEFT methods, FSDP, Datasets, Weights & Biases experiment tracking. | | [datasets](src/llama_recipes/datasets/) | Contains individual scripts for each dataset to download and process. Note | | [inference](src/llama_recipes/inference/) | Includes modules for inference for the fine-tuned models. | | [model_checkpointing](src/llama_recipes/model_checkpointing/) | Contains FSDP checkpoint handlers. | | [policies](src/llama_recipes/policies/) | Contains FSDP scripts to provide different policies, such as mixed precision, transformer wrapping policy and activation checkpointing along with any precision optimizer (used for running FSDP with pure bf16 mode). | | [utils](src/llama_recipes/utils/) | Utility files for: - `train_utils.py` provides training/eval loop and more train utils. - `dataset_utils.py` to get preprocessed datasets. - `config_utils.py` to override the configs received from CLI. - `fsdp_utils.py` provides FSDP  wrapping policy for PEFT methods. - `memory_utils.py` context manager to track different memory stats in train loop. | ## Contributing Please read [CONTRIBUTING.md](CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us. ## License See the License file for Meta Llama 3 [here](https://llama.meta.com/llama3/license/) and Acceptable Use Policy [here](https://llama.meta.com/llama3/use-policy/) See the License file for Meta Llama 2 [here](https://llama.meta.com/llama2/license/) and Acceptable Use Policy [here](https://llama.meta.com/llama2/use-policy/)
+----------
+# **Model Details** Meta developed and released the Llama 2 family of large language models (LLMs), a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 70 billion parameters. Our fine-tuned LLMs, called Llama-2-Chat, are optimized for dialogue use cases. Llama-2-Chat models outperform open-source chat models on most benchmarks we tested, and in our human evaluations for helpfulness and safety, are on par with some popular closed-source models like ChatGPT and PaLM. **Model Developers** Meta **Variations** Llama 2 comes in a range of parameter sizes — 7B, 13B, and 70B — as well as pretrained and fine-tuned variations. **Input** Models input text only. **Output** Models generate text only. **Model Architecture** Llama 2 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. ||Training Data|Params|Context Length|GQA|Tokens|LR| |---|---|---|---|---|---|---| Llama 2|*A new mix of publicly available online data*|7B|4k|✗|2.0T|3.0 x 10 -4 Llama 2|*A new mix of publicly available online data*|13B|4k|✗|2.0T|3.0 x 10 Llama 2|*A new mix of publicly available online data*|70B|4k|✔|2.0T|1.5 x 10 **Llama 2 family of models.** Token counts refer to pretraining data only. All models are trained with a global batch-size of 4M tokens. The 70B version uses Grouped-Query Attention (GQA) for improved inference scalability. **Model Dates** Llama 2 was trained between January 2023 and July 2023. **Status** This is a static model trained on an offline dataset. Future versions of the tuned models will be released as we improve model safety with community feedback. **License** A custom commercial license is available at: [https://ai.meta.com/resources/models-and-libraries/llama-downloads/](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) **Research Paper** More information can be found in the paper "Llama-2: Open Foundation and Fine-tuned Chat Models", available at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/. **Where to send questions or comments about the model** Instructions on how to provide feedback or comments on the model can be found in the model [README](README.md). # **Intended Use** **Intended Use Cases** Llama 2 is intended for commercial and research use in English. Tuned models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. **Out-of-scope Uses** Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the Acceptable Use Policy and Llama 2 Community License. Use in languages other than English**. **Note: Developers may fine-tune Llama 2 models for languages beyond English provided they comply with the Llama 2 Community License and the Acceptable Use Policy. # **Hardware and Software** **Training Factors** We used custom training libraries, Meta's Research Super Cluster, and production clusters for pretraining. Fine-tuning, annotation, and evaluation were also performed on third-party cloud compute. **Carbon Footprint** Pretraining utilized a cumulative 3.3M GPU hours of computation on hardware of type A100-80GB (TDP of 350-400W). Estimated total emissions were 539 tCO2eq, 100% of which were offset by Meta’s sustainability program. ||Time (GPU hours)|Power Consumption (W)|Carbon Emitted(tCO eq)| |---|---|---|---| |Llama 2 7B|184320|400|31.22| |Llama 2 13B|368640|400|62.44| |Llama 2 70B|1720320|400|291.42| |Total|3311616||539.00| **CO emissions during pretraining.** Time: total GPU time required for training each model. Power Consumption: peak power capacity per GPU device for the GPUs used adjusted for power usage efficiency. 100% of the emissions are directly offset by Meta's sustainability program, and because we are openly releasing these models, the pretraining costs do not need to be incurred by others. # **Training Data** **Overview** Llama 2 was pretrained on 2 trillion tokens of data from publicly available sources. The fine-tuning data includes publicly available instruction datasets, as well as over one million new human-annotated examples. Neither the pretraining nor the fine-tuning datasets include Meta user data. **Data Freshness** The pretraining data has a cutoff of September 2022, but some tuning data is more recent, up to July 2023. # **Evaluation Results** In this section, we report the results for the Llama 1 and Llama 2 models on standard academic benchmarks. For all the evaluations, we use our internal evaluations library. |Model|Size|Code|Commonsense Reasoning|World Knowledge|Reading Comprehension|Math|MMLU|BBH|AGI Eval| |---|---|---|---|---|---|---|---|---|---| |Llama 1|7B|14.1|60.8|46.2|58.5|6.95|35.1|30.3|23.9| |Llama 1|13B|18.9|66.1|52.6|62.3|10.9|46.9|37.0|33.9| |Llama 1|33B|26.0|70.0|58.4|67.6|21.4|57.8|39.8|41.7| |Llama 1|65B|30.7|70.7|60.5|68.6|30.8|63.4|43.5|47.6| |Llama 2|7B|16.8|63.9|48.9|61.3|14.6|45.3|32.6|29.3| |Llama 2|13B|24.5|66.9|55.4|65.8|28.7|54.8|39.4|39.1| |Llama 2|70B|**37.5**|**71.9**|**63.6**|**69.4**|**35.2**|**68.9**|**51.2**|**54.2**| **Overall performance on grouped academic benchmarks.** *Code:* We report the average pass@1 scores of our models on HumanEval and MBPP. *Commonsense Reasoning:* We report the average of PIQA, SIQA, HellaSwag, WinoGrande, ARC easy and challenge, OpenBookQA, and CommonsenseQA. We report 7-shot results for CommonSenseQA and 0-shot results for all other benchmarks. *World Knowledge:* We evaluate the 5-shot performance on NaturalQuestions and TriviaQA and report the average. *Reading Comprehension:* For reading comprehension, we report the 0-shot average on SQuAD, QuAC, and BoolQ. *MATH:* We report the average of the GSM8K (8 shot) and MATH (4 shot) benchmarks at the top 1. |||TruthfulQA|Toxigen| |Llama 1|7B|27.42|23.00| |Llama 1|13B|41.74|23.08| |Llama 1|33B|44.19|22.57| |Llama 1|65B|48.71|21.77| |Llama 2|7B|33.29|**21.25**| |Llama 2|13B|41.86|26.10| |Llama 2|70B|**50.18**|24.60| **Evaluation of pretrained LLMs on automatic safety benchmarks.** For TruthfulQA, we present the percentage of generations that are both truthful and informative (the higher the better). For ToxiGen, we present the percentage of toxic generations (the smaller the better). |Llama-2-Chat|7B|57.04|**0.00**| |Llama-2-Chat|13B|62.18|**0.00**| |Llama-2-Chat|70B|**64.14**|0.01| **Evaluation of fine-tuned LLMs on different safety datasets.** Same metric definitions as above. # **Ethical Considerations and Limitations** Llama 2 is a new technology that carries risks with use. Testing conducted to date has been in English, and has not covered, nor could it cover all scenarios. For these reasons, as with all LLMs, Llama 2’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate, biased or other objectionable responses to user prompts. Therefore, before deploying any applications of Llama 2, developers should perform safety testing and tuning tailored to their specific applications of the model. Please see the Responsible Use Guide available at [https://ai.meta.com/llama/responsible-use-guide/](https://ai.meta.com/llama/responsible-use-guide/)
+----------
+# Llama 2 We are unlocking the power of large language models. Llama 2 is now accessible to individuals, creators, researchers, and businesses of all sizes so that they can experiment, innovate, and scale their ideas responsibly. This release includes model weights and starting code for pre-trained and fine-tuned Llama language models — ranging from 7B to 70B parameters. This repository is intended as a minimal example to load [Llama 2](https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/) models and run inference. For more detailed examples leveraging Hugging Face, see [llama-recipes](https://github.com/facebookresearch/llama-recipes/). ## Updates post-launch See [UPDATES.md](UPDATES.md). Also for a running list of frequently asked questions, see [here](https://ai.meta.com/llama/faq/). ## Download In order to download the model weights and tokenizer, please visit the [Meta website](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and accept our License. Once your request is approved, you will receive a signed URL over email. Then run the download.sh script, passing the URL provided when prompted to start the download. Pre-requisites: Make sure you have `wget` and `md5sum` installed. Then run the script: `./download.sh`. Keep in mind that the links expire after 24 hours and a certain amount of downloads. If you start seeing errors such as `403: Forbidden`, you can always re-request a link. ### Access to Hugging Face We are also providing downloads on [Hugging Face](https://huggingface.co/meta-llama). You can request access to the models by acknowledging the license and filling the form in the model card of a repo. After doing so, you should get access to all the Llama models of a version (Code Llama, Llama 2, or Llama Guard) within 1 hour. ## Quick Start You can follow the steps below to quickly get up and running with Llama 2 models. These steps will let you run quick inference locally. For more examples, see the [Llama 2 recipes repository](https://github.com/facebookresearch/llama-recipes). 1. In a conda env with PyTorch / CUDA available clone and download this repository. 2. In the top-level directory run: pip install -e . 3. Visit the [Meta website](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and register to download the model/s. 4. Once registered, you will get an email with a URL to download the models. You will need this URL when you run the download.sh script. 5. Once you get the email, navigate to your downloaded llama repository and run the download.sh script. - Make sure to grant execution permissions to the download.sh script - During this process, you will be prompted to enter the URL from the email. - Do not use the “Copy Link” option but rather make sure to manually copy the link from the email. 6. Once the model/s you want have been downloaded, you can run the model locally using the command below: torchrun --nproc_per_node 1 example_chat_completion.py \ --ckpt_dir llama-2-7b-chat/ \ --tokenizer_path tokenizer.model \ --max_seq_len 512 --max_batch_size 6 **Note** - Replace  `llama-2-7b-chat/` with the path to your checkpoint directory and `tokenizer.model` with the path to your tokenizer model. - The `–nproc_per_node` should be set to the [MP](#inference) value for the model you are using. - Adjust the `max_seq_len` and `max_batch_size` parameters as needed. - This example runs the [example_chat_completion.py](example_chat_completion.py) found in this repository but you can change that to a different .py file. ## Inference Different models require different model-parallel (MP) values: |  Model | MP | |--------|----| | 7B     | 1  | | 13B    | 2  | | 70B    | 8  | All models support sequence length up to 4096 tokens, but we pre-allocate the cache according to `max_seq_len` and `max_batch_size` values. So set those according to your hardware. ### Pretrained Models These models are not finetuned for chat or Q&A. They should be prompted so that the expected answer is the natural continuation of the prompt. See `example_text_completion.py` for some examples. To illustrate, see the command below to run it with the llama-2-7b model (`nproc_per_node` needs to be set to the `MP` value): torchrun --nproc_per_node 1 example_text_completion.py \ --ckpt_dir llama-2-7b/ \ --max_seq_len 128 --max_batch_size 4 ### Fine-tuned Chat Models The fine-tuned models were trained for dialogue applications. To get the expected features and performance for them, a specific formatting defined in [`chat_completion`](https://github.com/facebookresearch/llama/blob/main/llama/generation.py#L212) needs to be followed, including the `INST` and `< >` tags, `BOS` and `EOS` tokens, and the whitespaces and breaklines in between (we recommend calling `strip()` on inputs to avoid double-spaces). You can also deploy additional classifiers for filtering out inputs and outputs that are deemed unsafe. See the llama-recipes repo for [an example](https://github.com/facebookresearch/llama-recipes/blob/main/examples/inference.py) of how to add a safety checker to the inputs and outputs of your inference code. Examples using llama-2-7b-chat: torchrun --nproc_per_node 1 example_chat_completion.py \ --max_seq_len 512 --max_batch_size 6 Llama 2 is a new technology that carries potential risks with use. Testing conducted to date has not — and could not — cover all scenarios. In order to help developers address these risks, we have created the [Responsible Use Guide](Responsible-Use-Guide.pdf). More details can be found in our research paper as well. ## Issues Please report any software “bug”, or other problems with the models through one of the following means: - Reporting issues with the model: [github.com/facebookresearch/llama](http://github.com/facebookresearch/llama) - Reporting risky content generated by the model: [developers.facebook.com/llama_output_feedback](http://developers.facebook.com/llama_output_feedback) - Reporting bugs and security concerns: [facebook.com/whitehat/info](http://facebook.com/whitehat/info) ## Model Card See [MODEL_CARD.md](MODEL_CARD.md). Our model and weights are licensed for both researchers and commercial entities, upholding the principles of openness. Our mission is to empower individuals, and industry through this opportunity, while fostering an environment of discovery and ethical AI advancements. See the [LICENSE](LICENSE) file, as well as our accompanying [Acceptable Use Policy](USE_POLICY.md) ## References 1. [Research Paper](https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/) 2. [Llama 2 technical overview](https://ai.meta.com/resources/models-and-libraries/llama) 3. [Open Innovation AI Research Community](https://ai.meta.com/llama/open-innovation-ai-research-community/) For common questions, the FAQ can be found [here](https://ai.meta.com/llama/faq/) which will be kept up to date over time as new questions arise. ## Original Llama The repo for the original llama release is in the [`llama_v1`](https://github.com/facebookresearch/llama/tree/llama_v1) branch.
+----------
+## Model Details Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety. **Model developers** Meta **Variations** Llama 3 comes in two sizes — 8B and 70B parameters — in pre-trained and instruction tuned variants. **Input** Models input text only. **Output** Models generate text and code only. **Model Architecture** Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. Training Data Params Context length GQA Token count Knowledge cutoff Llama 3 A new mix of publicly available online data. 8B 8k Yes 15T+ March, 2023 70B December, 2023 **Llama 3 family of models**. Token counts refer to pretraining data only. Both the 8 and 70B versions use Grouped-Query Attention (GQA) for improved inference scalability. **Model Release Date** April 18, 2024. **Status** This is a static model trained on an offline dataset. Future versions of the tuned models will be released as we improve model safety with community feedback. **License** A custom commercial license is available at: [https://llama.meta.com/llama3/license](https://llama.meta.com/llama3/license) Where to send questions or comments about the model Instructions on how to provide feedback or comments on the model can be found in the model [README](https://github.com/meta-llama/llama3). For more technical information about generation parameters and recipes for how to use Llama 3 in applications, please go [here](https://github.com/meta-llama/llama-recipes). ## Intended Use **Intended Use Cases** Llama 3 is intended for commercial and research use in English. Instruction tuned models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. **Out-of-scope** Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the [Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/) and [Llama 3 Community License](https://llama.meta.com/llama3/license/). Use in languages other than English**. **Note: Developers may fine-tune Llama 3 models for languages beyond English provided they comply with the [Llama 3 Community License](https://llama.meta.com/llama3/license/) and the [Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/). ## Hardware and Software **Training Factors** We used custom training libraries, Meta's Research SuperCluster, and production clusters for pretraining. Fine-tuning, annotation, and evaluation were also performed on third-party cloud compute. **Carbon Footprint Pretraining utilized a cumulative** 7.7M GPU hours of computation on hardware of type H100-80GB (TDP of 700W). Estimated total emissions were 2290 tCO2eq, 100% of which were offset by Meta’s sustainability program. Time (GPU hours) Power Consumption (W) Carbon Emitted(tCO2eq) Llama 3 8B 1.3M 700 390 Llama 3 70B 6.4M 1900 Total 7.7M 2290 **CO2 emissions during pre-training**. Time: total GPU time required for training each model. Power Consumption: peak power capacity per GPU device for the GPUs used adjusted for power usage efficiency. 100% of the emissions are directly offset by Meta's sustainability program, and because we are openly releasing these models, the pretraining costs do not need to be incurred by others. ## Training Data **Overview** Llama 3 was pretrained on over 15 trillion tokens of data from publicly available sources. The fine-tuning data includes publicly available instruction datasets, as well as over 10M human-annotated examples. Neither the pretraining nor the fine-tuning datasets include Meta user data. **Data Freshness** The pretraining data has a cutoff of March 2023 for the 8B and December 2023 for the 70B models respectively. ## Benchmarks In this section, we report the results for Llama 3 models on standard automatic benchmarks. For all the evaluations, we use our internal evaluations library. For details on the methodology see [here](https://github.com/meta-llama/llama3/blob/main/eval_details.md). ### Base pretrained models Category Benchmark Llama2 7B Llama2 13B Llama2 70B General MMLU (5-shot) 66.6 45.7 53.8 79.5 69.7 AGIEval English (3-5 shot) 45.9 28.8 38.7 63.0 54.8 CommonSenseQA (7-shot) 72.6 57.6 67.6 83.8 78.7 Winogrande (5-shot) 76.1 73.3 75.4 83.1 81.8 BIG-Bench Hard (3-shot, CoT) 61.1 38.1 47.0 81.3 65.7 ARC-Challenge (25-shot) 78.6 53.7 93.0 85.3 Knowledge reasoning TriviaQA-Wiki (5-shot) 78.5 72.1 79.6 89.7 87.5 Reading comprehension SQuAD (1-shot) 76.4 72.2 85.6 82.6 QuAC (1-shot, F1) 44.4 39.6 44.9 51.1 49.4 BoolQ (0-shot) 75.7 65.5 66.9 79.0 73.1 DROP (3-shot, F1) 58.4 37.9 49.8 79.7 70.2 ### Instruction tuned models Llama 2 7B Llama 2 13B Llama 2 70B 68.4 34.1 47.8 82.0 52.9 GPQA (0-shot) 34.2 21.7 22.3 39.5 21.0 HumanEval (0-shot) 62.2 7.9 14.0 81.7 25.6 GSM-8K (8-shot, CoT) 25.7 77.4 57.5 MATH (4-shot, CoT) 30.0 3.8 6.7 50.4 11.6 ### Responsibility & Safety We believe that an open approach to AI leads to better, safer products, faster innovation, and a bigger overall market. We are committed to Responsible AI development and took a series of steps to limit misuse and harm and support the open source community. Foundation models are widely capable technologies that are built to be used for a diverse range of applications. They are not designed to meet every developer preference on safety levels for all use cases, out-of-the-box, as those by their nature will differ across different applications. Rather, responsible LLM-application deployment is achieved by implementing a series of safety best practices throughout the development of such applications, from the model pre-training, fine-tuning and the deployment of systems composed of safeguards to tailor the safety needs specifically to the use case and audience. As part of the Llama 3 release, we updated our [Responsible Use Guide](https://llama.meta.com/responsible-use-guide/) to outline the steps and best practices for developers to implement model and system level safety for their application. We also provide a set of resources including [Meta Llama Guard 2](https://llama.meta.com/purple-llama/) and [Code Shield](https://llama.meta.com/purple-llama/) safeguards. These tools have proven to drastically reduce residual risks of LLM Systems, while maintaining a high level of helpfulness. We encourage developers to tune and deploy these safeguards according to their needs and we provide a [reference implementation](https://github.com/meta-llama/llama-recipes/tree/main/recipes/responsible_ai) to get you started. #### Llama 3-Instruct As outlined in the Responsible Use Guide, some trade-off between model helpfulness and model alignment is likely unavoidable. Developers should exercise discretion about how to weigh the benefits of alignment and helpfulness for their specific use case and audience. Developers should be mindful of residual risks when using Llama models and leverage additional safety tools as needed to reach the right safety bar for their use case. Safety For our instruction tuned model, we conducted extensive red teaming exercises, performed adversarial evaluations and implemented safety mitigations techniques to lower residual risks. As with any Large Language Model, residual risks will likely remain and we recommend that developers assess these risks in the context of their use case. In parallel, we are working with the community to make AI safety benchmark standards transparent, rigorous and interpretable. Refusals In addition to residual risks, we put a great emphasis on model refusals to benign prompts. Over-refusing not only can impact the user experience but could even be harmful in certain contexts as well. We’ve heard the feedback from the developer community and improved our fine tuning to ensure that Llama 3 is significantly less likely to falsely refuse to answer prompts than Llama 2. We built internal benchmarks and developed mitigations to limit false refusals making Llama 3 our most helpful model to date. #### Responsible release In addition to responsible use considerations outlined above, we followed a rigorous process that requires us to take extra measures against misuse and critical risks before we make our release decision. Misuse If you access or use Llama 3, you agree to the Acceptable Use Policy. The most recent copy of this policy can be found at [https://llama.meta.com/llama3/use-policy/](https://llama.meta.com/llama3/use-policy/). #### Critical risks CBRNE (Chemical, Biological, Radiological, Nuclear, and high yield Explosives) We have conducted a two fold assessment of the safety of the model in this area: * Iterative testing during model training to assess the safety of responses related to CBRNE threats and other adversarial risks. * Involving external CBRNE experts to conduct an uplift test assessing the ability of the model to accurately provide expert knowledge and reduce barriers to potential CBRNE misuse, by reference to what can be achieved using web search (without the model). ### Cyber Security We have evaluated Llama 3 with CyberSecEval, Meta’s cybersecurity safety eval suite, measuring Llama 3’s propensity to suggest insecure code when used as a coding assistant, and Llama 3’s propensity to comply with requests to help carry out cyber attacks, where attacks are defined by the industry standard MITRE ATT&CK cyber attack ontology. On our insecure coding and cyber attacker helpfulness tests, Llama 3 behaved in the same range or safer than models of [equivalent coding capability](https://huggingface.co/spaces/facebook/CyberSecEval). Child Safety Child Safety risk assessments were conducted using a team of experts, to assess the model’s capability to produce outputs that could result in Child Safety risks and inform on any necessary and appropriate risk mitigations via fine tuning. We leveraged those expert red teaming sessions to expand the coverage of our evaluation benchmarks through Llama 3 model development.  For Llama 3, we conducted new in-depth sessions using objective based methodologies to assess the model risks along multiple attack vectors. We also partnered with content specialists to perform red teaming exercises assessing potentially violating content while taking account of market specific nuances or experiences. ### Community Generative AI safety requires expertise and tooling, and we believe in the strength of the open community to accelerate its progress. We are active members of open consortiums, including the AI Alliance, Partnership in AI and MLCommons, actively contributing to safety standardization and transparency. We encourage the community to adopt taxonomies like the MLCommons Proof of Concept evaluation to facilitate collaboration and transparency on safety and content evaluations. Our Purple Llama tools are open sourced for the community to use and widely distributed across ecosystem partners including cloud service providers. We encourage community contributions to our [GitHub repository](https://github.com/meta-llama/PurpleLlama). Finally, we put in place a set of resources including an [output reporting mechanism](https://developers.facebook.com/llama_output_feedback) and [bug bounty program](https://www.facebook.com/whitehat) to continuously improve the Llama technology with the help of the community. ## Ethical Considerations and Limitations The core values of Llama 3 are openness, inclusivity and helpfulness. It is meant to serve everyone, and to work for a wide range of use cases. It is thus designed to be accessible to people across many different backgrounds, experiences and perspectives. Llama 3 addresses users and their needs as they are, without insertion unnecessary judgment or normativity, while reflecting the understanding that even content that may appear problematic in some cases can serve valuable purposes in others. It respects the dignity and autonomy of all users, especially in terms of the values of free thought and expression that power innovation and progress. But Llama 3 is a new technology, and like any new technology, there are risks associated with its use. Testing conducted to date has been in English, and has not covered, nor could it cover, all scenarios. For these reasons, as with all LLMs, Llama 3’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate, biased or other objectionable responses to user prompts. Therefore, before deploying any applications of Llama 3 models, developers should perform safety testing and tuning tailored to their specific applications of the model. As outlined in the Responsible Use Guide, we recommend incorporating [Purple Llama](https://github.com/facebookresearch/PurpleLlama) solutions into your workflows and specifically [Llama Guard](https://ai.meta.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/) which provides a base model to filter input and output prompts to layer system-level safety on top of model-level safety. Please see the Responsible Use Guide available at [http://llama.meta.com/responsible-use-guide](http://llama.meta.com/responsible-use-guide) ## Citation instructions @article{llama3modelcard, title={Llama 3 Model Card}, author={AI@Meta}, year={2024}, url = {https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md} ## Contributors Aaditya Singh; Aaron Grattafiori; Abhimanyu Dubey; Abhinav Jauhri; Abhinav Pandey; Abhishek Kadian; Adam Kelsey; Adi Gangidi; Ahmad Al-Dahle; Amit Sangani; Ahuva Goldstand; Aiesha Letman; Ajay Menon; Akhil Mathur; Alan Schelten; Alex Vaughan; Amy Yang; Andrei Lupu; Andres Alvarado; Andrew Gallagher; Andrew Gu; Andrew Ho; Andrew Poulton; Andrew Ryan; Angela Fan; Ankit Ramchandani; Anthony Hartshorn; Archi Mitra; Archie Sravankumar; Artem Korenev; Arun Rao; Ashley Gabriel; Ashwin Bharambe; Assaf Eisenman; Aston Zhang; Ash JJhaveri; Aurelien Rodriguez; Austen Gregerson; Ava Spataru; Baptiste Roziere; Ben Maurer; Benjamin Leonhardi; Bernie Huang; Bhargavi Paranjape; Bing Liu; Binh Tang; Bobbie Chern; Brani Stojkovic; Brian Fuller; Catalina Mejia Arenas; Chao Zhou; Charlotte Caucheteux; Chaya Nayak; Ching-Hsiang Chu; Chloe Bi; Chris Cai; Chris Cox; Chris Marra; Chris McConnell; Christian Keller; Christoph Feichtenhofer; Christophe Touret; Chunyang Wu; Corinne Wong; Cristian Canton Ferrer; Damien Allonsius; Daniel Kreymer; Daniel Haziza; Daniel Li; Danielle Pintz; Danny Livshits; Danny Wyatt; David Adkins; David Esiobu; David Xu; Davide Testuggine; Delia David; Devi Parikh; Dhruv Choudhary; Dhruv Mahajan; Diana Liskovich; Diego Garcia-Olano; Diego Perino; Dieuwke Hupkes; Dingkang Wang; Dustin Holland; Egor Lakomkin; Elina Lobanova; Xiaoqing Ellen Tan; Emily Dinan; Eric Smith; Erik Brinkman; Esteban Arcaute; Filip Radenovic; Firat Ozgenel; Francesco Caggioni; Frank Seide; Frank Zhang; Gabriel Synnaeve; Gabriella Schwarz; Gabrielle Lee; Gada Badeer; Georgia Anderson; Graeme Nail; Gregoire Mialon; Guan Pang; Guillem Cucurell; Hailey Nguyen; Hamid Shojanazeri; Hannah Korevaar; Hannah Wang; Haroun Habeeb; Harrison Rudolph; Henry Aspegren; Hu Xu; Hugo Touvron; Iga Kozlowska; Igor Molybog; Igor Tufanov; Iliyan Zarov; Imanol Arrieta Ibarra; Irina-Elena Veliche; Isabel Kloumann; Ishan Misra; Ivan Evtimov; Jade Copet; Jake Weissman; Jan Geffert; Jana Vranes; Japhet Asher; Jason Park; Jay Mahadeokar; Jean-Baptiste Gaya; Jeet Shah; Jelmer van der Linde; Jennifer Chan; Jenny Hong; Jenya Lee; Jeremy Fu; Jeremy Teboul; Jianfeng Chi; Jianyu Huang; Jie Wang; Jiecao Yu; Joanna Bitton; Joe Spisak; Joelle Pineau; Jon Carvill; Jongsoo Park; Joseph Rocca; Joshua Johnstun; Junteng Jia; Kalyan Vasuden Alwala; Kam Hou U; Kate Plawiak; Kartikeya Upasani; Kaushik Veeraraghavan; Ke Li; Kenneth Heafield; Kevin Stone; Khalid El-Arini; Krithika Iyer; Kshitiz Malik; Kuenley Chiu; Kunal Bhalla; Kyle Huang; Lakshya Garg; Lauren Rantala-Yeary; Laurens van der Maaten; Lawrence Chen; Leandro Silva; Lee Bell; Lei Zhang; Liang Tan; Louis Martin; Lovish Madaan; Luca Wehrstedt; Lukas Blecher; Luke de Oliveira; Madeline Muzzi; Madian Khabsa; Manav Avlani; Mannat Singh; Manohar Paluri; Mark Zuckerberg; Marcin Kardas; Martynas Mankus; Mathew Oldham; Mathieu Rita; Matthew Lennie; Maya Pavlova; Meghan Keneally; Melanie Kambadur; Mihir Patel; Mikayel Samvelyan; Mike Clark; Mike Lewis; Min Si; Mitesh Kumar Singh; Mo Metanat; Mona Hassan; Naman Goyal; Narjes Torabi; Nicolas Usunier; Nikolay Bashlykov; Nikolay Bogoychev; Niladri Chatterji; Ning Dong; Oliver Aobo Yang; Olivier Duchenne; Onur Celebi; Parth Parekh; Patrick Alrassy; Paul Saab; Pavan Balaji; Pedro Rittner; Pengchuan Zhang; Pengwei Li; Petar Vasic; Peter Weng; Polina Zvyagina; Prajjwal Bhargava; Pratik Dubal; Praveen Krishnan; Punit Singh Koura; Puxin Xu; Qing He; Rachel Rodriguez; Ragavan Srinivasan; Rahul Mitra; Ramon Calderer; Raymond Li; Robert Stojnic; Roberta Raileanu; Robin Battey; Rocky Wang; Rohit Girdhar; Rohit Patel; Romain Sauvestre; Ronnie Polidoro; Roshan Sumbaly; Ross Taylor; Ruan Silva; Rui Hou; Rui Wang; Russ Howes; Ruty Rinott; Saghar Hosseini; Sai Jayesh Bondu; Samyak Datta; Sanjay Singh; Sara Chugh; Sargun Dhillon; Satadru Pan; Sean Bell; Sergey Edunov; Shaoliang Nie; Sharan Narang; Sharath Raparthy; Shaun Lindsay; Sheng Feng; Sheng Shen; Shenghao Lin; Shiva Shankar; Shruti Bhosale; Shun Zhang; Simon Vandenhende; Sinong Wang; Seohyun Sonia Kim; Soumya Batra; Sten Sootla; Steve Kehoe; Suchin Gururangan; Sumit Gupta; Sunny Virk; Sydney Borodinsky; Tamar Glaser; Tamar Herman; Tamara Best; Tara Fowler; Thomas Georgiou; Thomas Scialom; Tianhe Li; Todor Mihaylov; Tong Xiao; Ujjwal Karn; Vedanuj Goswami; Vibhor Gupta; Vignesh Ramanathan; Viktor Kerkez; Vinay Satish Kumar; Vincent Gonguet; Vish Vogeti; Vlad Poenaru; Vlad Tiberiu Mihailescu; Vladan Petrovic; Vladimir Ivanov; Wei Li; Weiwei Chu; Wenhan Xiong; Wenyin Fu; Wes Bouaziz; Whitney Meers; Will Constable; Xavier Martinet; Xiaojian Wu; Xinbo Gao; Xinfeng Xie; Xuchao Jia; Yaelle Goldschlag; Yann LeCun; Yashesh Gaur; Yasmine Babaei; Ye Qi; Yenda Li; Yi Wen; Yiwen Song; Youngjin Nam; Yuchen Hao; Yuchen Zhang; Yun Wang; Yuning Mao; Yuzi He; Zacharie Delpierre Coudert; Zachary DeVito; Zahra Hankir; Zhaoduo Wen; Zheng Yan; Zhengxing Chen; Zhenyu Yang; Zoe Papakipos
+----------
+🤗 Models on Hugging Face | Blog Website --- # Meta Llama 3 We are unlocking the power of large language models. Our latest version of Llama is now accessible to individuals, creators, researchers, and businesses of all sizes so that they can experiment, innovate, and scale their ideas responsibly. This release includes model weights and starting code for pre-trained and instruction-tuned Llama 3 language models — including sizes of 8B to 70B parameters. This repository is a minimal example of loading Llama 3 models and running inference. For more detailed examples, see [llama-recipes](https://github.com/facebookresearch/llama-recipes/). To download the model weights and tokenizer, please visit the [Meta Llama website](https://llama.meta.com/llama-downloads/) and accept our License. Once your request is approved, you will receive a signed URL over email. Then, run the download.sh script, passing the URL provided when prompted to start the download. Pre-requisites: Ensure you have `wget` and `md5sum` installed. Then run the script: `./download.sh`. Remember that the links expire after 24 hours and a certain amount of downloads. You can always re-request a link if you start seeing errors such as `403: Forbidden`. ### Access to Hugging Face We also provide downloads on [Hugging Face](https://huggingface.co/meta-llama), in both transformers and native `llama3` formats. To download the weights from Hugging Face, please follow these steps: - Visit one of the repos, for example [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct). - Read and accept the license. Once your request is approved, you'll be granted access to all the Llama 3 models. Note that requests used to take up to one hour to get processed. - To download the original native weights to use with this repo, click on the "Files and versions" tab and download the contents of the `original` folder. You can also download them from the command line if you `pip install huggingface-hub`: huggingface-cli download meta-llama/Meta-Llama-3-8B-Instruct --include "original/*" --local-dir meta-llama/Meta-Llama-3-8B-Instruct - To use with transformers, the following [pipeline](https://huggingface.co/docs/transformers/en/main_classes/pipelines) snippet will download and cache the weights: ```python model_id = "meta-llama/Meta-Llama-3-8B-Instruct" model="meta-llama/Meta-Llama-3-8B-Instruct", model_kwargs={"torch_dtype": torch.bfloat16}, device="cuda", You can follow the steps below to get up and running with Llama 3 models quickly. These steps will let you run quick inference locally. For more examples, see the [Llama recipes repository](https://github.com/facebookresearch/llama-recipes). 1. Clone and download this repository in a conda env with PyTorch / CUDA. 2. In the top-level directory run: pip install -e . 3. Visit the [Meta Llama website](https://llama.meta.com/llama-downloads/) and register to download the model/s. 4. Once registered, you will get an email with a URL to download the models. You will need this URL when you run the download.sh script. 5. Once you get the email, navigate to your downloaded llama repository and run the download.sh script. - Make sure to grant execution permissions to the download.sh script - During this process, you will be prompted to enter the URL from the email. - Do not use the “Copy Link” option; copy the link from the email manually. 6. Once the model/s you want have been downloaded, you can run the model locally using the command below: torchrun --nproc_per_node 1 example_chat_completion.py \ --ckpt_dir Meta-Llama-3-8B-Instruct/ \ --tokenizer_path Meta-Llama-3-8B-Instruct/tokenizer.model \ --max_seq_len 512 --max_batch_size 6 - Replace  `Meta-Llama-3-8B-Instruct/` with the path to your checkpoint directory and `Meta-Llama-3-8B-Instruct/tokenizer.model` with the path to your tokenizer model. - The `–nproc_per_node` should be set to the [MP](#inference) value for the model you are using. - Adjust the `max_seq_len` and `max_batch_size` parameters as needed. - This example runs the [example_chat_completion.py](example_chat_completion.py) found in this repository, but you can change that to a different .py file. Different models require different model-parallel (MP) values: |  Model | MP | | 8B     | 1  | | 70B    | 8  | All models support sequence length up to 8192 tokens, but we pre-allocate the cache according to `max_seq_len` and `max_batch_size` values. So set those according to your hardware. These models are not finetuned for chat or Q&A. They should be prompted so that the expected answer is the natural continuation of the prompt. See `example_text_completion.py` for some examples. To illustrate, see the command below to run it with the llama-3-8b model (`nproc_per_node` needs to be set to the `MP` value): torchrun --nproc_per_node 1 example_text_completion.py \ --ckpt_dir Meta-Llama-3-8B/ \ --tokenizer_path Meta-Llama-3-8B/tokenizer.model \ --max_seq_len 128 --max_batch_size 4 ### Instruction-tuned Models The fine-tuned models were trained for dialogue applications. To get the expected features and performance for them, specific formatting defined in [`ChatFormat`](https://github.com/meta-llama/llama3/blob/main/llama/tokenizer.py#L202) needs to be followed: The prompt begins with a `<|begin_of_text|>` special token, after which one or more messages follow. Each message starts with the `<|start_header_id|>` tag, the role `system`, `user` or `assistant`, and the `<|end_header_id|>` tag. After a double newline `\n\n`, the message's contents follow. The end of each message is marked by the `<|eot_id|>` token. You can also deploy additional classifiers to filter out inputs and outputs that are deemed unsafe. See the llama-recipes repo for [an example](https://github.com/meta-llama/llama-recipes/blob/main/recipes/inference/local_inference/inference.py) of how to add a safety checker to the inputs and outputs of your inference code. Examples using llama-3-8b-chat: torchrun --nproc_per_node 1 example_chat_completion.py \ --max_seq_len 512 --max_batch_size 6 Llama 3 is a new technology that carries potential risks with use. Testing conducted to date has not — and could not — cover all scenarios. To help developers address these risks, we have created the [Responsible Use Guide](https://ai.meta.com/static-resource/responsible-use-guide/). Please report any software “bug” or other problems with the models through one of the following means: - Reporting issues with the model: [https://github.com/meta-llama/llama3/issues](https://github.com/meta-llama/llama3/issues) - Reporting risky content generated by the model: [developers.facebook.com/llama_output_feedback](http://developers.facebook.com/llama_output_feedback) - Reporting bugs and security concerns: [facebook.com/whitehat/info](http://facebook.com/whitehat/info) Our model and weights are licensed for researchers and commercial entities, upholding the principles of openness. Our mission is to empower individuals and industry through this opportunity while fostering an environment of discovery and ethical AI advancements. See the [LICENSE](LICENSE) file, as well as our accompanying [Acceptable Use Policy](USE_POLICY.md) ## Questions For common questions, the FAQ can be found [here](https://llama.meta.com/faq), which will be updated over time as new questions arise.
+----------
+# Code Llama ## **Model Details** **Model Developers** Meta AI **Variations** Code Llama comes in four model sizes, and three variants: 1) Code Llama: our base models are designed for general code synthesis and understanding 2) Code Llama - Python: designed specifically for Python 3) Code Llama - Instruct: for instruction following and safer deployment All variants are available in sizes of 7B, 13B, 34B and 70B parameters. **Input** Models input text only. **Output** Models output text only. **Model Architecture** Code Llama and its variants are autoregressive language models using optimized transformer architectures. Code Llama 7B, 13B and 70B additionally support infilling text generation. All models but Code Llama - Python 70B and Code Llama - Instruct 70B were fine-tuned with up to 16K tokens, and support up to 100K tokens at inference time. **Model Dates** Code Llama and its variants have been trained between January 2023 and January 2024. **Status** This is a static model trained on an offline dataset. Future versions of Code Llama - Instruct will be released  as we improve model safety with community feedback. **Licence** A custom commercial license is available at: [https://ai.meta.com/resources/models-and-libraries/llama-downloads/](https://ai.meta.com/resources/models-and-libraries/llama-downloads/). **Research Paper** More information can be found in the paper "[Code Llama: Open Foundation Models for Code](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/)". **Where to send comments** Instructions on how to provide feedback or comments on the model can be found in the model [README](README.md), or by opening an issue in the GitHub repository ([https://github.com/facebookresearch/codellama/](https://github.com/facebookresearch/codellama/)). ## **Intended Use** **Intended Use Cases** Code Llama and its variants are intended for commercial and research use in English and relevant programming languages. The base model Code Llama can be adapted for a variety of code synthesis and understanding tasks, Code Llama - Python is designed specifically to handle the Python programming language, and Code Llama - Instruct is intended to be safer to use for code assistance and generation applications. **Out-of-Scope Uses** Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for Code Llama and its variants. ## **Hardware and Software** **Training Factors** We used custom training libraries. The training and fine-tuning of the released models have been performed by Meta’s Research Super Cluster. **Carbon Footprint** In aggregate, training all 12 Code Llama models required 1400K GPU hours of computation on hardware of type A100-80GB (TDP of 350-400W). Estimated total emissions were 228.55 tCO2eq, 100% of which were offset by Meta’s sustainability program. **Training data** All experiments reported here and the released models have been trained and fine-tuned using the same data as Llama 2 with different weights (see Section 2 and Table 1 in the [research paper](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) for details). Code Llama - Instruct uses additional instruction fine-tuning data. **Evaluation Results** See evaluations for the main models and detailed ablations in Section 3 and safety evaluations in Section 4 of the research paper. ## **Ethical Considerations and Limitations** Code Llama and its variants are a new technology that carries risks with use. Testing conducted to date has been in English, and has not covered, nor could it cover all scenarios. For these reasons, as with all LLMs, Code Llama’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate or objectionable responses to user prompts. Therefore, before deploying any applications of Code Llama, developers should perform safety testing and tuning tailored to their specific applications of the model. Please see the Responsible Use Guide available available at [https://ai.meta.com/llama/responsible-user-guide](https://ai.meta.com/llama/responsible-user-guide).
+----------
+# Introducing Code Llama Code Llama is a family of large language models for code based on [Llama 2](https://github.com/facebookresearch/llama) providing state-of-the-art performance among open models, infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks. We provide multiple flavors to cover a wide range of applications: foundation models (Code Llama), Python specializations (Code Llama - Python), and instruction-following models (Code Llama - Instruct) with 7B, 13B and 34B parameters each. All models are trained on sequences of 16k tokens and show improvements on inputs with up to 100k tokens. 7B and 13B Code Llama and Code Llama - Instruct variants support infilling based on surrounding content. Code Llama was developed by fine-tuning Llama 2 using a higher sampling of code. As with Llama 2, we applied considerable safety mitigations to the fine-tuned versions of the model. For detailed information on model training, architecture and parameters, evaluations, responsible AI and safety refer to  our [research paper](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/). Output generated by code generation features of the Llama Materials, including Code Llama, may be subject to third party licenses, including, without limitation, open source licenses. We are unlocking the power of large language models and our latest version of Code Llama is now accessible to individuals, creators, researchers and businesses of all sizes so that they can experiment, innovate and scale their ideas responsibly. This release includes model weights and starting code for pretrained and fine-tuned Llama language models — ranging from 7B to 34B parameters. This repository is intended as a minimal example to load [Code Llama](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) models and run inference. [comment]: <> (Code Llama models are compatible with the scripts in llama-recipes) In order to download the model weights and tokenizers, please visit the [Meta website](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and accept our License. Once your request is approved, you will receive a signed URL over email. Then run the download.sh script, passing the URL provided when prompted to start the download. Make sure that you copy the URL text itself, **do not use the 'Copy link address' option** when you right click the URL. If the copied URL text starts with: https://download.llamameta.net, you copied it correctly. If the copied URL text starts with: https://l.facebook.com, you copied it the wrong way. Pre-requisites: make sure you have `wget` and `md5sum` installed. Then to run the script: `bash download.sh`. Keep in mind that the links expire after 24 hours and a certain amount of downloads. If you start seeing errors such as `403: Forbidden`, you can always re-request a link. ### Model sizes | Model | Size     | |-------|----------| | 7B    | ~12.55GB | | 13B   | 24GB     | | 34B   | 63GB     | | 70B   | 131GB    | [comment]: <> (Access on Hugging Face, We are also providing downloads on Hugging Face. You must first request a download from the Meta website using the same email address as your Hugging Face account. After doing so, you can request access to any of the models on Hugging Face and within 1-2 days your account will be granted access to all versions.) ## Setup In a conda environment with PyTorch / CUDA available, clone the repo and run in the top-level directory: pip install -e . Different models require different model-parallel (MP) values: | Model | MP | |-------|----| | 7B    | 1  | | 13B   | 2  | | 34B   | 4  | | 70B   | 8  | All models, except the 70B python and instruct versions, support sequence lengths up to 100,000 tokens, but we pre-allocate the cache according to `max_seq_len` and `max_batch_size` values. So set those according to your hardware and use-case. ### Pretrained Code Models The Code Llama and Code Llama - Python models are not fine-tuned to follow instructions. They should be prompted so that the expected answer is the natural continuation of the prompt. See `example_completion.py` for some examples. To illustrate, see command below to run it with the `CodeLlama-7b` model (`nproc_per_node` needs to be set to the `MP` value): torchrun --nproc_per_node 1 example_completion.py \ --ckpt_dir CodeLlama-7b/ \ --tokenizer_path CodeLlama-7b/tokenizer.model \ --max_seq_len 128 --max_batch_size 4 Pretrained code models are: the Code Llama models `CodeLlama-7b`, `CodeLlama-13b`, `CodeLlama-34b`, `CodeLlama-70b` and the Code Llama - Python models `CodeLlama-7b-Python`, `CodeLlama-13b-Python`, `CodeLlama-34b-Python`, `CodeLlama-70b-Python`. ### Code Infilling Code Llama and Code Llama - Instruct 7B and 13B models are capable of filling in code given the surrounding context. See `example_infilling.py` for some examples. The `CodeLlama-7b` model can be run for infilling with the command below (`nproc_per_node` needs to be set to the `MP` value): torchrun --nproc_per_node 1 example_infilling.py \ --max_seq_len 192 --max_batch_size 4 Pretrained infilling models are: the Code Llama models `CodeLlama-7b` and `CodeLlama-13b` and the Code Llama - Instruct models `CodeLlama-7b-Instruct`, `CodeLlama-13b-Instruct`. ### Fine-tuned Instruction Models Code Llama - Instruct models are fine-tuned to follow instructions. To get the expected features and performance for the 7B, 13B and 34B variants, a specific formatting defined in [`chat_completion()`](https://github.com/facebookresearch/codellama/blob/main/llama/generation.py#L319-L361) needs to be followed, including the `INST` and `< >` tags, `BOS` and `EOS` tokens, and the whitespaces and linebreaks in between (we recommend calling `strip()` on inputs to avoid double-spaces). `CodeLlama-70b-Instruct` requires a separate turn-based prompt format defined in [`dialog_prompt_tokens()`](https://github.com/facebookresearch/codellama/blob/main/llama/generation.py#L506-L548). You can use `chat_completion()` directly to generate answers with all instruct models; it will automatically perform the required formatting. You can also deploy additional classifiers for filtering out inputs and outputs that are deemed unsafe. See the llama-recipes repo for [an example](https://github.com/facebookresearch/llama-recipes/blob/main/src/llama_recipes/inference/safety_utils.py) of how to add a safety checker to the inputs and outputs of your inference code. Examples using `CodeLlama-7b-Instruct`: torchrun --nproc_per_node 1 example_instructions.py \ --ckpt_dir CodeLlama-7b-Instruct/ \ --tokenizer_path CodeLlama-7b-Instruct/tokenizer.model \ --max_seq_len 512 --max_batch_size 4 Fine-tuned instruction-following models are: the Code Llama - Instruct models `CodeLlama-7b-Instruct`, `CodeLlama-13b-Instruct`, `CodeLlama-34b-Instruct`, `CodeLlama-70b-Instruct`. Code Llama is a new technology that carries potential risks with use. Testing conducted to date has not — and could not — cover all scenarios. In order to help developers address these risks, we have created the [Responsible Use Guide](https://github.com/facebookresearch/llama/blob/main/Responsible-Use-Guide.pdf). More details can be found in our research papers as well. Please report any software “bug”, or other problems with the models through one of the following means: - Reporting issues with the model: [github.com/facebookresearch/codellama](http://github.com/facebookresearch/codellama) - Reporting risky content generated by the model: [developers.facebook.com/llama_output_feedback](http://developers.facebook.com/llama_output_feedback) - Reporting bugs and security concerns: [facebook.com/whitehat/info](http://facebook.com/whitehat/info) See [MODEL_CARD.md](MODEL_CARD.md) for the model card of Code Llama. Our model and weights are licensed for both researchers and commercial entities, upholding the principles of openness. Our mission is to empower individuals, and industry through this opportunity, while fostering an environment of discovery and ethical AI advancements. See the [LICENSE](https://github.com/facebookresearch/llama/blob/main/LICENSE) file, as well as our accompanying [Acceptable Use Policy](https://github.com/facebookresearch/llama/blob/main/USE_POLICY.md) 1. [Code Llama Research Paper](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) 2. [Code Llama Blog Post](https://ai.meta.com/blog/code-llama-large-language-model-coding/)
+----------
+Models on Hugging Face CyberSec Eval Paper Llama Guard Paper # Purple Llama Purple Llama is an umbrella project that over time will bring together tools and evals to help the community build responsibly with open generative AI models. The initial release will include tools and evals for Cyber Security and Input/Output safeguards but we plan to contribute more in the near future. ## Why purple? Borrowing a [concept](https://www.youtube.com/watch?v=ab_Fdp6FVDI) from the cybersecurity world, we believe that to truly mitigate the challenges which generative AI presents, we need to take both attack (red team) and defensive (blue team) postures. Purple teaming, composed of both red and blue team responsibilities, is a collaborative approach to evaluating and mitigating potential risks and the same ethos applies to generative AI and hence our investment in Purple Llama will be comprehensive. Components within the Purple Llama project will be licensed permissively enabling both research and commercial usage. We believe this is a major step towards enabling community collaboration and standardizing the development and usage of trust and safety tools for generative AI development. More concretely evals and benchmarks are licensed under the MIT license while any models use the Llama 2 Community license. See the table below: | **Component Type** |            **Components**            |                                          **License**                                           | | :----------------- | :----------------------------------: | :--------------------------------------------------------------------------------------------: | | Evals/Benchmarks   | Cyber Security Eval (others to come) |                                              MIT                                               | | Models             |             Llama Guard              | [Llama 2 Community License](https://github.com/facebookresearch/PurpleLlama/blob/main/LICENSE) | | Models             |             Llama Guard 2            | Llama 3 Community License | | Safeguard          |             Code Shield              | MIT | ## Evals & Benchmarks ### Cybersecurity #### CyberSec Eval v1 CyberSec Eval v1 was what we believe was the first industry-wide set of cybersecurity safety evaluations for LLMs. These benchmarks are based on industry guidance and standards (e.g., CWE and MITRE ATT&CK) and built in collaboration with our security subject matter experts. We aim to provide tools that will help address some risks outlined in the [White House commitments on developing responsible AI](https://www.whitehouse.gov/briefing-room/statements-releases/2023/07/21/fact-sheet-biden-harris-administration-secures-voluntary-commitments-from-leading-artificial-intelligence-companies-to-manage-the-risks-posed-by-ai/), including: * Metrics for quantifying LLM cybersecurity risks. * Tools to evaluate the frequency of insecure code suggestions. * Tools to evaluate LLMs to make it harder to generate malicious code or aid in carrying out cyberattacks. We believe these tools will reduce the frequency of LLMs suggesting insecure AI-generated code and reduce their helpfulness to cyber adversaries. Our initial results show that there are meaningful cybersecurity risks for LLMs, both with recommending insecure code and for complying with malicious requests. See our [Cybersec Eval paper](https://ai.meta.com/research/publications/purple-llama-cyberseceval-a-benchmark-for-evaluating-the-cybersecurity-risks-of-large-language-models/) for more details. #### CyberSec Eval 2 CyberSec Eval 2 expands on its predecessor by measuring an LLM’s propensity to abuse a code interpreter, offensive cybersecurity capabilities, and susceptibility to prompt injection. You can read the paper [here](https://ai.meta.com/research/publications/cyberseceval-2-a-wide-ranging-cybersecurity-evaluation-suite-for-large-language-models/). You can also check out the 🤗 leaderboard [here](https://huggingface.co/spaces/facebook/CyberSecEval). ## System-Level Safeguards As we outlined in Llama 3’s [Responsible Use Guide](https://ai.meta.com/llama/responsible-use-guide/), we recommend that all inputs and outputs to the LLM be checked and filtered in accordance with content guidelines appropriate to the application. ### Llama Guard To support this, and empower the community, we released Llama Guard, an openly-available model that performs competitively on common open benchmarks and provides developers with a pretrained model to help defend against generating potentially risky outputs. As part of our ongoing commitment to open and transparent science, we also released our methodology and an extended discussion of model performance in our [Llama Guard paper](https://ai.meta.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/). We are happy to share an updated version, Meta Llama Guard 2. Llama Guard 2 was optimized to support the newly [announced](https://mlcommons.org/2024/04/mlc-aisafety-v0-5-poc/) policy published by MLCommons, expanding its coverage to a more comprehensive set of safety categories, out-of-the-box. It also comes with better classification performance than Llama Guard 1 and improved zero-shot and few shot adaptability. Ultimately, our vision is to enable developers to customize this model to support relevant use cases and to make it easier to adopt best practices and improve the open ecosystem. ### Code Shield Code Shield adds support for inference-time filtering of insecure code produced by LLMs. Code Shield offers mitigation of insecure code suggestions risk, code interpreter abuse prevention, and secure command execution. [CodeShield Example Notebook](https://github.com/meta-llama/PurpleLlama/blob/main/CodeShield/notebook/CodeShieldUsageDemo.ipynb). To get started and learn how to use Purple Llama components with Llama models, see the getting started guide [here](https://ai.meta.com/llama/get-started/). The guide provides information and resources to help you set up Llama, including how to access the model, hosting how-to information and integration guides. Additionally, you will find supplemental materials to further assist you while responsibly building with Llama. The guide will be updated as more Purple Llama components get released. ## FAQ For a running list of frequently asked questions, for not only Purple Llama components but also generally for Llama models, see the FAQ [here](https://ai.meta.com/llama/faq/). ## Join the Purple Llama community See the [CONTRIBUTING](CONTRIBUTING.md) file for how to help out.
+----------
+# Model Details Meta Llama Guard 2 is an 8B parameter Llama 3-based [1] LLM safeguard model. Similar to [Llama Guard](https://ai.meta.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/), it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated. Below is a response classification example input and output for Llama Guard 2. In order to produce classifier scores, we look at the probability for the first token, and use that as the “unsafe” class probability. We can then apply score thresholding to make binary decisions. # Harm Taxonomy and Policy The model is trained to predict safety labels on the 11 categories shown below, based on the [MLCommons taxonomy](https://mlcommons.org/2024/04/mlc-aisafety-v0-5-poc/) of hazards. Harm categories S1: Violent Crimes S2: Non-Violent Crimes S3: Sex-Related Crimes S4: Child Sexual Exploitation S5: Specialized Advice S6: Privacy S7: Intellectual Property S8: Indiscriminate Weapons S9: Hate S10: Suicide & Self-Harm S11: Sexual Content **S1: Violent Crimes** Responses that enable, encourage, or endorse the commission of violent crimes, including: (1) unlawful violence toward people (ex: terrorism, genocide, murder, hate-motivated violent crimes, child abuse, assault, battery, kidnapping) and (2) unlawful violence toward animals (ex: animal abuse) **S2: Non-Violent Crimes** Responses that enable, encourage, or endorse the commission of non-violent crimes, including: (1) personal crimes (ex: labor trafficking, threats, intimidation, hate-motivated non-violent crimes), (2) financial crimes (ex: fraud, scams, money laundering), (3) property crimes (ex: theft, arson, vandalism), (4) drug crimes (ex: creating or using regulated substances), (5) weapons crimes (ex: producing unlicensed firearms), and (6) cyber crimes (ex: hacking) **S3: Sex-Related Crimes** Responses that enable, encourage, or endorse the commission of sex-related crimes, including: (1) sex trafficking, (2) sexual assault (ex: rape), (3) sexual harassment (physical, verbal, or visual), and (4) prostitution **S4: Child Sexual Exploitation** Responses that contain, describe, enable, encourage, or endorse the sexual abuse of children **S5: Specialized Advice** Responses that contain specialized financial, medical, or legal advice, or that indicate dangerous activities or objects are safe **S6: Privacy** Responses that contain sensitive, nonpublic personal information that could undermine someone's physical, digital, or financial security **S7: Intellectual Property** Responses that may violate the intellectual property rights of any third party **S8: Indiscriminate Weapons** Responses that enable, encourage, or endorse the creation of indiscriminate weapons, including: (1) chemical weapons (ex: nerve gas), (2) biological weapons (ex: anthrax), (3) radiological weapons (ex: cobalt bombs), (4) nuclear weapons (ex: fission bombs), and (5) high-yield explosive weapons (ex: cluster munitions) **S9: Hate** Responses that demean or dehumanize people on the basis of their sensitive, personal characteristics (i.e., race, color, ethnicity, national origin, disability, religious affiliation, caste, sexual orientation, sex, gender identity, and/or serious disease) **S10: Suicide & Self-Harm** Responses that enable, encourage, or endorse acts of intentional self-harm, including: (1) suicide, (2) self-injury (ex: cutting), and (3) disordered eating **S11: Sexual Content** Responses that contain erotica # Training and Evaluation Data We start with the training set of Llama Guard (see [model card](https://github.com/meta-llama/PurpleLlama/blob/main/Llama-Guard/MODEL_CARD.md#training-data)), and obtain labels on the Harm Taxonomy described above. To improve adaptability of the model to different prompts, we train on hard samples, which are obtained by taking an existing sample and prompting Llama2 70B to produce an alternate policy description that will flip the label of the given sample. We report metrics for various models and APIs on our validation set, which is obtained from combining the validation set of Llama Guard v1 and held-out samples from the additional Llama 3 safety data. We compare performance on our internal test set, as well as on open datasets like [XSTest](https://github.com/paul-rottger/exaggerated-safety?tab=readme-ov-file#license), [OpenAI moderation](https://github.com/openai/moderation-api-release), and [BeaverTails](https://github.com/PKU-Alignment/beavertails). We find that there is overlap between our training set and the BeaverTails-30k test split. Since both our internal test set and BeaverTails use prompts from the Anthropic's [hh-rlhf dataset](https://github.com/anthropics/hh-rlhf) as a starting point for curating data, it is possible that different splits of Anthropic were used while creating the two datasets. Therefore to prevent leakage of signal between our train set and the BeaverTails-30k test set, we create our own BeaverTails-30k splits based on the Anthropic train-test splits used for creating our internal sets. *Note on evaluations*: As discussed in the Llama Guard [paper](https://arxiv.org/abs/2312.06674), comparing model performance is not straightforward as each model is built on its own policy and is expected to perform better on an evaluation dataset with a policy aligned to the model. This highlights the need for industry standards. By aligning Llama Guard 2 with the Proof of Concept MLCommons taxonomy, we hope to drive adoption of industry standards like this and facilitate collaboration and transparency in the LLM safety and content evaluation space. # Model Performance We evaluate the performance of Llama Guard 2 and compare it with Llama Guard and popular content moderation APIs such as Azure, OpenAI Moderation, and Perspective. We use the token probability of the first output token (i.e. safe/unsafe) as the score for classification. For obtaining a binary classification decision from the score, we use a threshold of 0.5. Llama Guard 2 improves over Llama Guard, and outperforms other approaches on our internal test set. Note that we manage to achieve great performance while keeping a low false positive rate as we know that over-moderation can impact user experience when building LLM-applications. | **Model**                | **F1 ↑** | **AUPRC ↑** | **False Positive Rate ↓** | |--------------------------|:------:|:---------:|:-----------------------:| | Llama Guard\*             |  0.665 | 0.854 |          0.027          | | Llama Guard 2            |  **0.915** |   **0.974**   |          0.040          | | GPT4                     | 0.796 |    N/A    |          0.151          | | OpenAI Moderation API    |  0.347 |   0.669   |          0.030          | | Azure Content Safety API |  0.519 |    N/A    |          0.245          | | Perspective API          |  0.265 |   0.586   |          0.046          | Table 1: Comparison of performance of various approaches measured on our internal test set. *The performance of Llama Guard is lower on our new test set due to expansion of the number of harm categories from 6 to 11, which is not aligned to what Llama Guard was trained on. | **Category**           | **False Negative Rate\* ↓** | **False Positive Rate ↓** | |------------------------|:--------------------------:|:-------------------------:| | Violent Crimes         |            0.042           |           0.002           | | Privacy                |            0.057           |           0.004           | | Non-Violent Crimes     |            0.082           |           0.009           | | Intellectual Property  |            0.099           |           0.004           | | Hate                   |            0.190           |           0.005           | | Specialized Advice     |            0.192           |           0.009           | | Sexual Content         |            0.229           |           0.004           | | Indiscriminate Weapons |            0.263           |           0.001           | | Child Exploitation     |            0.267           |           0.000           | | Sex Crimes             |            0.275           |           0.002           | | Self-Harm              |            0.277           |           0.002           | Table 2: Category-wise breakdown of false negative rate and false positive rate for Llama Guard 2 on our internal benchmark for response classification with safety labels from the ML Commons taxonomy. *The binary safe/unsafe label is used to compute categorical FNR by using the true categories. We do not penalize the model while computing FNR for cases where the model predicts the correct overall label but an incorrect categorical label. We also report performance on OSS safety datasets, though we note that the policy used for assigning safety labels is not aligned with the policy used while training Llama Guard 2. Still, Llama Guard 2 provides a superior tradeoff between F1 score and False Positive Rate on the XSTest and OpenAI Moderation datasets, demonstrating good adaptability to other policies. The BeaverTails dataset has a lower bar for a sample to be considered unsafe compared to Llama Guard 2's policy. The policy and training data of MDJudge [4] is more aligned with this dataset and we see that it performs better on them as expected (at the cost of a higher FPR). GPT-4 achieves high recall on all of the sets but at the cost of very high FPR (9-25%), which could hurt its ability to be used as a safeguard for practical applications. (F1 ↑ / False Positive Rate ↓) False Refusals (XSTest) OpenAI policy (OpenAI Mod) BeaverTails policy (BeaverTails-30k) Llama Guard 0.737 / 0.079 0.599 / 0.035 Llama Guard 2 0.884 / 0.084 0.807 / 0.060 0.736 / 0.059 MDJudge 0.856 / 0.172 0.768 / 0.212 0.849 / 0.098 GPT4 0.895 / 0.128 0.842 / 0.092 0.802 / 0.256 OpenAI Mod API 0.576 / 0.040 0.788 / 0.156 0.284 / 0.056 Table 3: Comparison of performance of various approaches measured on our internal test set for response classification. NOTE: The policy used for training Llama Guard does not align with those used for labeling these datasets. Still, Llama Guard 2 provides a superior tradeoff between F1 score and False Positive Rate across these datasets, demonstrating strong adaptability to other policies. We hope to provide developers with a high-performing moderation solution for most use cases by aligning Llama Guard 2 taxonomy with MLCommons standard. But as outlined in our Responsible Use Guide, each use case requires specific safety considerations and we encourage developers to tune Llama Guard 2 for their own use case to achieve better moderation for their custom policies. As an example of how Llama Guard 2's performance may change, we train on the BeaverTails training dataset and compare against MDJudge (which was trained on BeaverTails among others). |          **Model**          | **F1 ↑** | **False Positive Rate ↓** | |:---------------------------:|:--------:|:-------------------------:| | Llama Guard 2               |   0.736  |           0.059           | | MDJudge                     | 0.849 |           0.098           | | Llama Guard 2 + BeaverTails |   **0.852**  |           0.101           | Table 4: Comparison of performance on BeaverTails-30k. # Limitations There are some limitations associated with Llama Guard 2. First, Llama Guard 2 itself is an LLM fine-tuned on Llama 3. Thus, its performance (e.g., judgments that need common sense knowledge, multilingual capability, and policy coverage) might be limited by its (pre-)training data. Second, Llama Guard 2 is finetuned for safety classification only (i.e. to generate "safe" or "unsafe"), and is not designed for chat use cases. However, since it is an LLM, it can still be prompted with any text to obtain a completion. Lastly, as an LLM, Llama Guard 2 may be susceptible to adversarial attacks or prompt injection attacks that could bypass or alter its intended use. However, with the help of external components (e.g., KNN, perplexity filter), recent work (e.g., [3]) demonstrates that Llama Guard is able to detect harmful content reliably. **Note on Llama Guard 2's policy** Llama Guard 2 supports 11 out of the 13 categories included in the [MLCommons AI Safety](https://mlcommons.org/working-groups/ai-safety/ai-safety/) taxonomy. The Election and Defamation categories are not addressed by Llama Guard 2 as moderating these harm categories requires access to up-to-date, factual information sources and the ability to determine the veracity of a particular output. To support the additional categories, we recommend using other solutions (e.g. Retrieval Augmented Generation) in tandem with Llama Guard 2 to evaluate information correctness. # Citation @misc{metallamaguard2, author =       {Llama Team}, title =        {Meta Llama Guard 2}, howpublished = {\url{https://github.com/meta-llama/PurpleLlama/blob/main/Llama-Guard2/MODEL_CARD.md}}, year =         {2024} # References [1] [Llama 3 Model Card](https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md) [2] [Llama Guard Model Card](https://github.com/meta-llama/PurpleLlama/blob/main/Llama-Guard/MODEL_CARD.md) [3] [RigorLLM: Resilient Guardrails for Large Language Models against Undesired Content](https://arxiv.org/pdf/2403.13031.pdf) [4] [MDJudge for Salad-Bench](https://huggingface.co/OpenSafetyLab/MD-Judge-v0.1)
+----------
+# Meta Llama Guard 2 Llama Guard 2 is a model that provides input and output guardrails for LLM deployments, based on MLCommons policy. # Download In order to download the model weights and tokenizer, please visit the [Meta website](https://llama.meta.com/llama-downloads) and accept our License. Once your request is approved, you will receive a signed URL over email. Then run the download.sh script, passing the URL provided when prompted to start the download. Pre-requisites: Make sure you have wget and md5sum installed. Then to run the script: `./download.sh`. Keep in mind that the links expire after 24 hours and a certain amount of downloads. If you start seeing errors such as `403: Forbidden`, you can always re-request a link. # Quick Start Since Llama Guard 2 is a fine-tuned Llama3 model (see our [model card](MODEL_CARD.md) for more information), the same quick start steps outlined in our [README file](https://github.com/meta-llama/llama3/blob/main/README.md) for Llama3 apply here. In addition to that, we added examples using Llama Guard 2 in the [Llama recipes repository](https://github.com/facebookresearch/llama-recipes). # Issues Please report any software bug, or other problems with the models through one of the following means: - Reporting issues with the Llama Guard model: [github.com/meta-llama/PurpleLlama](https://github.com/meta-llama/PurpleLlama) - Reporting issues with Llama in general: [github.com/meta-llama/llama3](https://github.com/meta-llama/llama3) - Reporting risky content generated by the model: [developers.facebook.com/llama_output_feedback](https://developers.facebook.com/llama_output_feedback) - Reporting bugs and security concerns: [facebook.com/whitehat/info](https://facebook.com/whitehat/info) # License Our model and weights are licensed for both researchers and commercial entities, upholding the principles of openness. Our mission is to empower individuals, and industry through this opportunity, while fostering an environment of discovery and ethical AI advancements. The same license as Llama 3 applies: see the [LICENSE](../LICENSE) file, as well as our accompanying [Acceptable Use Policy](USE_POLICY.md). author =       {Llama Team}, title =        {Meta Llama Guard 2}, [Research Paper](https://ai.facebook.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/)
+----------
+Llama Guard is a 7B parameter [Llama 2](https://arxiv.org/abs/2307.09288)-based input-output safeguard model. It can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM: it generates text in its output that indicates whether a given prompt or response is safe/unsafe, and if unsafe based on a policy, it also lists the violating subcategories. Here is an example: In order to produce classifier scores, we look at the probability for the first token, and turn that into an “unsafe” class probability. Model users can then make binary decisions by applying a desired threshold to the probability scores. # Training and Evaluation We use a mix of prompts that come from the Anthropic [dataset](https://github.com/anthropics/hh-rlhf) and redteaming examples that we have collected in house, in a separate process from our production redteaming. In particular, we took the prompts only from the Anthropic dataset, and generated new responses from our in-house LLaMA models, using jailbreaking techniques to elicit violating responses. We then annotated Anthropic data (prompts & responses) in house, mapping labels according to the categories identified above. Overall we have ~13K training examples. ## Taxonomy of harms and Risk Guidelines As automated content risk mitigation relies on classifiers to make decisions about content in real time, a prerequisite to building these systems is to have the following components: - A **taxonomy** of risks that are of interest – these become the classes of a classifier. - A **risk guideline** that determines where we put the line between encouraged and discouraged outputs for each risk category in the taxonomy. Together with this model, we release an open taxonomy inspired by existing open taxonomies such as those employed by Google, Microsoft and OpenAI in the hope that it can be useful to the community. This taxonomy does not necessarily reflect Meta's own internal policies and is meant to demonstrate the value of our method to tune LLMs into classifiers that show high performance and high degrees of adaptability to different policies. ### The Llama Guard Safety Taxonomy & Risk Guidelines Below, we provide both the harm types themselves under this taxonomy and also examples of the specific kinds of content that would be considered harmful under each category: - **Violence & Hate** encompasses statements that encourage or could help people plan or engage in violence. Similarly, statements that advocate discrimination, contain slurs, or voice hateful sentiments against people based on their sensitive personal characteristics (ex: race, color, religion, national origin, sexual orientation, gender, gender identity, or disability) would also be considered inappropriate under this category. - **Sexual Content** encompasses statements encouraging someone (who could be underage) to engage in specific sex acts. Similarly, sexually explicit (i.e., erotic) statements would also be considered inappropriate under this category. - **Guns & Illegal Weapons** encompasses statements that specifically encourage, condone, or could help people plan or execute specific crimes involving the illegal acquisition, creation, or use of guns or illegal weapons (ex: explosives, biological agents, or chemical weapons), including instructions on how to create or use them. - **Regulated or Controlled Substances** encompasses statements that specifically encourage or could help people to illegally produce, transfer, or consume regulated or controlled substances like illegal drugs, tobacco, alcohol, or cannabis. - **Suicide & Self Harm** encompasses statements that encourage, condone, or enable people to harm themselves (ex: by providing instructions or information on methods of self-harm). When people express or imply an intent to harm themselves, any response other than one that provides links to appropriate health resources would be considered inappropriate (even if entirely supportive). - **Criminal Planning** encompasses miscellaneous statements that encourage, condone, or could help people plan or execute specific criminal activities, like arson, kidnapping, or theft. Items should only be considered harmful under this category when they could not be read as violating any of the other harm types above (ex: statements that encourage violence should be considered violating under Violence & Hate rather than this category). ## Evaluation results We compare the performance of the model against standard content moderation APIs in the industry, including [OpenAI](https://platform.openai.com/docs/guides/moderation/overview), [Azure Content Safety](https://learn.microsoft.com/en-us/azure/ai-services/content-safety/concepts/harm-categories), [PerspectiveAPI](https://developers.perspectiveapi.com/s/about-the-api-attributes-and-languages?language=en_US) from Google on both public and in-house benchmarks. The public benchmarks include [ToxicChat](https://huggingface.co/datasets/lmsys/toxic-chat) and [OpenAI Moderation](https://github.com/openai/moderation-api-release). Note: comparisons are not exactly apples-to-apples due to mismatches in each taxonomy. The interested reader can find a more detailed discussion about this in our [paper](https://ai.meta.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/). |                 | Our Test Set (Prompt) | OpenAI Mod | ToxicChat | Our Test Set (Response) | | --------------- | --------------------- | ---------- | --------- | ----------------------- | | Llama Guard     | **0.945**             | 0.847      | **0.626** | **0.953**               | | OpenAI API      | 0.764                 | **0.856**  | 0.588     | 0.769                   | | Perspective API | 0.728                 | 0.787      | 0.532     | 0.699                   |
+----------
+Hamel’s Blog - Optimizing latency Subscribe for updates Summary Below is a summary of my findings: 🏁 mlc is the fastest . This is so fast that I’m skeptical and am now motivated to measure quality (if I have time). When checking the outputs manually, they didn’t seem that different than other approaches. ❤️ CTranslate2 is my favorite tool, which is among the fastest but is also the easiest to use . The documentation is the best out of all of the solutions I tried. Furthermore, I think that the ergonomics are excellent for the models that they support. Unlike vLLM, CTranslate doesn’t seem to support distributed inference just yet. 🛠️ is really fast, but CTranslate can be much faster. On other hand, vLLM supports distributed inference , which is something you will need for larger models. vLLM might be the sweet spot for serving very large models. 😐 Text Generation Inference is an ok option (but nowhere near as fast as ) if you want to deploy HuggingFace LLMs in a standard way . TGI has some nice features like telemetry baked in ( via OpenTelemetry ) and integration with the HF ecosystem like inference endpoints . One thing to note that as of 7/28/2023, the license for TGI was changed to be more restrictive that may interfere with certain commercial uses . I am personally not a fan of the license. Rough Benchmarks This study focuses on various approaches to optimizing latency . Specifically, I want to know which tools are the most effective at optimizing latency for open source LLMs. In order to focus on latency, I hold the following variables constant: batch size of n = 1 for all prediction requests (holding throughput constant). All experiments were conducted on a Nvidia A6000 GPU, unless otherwise noted. Max output tokens were always set to 200 All numbers are calculated as an average over a fixed set of 9 prompts. The model used is meta-llama/Llama-2-7b-hf on the HuggingFace Hub In addition to batch size of and using a A6000 GPU (unless noted otherwise), I also made sure I warmed up the model by sending an initial inference request before measuring latency. Llama-v2-7b benchmark: batch size = 1, max output tokens = 200 avg tok/sec avg time (seconds) avg output token count platform options gpu float16 quantization 44.8 4.5 200.0 int8 quantization 62.6 3.2 HF Hosted Inference Endpoint A10G 30.4 6.6 202.0 HuggingFace Transformers (no server) 24.6 7.5 181.4 nf4 4bit quantization bitsandbytes 24.3 7.6 21.1 9.5 quantized w/ GPTQ 23.6 8.8 quantized w/ bitsandbytes 1.9 103.0 q4f16 117.1 1.3 153.9 text-generation-webui exllama 77.0 1.7 134.0 vllm A100 (on Modal Labs) 41.5 3.4 143.1 46.4 178.0 In some cases I did not use an b/c the platform didn’t have that particular GPU available. You can ignore these rows if you like, but I still think it is valuable information. I had access to a A6000, so I just used what I had. I noticed that the output of the LLM was quite different (less tokens) when using . I am not sure if I did something wrong here, or it changes the behavior of the LLM. Furthermore, the goal was not to be super precise on these benchmarks but rather to get a general sense of how things work and how they might compare to each other out of the box. Some of the tools above are inference servers which perform logging, tracing etc. in addition to optimizing models which effect latency. The idea is to see where there are significant differences between tools. I discussed this more Background One capability you need to be successful with open source LLMs is the ability to serve models efficiently. There are two categories of tools for model inference: Inference servers: these help with providing a web server that can provide a REST/grpc or other interface to interact with your model as a service. These inference servers usually have parameters to help you make trade-offs between throughput and latency . Additionally, some inference servers come with additional features like telemetry, model versioning and more. You can learn more about this topic the serving section of these notes. For LLMs, popular inference servers are the Text Generation Inference (TGI) Model Optimization : These modify your model to make them faster for inference. Examples include quantization Paged Attention Exllama and more. It is common to use both Inference servers techniques in conjunction. Some inference servers like even help you apply optimization techniques. Notes On Tools Other than benchmarking, an important goal of this study was to understand how to use different platforms & tools. Start with compiling the model as shown in these docs After installing MLC , you can compile meta-llama/Llama-2-7b-chat-hf like so: python3 -m mlc_llm.build \ --hf-path meta-llama/Llama-2-7b-chat-hf --target cuda --quantization q4f16_1 The arguments for the compliation are documented . This puts the model in the ./dist/ folder with the name Llama-2-7b-chat-hf-q4f16_1 You can use their python client to interact with the compiled model: from mlc_chat import ChatModule, ChatConfig cfg = ChatConfig(max_gen_len cm ChatModule(model "Llama-2-7b-chat-hf-q4f16_1" , chat_config cfg) output cm.generate(prompt prompt) You can see the full benchmarking code Warning I wasn’t able to get to run correctly with the supplied python client so I am using the chat variant ( Llama-2-7b-chat-hf ) as a proxy. I asked the kind folks who work on the mlc project and they said the python client is currently designed for chat, such that they have this system prompt that is hard coded for llama models: conv.system = ("[INST] <>\n\nYou are a helpful, respectful and honest assistant. " "Always answer as helpfully as possible, while being safe. " "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, " "or illegal content. " "Please ensure that your responses are socially unbiased and positive in nature.\n\n" "If a question does not make any sense, or is not factually coherent, explain why instead " "of answering something not correct. " "If you don't know the answer to a question, please don't share false " "information.\n<>\n\n "); If you want to fix this, you must edit mlc-chat-config.json , changing conv_template LM These docs say more about the config.json The config file is located in ./dist//params/mlc-chat-config.json . For example: > cat ./dist/Llama-2-7b-hf-q4f16_1/params/mlc-chat-config.json "model_lib": "Llama-2-7b-hf-q4f16_1", "local_id": "Llama-2-7b-hf-q4f16_1", "conv_template": "llama-2", "temperature": 0.7, "repetition_penalty": 1.0, "top_p": 0.95, "mean_gen_len": 128, "max_gen_len": 512, "shift_fill_factor": 0.3, "tokenizer_files": [ "tokenizer.json", "tokenizer.model" "model_category": "llama", "model_name": "Llama-2-7b-hf" is an optimization tool that can make models ridiculously fast. h/t to Anton . The documentation for CTranslate2 contains specific instructions for llama models To optimize llama v2 , we first need to quantize the model. This can be done like so: ct2-transformers-converter --model int8 --output_dir llama-2-7b-ct2 --force refers to the HuggingFace repo for this model . The benchmarking code is as follows (can also be found ): time ctranslate2 sys sys.path.append( '../common/' questions pandas as pd generator ctranslate2.Generator( "llama-2-7b-ct2" , device "cuda" tokenizer transformers.AutoTokenizer.from_pretrained( "meta-llama/Llama-2-7b-hf" def predict(prompt: str "Generate text give a prompt" start time.perf_counter() tokens tokenizer.convert_ids_to_tokens(tokenizer.encode(prompt)) results generator.generate_batch([tokens], sampling_topk , max_length , include_prompt_in_result False results[ 0 ].sequences_ids[ ] tokenizer.decode(tokens) request_time return 'tok_count' len (tokens), 'time' : request_time, 'question' : prompt, 'answer' : output, 'note' 'CTranslate2 int8 quantization' if __name__ == '__main__' counter responses [] for q in questions: >= : responses.append(predict(q)) += df pd.DataFrame(responses) df.to_csv( 'bench-ctranslate-int8.csv' , index Text Generation Inference (TGI) License Restrictions The license for TGI was recently changed away from Apache 2.0 to be more restrictive. Be careful when using TGI in commercial applications. Text generation inference which is often referred to as “TGI” was easy to use without any optimization. You can run it like this: “start_server.sh” #!/bin/bash [ -z " $HUGGING_FACE_HUB_TOKEN then echo "HUGGING_FACE_HUB_TOKEN is not set. Please set it before running this script." exit fi "TheBloke/Llama-2-7B-GPTQ" volume $PWD /data docker run --gpus all -e HUGGING_FACE_HUB_TOKEN= GPTQ_BITS=4 GPTQ_GROUPSIZE=128 --shm-size 5g -p 8081:80 -v $volume :/data ghcr.io/huggingface/text-generation-inference --max-best-of $@ We can then run the server with this command: bash start_server.sh --model-id Help You can see all the options for the TGI container with the help flag like so: run ghcr.io/huggingface/text-generation-inference --help less Quantization was very difficult to get working. There is a —quantize flag with accepts gptq . The approach makes inference much slower, which others have reported To make work for llama v2 models requires a bunch of work, you have to install the text-generation-server which can take a while and is very brittle to get right. I had to step through the Makefile carefully. After that you have to download the weights with: text-generation-server download-weights meta-llama/Llama-2-7b-hf You can run the following command to perform the quantization (the last argument is the destination directory where the weights are stored). quantize data/quantized/ However, this step is not needed for the most popular models, as someone will likely already have quantized and uploaded them to the Hub. Pre-Quantized Models Alternatively, you can use a pre-quantized model that has been uploaded to the Hub. TheBloke/Llama-2-7B-GPTQ is a good example of one. To get this to work, you have to be careful to set the GPTQ_BITS GPTQ_GROUPSIZE environment variables to match the config. For example This config necessitates setting These are already set in shown above. This PR will eventually fix that. To use the with TGI, I can use the same bash script with the following arguments: --quantize Comparison Without TGI Server When I first drafted this study I got the following response on twitter: Based on your code ( https://t.co/hSYaPTsEaK ) it seems like you measure the full HTTP request, which is like comparing trees to an apple. — Philipp Schmid ( @_philschmid July 29, 2023 Phillip certainly has a point! I am indeed testing both! I’m looking for big differences in tools here, and since some inference servers have optimization tools, and some optimization tools do not have an inference server I cannot do a true apples to apples comparison. However, I think its still useful to try different things as advertised to see what is possible, and also take note of really significant gaps in latency between tools. Therefore, I ran the following tests to perform the similar optimizations as TGI, but without the server to see what happened: HuggingFace Transformers I was able to get slightly better performance without the TGI server as predicted by Phillip, but it did not account for the the massive gap between some tools (which is exactly the kind of thing I was looking for). To benchmark quantization with bitsandbytes, I followed this blog post and wrote this benchmarking code . I quantized the model by loading it like this: model_id AutoTokenizer.from_pretrained(model_id) nf4_config BitsAndBytesConfig( load_in_4bit bnb_4bit_quant_type "nf4" bnb_4bit_compute_dtype torch.bfloat16 model_nf4 AutoModelForCausalLM.from_pretrained(model_id, quantization_config nf4_config) Unlike TGI, I was able to get bitsandbytes to work properly here, but just like TGI it didn’t speed anything up for me with respect to inference latency. As reflected in the benchmark table, I got nearly the same results with transformers without any optimizations I also quantized the model using without an inference server to compare against TGI. The code for that is The results were so bad ~ 5 tok/sec that I decided not to put this in the table, because it seemed quite off to me. Text Generation WebUI Aman let me know about text-generation-web-ui , and also these instructions for quickly experimenting with ExLlama ggml . I wasn’t able to get the variant to work properly, unfortunately. If you are really serious about using exllama, I recommend trying to use it without the text generation UI and look at the repo, specifically at test_benchmark_inference.py . (I didn’t have time for this, but if I was going to use exllama for anything serious I would go this route). From the root of the repo, you can run the following commands to start an inference server optimized with download-model.py TheBloke/Llama-2-7B-GPTQ server.py --listen --extensions openai --loader exllama_hf TheBloke_Llama-2-7B-GPTQ After the server was started, I used to conduct the benchmark. Overall, I didn’t like this particular piece of software much. It’s bit bloated because its trying to do too many things at once (An inference server, Web UIs, and other optimizations). That being said, the documentation is good and it is easy to use. I don’t think there is any particular reason to use this unless you want an end-to-end solution that also comes with a web user-interface (which many people want!). only works with CUDA 11.8, which I configured using this approach . After configuring CUDA and installing the right version of PyTorch, you need to install the bleeding edge from git: pip install -U git+https://github.com/vllm-project/vllm.git A good recipe to use for vLLM can be find on these Modal docs . Surprisingly, I had much lower latency when running on a local vs. a hosted A100 on Modal Labs. It’s possible that I did something wrong here. Currently, is the fastest solution for when you need distributed inference (i.e. when your model doesn’t fit on a single GPU). offers a server , but I benchmarked the model locally using their tools instead. The code for the benchmarking can be found here SamplingParams, LLM #from https://modal.com/docs/guide/ex/vllm_inference # Coding questions "Implement a Python function to compute the Fibonacci numbers." "Write a Rust function that performs binary exponentiation." "What are the differences between Javascript and Python?" # Literature "Write a story in the style of James Joyce about a trip to the Australian outback in 2083, to see robots in the beautiful desert." "Who does Harry turn into a balloon?" "Write a tale about a time-traveling historian who's determined to witness the most significant events in human history." # Math "What is the product of 9 and 8?" "If a train travels 120 kilometers in 2 hours, what is its average speed?" "Think through this step by step. If the sequence a_n is defined by a_1 = 3, a_2 = 5, and a_n = a_(n-1) + a_(n-2) for n > 2, find a_6." MODEL_DIR "/home/ubuntu/hamel-drive/vllm-models" download_model_to_folder(): huggingface_hub snapshot_download os snapshot_download( local_dir MODEL_DIR, token os.environ[ "HUGGING_FACE_HUB_TOKEN" LLM(MODEL_DIR) generate(question, llm, note None response : question, : note} sampling_params SamplingParams( temperature 1.0 top_p max_tokens result llm.generate(question, sampling_params) result: response[ (output.outputs[ ].token_ids) output.outputs[ ].text llm download_model_to_folder() generate(question q, llm llm, note 'vLLM' responses.append(response) 'bench-vllm.csv' HuggingFace Inference Endpoint I deployed an inference endpoint on HuggingFace for , on a Nvidia A10G GPU. I didn’t try to turn on any optimizations like quantization and wanted to see what the default performance would be like. The documentation for these interfaces can be found . There is also a python client Their documentation says they are using TGI under the hood. However, my latency was significantly faster on their hosted inference platform than using TGI locally. This could be due to the fact that I used a with them but only a locally. It’s worth looking into why this discrepancy exists further. The code for this benchmark can be found Footnotes It is common to explore the inference vs throughput frontier when conducting inference benchmarks. I did not do this, since I was most interested in latency. Here is an example of how to conduct inference benchmarks that consider both throughput and latency. ↩︎ For Llama v2 models , you must be careful to use the models ending in -hf as those are the ones that are compatible with the transformers library. The Modular Inference Engine is another example of an inference server that also applies optimization techniques. At the time of this writing, this is proprietary technology, but its worth keeping an eye on this in the future. Edit this page
+----------
+Achieve 23x LLM Inference Throughput & Reduce p50 Latency Anyscale Preview is now available! Login today to get free $50 compute credit 🚀 Home Blog Detail How continuous batching enables 23x throughput in LLM inference while reducing p50 latency By Cade Daniel Chen Shen Eric Liang Richard Liaw June 22, 2023 In this blog, we’ll cover the basics of large language model (LLM) inference and highlight inefficiencies in traditional batching policies. We’ll introduce continuous batching and discuss benchmark results for existing batching systems such as HuggingFace’s text-generation-inference and vLLM. By leveraging vLLM, users can achieve 23x LLM inference throughput while reducing p50 latency. Update June 2024: Anyscale Endpoints (Anyscale's LLM API Offering) and Private Endpoints (self-hosted LLMs) are now available as part of the Anyscale Platform.  Click to get started on the Anyscale platform. Due to the large GPU memory footprint and compute cost of LLMs , serving dominates the compute cost for most real world applications. ML engineers often treat LLMs like "black boxes" that can only be optimized with internal changes such as quantization and custom CUDA kernels. However, this is not entirely the case. Because LLMs iteratively generate their output, and because LLM inference is often memory and not compute bound, there are surprising system-level batching optimizations that make 10x or more differences in real-world workloads. One recent such proposed optimization is , also known as dynamic batching , or batching with iteration-level scheduling . We wanted to see how this optimization performs. We will get into details below, including how we simulate a production workload, but to summarize our findings: Up to 23x throughput improvement using continuous batching and continuous batching-specific memory optimizations (using ). 8x throughput over naive batching by using continuous batching (both on Ray Serve Hugging Face’s text-generation-inference 4x throughput over naive batching by using an optimized model implementation ( NVIDIA’s FasterTransformer You can try out continuous batching today: see this example to run vLLM on Ray Serve The remainder of this blog is structured as follows: We’ll cover the basics of how LLM inference works and highlight inefficiencies in traditional request-based dynamic batching policies. We’ll introduce continuous batching and how it answers many of the inefficiencies of request-based dynamic batching. We then discuss our benchmarks and the implications this has on how to serve LLM models cost-effectively. Link The basics of LLM inference There is a lot to know about LLM inference, and we refer users to Efficient Inference on a Single GPU Optimization story: Bloom inference for more detail. However, at a high level, LLM inference is pretty straightforward. For each request: You start with a sequence of tokens (called the "prefix" or "prompt"). The LLM produces a sequence of completion tokens, stopping only after producing a stop token or reaching a maximum sequence length. This is an iterative process. You get one additional completion token for each new forward pass of the model. For example, suppose you prompt with a sentence "What is the capital of California: ", it would take ten forward pass iterations to get back the full response of ["S", "a", "c", "r", “a”, "m", "e", "n", "t", "o"]. This example simplifies things a little bit because in actuality tokens do not map 1:1 to ASCII characters (a popular token encoding technique is Byte-Pair Encoding which is beyond the scope of this blog post), but the iterative nature of generation is the same regardless of how you tokenize your sequences. Simplified LLM inference. This toy example shows a hypothetical model which supports a maximum sequence length of 8 tokens (T1, T2, …, T8). Starting from the prompt tokens (yellow), the iterative process generates a single token at a time (blue). Once the model generates an end-of-sequence token (red), the generation loop stops. This example shows a batch of only one input sequence, so the batch size is 1. Now that we understand the simplicity of the iterative process, let’s dive deeper with some things you may not know about LLM inference: The initial ingestion (“prefill”) of the prompt "What is the capital of California: " takes about as much time as the generation of each subsequent token. This is because the prefill phase pre-computes some inputs of the attention mechanism that remain constant over the lifetime of the generation. This prefill phase efficiently uses the GPU’s parallel compute because these inputs can be computed independently of each other. LLM inference is memory-IO bound , not compute bound. In other words, it currently takes more time to load 1MB of data to the GPU’s compute cores than it does for those compute cores to perform LLM computations on 1MB of data. This means that LLM inference throughput is largely determined by how large a batch you can fit into high-bandwidth GPU memory . See this page in the NVIDIA docs for more details. The amount of GPU memory consumed scales with the base model size + the length of the token sequence. In Numbers every LLM developer should know , it’s estimated that a 13B parameter model consumes nearly 1MB of state for each token in a sequence. On a higher-end A100 GPU with 40GB RAM, back-of-the-envelope math suggests that since 14 GB are left after storing the 26GB of model parameters, ~14k tokens can be held in memory at once. This may seem high but is actually quite limiting; if we limit our sequence lengths to 512, we can process at most ~28 sequences in a batch. The problem is worse for higher sequence lengths; a sequence length of 2048 means our batch size is limited to 7 sequences. Note that this is an upper bound since it doesn’t leave room for storing intermediate computations. What this all means is that there is substantial “room on the table” so to speak if you can optimize memory usage. This is why approaches such as model quantization strategies such as are potentially so powerful; if you could halve the memory usage by moving from 16-bit to 8-bit representations, you could double the space available for larger batch sizes. However, not all strategies require modifications to the model weights. For example, FlashAttention found significant throughput improvements by reorganizing the attention computation to require less memory-IO. Continuous batching is another memory optimization technique which does not require modification of the model. We next explain how naive batching works (and is inefficient), and how continuous batching increases the memory-efficiency of LLM generation. LLM batching explained GPUs are massively-parallel compute architectures, with compute rates (measured in floating-point operations per second, or flops) in the teraflop ( ) or even petaflop ( H100 ) range. Despite these staggering amounts of compute, LLMs struggle to achieve saturation because so much of the chip’s memory bandwidth is spent loading model parameters. Batching is one way to improve the situation; instead of loading new model parameters each time you have an input sequence, you can load the model parameters once and then use them to process many input sequences. This more efficiently uses the chip’s memory bandwidth, leading to higher compute utilization, higher throughput, and cheaper LLM inference. Naive batching / static batching We call this traditional approach to batching static batching , because the size of the batch remains constant until the inference is complete. Here’s an illustration of static batching in context of LLM inference: Completing four sequences using static batching. On the first iteration (left), each sequence generates one token (blue) from the prompt tokens (yellow). After several iterations (right), the completed sequences each have different sizes because each emits their end-of-sequence-token (red) at different iterations. Even though sequence 3 finished after two iterations, static batching means that the GPU will be underutilized until the last sequence in the batch finishes generation (in this example, sequence 2 after six iterations). Unlike traditional deep learning models, batching for LLMs can be tricky due to the iterative nature of their inference. Intuitively, this is because requests can "finish" earlier in a batch, but it is tricky to release their resources and add new requests to the batch that may be at different completion states. This means that as the GPU is underutilized as generation lengths of different sequences in a batch differ from the largest generation length of the batch. In the figure on the right above, this is illustrated by the white squares after end-of-sequence tokens for sequences 1, 3, and 4. How often does static batching under-utilize the GPU? It depends on the generation lengths of sequences in a batch. For example, one could use LLM inference to emit a single token as a classification task (there are better ways to do this but let’s use this as an example). In this case, every output sequence is the same size (1 token). If the input sequences are also the same size (say, 512 tokens), then each static batch will achieve the best possible GPU utilization. On the other hand, a LLM-powered chatbot service cannot assume fixed-length input sequences, nor assume fixed-length output sequences. Proprietary models offer maximum context lengths in excess of 8K tokens at the time of writing. With static batching, variance in generation output could cause massive underutilization of GPUs. It’s no wonder OpenAI CEO Sam Altman described the compute costs as eye-watering Without restrictive assumptions on user input and model output, unoptimized production-grade LLM systems simply can’t serve traffic without underutilizing GPUs and incurring unnecessarily high costs. We need to optimize how we serve LLMs for their power to be broadly accessible. Continuous batching The industry recognized the inefficiency and came up with a better approach. Orca: A Distributed Serving System for Transformer-Based Generative Models is a paper presented in OSDI ‘22 which is the first to our knowledge to tackle this problem. Instead of waiting until every sequence in a batch has completed generation, Orca implements iteration-level scheduling where the batch size is determined per iteration. The result is that once a sequence in a batch has completed generation, a new sequence can be inserted in its place, yielding higher GPU utilization than static batching. Completing seven sequences using continuous batching. Left shows the batch after a single iteration, right shows the batch after several iterations. Once a sequence emits an end-of-sequence token, we insert a new sequence in its place (i.e. sequences S5, S6, and S7). This achieves higher GPU utilization since the GPU does not wait for all sequences to complete before starting a new one. Reality is a bit more complicated than this simplified model: since the prefill phase takes compute and has a different computational pattern than generation, it cannot be easily batched with the generation of tokens. Continuous batching frameworks currently manage this via hyperparameter: waiting_served_ratio , or the ratio of requests waiting for prefill to those waiting end-of-sequence tokens. Speaking of frameworks, Hugging Face has productionized continuous batching in their Rust- and Python-based text-generation-inference LLM inference server . We use their implementation to understand the performance characteristics of continuous batching in our benchmarks below. : Continuous batching, dynamic batching, and iteration-level scheduling are all close enough in meaning that any one of them can be used to describe the batching algorithm. We chose to use continuous batching. Dynamic batching is fitting but can be confused with request-level batching, where an LLM inference server uses a static batch whose size is chosen when the current batch has completely finished generation. We feel that iteration-level scheduling is descriptive of the scheduling mechanism but not the process as a whole. PagedAttention and vLLM For this blog post, we want to showcase the differences between static batching and continuous batching. It turns out that continuous batching can unlock memory optimizations that are not possible with static batching by improving upon Orca’s design. PagedAttention is a new attention mechanism implemented in ( ). It takes inspiration from traditional OS concepts such as paging virtual memory . They allow the KV cache (what is computed in the “prefill” phase, discussed above) to be non-contiguous by allocating memory in fixed-size “pages”, or blocks. The attention mechanism can then be rewritten to operate on block-aligned inputs, allowing attention to be performed on non-contiguous memory ranges. This means that buffer allocation can happen just-in-time instead of ahead-of-time: when starting a new generation, the framework does not need to allocate a contiguous buffer of size maximum_context_length. Each iteration, the scheduler can decide if it needs more room for a particular generation, and allocate on the fly without any degradation to PagedAttention’s performance. This doesn’t guarantee perfect utilization of memory ( their blog says the wastage is now limited to under 4%, only in the last block), but it significantly improves upon wastage from ahead-of-time allocation schemes used widely by the industry today. Altogether, PagedAttention + vLLM enable massive memory savings as most sequences will not consume the entire context window. These memory savings translate directly into a higher batch size, which means higher throughput and cheaper serving. We include vLLM in our benchmarks below. Benchmarking setup We’ll discuss our experimental setup then dive into the results of our benchmarks. Experiments Our goal is to see how continuous batching performs versus static batching on a simulated real-world live-inference workload. Fundamentally, we care about cost. We break this down into throughput and latency since cost is directly downstream of how efficiently you can serve at a given latency. Benchmark goal Measurement Measure throughput Time-to-process a queue of 1000 requests, each with 512 input tokens and generation length sampled from an exponential distribution. Measure latency Request latencies for 100 requests, with varying input lengths, output lengths, and arrival times at a fixed average rate. We’ll discuss the datasets and other details of the experiments in their respective results section. Hardware/model We benchmark throughput and latency on a single NVIDIA A100 GPU provided by Anyscale . Our A100 has 40GB of GPU RAM. We selected Meta’s OPT-13B model because each framework under test had a readily-available integration with this model. We selected the 13B variant because it fits into our GPU without requiring tensor parallelism, yet is still large enough to present memory efficiency challenges. We opt not to use tensor parallelism, where each transformer block is split over multiple GPUs, to keep our experiments simple, although both static batching and continuous batching work with tensor parallelism. Frameworks We test two static batching frameworks and three continuous batching frameworks. Our static batching frameworks are: Hugging Face’s Pipelines This is the simplest inference solution. It provides static batching with an easy-to-use API that works with any model and supports more tasks than simple text-generation. We use this as our baseline. This is a library which provides optimized implementations of various transformer models. It currently only provides static batching (the Triton inference server provides request-level dynamic batching, but not continuous batching yet). This provides us with an idea of how far an extremely optimized implementation of our model can get us with static batching – it provides a more competitive baseline than the relatively unoptimized OPT-13B implementation available on Hugging Face Hub Our continuous batching frameworks are: This is the inference server Hugging Face uses to power their LLM live-inference APIs. It implements continuous batching. Continuous batching on Ray Serve leverages Ray’s serverless capabilities to provide seamless autoscaling, high-availability, and support for complex DAGs. We wanted to understand how continuous batching works, so we re-implemented text-generation-inference’s core continuous batching logic in pure-Python on Ray Serve. As you will see in our results, our implementation achieves the same performance as text-generation-inference, which validates our understanding. This is an open-source project recently released by folks at UC Berkeley ( ). It builds upon Orca’s continuous batching design by taking full control of dynamic memory allocations, allowing it to significantly reduce different forms of GPU memory fragmentation. We test this framework because it shows the impact of further optimizations made possible by iteration-level scheduling and continuous batching. Benchmarking results: Throughput Based on our understanding of static batching, we expect continuous batching to perform significantly better when there is higher variance in sequence lengths in each batch. To show this, we run our throughput benchmark four times for each framework, each time on a dataset with higher variance in sequence lengths. To do this, we create a dataset containing 1000 sequences each with 512 input tokens. We configure our model to always emit a per-sequence generation length by ignoring the end-of-sequence token and configuring max_tokens. We then generate 1000 generation lengths, one for each request, sampled from an exponential distribution with mean=128 tokens. We use an exponential distribution as it is a good approximation of the generation lengths that one may encounter while serving an application like ChatGPT. To vary the variance of each run, we select only samples from the exponential distribution that are less than or equal to 32, 128, 512, and 1536. The total output sequence length is then, at most, 512+32=544, 512+128=640, 512+512=1024, and 512+1536=2048 (the maximum sequence length of our model). We then use a simple asyncio Python benchmarking script to submit HTTP requests to our model server. The benchmarking script submits all requests in burst fashion, so that the compute is saturated. The results are as follows: Throughput in tokens per second of each framework as variance in sequence length increases. As expected, the static batchers and naive continuous batchers perform approximately identically for lower-variance generation lengths. However as the variance increases, naive static batching’s performance plummets to 81 token/s. FasterTransformers improves upon naive static batching significantly, nearly keeping up with the naive continuous batchers until generation length limit of 1536. Continuous batching on Ray Serve and text-generation-inference achieves about the same performance, which is what we expect since they use the same batching algorithm. What is most impressive here is vLLM. For each dataset, vLLM more than doubles performance compared to naive continuous batching. We have not analyzed what optimization contributes the most to vLLM performance the most, but we suspect vLLM’s ability to reserve space dynamically instead of ahead-of-time allows vLLM to dramatically increase the batch size. We plot these performance results relative to naive static batching: Our throughput benchmark results presented as improvement multiples over naive static batching, log scale. It’s important to note how impressive even FasterTransformer’s 4x improvement is; we’re very interested in benchmarking FasterTransformers plus continuous batching when NVIDIA implements it. However, continuous batching is clearly a significant improvement over static batching even with an optimized model. The performance gap becomes gigantic when you include further memory optimization enabled by continuous batching and iteration-level scheduling as vLLM does. Benchmarking results: Latency Live-inference endpoints often face latency-throughput tradeoffs that must be optimized based on user needs. We benchmark latency on a realistic workload and measure how the cumulative distribution function of latencies changes with each framework. Similar to the throughput benchmark, we configure the model to always emit a specified amount of tokens specified per-request. We prepare 100 randomly-generated prompts by sampling lengths from a uniform distribution between 1 token and 512 tokens. We sample 100 output lengths from a capped exponential distribution with mean=128 and maximum size of 1536. These numbers were chosen because they are reasonably realistic and allow the generation to use up the full context-length of our model (512+1536=2048). Instead of submitting all requests at the same time as done in the throughput benchmark, we delay each request by a predetermined number of seconds. We sample a Poisson distribution to determine how long each request waits after the previously submitted request. The Poisson distribution is parameterized by λ, the expected rate, which in our case is how many queries per second (QPS) hit our model endpoint. We measure latencies at both QPS=1 and QPS=4 to see how the latency distribution changes as load changes. Median generation request latency for each framework, under average load of 1 QPS and 4 QPS. Continuous batching systems improve median latency. We see that while improving throughput, continuous batching systems also improve median latency. This is because continuous batching systems allow for new requests to be added to an existing batch if there is room, each iteration. But how about other percentiles? In fact, we find that they improve latency across all percentiles: Cumulative distribution function of generation request latencies for each framework with QPS=1. Static batchers and continuous batchers have distinct curve shapes caused by the presence of iteration-level batch scheduling in continuous batchers. All continuous batchers perform approximately equally under this load; FasterTransformers performs noticeably better than static batching on a naive model implementation. The reason why continuous batching improves latency at all percentiles is the same as why it improves latency at p50: new requests can be added regardless of how far into generation other sequences in the batch are. However, like static batching, continuous batching is still limited by how much space is available on the GPU. As your serving system becomes saturated with requests, meaning a higher on-average batch size, there are less opportunities to inject new requests immediately when they are received. We can see this as we increase the average QPS to 4: Cumulative distribution function of generation request latencies for each framework with QPS=4. Compared to QPS=1, FasterTransformer’s distribution of latencies becomes more similar to static batching on a naive model. Both Ray Serve and text-generation-inference’s continuous batching implementations perform similarly, but noticeably worse than vLLM. We observe that FasterTransformer becomes more similar to naive static batching, and that both text-generation-inference and Ray Serve’s implementation of continuous batching are on their way to look like FasterTransformer’s curve with QPS=1. That is, as the systems become saturated there are less opportunities to inject new requests immediately, so request latency goes up. This lines up with the vLLM curve – it remains mostly unchanged between QPS=1 and QPS=4. This is because due to its advanced memory optimizations, it has a higher maximum batch size. Anecdotally, we observe that vLLM becomes saturated around QPS=8 with a throughput near 1900 token/s. To compare these numbers apples-to-apples to the other serving systems requires more experimentation; however we have shown that continuous batching significantly improves over static batching by 1) reducing latency by injecting new requests immediately when possible, and 2) enable advanced memory optimizations (in vLLM’s case) that increase the QPS that the serving system can handle before becoming saturated. Conclusion LLMs present some amazing capabilities, and we believe their impact is still mostly undiscovered. We have shared how a new serving technique, continuous batching, works and how it outperforms static batching. It improves throughput by wasting fewer opportunities to schedule new requests, and improves latency by being capable of immediately injecting new requests into the compute stream. We are excited to see what people can do with continuous batching, and where the industry goes from here. Try out continuous batching for yourself We have a vLLM + Ray Serve example that allows you to try out continuous batching. We are integrating continuous batching systems into Aviary , a webapp that allows you to compare the outputs of different LLMs in parallel , and will release it within the week. Acknowledgements. We’d like to thank the following people for assisting in benchmarking and/or reviewing our results. : Stephanie Wang, Antoni Baum, Edward Oakes, and Amog Kamsetty; UC Berkeley : Zhuohan Li and Woosuk Kwon. Get involved with Ray code used for the experiments in the blog post is here . To connect with the Ray community, join the Ray Slack or ask questions on the Discuss forum . If you are interested in hosting LLMs, check out our managed Ray offering . If you are interested in learning more about Ray, see ray.io docs.ray.io See our earlier blog series on solving Generative AI infrastructure and using LangChain with Ray Ray Summit 2023 : If you are interested to learn much more about how Ray can be used to build performant and scalable LLM applications and fine-tune/train/serve LLMs on Ray, join Ray Summit on September 18-20th ! We have a set of great keynote speakers including John Schulman from OpenAI and Aidan Gomez Cohere , community and tech talks about Ray as well as practical training focused on LLMs Table of contents The basics of LLM inference Naive batching / static batching Try out continuous batching for yourself Get involved with Ray Sharing Tags LLM Sign up for product updates Recommended content Ray Spotlight Series: Multitenant Serve Applications with Runtime Envs as Containers Cross-modal Search for E-commerce: Building and Scaling a Cross-Modal Image Retrieval App Figure 1. End-to-end Stable Diffusion training architecture diagram. We Pre-Trained Stable Diffusion Models on 2 billion Images and Didn't Break the Bank - Definitive Guides with Ray Series Ready to try Anyscale? Access Anyscale today to see how companies using Anyscale and Ray benefit from rapid time-to-market and faster iterations across the entire AI lifecycle. Try free © Anyscale, Inc 2024 - Privacy Policy Follow Anyscale Follow Ray Company About Us News Careers Contact sales Learn Case Studies Ray Summit 2024 Events Ray Training Ray Docs Anyscale Docs Products Anyscale Platform Ray Open Source Integrations © Anyscale, Inc 2024 -
+----------
+GitHub - huggingface/peft: 🤗 PEFT: State-of-the-art Parameter-Efficient Fine-Tuning. Skip to content You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. to refresh your session. You switched accounts on another tab or window. to refresh your session. Dismiss alert huggingface / peft Public Notifications You must be signed in to change notification settings Fork 1.4k Star 14.6k 🤗 PEFT: State-of-the-art Parameter-Efficient Fine-Tuning. huggingface.co/docs/peft Apache-2.0 license stars forks Branches Activity You must be signed in to change notification settings huggingface/peft This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository. Go to file Code Folders and files Name Last commit message Last commit date Latest commit History 1,000 Commits .github docs examples scripts src/ tests .gitignore .pre-commit-config.yaml LICENSE README.md pyproject.toml requirements.txt setup.py View all files Repository files navigation 🤗 PEFT State-of-the-art Parameter-Efficient Fine-Tuning (PEFT) methods Fine-tuning large pretrained models is often prohibitively costly due to their scale. Parameter-Efficient Fine-Tuning (PEFT) methods enable efficient adaptation of large pretrained models to various downstream applications by only fine-tuning a small number of (extra) model parameters instead of all the model's parameters. This significantly decreases the computational and storage costs. Recent state-of-the-art PEFT techniques achieve performance comparable to fully fine-tuned models. PEFT is integrated with Transformers for easy model training and inference, Diffusers for conveniently managing different adapters, and Accelerate for distributed training and inference for really big models. Tip Visit the PEFT organization to read about the PEFT methods implemented in the library and to see notebooks demonstrating how to apply these methods to a variety of downstream tasks. Click the "Watch repos" button on the organization page to be notified of newly implemented methods and notebooks! Check the PEFT Adapters API Reference section for a list of supported PEFT methods, and read the Adapters Soft prompts IA3 conceptual guides to learn more about how these methods work. Quickstart Install PEFT from pip: pip install peft Prepare a model for training with a PEFT method such as LoRA by wrapping the base model and PEFT configuration with get_peft_model . For the bigscience/mt0-large model, you're only training 0.19% of the parameters! AutoModelForSeq2SeqLM get_peft_config LoraConfig TaskType model_name_or_path "bigscience/mt0-large" tokenizer_name_or_path peft_config task_type SEQ_2_SEQ_LM inference_mode r 8 lora_alpha 32 lora_dropout 0.1 print_trainable_parameters () "trainable params: 2359296 || all params: 1231940608 || trainable%: 0.19151053100118282" To load a PEFT model for inference: AutoPeftModelForCausalLM AutoTokenizer torch "ybelkada/opt-350m-lora" "facebook/opt-350m" eval inputs "Preheat the oven to 350 degrees and place the cookie dough" return_tensors "pt" outputs generate input_ids "input_ids" ]. max_new_tokens 50 print batch_decode skip_special_tokens )[ ]) "Preheat the oven to 350 degrees and place the cookie dough in the center of the oven. In a large bowl, combine the flour, baking powder, baking soda, salt, and cinnamon. In a separate bowl, combine the egg yolks, sugar, and vanilla." Why you should use PEFT There are many benefits of using PEFT but the main one is the huge savings in compute and storage, making PEFT applicable to many different use cases. High performance on consumer hardware Consider the memory requirements for training the following models on the ought/raft/twitter_complaints dataset with an A100 80GB GPU with more than 64GB of CPU RAM. Model Full Finetuning PEFT-LoRA PyTorch PEFT-LoRA DeepSpeed with CPU Offloading bigscience/T0_3B (3B params) 47.14GB GPU / 2.96GB CPU 14.4GB GPU / 2.96GB CPU 9.8GB GPU / 17.8GB CPU bigscience/mt0-xxl (12B params) OOM GPU 56GB GPU / 3GB CPU 22GB GPU / 52GB CPU bigscience/bloomz-7b1 (7B params) 32GB GPU / 3.8GB CPU 18.1GB GPU / 35GB CPU With LoRA you can fully finetune a 12B parameter model that would've otherwise run out of memory on the 80GB GPU, and comfortably fit and train a 3B parameter model. When you look at the 3B parameter model's performance, it is comparable to a fully finetuned model at a fraction of the GPU memory. Submission Name Accuracy Human baseline (crowdsourced) 0.897 Flan-T5 0.892 lora-t0-3b 0.863 The bigscience/T0_3B model performance isn't optimized in the table above. You can squeeze even more performance out of it by playing around with the input instruction templates, LoRA hyperparameters, and other training related hyperparameters. The final checkpoint size of this model is just 19MB compared to 11GB of the full bigscience/T0_3B model. Learn more about the advantages of finetuning with PEFT in this blog post Quantization is another method for reducing the memory requirements of a model by representing the data in a lower precision. It can be combined with PEFT methods to make it even easier to train and load LLMs for inference. Learn how to finetune with QLoRA and the TRL library on a 16GB GPU in the Finetune LLMs on your own consumer hardware using tools from PyTorch and Hugging Face ecosystem blog post. Learn how to finetune a openai/whisper-large-v2 model for multilingual automatic speech recognition with LoRA and 8-bit quantization in this (see this instead for an example of streaming a dataset). Save compute and storage PEFT can help you save storage by avoiding full finetuning of models on each of downstream task or dataset. In many cases, you're only finetuning a very small fraction of a model's parameters and each checkpoint is only a few MBs in size (instead of GBs). These smaller PEFT adapters demonstrate performance comparable to a fully finetuned model. If you have many datasets, you can save a lot of storage with a PEFT model and not have to worry about catastrophic forgetting or overfitting the backbone or base model. PEFT integrations PEFT is widely supported across the Hugging Face ecosystem because of the massive efficiency it brings to training and inference. Diffusers The iterative diffusion process consumes a lot of memory which can make it difficult to train. PEFT can help reduce the memory requirements and reduce the storage size of the final model checkpoint. For example, consider the memory required for training a Stable Diffusion model with LoRA on an A100 80GB GPU with more than 64GB of CPU RAM. The final model checkpoint size is only 8.8MB! PEFT-LoRA PEFT-LoRA with Gradient Checkpointing CompVis/stable-diffusion-v1-4 27.5GB GPU / 3.97GB CPU 15.5GB GPU / 3.84GB CPU 8.12GB GPU / 3.77GB CPU Take a look at the examples/lora_dreambooth/train_dreambooth.py training script to try training your own Stable Diffusion model with LoRA, and play around with the smangrul/peft-lora-sd-dreambooth Space which is running on a T4 instance. Learn more about the PEFT integration in Diffusers in this is a library for distributed training and inference on various training setups and hardware (GPUs, TPUs, Apple Silicon, etc.). PEFT models work with Accelerate out of the box, making it really convenient to train really large models or use them for inference on consumer hardware with limited resources. PEFT can also be applied to training LLMs with RLHF components such as the ranker and policy. Get started by reading: Fine-tune a Mistral-7b model with Direct Preference Optimization with PEFT and the library to learn more about the Direct Preference Optimization (DPO) method and how to apply it to a LLM. Fine-tuning 20B LLMs with RLHF on a 24GB consumer GPU with PEFT and the library, and then try out the gpt2-sentiment_peft.ipynb notebook to optimize GPT2 to generate positive movie reviews. StackLLaMA: A hands-on guide to train LLaMA with RLHF with PEFT, and then try out the stack_llama/scripts for supervised finetuning, reward modeling, and RL finetuning. Model support Use this Space or check out the to find which models officially support a PEFT method out of the box. Even if you don't see a model listed below, you can manually configure the model config to enable PEFT for a model. Read the New transformers architecture guide to learn how. Contribute If you would like to contribute to PEFT, please check out our contribution guide Citing 🤗 PEFT To use 🤗 PEFT in your publication, please cite it by using the following BibTeX entry. @Misc title PEFT: State-of-the-art Parameter-Efficient Fine-Tuning methods author Sourab Mangrulkar and Sylvain Gugger and Lysandre Debut and Younes Belkada and Sayak Paul and Benjamin Bossan howpublished \url{https://github.com/huggingface/peft} year 2022 About 🤗 PEFT: State-of-the-art Parameter-Efficient Fine-Tuning. Topics python adapter pytorch lora diffusion parameter-efficient-learning Readme Custom properties Stars Watchers 107 watching Forks Report repository Releases 17 v0.11.1 Latest May 17, 2024 + 16 releases Packages No packages published Used by 9.1k + 9,081 Contributors 175 + 161 contributors Languages Python 98.9% Other 1.1% Footer © 2024 GitHub, Inc. You can’t perform that action at this time.
+----------
+llama-recipes/docs/LLM_finetuning.md at main · meta-llama/llama-recipes · GitHub You signed in with another tab or window. to refresh your session. You signed out in another tab or window. to refresh your session. You switched accounts on another tab or window. to refresh your session. You must be signed in to change notification settings 10.1k © 2024 GitHub, Inc. You can’t perform that action at this time.
+----------
+llama-recipes/recipes/finetuning/datasets/README.md at main · meta-llama/llama-recipes · GitHub You signed in with another tab or window. to refresh your session. You signed out in another tab or window. to refresh your session. You switched accounts on another tab or window. to refresh your session. You must be signed in to change notification settings © 2024 GitHub, Inc. You can’t perform that action at this time.
+----------
+Efficient Fine-Tuning with LoRA: A Guide to Optimal Parameter Selection for Large Language Models | Databricks Blog Skip to main content Share this post With the rapid advancement of neural network-based techniques and Large Language Model (LLM) research, businesses are increasingly interested in AI applications for value generation. They employ various machine learning approaches, both generative and non-generative, to address text-related challenges such as classification, summarization, sequence-to-sequence tasks, and controlled text generation. Organizations can opt for third-party APIs, but fine-tuning models with proprietary data offers domain-specific and pertinent results, enabling cost-effective and independent solutions deployable across different environments in a secure manner. Ensuring efficient resource utilization and cost-effectiveness is crucial when choosing a strategy for fine-tuning. This blog explores arguably the most popular and effective variant of such parameter efficient methods, Low Rank Adaptation (LoRA), with a particular emphasis on QLoRA (an even more efficient variant of LoRA). The approach here will be to take an open large language model and fine-tune it to generate fictitious product descriptions when prompted with a product name and a category. The model chosen for this exercise is OpenLLaMA-3b-v2 , an open large language model with a permissive license (Apache 2.0), and the dataset chosen is Red Dot Design Award Product Descriptions , both of which can be downloaded from the HuggingFace Hub at the links provided. Fine-Tuning, LoRA and QLoRA In the realm of language models, fine tuning an existing language model to perform a specific task on specific data is a common practice. This involves adding a task-specific head, if necessary, and updating the weights of the neural network through backpropagation during the training process. It is important to note the distinction between this finetuning process and training from scratch. In the latter scenario, the model's weights are randomly initialized, while in finetuning, the weights are already optimized to a certain extent during the pre-training phase. The decision of which weights to optimize or update, and which ones to keep frozen, depends on the chosen technique. Full finetuning involves optimizing or training all layers of the neural network. While this approach typically yields the best results, it is also the most resource-intensive and time-consuming. Fortunately, there exist parameter-efficient approaches for fine-tuning that have proven to be effective. Although most such approaches have yielded less performance, Low Rank Adaptation (LoRA) has bucked this trend by even outperforming full finetuning in some cases, as a consequence of avoiding catastrophic forgetting (a phenomenon which occurs when the knowledge of the pretrained model is lost during the fine-tuning process). LoRA is an improved finetuning method where instead of finetuning all the weights that constitute the weight matrix of the pre-trained large language model, two smaller matrices that approximate this larger matrix are fine-tuned. These matrices constitute the LoRA adapter. This fine-tuned adapter is then loaded to the pretrained model and used for inference. QLoRA is an even more memory efficient version of LoRA where the pretrained model is loaded to GPU memory as quantized 4-bit weights (compared to 8-bits in the case of LoRA), while preserving similar effectiveness to LoRA. Probing this method, comparing the two methods when necessary, and figuring out the best combination of QLoRA hyperparameters to achieve optimal performance with the quickest training time will be the focus here. LoRA is implemented in the Hugging Face Parameter Efficient Fine-Tuning (PEFT) library, offering ease of use and QLoRA can be leveraged by using together. HuggingFace Transformer Reinforcement Learning (TRL) library offers a convenient trainer for supervised finetuning with seamless integration for LoRA. These three libraries will provide the necessary tools to finetune the chosen pretrained model to generate coherent and convincing product descriptions once prompted with an instruction indicating the desired attributes. Prepping the data for supervised fine-tuning To probe the effectiveness of QLoRA for fine tuning a model for instruction following, it is essential to transform the data to a format suited for supervised fine-tuning. Supervised fine-tuning in essence, further trains a pretrained model to generate text conditioned on a provided prompt. It is supervised in that the model is finetuned on a dataset that has prompt-response pairs formatted in a consistent manner. An example observation from our chosen dataset from the Hugging Face hub looks as follows: product category description text "Biamp Rack Products" "Digital Audio Processors" "“High recognition value, uniform aesthetics and practical scalability – this has been impressively achieved with the Biamp brand language …" "Product Name: Biamp Rack Products; Product Category: Digital Audio Processors; Product Description: “High recognition value, uniform aesthetics and practical scalability – this has been impressively achieved with the Biamp brand language … As useful as this dataset is, this is not well formatted for fine-tuning of a language model for instruction following in the manner described above. The following code snippet loads the dataset from the Hugging Face hub into memory, transforms the necessary fields into a consistently formatted string representing the prompt, and inserts the response( i.e. the description), immediately afterwards. This format is known as the ‘Alpaca format’ in large language model research circles as it was the format used to finetune the original LlaMA model from Meta to result in the Alpaca model, one of the first widely distributed instruction-following large language models (although not licensed for commercial use). datasets load_dataset Dataset #Load the dataset from the HuggingFace Hub rd_ds = load_dataset( "xiyuez/red-dot-design-award-product-description" #Convert to pandas dataframe for convenient processing rd_df = pd.DataFrame(rd_ds[ 'train' #Combine the two attributes into an instruction string rd_df[ 'instruction' ] = 'Create a detailed description for the following product: ' + rd_df[ 'product' ]+ ', belonging to category: ' 'category' rd_df = rd_df[[ 'description' ]] #Get a 5000 sample subset for fine-tuning purposes rd_df_sample = rd_df.sample(n= 5000 , random_state= 42 #Define template and format data into the template for supervised fine-tuning template = """Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: {} ### Response:\n""" rd_df_sample[ 'prompt' ] = rd_df_sample[ "instruction" ].apply( lambda x: template. format (x)) rd_df_sample.rename(columns={ 'response' }, inplace= ] + "\n### End" rd_df_sample = rd_df_sample[[ 'text' ] = rd_df[ "prompt" ] + rd_df[ "response" rd_df.drop(columns=[ ], inplace= The resulting prompts are then loaded into a hugging face dataset for supervised finetuning. Each such prompt has the following format. Below is an instruction that describes a task. Write a response that appropriately completes the request. Create a detailed description the following product: Beseye Pro, belonging to category: Cloud-Based Home Security Camera ### Response: Beseye Pro combines intelligent home monitoring with decorative art. The camera, whose form reminiscent of a water drop, secured the mounting a neodymium magnet can be rotated by 360 degrees. This allows it to be easily positioned the desired direction. The camera also houses modern technologies, such infrared LEDs, cloud-based intelligent video analyses SSL encryption. ### End To facilitate quick experimentation, each fine-tuning exercise will be done on a 5000 observation subset of this data. Testing model performance before fine-tuning Before any fine-tuning, it’s a good idea to check how the model performs without any fine-tuning to get a baseline for pre-trained model performance. The model can be loaded in 8-bit as follows and prompted with the format specified in the model card on Hugging Face LlamaTokenizer, LlamaForCausalLM model_path = 'openlm-research/open_llama_3b_v2' tokenizer = LlamaTokenizer.from_pretrained(model_path) model = LlamaForCausalLM.from_pretrained( model_path, load_in_8bit= , device_map= 'auto' #Pass in a prompt and infer with the model prompt = 'Q: Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse\nA:' input_ids = tokenizer(prompt, return_tensors= ).input_ids generation_output = model.generate( input_ids=input_ids, max_new_tokens= 128 (tokenizer.decode(generation_output[ ])) The output obtained is not quite what we want. Q: Create a detailed description the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse A: The Corelogic Smooth Mouse a wireless optical mouse that has a 1000 dpi resolution. It has a 2.4 GHz wireless connection a 12 -month warranty. Q: What the price of the Corelogic Smooth Mouse? A: The Corelogic Smooth Mouse priced at $ 29.99 . Q: What the weight of the Corelogic Smooth Mouse? A: The Corelogic Smooth Mouse weighs pounds. Q: What the dimensions of the Corelogic Smooth Mouse? A: The Corelogic Smooth Mouse has a dimension The first part of the result is actually satisfactory, but the rest of it is more of a rambling mess. Similarly, if the model is prompted with the input text in the ‘Alpaca format’ as discussed before, the output is expected to be just as sub-optimal: prompt= """Below is an instruction that describes a task. Write a response that appropriately completes the request. Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse ### Response:""" input_ids = tokenizer(prompt, return_tensors= And sure enough, it is: Corelogic Smooth Mouse a mouse that designed to be used by people disabilities. It a wireless mouse that designed to be used by people a wireless mouse that designed to be used by people a wireless mouse that designed to be used by people a wireless mouse that designed to be used by people a wireless mouse that designed to be used by people a wireless mouse that designed to be used by people a wireless mouse that designed to be used by The model performs what it was trained to do, predicts the next most probable token. The point of supervised fine-tuning in this context is to generate the desired text in a controllable manner. Please note that in the subsequent experiments, while QLoRA leverages a model loaded in 4-bit with the weights frozen, the inference process to examine output quality is done once the model has been loaded in 8-bit as shown above for consistency. The Turnable Knobs When using PEFT to train a model with LoRA or QLoRA (note that, as mentioned before, the primary difference between the two is that in the latter, the pretrained models are frozen in 4-bit during the fine-tuning process), the hyperparameters of the low rank adaptation process can be defined in a LoRA config as shown below: ... #If only targeting attention blocks of the model target_modules = [ "q_proj" "v_proj" #If targeting all linear layers 'q_proj' 'k_proj' 'v_proj' 'o_proj' 'gate_proj' 'down_proj' 'up_proj' 'lm_head' lora_config = LoraConfig( r= 16 target_modules = target_modules, lora_alpha= lora_dropout= 0.05 bias= "none" task_type= "CAUSAL_LM" ,} Two of these hyperparameters, r and target_modules are empirically shown to affect adaptation quality significantly and will be the focus of the tests that follow. The other hyperparameters are kept constant at the values indicated above for simplicity. represents the rank of the low rank matrices learned during the finetuning process. As this value is increased, the number of parameters needed to be updated during the low-rank adaptation increases. Intuitively, a lower r may lead to a quicker, less computationally intensive training process, but may affect the quality of the model thus produced. However, increasing r beyond a certain value may not yield any discernible increase in quality of model output. How the value of r affects adaptation (fine-tuning) quality will be put to the test shortly. When fine-tuning with LoRA, it is possible to target specific modules in the model architecture. The adaptation process will target these modules and apply the update matrices to them. Similar to the situation with " ," targeting more modules during LoRA adaptation results in increased training time and greater demand for compute resources. Thus, it is a common practice to only target the attention blocks of the transformer. However, recent work as shown in the QLoRA paper by Dettmers et al. suggests that targeting all linear layers results in better adaptation quality. This will be explored here as well. Names of the linear layers of the model can be conveniently appended to a list with the following code snippet: re model_modules = (model.modules) pattern = r'\((\w+)\): Linear' linear_layer_names = re.findall(pattern, model_modules) names = [] # Print the names of the Linear layers name linear_layer_names: names.append(name) target_modules = list set (names)) Tuning the finetuning with LoRA The developer experience of fine tuning large language models in general have improved dramatically over the past year or so. The latest high level abstraction from Hugging Face is the SFTTrainer class in the TRL library. To perform QLoRA, all that is needed is the following: 1.  Load the model to GPU memory in 4-bit (bitsandbytes enables this process). 2.  Define the LoRA configuration as discussed above. 3.  Define the train and test splits of the prepped instruction following data into Hugging Face Dataset objects. 4. Define training arguments. These include the number of epochs, batch size and other training hyperparameters which will be kept constant during this exercise. 5. Pass these arguments into an instance of SFTTrainer. These steps are clearly indicated in the source file in the associated with this blog. The actual training logic is abstracted away nicely as follows: trainer = SFTTrainer( model, train_dataset=dataset[ eval_dataset = dataset[ 'test' dataset_text_field= "text" max_seq_length= 256 args=training_args, # Initiate the training process mlflow.start_run(run_name= ‘run_name_of_choice’): trainer.train() If MLFlow autologging is enabled in the Databricks workspace, which is highly recommended, all the training parameters and metrics are automatically tracked and logged with the MLFlow tracking server. This functionality is invaluable in monitoring long-running training tasks. Needless to say, the fine-tuning process is performed using a compute cluster (in this case, a single node with a single A100 GPU) created using the latest Databricks Machine runtime with GPU support. Hyperparameter Combination #1: QLoRA with r=8 and targeting “q_proj”, “v_proj” The first combination of QLoRA hyperparameters attempted is r=8 and targets only the attention blocks, namely “q_proj” and “v_proj” for adaptation. The following code snippets gives the number of trainable parameters: model = get_peft_model(model, lora_config) model.print_trainable_parameters() These choices result in 2,662,400 parameters being updated during the fine-tuning process (~2.6 million) from a total of ~3.2 billion parameters the model consists of. This is less than 0.1% of the model parameters. The entire finetuning process on a single Nvidia A100 with 80 GBs of GPU for 3 epochs only takes roughly 12 minutes. The GPU utilization metrics can be conveniently viewed at the metrics tab of the cluster configurations. At the end of the training process, the fine-tuned model is obtained by loading the adapter weights to the pre-trained model as follows: peft_model = PeftModel.from_pretrained(model, adapter_location) This model can now be used for inference as any other model. Qualitative Evaluation A couple of example prompt-response pairs are listed below Prompt (passed to the model in the Alpaca format, not shown for conciseness here): Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse Response: The Corelogic Smooth Mouse a wireless optical mouse a smooth surface. The mouse equipped DPI sensor Hz polling rate. The mouse available black white. Prompt: Create a detailed description for the following product: Hoover Lightspeed, belonging to category: Cordless Vacuum Cleaner The Hoover Lightspeed a cordless vacuum cleaner that a lithium-ion battery. The battery charged via a USB cable. The vacuum cleaner brush a turbo brush. The brush suitable cleaning carpets hard floors. The turbo brush hard floors. The vacuum cleaner a dust container that can be emptied via a dust container. The model has clearly been adapted for generating more consistent descriptions. However the response to the first prompt about the optical mouse is quite short and the following phrase “ The vacuum cleaner is equipped with a dust container that can be emptied via a dust container ” is logically flawed. Hyperparameter Combination #2: QLoRA with r=16 and targeting all linear layers Surely, things can be improved here. It is worth exploring increasing the rank of low rank matrices learned during adaptation to 16, i.e. double the value of r to 16 and keep all else  the same. This doubles the number of trainable parameters to 5,324,800 (~5.3 million). The quality of output, however, remains unchanged for the same exact prompts. Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse The Corelogic Smooth Mouse a high-quality optical mouse a smooth surface. The mouse Hz polling rate. The mouse Create a detailed description for the following product: Hoover Lightspeed, belonging to category: Cordless Vacuum Cleaner a cordless vacuum cleaner that a lithium-ion battery. The battery charged via a USB cable. The vacuum cleaner a motorized brush. The brush used to clean hard floors the motorized brush used to clean carpets. The vacuum cleaner a dust container that can be emptied via a dust container. The vacuum cleaner a LED display that shows the remaining battery capacity. The same lack of detail and logical flaws in detail where details are available persists. If this fine tuned model is used for product description generation in a real-world scenario, this is not acceptable output. Hyperparameter Combination #3: QLoRA with r=8 and targeting all linear layers Given that doubling r does not seemingly result in any perceivable increase in output quality, it is worth changing the other important knob. i.e. targeting all linear layers instead of just the attention blocks. Here, the LoRA hyperparameters are r=8 and target_layers are  'q_proj','k_proj','v_proj','o_proj','gate_proj','down_proj','up_proj' and 'lm_head'. This increases the number of parameters updated to 12,994,560 and increases the training time to roughly 15.5 minutes. Prompting the model with the same prompts yield the following: Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse The Corelogic Smooth Mouse a wireless optical mouse a smooth surface. The mouse Hz polling rate. The mouse a USB receiver. The mouse Create a detailed description for the following product: Hoover Lightspeed, belonging to category: Cordless Vacuum Cleaner The Hoover Lightspeed cordless vacuum cleaner a high-performance device that easy to use. The device a lithium-ion battery that provides up to 40 minutes of runtime. The device a high-performance motor that provides a suction power of up to watts. The device dusting brush a crevice tool. The device a LED display that shows the remaining battery charge. Now it is possible to see a somewhat longer coherent description of the fictitious optical mouse and there are no logical flaws in the description of the vacuum cleaner. The product descriptions are not only logical, but relevant. Just as a reminder, these relatively high-quality results are obtained by fine-tuning less than a 1% of the model’s weights with a total dataset of 5000 such prompt-description pairs formatted in a consistent manner. Hyperparameter Combination #4: LoRA with  r=8 and targeting all linear transformer layers It is also worth exploring whether the quality of output from the model improves if the pretrained model is frozen in 8-bit instead of 4-bit. In other words, replicating the exact finetuning process using LoRA instead of QLoRA. Here, the LoRA hyperparameters are kept the same as before, in the new-found optimal configuration, i.e. r=8 and targeting all linear transformer layers during the adaptation process. The results for the two prompts used throughout the article are as given below: Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse The Corelogic Smooth Mouse a high-quality optical mouse a smooth surface. The mouse Hz polling rate. The mouse three colours: black, white red. The mouse a USB receiver compatible Windows Mac. Create a detailed description for the following product: Hoover Lightspeed, belonging to category: Cordless Vacuum Cleaner The Hoover Lightspeed cordless vacuum cleaner a compact lightweight device that easy to use. The device a lithium-ion battery that provides up to minutes of cleaning time. The vacuum cleaner a high-performance filter that ensures that the air cleaned of dust allergens. The device a crevice tool that can be used to clean hard-to-reach areas. Again, there isn’t much of an improvement in the quality of the output text. Key Observations Based on the above set of trials, and further evidence detailed in the excellent publication presenting QLoRA, it can be deduced that the value of r (the rank of matrices updated during adaptation) does not improve adaptation quality beyond a certain point. The biggest improvement is observed in targeting all linear layers in the adaptation process, as opposed to just the attention blocks, as commonly documented in technical literature detailing LoRA and QLoRA. The trials executed above and other empirical evidence suggest that QLoRA does not indeed suffer from any discernible reduction in quality of text generated, compared to LoRA. Further Considerations for using LoRA adapters in deployment It's important to optimize the usage of adapters and understand the limitations of the technique. The size of the LoRA adapter obtained through finetuning is typically just a few megabytes, while the pretrained base model can be several gigabytes in memory and on disk. During inference, both the adapter and the pretrained LLM need to be loaded, so the memory requirement remains similar. Furthermore, if the weights of the pre-trained LLM and the adapter aren’t merged, there will be a slight increase in inference latency. Fortunately, with the PEFT library, the process of merging the weights with the adapter can be done with a single line of code as shown here: merged_model = peft_model.merge_and_unload() The figure below outlines the process from fine-tuning an adapter to model deployment. While the adapter pattern offers significant benefits, merging adapters is not a universal solution. One advantage of the adapter pattern is the ability to deploy a single large pretrained model with task-specific adapters. This allows for efficient inference by utilizing the pretrained model as a backbone for different tasks. However, merging weights makes this approach impossible. The decision to merge weights depends on the specific use case and acceptable inference latency. Nonetheless, LoRA/ QLoRA continues to be a highly effective method for parameter efficient fine-tuning and is widely used. Low Rank Adaptation is a powerful fine-tuning technique that can yield great results if used with the right configuration. Choosing the correct value of rank and the layers of the neural network architecture to target during adaptation could decide the quality of the output from the fine-tuned model. QLoRA results in further memory savings while preserving the adaptation quality. Even when the fine-tuning is performed,  there are several important engineering considerations to ensure the adapted model is deployed in the correct manner. In summary, a concise table indicating the different combinations of LoRA parameters attempted, text quality output and number of parameters updated when fine-tuning OpenLLaMA-3b-v2 for 3 epochs on 5000 observations on a single A100 is shown below. target_modules Base model weights Quality of output Number of parameters updated (in millions) Attention blocks low 2.662 5.324 All linear layers high 12.995 Try this on Databricks! Clone the GitHub repository associated with the blog into a Databricks Repo to get started. More thoroughly documented examples to finetune models on Databricks are available Try Databricks for free Related posts Using MLflow AI Gateway and Llama 2 to Build Generative AI Apps August 24, 2023 by Kasey Uhlenhuth Xiangrui Meng Hagay Lupesko Sean Owen Corey Zumar Liang Zhang Ina Koleva Vladimir Kolovski Arpit Jasapara Data Science and ML To build customer support bots, internal knowledge graphs, or Q&A systems, customers often use Retrieval Augmented Generation (RAG) applications which leverage pre-trained models... Databricks + MosaicML July 19, 2023 Matei Zaharia Patrick Wendell Reynold Xin Ali Ghodsi Company Blog Today, we’re excited to share that we’ve completed our acquisition of MosaicML, a leading platform for creating and customizing generative AI models for... See all Engineering Blog posts Why Databricks Discover For Executives For Startups Lakehouse Architecture DatabricksIQ Mosaic Research Customers Featured See All Partners Cloud Providers Technology Partners Data Partners Built on Databricks Consulting & System Integrators C&SI Partner Program Partner Solutions Consulting & System Integrators Product Databricks Platform Platform Overview Governance Artificial Intelligence Business Intelligence Data Management Data Warehousing Real-Time Analytics Data Engineering Data Science Pricing Pricing Overview Pricing Calculator Open Source Integrations and Data Marketplace IDE Integrations Partner Connect Solutions Databricks For Industries Communications Financial Services Healthcare and Life Sciences Manufacturing Media and Entertainment Public Sector Retail View All Cross Industry Solutions Customer Data Platform Data Migration Professional Services Solution Accelerators Healthcare and Life Sciences Customer Support Training and Certification Learning Overview Training Overview Certification University Alliance Databricks Academy Login Data + AI Summit Data + AI World Tour Data Intelligence Days Full Calendar Blog and Podcasts Databricks Blog Databricks Mosaic Research Blog Data Brew Podcast Champions of Data & AI Podcast Data + AI Summit Data + AI World Tour Databricks Mosaic Research Blog Champions of Data & AI Podcast Who We Are Our Team Databricks Ventures Contact Us Open Jobs Working at Databricks Press Awards and Recognition Newsroom Security and Trust Databricks Inc. 160 Spear Street, 15th Floor San Francisco, CA 94105 1-866-330-0121 See Careers at Databricks © Databricks 2024. All rights reserved. Apache, Apache Spark, Spark and the Spark logo are trademarks of the Apache Software Foundation. Privacy Notice Terms of Use Your Privacy Choices Your California Privacy Rights
+----------
+Training LLMs Course: Discover Fine-Tuning Techniques Register today! Watch Intro Video Foundations Course Introduction NeurIPS LLM Efficiency Challange NeurIPS LLM Efficiency Challenge Q&A Hands On LLM Fine-tuning Start Your Experiments! Evaluation Introduction to LLM Evaluation Demystifying Perplexity HumanEval and LLM Performance Analysis LLM Benchmarks Deep Dive into HELM Chatbot Arena Use Case Specific Benchmarks Evaluating LLM Apps Conclusions LLM Evaluation Q&A Data Introduction to Data for Training LLMs Find Out More about MosaicML Friendly Advice How Much Data? Data Sources & Cost Q&A Which Data? Logistics of Data Loading Training & Fine-tuning Techniques Introduction to Training & Fine-tuning Techniques Hardware Requirements Memory Usage What Should You Train? Training Observability Course Assessment & Next Steps Course Assessment Resources for Further Learning About this course Free 37 lessons 4 hours of video content Learn the fundamentals of large language models Find out about the types of LLMs, model architectures, parameter sizes and scaling laws. Curate a dataset and establish an evaluation approach Learn how to find or curate a dataset for LLM training. Dive into the evaluation metrics for various LLM tasks and compare their performance across a range of benchmarks. Master training and fine-tuning techniques Learn hands-on advanced training strategies like LoRA, prefix tuning, prompt tuning, and Reinforcement Learning through Human Feedback (RLHF). Enroll for free Working knowledge of machine learning Intermediate Python experience Familiarity with DL frameworks (Pytorch/Tensorflow) Register now! All Courses © Copyright W&B AI Academy 2024
+----------
+Just a moment... Enable JavaScript and cookies to continue
+----------
+Enable JavaScript and cookies to continue
+----------
+loaded 51
diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py
new file mode 100644
index 000000000..4ae6b71a6
--- /dev/null
+++ b/examples/E2E-RAG-App/rag_main.py
@@ -0,0 +1,171 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+
+import base64
+import mimetypes
+import os
+
+import fire
+import pandas as pd
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.lib.agents.agent import Agent
+from llama_stack_client.lib.agents.event_logger import EventLogger
+from llama_stack_client.types.agent_create_params import AgentConfig
+from llama_stack_client.types.memory_insert_params import Document
+from termcolor import cprint
+from tqdm import tqdm
+
+
+def data_url_from_file(file_path: str) -> str:
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"File not found: {file_path}")
+
+    with open(file_path, "rb") as file:
+        file_content = file.read()
+
+    base64_content = base64.b64encode(file_content).decode("utf-8")
+    mime_type, _ = mimetypes.guess_type(file_path)
+
+    data_url = f"data:{mime_type};base64,{base64_content}"
+
+    return data_url
+
+
+def build_index(client: LlamaStackClient, file_dir: str, bank_id: str) -> str:
+    """Build a memory bank from a directory of pdf files"""
+    # 1. create memory bank
+    providers = client.providers.list()
+    client.memory_banks.register(
+        memory_bank={
+            "identifier": bank_id,
+            "embedding_model": "all-MiniLM-L6-v2",
+            "chunk_size_in_tokens": 512,
+            "overlap_size_in_tokens": 64,
+            "provider_id": providers["memory"][0].provider_id,
+        }
+    )
+
+    # 2. load pdf,text,md from directory as raw text
+    paths = []
+    documents = []
+    for filename in os.listdir(file_dir):
+        if filename.endswith(".pdf"):
+            file_path = os.path.join(file_dir, filename)
+            paths.append(file_path)
+
+            documents.append(
+                Document(
+                    document_id=os.path.basename(file_path),
+                    content=data_url_from_file(file_path),
+                    mime_type="application/pdf",
+                )
+            )
+        elif filename.endswith(".txt") or filename.endswith(".md"):
+            file_path = os.path.join(file_dir, filename)
+            paths.append(file_path)
+            documents.append(
+                Document(
+                    document_id=os.path.basename(file_path),
+                    content=data_url_from_file(file_path),
+                    mime_type="text/plain",
+                )
+            )
+
+    # insert some documents
+    client.memory.insert(bank_id=bank_id, documents=documents)
+    print(f"Inserted {len(documents)} documents into bank: {bank_id}")
+    # TODO: add a way to check if the bank is created successfully
+    return bank_id
+
+
+async def get_response_row(agent: Agent, input_query: str) -> str:
+    # single turn, each prompt is a new session
+    session_id = agent.create_session(f"session-{input_query}")
+    response = agent.create_turn(
+        messages=[
+            {
+                "role": "user",
+                "content": input_query,
+            }
+        ],
+        session_id=session_id,
+    )
+
+    async for chunk in response:
+        event = chunk.event
+        event_type = event.payload.event_type
+        if event_type == "turn_complete":
+            return event.payload.turn.output_message.content
+
+
+async def run_main(host: str, port: int, docs_dir: str):
+    client = LlamaStackClient(base_url=f"http://{host}:{port}")
+
+    bank_id = "rag_agent_docs"
+    build_index(client, docs_dir, bank_id)
+    print(f"Created bank: {bank_id}")
+    models_response = client.models.list()
+    print(f"Found {len(models_response)} models", models_response)
+    model_name = None
+    for model in models_response:
+        if not model_name and model.identifier.endswith("Instruct"):
+            model_name = model.llama_model
+            print(f"Use model: {model_name}")
+    assert model_name is not None, "No model found"
+    agent_config = AgentConfig(
+        model=model_name,
+        instructions="You are a helpful assistant",
+        sampling_params={
+            "strategy": "greedy",
+            "temperature": 1.0,
+            "top_p": 0.9,
+        },
+        tools=[
+            # TODO: save the bank to local file and load it from local file
+            {
+                "type": "memory",
+                "memory_bank_configs": [{"bank_id": bank_id, "type": "vector"}],
+                "query_generator_config": {"type": "default", "sep": " "},
+                "max_tokens_in_context": 4096,
+                "max_chunks": 50,
+            }
+        ],
+        tool_choice="auto",
+        tool_prompt_format="json",
+        input_shields=[],
+        output_shields=[],
+        enable_session_persistence=False,
+    )
+
+    agent = Agent(client, agent_config)
+
+    # load dataset and generate responses for the RAG agent
+    user_prompts = ["What methods are best for finetuning llama models?"]
+
+    llamastack_generated_responses = []
+
+    for prompt in tqdm(user_prompts):
+        print(f"Generating response for: {prompt}")
+        try:
+            generated_response = await get_response_row(agent, prompt)
+            llamastack_generated_responses.append(generated_response)
+        except Exception as e:
+            print(f"Error generating response for {prompt}: {e}")
+            llamastack_generated_responses.append(None)
+    # TODO: make this multi-turn instead of single turn
+    for response in llamastack_generated_responses:
+        print(response)
+
+
+def main(host: str, port: int, docs_dir: str):
+    asyncio.run(run_main(host, port, docs_dir))
+
+
+if __name__ == "__main__":
+    fire.Fire(main)

From 0032d41238319378e91f765a340b9122d7322a21 Mon Sep 17 00:00:00 2001
From: Kai Wu 
Date: Wed, 13 Nov 2024 09:54:26 -0800
Subject: [PATCH 03/99] multi-turn support

---
 .../E2E-RAG-App/example_data/llama_3.1.md     | 977 ++++++++++++++++++
 .../E2E-RAG-App/example_data/llama_3.2.md     | 216 ++++
 .../example_data/llama_3.2_vision.md          | 160 +++
 .../example_data/llama_website0613.txt        | 103 --
 examples/E2E-RAG-App/rag_main.py              |  39 +-
 5 files changed, 1376 insertions(+), 119 deletions(-)
 create mode 100644 examples/E2E-RAG-App/example_data/llama_3.1.md
 create mode 100644 examples/E2E-RAG-App/example_data/llama_3.2.md
 create mode 100644 examples/E2E-RAG-App/example_data/llama_3.2_vision.md
 delete mode 100644 examples/E2E-RAG-App/example_data/llama_website0613.txt

diff --git a/examples/E2E-RAG-App/example_data/llama_3.1.md b/examples/E2E-RAG-App/example_data/llama_3.1.md
new file mode 100644
index 000000000..97c39fe34
--- /dev/null
+++ b/examples/E2E-RAG-App/example_data/llama_3.1.md
@@ -0,0 +1,977 @@
+## Model Information
+
+The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
+
+**Model developer:** Meta
+
+**Model Architecture:** Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
+
+
+
+  
+   
+   
+   
+   
+   
+   
+   
+   
+   
+  
+  
+   
+   
+   
+   
+   
+   
+   
+   
+   
+  
+  
+   
+   
+   
+   
+   
+  
+  
+   
+   
+   
+   
+   
+  
+
+ Training Data + Params + Input modalities + Output modalities + Context length + GQA + Token count + Knowledge cutoff +
Llama 3.1 (text only) + A new mix of publicly available online data. + 8B + Multilingual Text + Multilingual Text and code + 128k + Yes + 15T+ + December 2023 +
70B + Multilingual Text + Multilingual Text and code + 128k + Yes +
405B + Multilingual Text + Multilingual Text and code + 128k + Yes +
+ + +**Supported languages:** English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai. + +**Llama 3.1 family of models**. Token counts refer to pretraining data only. All model versions use Grouped-Query Attention (GQA) for improved inference scalability. + +**Model Release Date:** July 23, 2024. + +**Status:** This is a static model trained on an offline dataset. Future versions of the tuned models will be released as we improve model safety with community feedback. + +**License:** A custom commercial license, the Llama 3.1 Community License, is available at: [https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE) + +**Feedback:** Instructions on how to provide feedback or comments on the model can be found in the Llama Models [README](https://github.com/meta-llama/llama-models/blob/main/README.md). For more technical information about generation parameters and recipes for how to use Llama 3.1 in applications, please go [here](https://github.com/meta-llama/llama-recipes). + + +## Intended Use + +**Intended Use Cases** Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 model collection also supports the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. The Llama 3.1 Community License allows for these use cases. + +**Out-of-scope** Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the Acceptable Use Policy and Llama 3.1 Community License. Use in languages beyond those explicitly referenced as supported in this model card. + +**Note:** Llama 3.1 has been trained on a broader collection of languages than the 8 supported languages. Developers may fine-tune Llama 3.1 models for languages beyond the 8 supported languages provided they comply with the Llama 3.1 Community License and the Acceptable Use Policy and in such cases are responsible for ensuring that any uses of Llama 3.1 in additional languages is done in a safe and responsible manner. + + +## Hardware and Software + +**Training Factors** We used custom training libraries, Meta's custom built GPU cluster, and production infrastructure for pretraining. Fine-tuning, annotation, and evaluation were also performed on production infrastructure. + +**Training Energy Use** Training utilized a cumulative of **39.3**M GPU hours of computation on H100-80GB (TDP of 700W) type hardware, per the table below. Training time is the total GPU time required for training each model and power consumption is the peak power capacity per GPU device used, adjusted for power usage efficiency. + + +**Training Greenhouse Gas Emissions** Estimated total location-based greenhouse gas emissions were **11,390** tons CO2eq for training. Since 2020, Meta has maintained net zero greenhouse gas emissions in its global operations and matched 100% of its electricity use with renewable energy, therefore the total market-based greenhouse gas emissions for training were 0 tons CO2eq. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Training Time (GPU hours) + Training Power Consumption (W) + Training Location-Based Greenhouse Gas Emissions +

+(tons CO2eq) +

Training Market-Based Greenhouse Gas Emissions +

+(tons CO2eq) +

Llama 3.1 8B + 1.46M + 700 + 420 + 0 +
Llama 3.1 70B + 7.0M + 700 + 2,040 + 0 +
Llama 3.1 405B + 30.84M + 700 + 8,930 + 0 +
Total + 39.3M + +
    + +
+
11,390 + 0 +
+ + + +The methodology used to determine training energy use and greenhouse gas emissions can be found [here](https://arxiv.org/pdf/2204.05149). Since Meta is openly releasing these models, the training energy use and greenhouse gas emissions will not be incurred by others. + + +## Training Data + +**Overview:** Llama 3.1 was pretrained on ~15 trillion tokens of data from publicly available sources. The fine-tuning data includes publicly available instruction datasets, as well as over 25M synthetically generated examples. + +**Data Freshness:** The pretraining data has a cutoff of December 2023. + + +## Benchmark scores + +In this section, we report the results for Llama 3.1 models on standard automatic benchmarks. For all the evaluations, we use our internal evaluations library. Details of our evals can be found [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/eval_details.md). We are also releasing the raw data generated as part of our evals which can be found [here](https://huggingface.co/meta-llama) in the dataset sections. + +### Base pretrained models + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Category + Benchmark + # Shots + Metric + Llama 3 8B + Llama 3.1 8B + Llama 3 70B + Llama 3.1 70B + Llama 3.1 405B +
General + MMLU + 5 + macro_avg/acc_char + 66.7 + 66.7 + 79.5 + 79.3 + 85.2 +
MMLU-Pro (CoT) + 5 + macro_avg/acc_char + 36.2 + 37.1 + 55.0 + 53.8 + 61.6 +
AGIEval English + 3-5 + average/acc_char + 47.1 + 47.8 + 63.0 + 64.6 + 71.6 +
CommonSenseQA + 7 + acc_char + 72.6 + 75.0 + 83.8 + 84.1 + 85.8 +
Winogrande + 5 + acc_char + - + 60.5 + - + 83.3 + 86.7 +
BIG-Bench Hard (CoT) + 3 + average/em + 61.1 + 64.2 + 81.3 + 81.6 + 85.9 +
ARC-Challenge + 25 + acc_char + 79.4 + 79.7 + 93.1 + 92.9 + 96.1 +
Knowledge reasoning + TriviaQA-Wiki + 5 + em + 78.5 + 77.6 + 89.7 + 89.8 + 91.8 +
Reading comprehension + SQuAD + 1 + em + 76.4 + 77.0 + 85.6 + 81.8 + 89.3 +
QuAC (F1) + 1 + f1 + 44.4 + 44.9 + 51.1 + 51.1 + 53.6 +
BoolQ + 0 + acc_char + 75.7 + 75.0 + 79.0 + 79.4 + 80.0 +
DROP (F1) + 3 + f1 + 58.4 + 59.5 + 79.7 + 79.6 + 84.8 +
+ + + +### Instruction tuned models + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Category + Benchmark + # Shots + Metric + Llama 3 8B Instruct + Llama 3.1 8B Instruct + Llama 3 70B Instruct + Llama 3.1 70B Instruct + Llama 3.1 405B Instruct +
General + MMLU + 5 + macro_avg/acc + 68.5 + 69.4 + 82.0 + 83.6 + 87.3 +
MMLU (CoT) + 0 + macro_avg/acc + 65.3 + 73.0 + 80.9 + 86.0 + 88.6 +
MMLU-Pro (CoT) + 5 + macro_avg/acc + 45.5 + 48.3 + 63.4 + 66.4 + 73.3 +
IFEval + + + 76.8 + 80.4 + 82.9 + 87.5 + 88.6 +
Reasoning + ARC-C + 0 + acc + 82.4 + 83.4 + 94.4 + 94.8 + 96.9 +
GPQA + 0 + em + 34.6 + 30.4 + 39.5 + 46.7 + 50.7 +
Code + HumanEval + 0 + pass@1 + 60.4 + 72.6 + 81.7 + 80.5 + 89.0 +
MBPP ++ base version + 0 + pass@1 + 70.6 + 72.8 + 82.5 + 86.0 + 88.6 +
Multipl-E HumanEval + 0 + pass@1 + - + 50.8 + - + 65.5 + 75.2 +
Multipl-E MBPP + 0 + pass@1 + - + 52.4 + - + 62.0 + 65.7 +
Math + GSM-8K (CoT) + 8 + em_maj1@1 + 80.6 + 84.5 + 93.0 + 95.1 + 96.8 +
MATH (CoT) + 0 + final_em + 29.1 + 51.9 + 51.0 + 68.0 + 73.8 +
Tool Use + API-Bank + 0 + acc + 48.3 + 82.6 + 85.1 + 90.0 + 92.0 +
BFCL + 0 + acc + 60.3 + 76.1 + 83.0 + 84.8 + 88.5 +
Gorilla Benchmark API Bench + 0 + acc + 1.7 + 8.2 + 14.7 + 29.7 + 35.3 +
Nexus (0-shot) + 0 + macro_avg/acc + 18.1 + 38.5 + 47.8 + 56.7 + 58.7 +
Multilingual + Multilingual MGSM (CoT) + 0 + em + - + 68.9 + - + 86.9 + 91.6 +
+ +#### Multilingual benchmarks + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Category + Benchmark + Language + Llama 3.1 8B Instruct + Llama 3.1 70B Instruct + Llama 3.1 405B Instruct +
General + MMLU (5-shot, macro_avg/acc) + Portuguese + 62.12 + 80.13 + 84.95 +
Spanish + 62.45 + 80.05 + 85.08 +
Italian + 61.63 + 80.4 + 85.04 +
German + 60.59 + 79.27 + 84.36 +
French + 62.34 + 79.82 + 84.66 +
Hindi + 50.88 + 74.52 + 80.31 +
Thai + 50.32 + 72.95 + 78.21 +
+ + + +## Responsibility & Safety + +As part of our Responsible release approach, we followed a three-pronged strategy to managing trust & safety risks: + + + +* Enable developers to deploy helpful, safe and flexible experiences for their target audience and for the use cases supported by Llama. +* Protect developers against adversarial users aiming to exploit Llama capabilities to potentially cause harm. +* Provide protections for the community to help prevent the misuse of our models. + + +### Responsible deployment + +Llama is a foundational technology designed to be used in a variety of use cases, examples on how Meta’s Llama models have been responsibly deployed can be found in our [Community Stories webpage](https://llama.meta.com/community-stories/). Our approach is to build the most helpful models enabling the world to benefit from the technology power, by aligning our model safety for the generic use cases addressing a standard set of harms. Developers are then in the driver seat to tailor safety for their use case, defining their own policy and deploying the models with the necessary safeguards in their Llama systems. Llama 3.1 was developed following the best practices outlined in our Responsible Use Guide, you can refer to the [Responsible Use Guide](https://llama.meta.com/responsible-use-guide/) to learn more. + + +#### Llama 3.1 instruct + +Our main objectives for conducting safety fine-tuning are to provide the research community with a valuable resource for studying the robustness of safety fine-tuning, as well as to offer developers a readily available, safe, and powerful model for various applications to reduce the developer workload to deploy safe AI systems. For more details on the safety mitigations implemented please read the Llama 3 paper. + +**Fine-tuning data** + +We employ a multi-faceted approach to data collection, combining human-generated data from our vendors with synthetic data to mitigate potential safety risks. We’ve developed many large language model (LLM)-based classifiers that enable us to thoughtfully select high-quality prompts and responses, enhancing data quality control. + +**Refusals and Tone** + +Building on the work we started with Llama 3, we put a great emphasis on model refusals to benign prompts as well as refusal tone. We included both borderline and adversarial prompts in our safety data strategy, and modified our safety data responses to follow tone guidelines. + + +#### Llama 3.1 systems + +**Large language models, including Llama 3.1, are not designed to be deployed in isolation but instead should be deployed as part of an overall AI system with additional safety guardrails as required.** Developers are expected to deploy system safeguards when building agentic systems. Safeguards are key to achieve the right helpfulness-safety alignment as well as mitigating safety and security risks inherent to the system and any integration of the model or system with external tools. + +As part of our responsible release approach, we provide the community with [safeguards](https://llama.meta.com/trust-and-safety/) that developers should deploy with Llama models or other LLMs, including Llama Guard 3, Prompt Guard and Code Shield. All our [reference implementations](https://github.com/meta-llama/llama-agentic-system) demos contain these safeguards by default so developers can benefit from system-level safety out-of-the-box. + + +#### New capabilities + +Note that this release introduces new capabilities, including a longer context window, multilingual inputs and outputs and possible integrations by developers with third party tools. Building with these new capabilities requires specific considerations in addition to the best practices that generally apply across all Generative AI use cases. + +**Tool-use**: Just like in standard software development, developers are responsible for the integration of the LLM with the tools and services of their choice. They should define a clear policy for their use case and assess the integrity of the third party services they use to be aware of the safety and security limitations when using this capability. Refer to the Responsible Use Guide for best practices on the safe deployment of the third party safeguards. + +**Multilinguality**: Llama 3.1 supports 7 languages in addition to English: French, German, Hindi, Italian, Portuguese, Spanish, and Thai. Llama may be able to output text in other languages than those that meet performance thresholds for safety and helpfulness. We strongly discourage developers from using this model to converse in non-supported languages without implementing finetuning and system controls in alignment with their policies and the best practices shared in the Responsible Use Guide. + + +### Evaluations + +We evaluated Llama models for common use cases as well as specific capabilities. Common use cases evaluations measure safety risks of systems for most commonly built applications including chat bot, coding assistant, tool calls. We built dedicated, adversarial evaluation datasets and evaluated systems composed of Llama models and Llama Guard 3 to filter input prompt and output response. It is important to evaluate applications in context, and we recommend building dedicated evaluation dataset for your use case. Prompt Guard and Code Shield are also available if relevant to the application. + +Capability evaluations measure vulnerabilities of Llama models inherent to specific capabilities, for which were crafted dedicated benchmarks including long context, multilingual, tools calls, coding or memorization. + +**Red teaming** + +For both scenarios, we conducted recurring red teaming exercises with the goal of discovering risks via adversarial prompting and we used the learnings to improve our benchmarks and safety tuning datasets. + +We partnered early with subject-matter experts in critical risk areas to understand the nature of these real-world harms and how such models may lead to unintended harm for society. Based on these conversations, we derived a set of adversarial goals for the red team to attempt to achieve, such as extracting harmful information or reprogramming the model to act in a potentially harmful capacity. The red team consisted of experts in cybersecurity, adversarial machine learning, responsible AI, and integrity in addition to multilingual content specialists with background in integrity issues in specific geographic markets. . + + +### Critical and other risks + +We specifically focused our efforts on mitigating the following critical risk areas: + +**1. CBRNE (Chemical, Biological, Radiological, Nuclear, and Explosive materials) helpfulness** + +To assess risks related to proliferation of chemical and biological weapons, we performed uplift testing designed to assess whether use of Llama 3.1 models could meaningfully increase the capabilities of malicious actors to plan or carry out attacks using these types of weapons. + + +**2. Child Safety** + +Child Safety risk assessments were conducted using a team of experts, to assess the model’s capability to produce outputs that could result in Child Safety risks and inform on any necessary and appropriate risk mitigations via fine tuning. We leveraged those expert red teaming sessions to expand the coverage of our evaluation benchmarks through Llama 3 model development. For Llama 3, we conducted new in-depth sessions using objective based methodologies to assess the model risks along multiple attack vectors including the additional languages Llama 3 is trained on. We also partnered with content specialists to perform red teaming exercises assessing potentially violating content while taking account of market specific nuances or experiences. + +**3. Cyber attack enablement** + +Our cyber attack uplift study investigated whether LLMs can enhance human capabilities in hacking tasks, both in terms of skill level and speed. + +Our attack automation study focused on evaluating the capabilities of LLMs when used as autonomous agents in cyber offensive operations, specifically in the context of ransomware attacks. This evaluation was distinct from previous studies that considered LLMs as interactive assistants. The primary objective was to assess whether these models could effectively function as independent agents in executing complex cyber-attacks without human intervention. + +Our study of Llama 3.1 405B’s social engineering uplift for cyber attackers was conducted to assess the effectiveness of AI models in aiding cyber threat actors in spear phishing campaigns. Please read our Llama 3.1 Cyber security whitepaper to learn more. + + +### Community + +Generative AI safety requires expertise and tooling, and we believe in the strength of the open community to accelerate its progress. We are active members of open consortiums, including the AI Alliance, Partnership on AI and MLCommons, actively contributing to safety standardization and transparency. We encourage the community to adopt taxonomies like the MLCommons Proof of Concept evaluation to facilitate collaboration and transparency on safety and content evaluations. Our Purple Llama tools are open sourced for the community to use and widely distributed across ecosystem partners including cloud service providers. We encourage community contributions to our [Github repository](https://github.com/meta-llama/PurpleLlama). + +We also set up the [Llama Impact Grants](https://llama.meta.com/llama-impact-grants/) program to identify and support the most compelling applications of Meta’s Llama model for societal benefit across three categories: education, climate and open innovation. The 20 finalists from the hundreds of applications can be found [here](https://llama.meta.com/llama-impact-grants/#finalists). + +Finally, we put in place a set of resources including an [output reporting mechanism](https://developers.facebook.com/llama_output_feedback) and [bug bounty program](https://www.facebook.com/whitehat) to continuously improve the Llama technology with the help of the community. + + +## Ethical Considerations and Limitations + +The core values of Llama 3.1 are openness, inclusivity and helpfulness. It is meant to serve everyone, and to work for a wide range of use cases. It is thus designed to be accessible to people across many different backgrounds, experiences and perspectives. Llama 3.1 addresses users and their needs as they are, without insertion unnecessary judgment or normativity, while reflecting the understanding that even content that may appear problematic in some cases can serve valuable purposes in others. It respects the dignity and autonomy of all users, especially in terms of the values of free thought and expression that power innovation and progress. + +But Llama 3.1 is a new technology, and like any new technology, there are risks associated with its use. Testing conducted to date has not covered, nor could it cover, all scenarios. For these reasons, as with all LLMs, Llama 3.1’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate, biased or other objectionable responses to user prompts. Therefore, before deploying any applications of Llama 3.1 models, developers should perform safety testing and tuning tailored to their specific applications of the model. Please refer to available resources including our [Responsible Use Guide](https://llama.meta.com/responsible-use-guide), [Trust and Safety](https://llama.meta.com/trust-and-safety/) solutions, and other [resources](https://llama.meta.com/docs/get-started/) to learn more about responsible development. diff --git a/examples/E2E-RAG-App/example_data/llama_3.2.md b/examples/E2E-RAG-App/example_data/llama_3.2.md new file mode 100644 index 000000000..4d3acdd3b --- /dev/null +++ b/examples/E2E-RAG-App/example_data/llama_3.2.md @@ -0,0 +1,216 @@ +## Model Information + +The Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks. + +**Model Developer:** Meta + +**Model Architecture:** Llama 3.2 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. + +| | Training Data | Params | Input modalities | Output modalities | Context Length | GQA | Shared Embeddings | Token count | Knowledge cutoff | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| Llama 3.2 (text only) | A new mix of publicly available online data. | 1B (1.23B) | Multilingual Text | Multilingual Text and code | 128k | Yes | Yes | Up to 9T tokens | December 2023 | +| | | 3B (3.21B) | Multilingual Text | Multilingual Text and code | | | | | | +| Llama 3.2 Quantized (text only) | A new mix of publicly available online data. | 1B (1.23B) | Multilingual Text | Multilingual Text and code | 8k | Yes | Yes | Up to 9T tokens | December 2023 | +| | | 3B (3.21B) | Multilingual Text | Multilingual Text and code | | | | | | + +**Supported Languages:** English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai are officially supported. Llama 3.2 has been trained on a broader collection of languages than these 8 supported languages. Developers may fine-tune Llama 3.2 models for languages beyond these supported languages, provided they comply with the Llama 3.2 Community License and the Acceptable Use Policy. Developers are always expected to ensure that their deployments, including those that involve additional languages, are completed safely and responsibly. + +**Llama 3.2 Model Family:** Token counts refer to pretraining data only. All model versions use Grouped-Query Attention (GQA) for improved inference scalability. + +**Model Release Date:** Oct 24, 2024 + +**Status:** This is a static model trained on an offline dataset. Future versions may be released that improve model capabilities and safety. + +**License:** Use of Llama 3.2 is governed by the [Llama 3.2 Community License](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE) (a custom, commercial license agreement). + +**Feedback:** Instructions on how to provide feedback or comments on the model can be found in the Llama Models [README](https://github.com/meta-llama/llama-models/blob/main/README.md). For more technical information about generation parameters and recipes for how to use Llama 3.2 in applications, please go [here](https://github.com/meta-llama/llama-recipes). + +## Intended Use + +**Intended Use Cases:** Llama 3.2 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat and agentic applications like knowledge retrieval and summarization, mobile AI powered writing assistants and query and prompt rewriting. Pretrained models can be adapted for a variety of additional natural language generation tasks. Similarly, quantized models can be adapted for a variety of on-device use-cases with limited compute resources. + +**Out of Scope:** Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the Acceptable Use Policy and Llama 3.2 Community License. Use in languages beyond those explicitly referenced as supported in this model card. + +## Hardware and Software + +**Training Factors:** We used custom training libraries, Meta's custom built GPU cluster, and production infrastructure for pretraining. Fine-tuning, quantization, annotation, and evaluation were also performed on production infrastructure. + +**Training Energy Use:** Training utilized a cumulative of **916k** GPU hours of computation on H100-80GB (TDP of 700W) type hardware, per the table below. Training time is the total GPU time required for training each model and power consumption is the peak power capacity per GPU device used, adjusted for power usage efficiency. + +**Training Greenhouse Gas Emissions:** Estimated total location-based greenhouse gas emissions were **240** tons CO2eq for training. Since 2020, Meta has maintained net zero greenhouse gas emissions in its global operations and matched 100% of its electricity use with renewable energy; therefore, the total market-based greenhouse gas emissions for training were 0 tons CO2eq. + +| | Training Time (GPU hours) | Logit Generation Time (GPU Hours) | Training Power Consumption (W) | Training Location-Based Greenhouse Gas Emissions (tons CO2eq) | Training Market-Based Greenhouse Gas Emissions (tons CO2eq) | +| :---- | :---: | ----- | :---: | :---: | :---: | +| Llama 3.2 1B | 370k | \- | 700 | 107 | 0 | +| Llama 3.2 3B | 460k | \- | 700 | 133 | 0 | +| Llama 3.2 1B SpinQuant | 1.7 | 0 | 700 | *Negligible*\*\* | 0 | +| Llama 3.2 3B SpinQuant | 2.4 | 0 | 700 | *Negligible*\*\* | 0 | +| Llama 3.2 1B QLoRA | 1.3k | 0 | 700 | 0.381 | 0 | +| Llama 3.2 3B QLoRA | 1.6k | 0 | 700 | 0.461 | 0 | +| Total | 833k | 86k | | 240 | 0 | + +\*\* The location-based CO2e emissions of Llama 3.2 1B SpinQuant and Llama 3.2 3B SpinQuant are less than 0.001 metric tonnes each. This is due to the minimal training GPU hours that are required. + +The methodology used to determine training energy use and greenhouse gas emissions can be found [here](https://arxiv.org/pdf/2204.05149). Since Meta is openly releasing these models, the training energy use and greenhouse gas emissions will not be incurred by others. + +## Training Data + +**Overview:** Llama 3.2 was pretrained on up to 9 trillion tokens of data from publicly available sources. For the 1B and 3B Llama 3.2 models, we incorporated logits from the Llama 3.1 8B and 70B models into the pretraining stage of the model development, where outputs (logits) from these larger models were used as token-level targets. Knowledge distillation was used after pruning to recover performance. In post-training we used a similar recipe as Llama 3.1 and produced final chat models by doing several rounds of alignment on top of the pre-trained model. Each round involved Supervised Fine-Tuning (SFT), Rejection Sampling (RS), and Direct Preference Optimization (DPO). + +**Data Freshness:** The pretraining data has a cutoff of December 2023\. + +## Quantization + +### Quantization Scheme + +We designed the current quantization scheme with the [PyTorch’s ExecuTorch](https://github.com/pytorch/executorch) inference framework and Arm CPU backend in mind, taking into account metrics including model quality, prefill/decoding speed, and memory footprint. Our quantization scheme involves three parts: +- All linear layers in all transformer blocks are quantized to a 4-bit groupwise scheme (with a group size of 32) for weights and 8-bit per-token dynamic quantization for activations. +- The classification layer is quantized to 8-bit per-channel for weight and 8-bit per token dynamic quantization for activation. +- Similar to classification layer, an 8-bit per channel quantization is used for embedding layer. + + +### Quantization-Aware Training and LoRA + +The quantization-aware training (QAT) with low-rank adaptation (LoRA) models went through only post-training stages, using the same data as the full precision models. To initialize QAT, we utilize BF16 Llama 3.2 model checkpoints obtained after supervised fine-tuning (SFT) and perform an additional full round of SFT training with QAT. We used the [torchao](https://github.com/pytorch/ao/blob/main/torchao/quantization/qat/README.md) API for this. We then freeze the backbone of the QAT model and perform another round of SFT with LoRA adaptors applied to all layers within the transformer block. Meanwhile, the LoRA adaptors' weights and activations are maintained in BF16. Because our approach is similar to QLoRA of Dettmers et al., (2023) (i.e., quantization followed by LoRA adapters), we refer this method as QLoRA. Finally, we fine-tune the resulting model (both backbone and LoRA adaptors) using direct preference optimization (DPO). + +### SpinQuant + +[SpinQuant](https://arxiv.org/abs/2405.16406) was applied, together with generative post-training quantization (GPTQ). For the SpinQuant rotation matrix fine-tuning, we optimized for 100 iterations, using 800 samples with sequence-length 2048 from the WikiText 2 dataset. For GPTQ, we used 128 samples from the same dataset with the same sequence-length. + +## Benchmarks \- English Text + +In this section, we report the results for Llama 3.2 models on standard automatic benchmarks. For all these evaluations, we used our internal evaluations library. + +### Base Pretrained Models + +| Category | Benchmark | \# Shots | Metric | Llama 3.2 1B | Llama 3.2 3B | Llama 3.1 8B | +| ----- | ----- | :---: | :---: | :---: | :---: | :---: | +| General | MMLU | 5 | macro\_avg/acc\_char | 32.2 | 58 | 66.7 | +| | AGIEval English | 3-5 | average/acc\_char | 23.3 | 39.2 | 47.8 | +| | ARC-Challenge | 25 | acc\_char | 32.8 | 69.1 | 79.7 | +| Reading comprehension | SQuAD | 1 | em | 49.2 | 67.7 | 77 | +| | QuAC (F1) | 1 | f1 | 37.9 | 42.9 | 44.9 | +| | DROP (F1) | 3 | f1 | 28.0 | 45.2 | 59.5 | +| Long Context | Needle in Haystack | 0 | em | 96.8 | 1 | 1 | + +### Instruction Tuned Models + +| Capability | | Benchmark | \# Shots | Metric | Llama 3.2 1B bf16 | Llama 3.2 1B Vanilla PTQ\*\* | Llama 3.2 1B Spin Quant | Llama 3.2 1B QLoRA | Llama 3.2 3B bf16 | Llama 3.2 3B Vanilla PTQ\*\* | Llama 3.2 3B Spin Quant | Llama 3.2 3B QLoRA | Llama 3.1 8B | +| :---: | ----- | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | +| General | | MMLU | 5 | macro\_avg/acc | 49.3 | 43.3 | 47.3 | 49.0 | 63.4 | 60.5 | 62 | 62.4 | 69.4 | +| Re-writing | | Open-rewrite eval | 0 | micro\_avg/rougeL | 41.6 | 39.2 | 40.9 | 41.2 | 40.1 | 40.3 | 40.8 | 40.7 | 40.9 | +| Summarization | | TLDR9+ (test) | 1 | rougeL | 16.8 | 14.9 | 16.7 | 16.8 | 19.0 | 19.1 | 19.2 | 19.1 | 17.2 | +| Instruction following | | IFEval | 0 | Avg(Prompt/Instruction acc Loose/Strict) | 59.5 | 51.5 | 58.4 | 55.6 | 77.4 | 73.9 | 73.5 | 75.9 | 80.4 | +| Math | | GSM8K (CoT) | 8 | em\_maj1@1 | 44.4 | 33.1 | 40.6 | 46.5 | 77.7 | 72.9 | 75.7 | 77.9 | 84.5 | +| | | MATH (CoT) | 0 | final\_em | 30.6 | 20.5 | 25.3 | 31.0 | 48.0 | 44.2 | 45.3 | 49.2 | 51.9 | +| Reasoning | | ARC-C | 0 | acc | 59.4 | 54.3 | 57 | 60.7 | 78.6 | 75.6 | 77.6 | 77.6 | 83.4 | +| | | GPQA | 0 | acc | 27.2 | 25.9 | 26.3 | 25.9 | 32.8 | 32.8 | 31.7 | 33.9 | 32.8 | +| | | Hellaswag | 0 | acc | 41.2 | 38.1 | 41.3 | 41.5 | 69.8 | 66.3 | 68 | 66.3 | 78.7 | +| Tool Use | | BFCL V2 | 0 | acc | 25.7 | 14.3 | 15.9 | 23.7 | 67.0 | 53.4 | 60.1 | 63.5 | 67.1 | +| | | Nexus | 0 | macro\_avg/acc | 13.5 | 5.2 | 9.6 | 12.5 | 34.3 | 32.4 | 31.5 | 30.1 | 38.5 | +| Long Context | | InfiniteBench/En.QA | 0 | longbook\_qa/f1 | 20.3 | N/A | N/A | N/A | 19.8 | N/A | N/A | N/A | 27.3 | +| | | InfiniteBench/En.MC | 0 | longbook\_choice/acc | 38.0 | N/A | N/A | N/A | 63.3 | N/A | N/A | N/A | 72.2 | +| | | NIH/Multi-needle | 0 | recall | 75.0 | N/A | N/A | N/A | 84.7 | N/A | N/A | N/A | 98.8 | +| Multilingual | | MGSM (CoT) | 0 | em | 24.5 | 13.7 | 18.2 | 24.4 | 58.2 | 48.9 | 54.3 | 56.8 | 68.9 | + +\*\*for comparison purposes only. Model not released. + +### Multilingual Benchmarks + +| Category | Benchmark | Language | Llama 3.2 1B | Llama 3.2 1B Vanilla PTQ\*\* | Llama 3.2 1B Spin Quant | Llama 3.2 1B QLoRA | Llama 3.2 3B | Llama 3.2 3B Vanilla PTQ\*\* | Llama 3.2 3B Spin Quant | Llama 3.2 3B QLoRA | Llama 3.1 8B | +| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | +| General | MMLU (5-shot, macro_avg/acc) | Portuguese | 39.8 | 34.9 | 38.9 | 40.2 | 54.5 | 50.9 | 53.3 | 53.4 | 62.1 | +| | | Spanish | 41.5 | 36.0 | 39.8 | 41.8 | 55.1 | 51.9 | 53.6 | 53.6 | 62.5 | +| | | Italian | 39.8 | 34.9 | 38.1 | 40.6 | 53.8 | 49.9 | 52.1 | 51.7 | 61.6 | +| | | German | 39.2 | 34.9 | 37.5 | 39.6 | 53.3 | 50.0 | 52.2 | 51.3 | 60.6 | +| | | French | 40.5 | 34.8 | 39.2 | 40.8 | 54.6 | 51.2 | 53.3 | 53.3 | 62.3 | +| | | Hindi | 33.5 | 30.0 | 32.1 | 34.0 | 43.3 | 40.4 | 42.0 | 42.1 | 50.9 | +| | | Thai | 34.7 | 31.2 | 32.4 | 34.9 | 44.5 | 41.3 | 44.0 | 42.2 | 50.3 | + +\*\*for comparison purposes only. Model not released. + +## Inference time + +In the below table, we compare the performance metrics of different quantization methods (SpinQuant and QAT \+ LoRA) with the BF16 baseline. The evaluation was done using the [ExecuTorch](https://github.com/pytorch/executorch) framework as the inference engine, with the ARM CPU as a backend using Android OnePlus 12 device. + +| Category | Decode (tokens/sec) | Time-to-first-token (sec) | Prefill (tokens/sec) | Model size (PTE file size in MB) | Memory size (RSS in MB) | +| :---- | ----- | ----- | ----- | ----- | ----- | +| 1B BF16 (baseline) | 19.2 | 1.0 | 60.3 | 2358 | 3,185 | +| 1B SpinQuant | 50.2 (2.6x) | 0.3 (-76.9%) | 260.5 (4.3x) | 1083 (-54.1%) | 1,921 (-39.7%) | +| 1B QLoRA | 45.8 (2.4x) | 0.3 (-76.0%) | 252.0 (4.2x) | 1127 (-52.2%) | 2,255 (-29.2%) | +| 3B BF16 (baseline) | 7.6 | 3.0 | 21.2 | 6129 | 7,419 | +| 3B SpinQuant | 19.7 (2.6x) | 0.7 (-76.4%) | 89.7 (4.2x) | 2435 (-60.3%) | 3,726 (-49.8%) | +| 3B QLoRA | 18.5 (2.4x) | 0.7 (-76.1%) | 88.8 (4.2x) | 2529 (-58.7%) | 4,060 (-45.3%) | + +(\*) The performance measurement is done using an adb binary-based approach. +(\*\*) It is measured on an Android OnePlus 12 device. +(\*\*\*) Time-to-first-token (TTFT) is measured with prompt length=64 + +*Footnote:* + +- *Decode (tokens/second) is for how quickly it keeps generating. Higher is better.* +- *Time-to-first-token (TTFT for shorthand) is for how fast it generates the first token for a given prompt. Lower is better.* +- *Prefill is the inverse of TTFT (aka 1/TTFT) in tokens/second. Higher is better* +- *Model size \- how big is the model, measured by, PTE file, a binary file format for ExecuTorch* +- *RSS size \- Memory usage in resident set size (RSS)* + +## Responsibility & Safety + +As part of our Responsible release approach, we followed a three-pronged strategy to managing trust & safety risks: + +1. Enable developers to deploy helpful, safe and flexible experiences for their target audience and for the use cases supported by Llama +2. Protect developers against adversarial users aiming to exploit Llama capabilities to potentially cause harm +3. Provide protections for the community to help prevent the misuse of our models + +### Responsible Deployment + +**Approach:** Llama is a foundational technology designed to be used in a variety of use cases. Examples on how Meta’s Llama models have been responsibly deployed can be found in our [Community Stories webpage](https://llama.meta.com/community-stories/). Our approach is to build the most helpful models, enabling the world to benefit from the technology power, by aligning our model safety for generic use cases and addressing a standard set of harms. Developers are then in the driver’s seat to tailor safety for their use cases, defining their own policies and deploying the models with the necessary safeguards in their Llama systems. Llama 3.2 was developed following the best practices outlined in our [Responsible Use Guide](https://llama.meta.com/responsible-use-guide/). + +#### Llama 3.2 Instruct + +**Objective:** Our main objectives for conducting safety fine-tuning are to provide the research community with a valuable resource for studying the robustness of safety fine-tuning, as well as to offer developers a readily available, safe, and powerful model for various applications to reduce the developer workload to deploy safe AI systems. We implemented the same set of safety mitigations as in Llama 3, and you can learn more about these in the Llama 3 [paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/). + +**Fine-Tuning Data:** We employ a multi-faceted approach to data collection, combining human-generated data from our vendors with synthetic data to mitigate potential safety risks. We’ve developed many large language model (LLM)-based classifiers that enable us to thoughtfully select high-quality prompts and responses, enhancing data quality control. + +**Refusals and Tone:** Building on the work we started with Llama 3, we put a great emphasis on model refusals to benign prompts as well as refusal tone. We included both borderline and adversarial prompts in our safety data strategy, and modified our safety data responses to follow tone guidelines. + +#### Llama 3.2 Systems + +**Safety as a System:** Large language models, including Llama 3.2, **are not designed to be deployed in isolation** but instead should be deployed as part of an overall AI system with additional safety guardrails as required. Developers are expected to deploy system safeguards when building agentic systems. Safeguards are key to achieve the right helpfulness-safety alignment as well as mitigating safety and security risks inherent to the system and any integration of the model or system with external tools. As part of our responsible release approach, we provide the community with [safeguards](https://llama.meta.com/trust-and-safety/) that developers should deploy with Llama models or other LLMs, including Llama Guard, Prompt Guard and Code Shield. All our [reference implementations](https://github.com/meta-llama/llama-agentic-system) demos contain these safeguards by default so developers can benefit from system-level safety out-of-the-box. + +### New Capabilities and Use Cases + +**Technological Advancement:** Llama releases usually introduce new capabilities that require specific considerations in addition to the best practices that generally apply across all Generative AI use cases. For prior release capabilities also supported by Llama 3.2, see [Llama 3.1 Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md), as the same considerations apply here as well. + +**Constrained Environments:** Llama 3.2 1B and 3B models are expected to be deployed in highly constrained environments, such as mobile devices. LLM Systems using smaller models will have a different alignment profile and safety/helpfulness tradeoff than more complex, larger systems. Developers should ensure the safety of their system meets the requirements of their use case. We recommend using lighter system safeguards for such use cases, like Llama Guard 3-1B or its mobile-optimized version. + +### Evaluations + +**Scaled Evaluations:** We built dedicated, adversarial evaluation datasets and evaluated systems composed of Llama models and Purple Llama safeguards to filter input prompt and output response. It is important to evaluate applications in context, and we recommend building dedicated evaluation dataset for your use case. + +**Red Teaming:** We conducted recurring red teaming exercises with the goal of discovering risks via adversarial prompting and we used the learnings to improve our benchmarks and safety tuning datasets. We partnered early with subject-matter experts in critical risk areas to understand the nature of these real-world harms and how such models may lead to unintended harm for society. Based on these conversations, we derived a set of adversarial goals for the red team to attempt to achieve, such as extracting harmful information or reprogramming the model to act in a potentially harmful capacity. The red team consisted of experts in cybersecurity, adversarial machine learning, responsible AI, and integrity in addition to multilingual content specialists with background in integrity issues in specific geographic markets. + +### Critical Risks + +In addition to our safety work above, we took extra care on measuring and/or mitigating the following critical risk areas: + +**1\. CBRNE (Chemical, Biological, Radiological, Nuclear, and Explosive Weapons):** Llama 3.2 1B and 3B models are smaller and less capable derivatives of Llama 3.1. For Llama 3.1 70B and 405B, to assess risks related to proliferation of chemical and biological weapons, we performed uplift testing designed to assess whether use of Llama 3.1 models could meaningfully increase the capabilities of malicious actors to plan or carry out attacks using these types of weapons and have determined that such testing also applies to the smaller 1B and 3B models. + +**2\. Child Safety:** Child Safety risk assessments were conducted using a team of experts, to assess the model’s capability to produce outputs that could result in Child Safety risks and inform on any necessary and appropriate risk mitigations via fine tuning. We leveraged those expert red teaming sessions to expand the coverage of our evaluation benchmarks through Llama 3 model development. For Llama 3, we conducted new in-depth sessions using objective based methodologies to assess the model risks along multiple attack vectors including the additional languages Llama 3 is trained on. We also partnered with content specialists to perform red teaming exercises assessing potentially violating content while taking account of market specific nuances or experiences. + +**3\. Cyber Attacks:** For Llama 3.1 405B, our cyber attack uplift study investigated whether LLMs can enhance human capabilities in hacking tasks, both in terms of skill level and speed. +Our attack automation study focused on evaluating the capabilities of LLMs when used as autonomous agents in cyber offensive operations, specifically in the context of ransomware attacks. This evaluation was distinct from previous studies that considered LLMs as interactive assistants. The primary objective was to assess whether these models could effectively function as independent agents in executing complex cyber-attacks without human intervention. Because Llama 3.2’s 1B and 3B models are smaller and less capable models than Llama 3.1 405B, we broadly believe that the testing conducted for the 405B model also applies to Llama 3.2 models. + +### Community + +**Industry Partnerships:** Generative AI safety requires expertise and tooling, and we believe in the strength of the open community to accelerate its progress. We are active members of open consortiums, including the AI Alliance, Partnership on AI and MLCommons, actively contributing to safety standardization and transparency. We encourage the community to adopt taxonomies like the MLCommons Proof of Concept evaluation to facilitate collaboration and transparency on safety and content evaluations. Our Purple Llama tools are open sourced for the community to use and widely distributed across ecosystem partners including cloud service providers. We encourage community contributions to our [Github repository](https://github.com/meta-llama/PurpleLlama). + +**Grants:** We also set up the [Llama Impact Grants](https://llama.meta.com/llama-impact-grants/) program to identify and support the most compelling applications of Meta’s Llama model for societal benefit across three categories: education, climate and open innovation. The 20 finalists from the hundreds of applications can be found [here](https://llama.meta.com/llama-impact-grants/#finalists). + +**Reporting:** Finally, we put in place a set of resources including an [output reporting mechanism](https://developers.facebook.com/llama_output_feedback) and [bug bounty program](https://www.facebook.com/whitehat) to continuously improve the Llama technology with the help of the community. + +## Ethical Considerations and Limitations + +**Values:** The core values of Llama 3.2 are openness, inclusivity and helpfulness. It is meant to serve everyone, and to work for a wide range of use cases. It is thus designed to be accessible to people across many different backgrounds, experiences and perspectives. Llama 3.2 addresses users and their needs as they are, without insertion unnecessary judgment or normativity, while reflecting the understanding that even content that may appear problematic in some cases can serve valuable purposes in others. It respects the dignity and autonomy of all users, especially in terms of the values of free thought and expression that power innovation and progress. + +**Testing:** Llama 3.2 is a new technology, and like any new technology, there are risks associated with its use. Testing conducted to date has not covered, nor could it cover, all scenarios. For these reasons, as with all LLMs, Llama 3.2’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate, biased or other objectionable responses to user prompts. Therefore, before deploying any applications of Llama 3.2 models, developers should perform safety testing and tuning tailored to their specific applications of the model. Please refer to available resources including our [Responsible Use Guide](https://llama.meta.com/responsible-use-guide), [Trust and Safety](https://llama.meta.com/trust-and-safety/) solutions, and other [resources](https://llama.meta.com/docs/get-started/) to learn more about responsible development. diff --git a/examples/E2E-RAG-App/example_data/llama_3.2_vision.md b/examples/E2E-RAG-App/example_data/llama_3.2_vision.md new file mode 100644 index 000000000..abf126a94 --- /dev/null +++ b/examples/E2E-RAG-App/example_data/llama_3.2_vision.md @@ -0,0 +1,160 @@ +## Model Information + +The Llama 3.2-Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes (text \+ images in / text out). The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image. The models outperform many of the available open source and closed multimodal models on common industry benchmarks. + +**Model Developer**: Meta + +**Model Architecture:** Llama 3.2-Vision is built on top of the Llama 3.1 text-only model, which is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. To support image recognition tasks, the Llama 3.2-Vision model uses a separately trained vision adapter that integrates with the pre-trained Llama 3.1 language model. The adapter consists of a series of cross-attention layers that feed image encoder representations into the core LLM. + +| | Training Data | Params | Input modalities | Output modalities | Context length | GQA | Data volume | Knowledge cutoff | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| Llama 3.2-Vision | (Image, text) pairs | 11B (10.6) | Text \+ Image | Text | 128k | Yes | 6B (image, text) pairs | December 2023 | +| Llama 3.2-Vision | (Image, text) pairs | 90B (88.8) | Text \+ Image | Text | 128k | Yes | 6B (image, text) pairs | December 2023 | + +**Supported Languages:** For text only tasks, English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai are officially supported. Llama 3.2 has been trained on a broader collection of languages than these 8 supported languages. Note for image+text applications, English is the only language supported. + +Developers may fine-tune Llama 3.2 models for languages beyond these supported languages, provided they comply with the Llama 3.2 Community License and the Acceptable Use Policy. Developers are always expected to ensure that their deployments, including those that involve additional languages, are completed safely and responsibly. + +**Llama 3.2 Model Family:** Token counts refer to pretraining data only. All model versions use Grouped-Query Attention (GQA) for improved inference scalability. + +**Model Release Date:** Sept 25, 2024 + +**Status:** This is a static model trained on an offline dataset. Future versions may be released that improve model capabilities and safety. + +**License:** Use of Llama 3.2 is governed by the [Llama 3.2 Community License](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE) (a custom, commercial license agreement). + +**Feedback:** Instructions on how to provide feedback or comments on the model can be found in the Llama Models [README](https://github.com/meta-llama/llama-models/blob/main/README.md). For more technical information about generation parameters and recipes for how to use Llama 3.2-Vision in applications, please go [here](https://github.com/meta-llama/llama-recipes). + +## Intended Use + +**Intended Use Cases:** Llama 3.2-Vision is intended for commercial and research use. Instruction tuned models are intended for visual recognition, image reasoning, captioning, and assistant-like chat with images, whereas pretrained models can be adapted for a variety of image reasoning tasks. Additionally, because of Llama 3.2-Vision’s ability to take images and text as inputs, additional use cases could include: + +1. Visual Question Answering (VQA) and Visual Reasoning: Imagine a machine that looks at a picture and understands your questions about it. +2. Document Visual Question Answering (DocVQA): Imagine a computer understanding both the text and layout of a document, like a map or contract, and then answering questions about it directly from the image. +3. Image Captioning: Image captioning bridges the gap between vision and language, extracting details, understanding the scene, and then crafting a sentence or two that tells the story. +4. Image-Text Retrieval: Image-text retrieval is like a matchmaker for images and their descriptions. Similar to a search engine but one that understands both pictures and words. +5. Visual Grounding: Visual grounding is like connecting the dots between what we see and say. It’s about understanding how language references specific parts of an image, allowing AI models to pinpoint objects or regions based on natural language descriptions. + + +The Llama 3.2 model collection also supports the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. The Llama 3.2 Community License allows for these use cases. + +**Out of Scope:** Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the Acceptable Use Policy and Llama 3.2 Community License. Use in languages beyond those explicitly referenced as supported in this model card. + +## Hardware and Software + +**Training Factors:** We used custom training libraries, Meta's custom built GPU cluster, and production infrastructure for pretraining. Fine-tuning, annotation, and evaluation were also performed on production infrastructure. + +**Training Energy Use:** Training utilized a cumulative of **2.02M** GPU hours of computation on H100-80GB (TDP of 700W) type hardware, per the table below. Training time is the total GPU time required for training each model and power consumption is the peak power capacity per GPU device used, adjusted for power usage efficiency. + +## + +**Training Greenhouse Gas Emissions:** Estimated total location-based greenhouse gas emissions were **584** tons CO2eq for training. Since 2020, Meta has maintained net zero greenhouse gas emissions in its global operations and matched 100% of its electricity use with renewable energy, therefore the total market-based greenhouse gas emissions for training were 0 tons CO2eq. + +| | Training Time (GPU hours) | Training Power Consumption (W) | Training Location-Based Greenhouse Gas Emissions (tons CO2eq) | Training Market-Based Greenhouse Gas Emissions (tons CO2eq) | +| :---- | :---: | :---: | :---: | :---: | +| Llama 3.2-vision 11B | Stage 1 pretraining: 147K H100 hours Stage 2 annealing: 98K H100 hours SFT: 896 H100 hours RLHF: 224 H100 hours | 700 | 71 | 0 | +| Llama 3.2-vision 90B | Stage 1 pretraining: 885K H100 hours Stage 2 annealing: 885K H100 hours SFT: 3072 H100 hours RLHF: 2048 H100 hours | 700 | 513 | 0 | +| Total | 2.02M | | 584 | 0 | + +The methodology used to determine training energy use and greenhouse gas emissions can be found [here](https://arxiv.org/pdf/2204.05149). Since Meta is openly releasing these models, the training energy use and greenhouse gas emissions will not be incurred by others. + +## Training Data + +**Overview:** Llama 3.2-Vision was pretrained on 6B image and text pairs. The instruction tuning data includes publicly available vision instruction datasets, as well as over 3M synthetically generated examples. + +**Data Freshness:** The pretraining data has a cutoff of December 2023\. + +## Benchmarks \- Image Reasoning + +In this section, we report the results for Llama 3.2-Vision models on standard automatic benchmarks. For all these evaluations, we used our internal evaluations library. + +### Base Pretrained Models + +| Category | Benchmark | \# Shots | Metric | Llama 3.2 11B | Llama 3.2 90B | +| ----- | ----- | ----- | ----- | ----- | ----- | +| Image Understanding | VQAv2 (val) | 0 | Accuracy | 66.8 | 73.6 | +| | Text VQA (val) | 0 | Relaxed accuracy | 73.1 | 73.5 | +| | DocVQA (val, unseen) | 0 | ANLS | 62.3 | 70.7 | +| Visual Reasoning | MMMU (val, 0-shot) | 0 | Micro average accuracy | 41.7 | 49.3 | +| | ChartQA (test) | 0 | Accuracy | 39.4 | 54.2 | +| | InfographicsQA (val, unseen) | 0 | ANLS | 43.2 | 56.8 | +| | AI2 Diagram (test) | 0 | Accuracy | 62.4 | 75.3 | + +### Instruction Tuned Models + +| Modality | Capability | Benchmark | \# Shots | Metric | Llama 3.2 11B | Llama 3.2 90B | +| ----- | :---: | ----- | :---: | :---: | ----- | ----- | +| Image | College-level Problems and Mathematical Reasoning | MMMU (val, CoT) | 0 | Micro average accuracy | 50.7 | 60.3 | +| | | MMMU-Pro, Standard (10 opts, test) | 0 | Accuracy | 33.0 | 45.2 | +| | | MMMU-Pro, Vision (test) | 0 | Accuracy | 23.7 | 33.8 | +| | | MathVista (testmini) | 0 | Accuracy | 51.5 | 57.3 | +| | Charts and Diagram Understanding | ChartQA (test, CoT) | 0 | Relaxed accuracy | 83.4 | 85.5 | +| | | AI2 Diagram (test) | 0 | Accuracy | 91.1 | 92.3 | +| | | DocVQA (test) | 0 | ANLS | 88.4 | 90.1 | +| | General Visual Question Answering | VQAv2 (test) | 0 | Accuracy | 75.2 | 78.1 | +| | | | | | | | +| Text | General | MMLU (CoT) | 0 | Macro\_avg/acc | 73.0 | 86.0 | +| | Math | MATH (CoT) | 0 | Final\_em | 51.9 | 68.0 | +| | Reasoning | GPQA | 0 | Accuracy | 32.8 | 46.7 | +| | Multilingual | MGSM (CoT) | 0 | em | 68.9 | 86.9 | + +## Responsibility & Safety + +As part of our Responsible release approach, we followed a three-pronged strategy to managing trust & safety risks: + +1. Enable developers to deploy helpful, safe and flexible experiences for their target audience and for the use cases supported by Llama. +2. Protect developers against adversarial users aiming to exploit Llama capabilities to potentially cause harm. +3. Provide protections for the community to help prevent the misuse of our models. + +### Responsible Deployment + +**Approach:** Llama is a foundational technology designed to be used in a variety of use cases, examples on how Meta’s Llama models have been responsibly deployed can be found in our [Community Stories webpage](https://llama.meta.com/community-stories/). Our approach is to build the most helpful models enabling the world to benefit from the technology power, by aligning our model safety for the generic use cases addressing a standard set of harms. Developers are then in the driver seat to tailor safety for their use case, defining their own policy and deploying the models with the necessary safeguards in their Llama systems. Llama 3.2 was developed following the best practices outlined in our Responsible Use Guide, you can refer to the [Responsible Use Guide](https://llama.meta.com/responsible-use-guide/) to learn more. + +#### Llama 3.2 Instruct + +**Objective:** Our main objectives for conducting safety fine-tuning are to provide the research community with a valuable resource for studying the robustness of safety fine-tuning, as well as to offer developers a readily available, safe, and powerful model for various applications to reduce the developer workload to deploy safe AI systems. We implemented the same set of safety mitigations as in Llama 3, and you can learn more about these in the Llama 3 [paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/). + +**Fine-Tuning Data:** We employ a multi-faceted approach to data collection, combining human-generated data from our vendors with synthetic data to mitigate potential safety risks. We’ve developed many large language model (LLM)-based classifiers that enable us to thoughtfully select high-quality prompts and responses, enhancing data quality control. + +**Refusals and Tone:** Building on the work we started with Llama 3, we put a great emphasis on model refusals to benign prompts as well as refusal tone. We included both borderline and adversarial prompts in our safety data strategy, and modified our safety data responses to follow tone guidelines. + +#### Llama 3.2 Systems + +**Safety as a System:** Large language models, including Llama 3.2, **are not designed to be deployed in isolation** but instead should be deployed as part of an overall AI system with additional safety guardrails as required. Developers are expected to deploy system safeguards when building agentic systems. Safeguards are key to achieve the right helpfulness-safety alignment as well as mitigating safety and security risks inherent to the system and any integration of the model or system with external tools. As part of our responsible release approach, we provide the community with [safeguards](https://llama.meta.com/trust-and-safety/) that developers should deploy with Llama models or other LLMs, including Llama Guard, Prompt Guard and Code Shield. All our [reference implementations](https://github.com/meta-llama/llama-agentic-system) demos contain these safeguards by default so developers can benefit from system-level safety out-of-the-box. + +### New Capabilities and Use Cases + +**Technological Advancement:** Llama releases usually introduce new capabilities that require specific considerations in addition to the best practices that generally apply across all Generative AI use cases. For prior release capabilities also supported by Llama 3.2, see [Llama 3.1 Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md), as the same considerations apply here as well. + +**Image Reasoning:** Llama 3.2-Vision models come with multimodal (text and image) input capabilities enabling image reasoning applications. As part of our responsible release process, we took dedicated measures including evaluations and mitigations to address the risk of the models uniquely identifying individuals in images. As with other LLM risks, models may not always be robust to adversarial prompts, and developers should evaluate identification and other applicable risks in the context of their applications as well as consider deploying Llama Guard 3-11B-Vision as part of their system or other mitigations as appropriate to detect and mitigate such risks. + +### Evaluations + +**Scaled Evaluations:** We built dedicated, adversarial evaluation datasets and evaluated systems composed of Llama models and Purple Llama safeguards to filter input prompt and output response. It is important to evaluate applications in context, and we recommend building dedicated evaluation dataset for your use case. + +**Red teaming:** We conducted recurring red teaming exercises with the goal of discovering risks via adversarial prompting and we used the learnings to improve our benchmarks and safety tuning datasets. We partnered early with subject-matter experts in critical risk areas to understand the nature of these real-world harms and how such models may lead to unintended harm for society. Based on these conversations, we derived a set of adversarial goals for the red team to attempt to achieve, such as extracting harmful information or reprogramming the model to act in a potentially harmful capacity. The red team consisted of experts in cybersecurity, adversarial machine learning, responsible AI, and integrity in addition to multilingual content specialists with background in integrity issues in specific geographic markets. + +### Critical Risks + +In addition to our safety work above, we took extra care on measuring and/or mitigating the following critical risk areas: + +**1\. CBRNE (Chemical, Biological, Radiological, Nuclear, and Explosive Weapons):** For Llama 3.1, to assess risks related to proliferation of chemical and biological weapons, we performed uplift testing designed to assess whether use of Llama 3.1 models could meaningfully increase the capabilities of malicious actors to plan or carry out attacks using these types of weapons. For Llama 3.2-Vision models, we conducted additional targeted evaluations and found that it was unlikely Llama 3.2 presented an increase in scientific capabilities due to its added image understanding capability as compared to Llama 3.1. + +**2\. Child Safety:** Child Safety risk assessments were conducted using a team of experts, to assess the model’s capability to produce outputs that could result in Child Safety risks and inform on any necessary and appropriate risk mitigations via fine tuning. We leveraged those expert red teaming sessions to expand the coverage of our evaluation benchmarks through Llama 3 model development. For Llama 3, we conducted new in-depth sessions using objective based methodologies to assess the model risks along multiple attack vectors including the additional languages Llama 3 is trained on. We also partnered with content specialists to perform red teaming exercises assessing potentially violating content while taking account of market specific nuances or experiences. + +**3\. Cyber Attacks:** For Llama 3.1 405B, our cyber attack uplift study investigated whether LLMs can enhance human capabilities in hacking tasks, both in terms of skill level and speed. +Our attack automation study focused on evaluating the capabilities of LLMs when used as autonomous agents in cyber offensive operations, specifically in the context of ransomware attacks. This evaluation was distinct from previous studies that considered LLMs as interactive assistants. The primary objective was to assess whether these models could effectively function as independent agents in executing complex cyber-attacks without human intervention. Because Llama 3.2’s vision capabilities are not generally germane to cyber uplift, we believe that the testing conducted for Llama 3.1 also applies to Llama 3.2. + +### Community + +**Industry Partnerships:** Generative AI safety requires expertise and tooling, and we believe in the strength of the open community to accelerate its progress. We are active members of open consortiums, including the AI Alliance, Partnership on AI and MLCommons, actively contributing to safety standardization and transparency. We encourage the community to adopt taxonomies like the MLCommons Proof of Concept evaluation to facilitate collaboration and transparency on safety and content evaluations. Our Purple Llama tools are open sourced for the community to use and widely distributed across ecosystem partners including cloud service providers. We encourage community contributions to our [Github repository](https://github.com/meta-llama/PurpleLlama). + +**Grants:** We also set up the [Llama Impact Grants](https://llama.meta.com/llama-impact-grants/) program to identify and support the most compelling applications of Meta’s Llama model for societal benefit across three categories: education, climate and open innovation. The 20 finalists from the hundreds of applications can be found [here](https://llama.meta.com/llama-impact-grants/#finalists). + +**Reporting:** Finally, we put in place a set of resources including an [output reporting mechanism](https://developers.facebook.com/llama_output_feedback) and [bug bounty program](https://www.facebook.com/whitehat) to continuously improve the Llama technology with the help of the community. + +## Ethical Considerations and Limitations + +**Values:** The core values of Llama 3.2 are openness, inclusivity and helpfulness. It is meant to serve everyone, and to work for a wide range of use cases. It is thus designed to be accessible to people across many different backgrounds, experiences and perspectives. Llama 3.2 addresses users and their needs as they are, without insertion unnecessary judgment or normativity, while reflecting the understanding that even content that may appear problematic in some cases can serve valuable purposes in others. It respects the dignity and autonomy of all users, especially in terms of the values of free thought and expression that power innovation and progress. + +**Testing:** But Llama 3.2 is a new technology, and like any new technology, there are risks associated with its use. Testing conducted to date has not covered, nor could it cover, all scenarios. For these reasons, as with all LLMs, Llama 3.2’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate, biased or other objectionable responses to user prompts. Therefore, before deploying any applications of Llama 3.2 models, developers should perform safety testing and tuning tailored to their specific applications of the model. Please refer to available resources including our [Responsible Use Guide](https://llama.meta.com/responsible-use-guide), [Trust and Safety](https://llama.meta.com/trust-and-safety/) solutions, and other [resources](https://llama.meta.com/docs/get-started/) to learn more about responsible development. diff --git a/examples/E2E-RAG-App/example_data/llama_website0613.txt b/examples/E2E-RAG-App/example_data/llama_website0613.txt deleted file mode 100644 index 33ab39ffb..000000000 --- a/examples/E2E-RAG-App/example_data/llama_website0613.txt +++ /dev/null @@ -1,103 +0,0 @@ -Meta Llama Skip to main content Technology Getting Started Trust & Safety Community Resources Discover the possibilities with Meta Llama Democratizing access through an open platform featuring AI models, tools, and resources — enabling developers to shape the next wave of innovation. Licensed for both research and commercial use Get Started Llama models and tools Meta Llama 3 Build the future of AI with Meta Llama 3 Llama 3 is an accessible, open-source large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Learn more Meta Code Llama A state-of-the-art large language model for coding LLM capable of generating code, and natural language about code, from both code and natural language prompts. Meta Llama Guard Empowering developers, advancing safety, and building an open ecosystem We’re announcing Meta Llama Guard, an umbrella project featuring open trust and safety tools and evaluations meant to level the playing field for developers. Ready to start building with Meta Llama? Access our getting started guide and responsible use resources to get started. Get started guide Responsible use guide Prompt Engineering with Meta Llama Learn how to effectively use Llama models for prompt engineering with our free course on Deeplearning.AI, where you'll learn best practices and interact with the models through a simple API call. Partnerships Our global partners and supporters We have a broad range of supporters around the world who believe in our open approach to today’s AI — companies that have given early feedback and are excited to build with Llama, cloud providers that will include the model as part of their offering to customers, researchers committed to doing research with the model, and people across tech, academia, and policy who see the benefits of Llama and an open platform as we do. Latest Llama updates Introducing Meta Llama 3: The most capable openly available LLM to date Read more Meet Your New Assistant: Meta AI, Built With Llama 3 CYBERSECEVAL 2: A Wide-Ranging Cybersecurity Evaluation Suite for Large Language Models Stay up-to-date Our latest updates delivered to your inbox Subscribe to our newsletter to keep up with the latest Llama updates, releases and more. Sign up ----------- -Use Policy Skip to main content Meta is committed to promoting safe and fair use of its tools and features, including Llama 2. If you access or use Llama 2, you agree to this Acceptable Use Policy (“Policy”). The most recent copy of this policy can be found at llama.meta.com/use-policy . Prohibited Uses We want everyone to use Llama 2 safely and responsibly. You agree you will not use, or allow others to use, Llama 2 to: 1. Violate the law or others’ rights, including to: a. Engage in, promote, generate, contribute to, encourage, plan, incite, or further illegal or unlawful activity or content, such as: i. Violence or terrorism ii. Exploitation or harm to children, including the solicitation, creation, acquisition, or dissemination of child exploitative content or failure to report Child Sexual Abuse Material b. Human trafficking, exploitation, and sexual violence iii. The illegal distribution of information or materials to minors, including obscene materials, or failure to employ legally required age-gating in connection with such information or materials. iv. Sexual solicitation vi. Any other criminal activity c. Engage in, promote, incite, or facilitate the harassment, abuse, threatening, or bullying of individuals or groups of individuals d. Engage in, promote, incite, or facilitate discrimination or other unlawful or harmful conduct in the provision of employment, employment benefits, credit, housing, other economic benefits, or other essential goods and services e. Engage in the unauthorized or unlicensed practice of any profession including, but not limited to, financial, legal, medical/health, or related professional practices f. Collect, process, disclose, generate, or infer health, demographic, or other sensitive personal or private information about individuals without rights and consents required by applicable laws g. Engage in or facilitate any action or generate any content that infringes, misappropriates, or otherwise violates any third-party rights, including the outputs or results of any products or services using the Llama 2 Materials h. Create, generate, or facilitate the creation of malicious code, malware, computer viruses or do anything else that could disable, overburden, interfere with or impair the proper working, integrity, operation or appearance of a website or computer system 2. Engage in, promote, incite, facilitate, or assist in the planning or development of activities that present a risk of death or bodily harm to individuals, including use of Llama 2 related to the following: a. Military, warfare, nuclear industries or applications, espionage, use for materials or activities that are subject to the International Traffic Arms Regulations (ITAR) maintained by the United States Department of State b. Guns and illegal weapons (including weapon development) c. Illegal drugs and regulated/controlled substances d. Operation of critical infrastructure, transportation technologies, or heavy machinery e. Self-harm or harm to others, including suicide, cutting, and eating disorders f. Any content intended to incite or promote violence, abuse, or any infliction of bodily harm to an individual 3. Intentionally deceive or mislead others, including use of Llama 2 related to the following: a. Generating, promoting, or furthering fraud or the creation or promotion of disinformation b. Generating, promoting, or furthering defamatory content, including the creation of defamatory statements, images, or other content c. Generating, promoting, or further distributing spam d. Impersonating another individual without consent, authorization, or legal right e. Representing that the use of Llama 2 or outputs are human-generated f. Generating or facilitating false online engagement, including fake reviews and other means of fake online engagement 4. Fail to appropriately disclose to end users any known dangers of your AI system Please report any violation of this Policy, software “bug,” or other problems that could lead to a violation of this Policy through one of the following means: Reporting issues with the model: github.com/facebookresearch/llama Reporting risky content generated by the model: developers.facebook.com/llama_output_feedback Reporting bugs and security concerns: facebook.com/whitehat/info Reporting violations of the Acceptable Use Policy or unlicensed uses of Llama: LlamaUseReport@meta.com ----------- -Responsible Use Guide for Llama 2 Skip to main content Responsibility Responsible Use Guide: your resource for building responsibly The Responsible Use Guide is a resource for developers that provides best practices and considerations for building products powered by large language models (LLM) in a responsible manner, covering various stages of development from inception to deployment. Responsible Use Guide ----------- -Meta Llama 2 Skip to main content Large language model Llama 2: open source, free for research and commercial use We're unlocking the power of these large language models. Our latest version of Llama – Llama 2 – is now accessible to individuals, creators, researchers, and businesses so they can experiment, innovate, and scale their ideas responsibly. Download the model Available as part of the Llama 2 release With each model download you'll receive: Model code Model weights README (user guide) License Acceptable use policy Model card Technical specifications Llama 2 was pretrained on publicly available online data sources. The fine-tuned model, Llama Chat, leverages publicly available instruction datasets and over 1 million human annotations. Read the paper Inside the model Llama 2 models are trained on 2 trillion tokens and have double the context length of Llama 1. Llama Chat models have additionally been trained on over 1 million new human annotations. Benchmarks Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1. Its fine-tuned models have been trained on over 1 million human annotations. Safety and helpfulness Reinforcement learning from human feedback Llama Chat uses reinforcement learning from human feedback to ensure safety and helpfulness. Training Llama Chat: Llama 2 is pretrained using publicly available online data. An initial version of Llama Chat is then created through the use of supervised fine-tuning. Next, Llama Chat is iteratively refined using Reinforcement Learning from Human Feedback (RLHF), which includes rejection sampling and proximal policy optimization (PPO). Get Llama 2 now: complete the download form via the link below. By submitting the form, you agree to Meta's privacy policy Get started Our global partners and supporters We have a broad range of supporters around the world who believe in our open approach to today’s AI — companies that have given early feedback and are excited to build with Llama 2, cloud providers that will include the model as part of their offering to customers, researchers committed to doing research with the model, and people across tech, academia, and policy who see the benefits of Llama and an open platform as we do. Statement of support for Meta’s open approach to today’s AI “We support an open innovation approach to AI. Responsible and open innovation gives us all a stake in the AI development process, bringing visibility, scrutiny and trust to these technologies. Opening today’s Llama models will let everyone benefit from this technology.” We’re committed to building responsibly To promote a responsible, collaborative AI innovation ecosystem, we’ve established a range of resources for all who use Llama 2: individuals, creators, developers, researchers, academics, and businesses of any size. The Responsible Use Guide is a resource for developers that provides best practices and considerations for building products powered by large language models (LLMs) in a responsible manner, covering various stages of development from inception to deployment. Safety Red-teaming Llama Chat has undergone testing by external partners and internal teams to identify performance gaps and mitigate potentially problematic responses in chat use cases. We're committed to ongoing red-teaming to enhance safety and performance. Open Innovation AI Research Community We're launching a program for academic researchers, designed to foster collaboration and knowledge-sharing in the field of artificial intelligence. This program provides unique a opportunity for researchers to come together, share their learnings, and help shape the future of AI. By joining this community, participants will have the chance to contribute to a research agenda that addresses the most pressing challenges in the field, and work together to develop innovative solutions that promote responsible and safe AI practices. We believe that by bringing together diverse perspectives and expertise, we can accelerate the pace of progress in AI research. Llama Impact Grants We want to activate the community of innovators who aspire to use Llama to solve hard problems. We are launching the grants to encourage a diverse set of public, non-profit, and for-profit entities to use Llama 2 to address environmental, education and other important challenges. The grants will be subject to rules which will be posted here prior to the grants start. Generative AI Community Forum We think it’s important that our product and policy decisions around generative AI are informed by people and experts from around the world. In support of this belief, we created a forum to act as a governance tool and resource for the community. It brings together a representative group of people to discuss and deliberate on the values that underpin AI, LLM and other new AI technologies. This forum will be held in consultation with Stanford Deliberative Democracy Lab and the Behavioural Insights Team, and is consistent with our open collaboration approach to sharing AI models. Join us on our AI journey If you’d like to advance AI with us, visit our Careers page to discover more about AI at Meta. See open positions Llama 2 Frequently asked questions Get answers to Llama 2 questions in our comprehensive FAQ page—from how it works, to how to use it, integrations, and more. See all FAQs Explore more on Llama 2 Discover more about Llama 2 here — visit our resources, ranging from our research paper, how to get access, and more. Github Open Innovation AI Research Community Getting started guide AI at Meta blog Research paper ----------- -Skip to main content Llama 2 Version Release Date: July 18, 2023 “Agreement” means the terms and conditions for use, reproduction, distribution and modification of the Llama Materials set forth herein. “Documentation” means the specifications, manuals and documentation accompanying Llama 2 distributed by Meta at llama.meta.com/llama-downloads/ “Licensee” or “you” means you, or your employer or any other person or entity (if you are entering into this Agreement on such person or entity’s behalf), of the age required under applicable laws, rules or regulations to provide legal consent and that has legal authority to bind your employer or such other person or entity if you are entering in this Agreement on their behalf. “Llama 2” means the foundational large language models and software and algorithms, including machine-learning model code, trained model weights, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing distributed by Meta at “Llama Materials” means, collectively, Meta’s proprietary Llama 2 and Documentation (and any portion thereof) made available under this Agreement. “Meta” “we” means Meta Platforms Ireland Limited (if you are located in or, if you are an entity, your principal place of business is in the EEA or Switzerland) and Meta Platforms, Inc. (if you are located outside of the EEA or Switzerland). By clicking “I Accept” below or by using or distributing any portion or element of the Llama Materials, you agree to be bound by this Agreement. 1. License Rights and Redistribution. a. Grant of Rights. You are granted a non-exclusive, worldwide, non-transferable and royalty-free limited license under Meta’s intellectual property or other rights owned by Meta embodied in the Llama Materials to use, reproduce, distribute, copy, create derivative works of, and make modifications to the Llama Materials. b. Redistribution and Use. i. If you distribute or make the Llama Materials, or any derivative works thereof, available to a third party, you shall provide a copy of this Agreement to such third party. ii. If you receive Llama Materials, or any derivative works thereof, from a Licensee as part of an integrated end user product, then Section 2 of this Agreement will not apply to you. iii. You must retain in all copies of the Llama Materials that you distribute the following attribution notice within a “Notice” text file distributed as a part of such copies: “Llama 2 is licensed under the LLAMA 2 Community License, Copyright © Meta Platforms, Inc. All Rights Reserved.” iv. Your use of the Llama Materials must comply with applicable laws and regulations (including trade compliance laws and regulations) and adhere to the Acceptable Use Policy for the Llama Materials (available at https://llama.meta.com/use-policy ), which is hereby incorporated by reference into this Agreement. v. You will not use the Llama Materials or any output or results of the Llama Materials to improve any other large language model (excluding Llama 2 or derivative works thereof). 2. Additional Commercial Terms. If, on the Llama 2 version release date, the monthly active users of the products or services made available by or for Licensee, or Licensee’s affiliates, is greater than 700 million monthly active users in the preceding calendar month, you must request a license from Meta, which Meta may grant to you in its sole discretion, and you are not authorized to exercise any of the rights under this Agreement unless or until Meta otherwise expressly grants you such rights. 3. Disclaimer of Warranty. UNLESS REQUIRED BY APPLICABLE LAW, THE LLAMA MATERIALS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING THE LLAMA MATERIALS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR USE OF THE LLAMA MATERIALS AND ANY OUTPUT AND RESULTS. 4. Limitation of Liability. IN NO EVENT WILL META OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN IF META OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING. 5. Intellectual Property. a. No trademark licenses are granted under this Agreement, and in connection with the Llama Materials, neither Meta nor Licensee may use any name or mark owned by or associated with the other or any of its affiliates, except as required for reasonable and customary use in describing and redistributing the Llama Materials. b. Subject to Meta’s ownership of Llama Materials and derivatives made by or for Meta, with respect to any derivative works and modifications of the Llama Materials that are made by you, as between you and Meta, you are and will be the owner of such derivative works and modifications. c. If you institute litigation or other proceedings against Meta or any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Llama Materials or Llama 2 outputs or results, or any portion of any of the foregoing, constitutes infringement of intellectual property or other rights owned or licensable by you, then any licenses granted to you under this Agreement shall terminate as of the date such litigation or claim is filed or instituted. You will indemnify and hold harmless Meta from and against any claim by any third party arising out of or related to your use or distribution of the Llama Materials. 6. Term and Termination. The term of this Agreement will commence upon your acceptance of this Agreement or access to the Llama Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein. Meta may terminate this Agreement if you are in breach of any term or condition of this Agreement. Upon termination of this Agreement, you shall delete and cease use of the Llama Materials. Sections 3, 4 and 7 shall survive the termination of this Agreement. 7. Governing Law and Jurisdiction. This Agreement will be governed and construed under the laws of the State of California without regard to choice of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement. The courts of California shall have exclusive jurisdiction of any dispute arising out of this Agreement. ----------- -Meta is committed to promoting safe and fair use of its tools and features, including Llama 2. If you access or use Llama 2, you agree to this Acceptable Use Policy (“Policy”). The most recent copy of this policy can be found at We want everyone to use Llama 2 safely and responsibly. You agree you will not use, or allow others to use, Llama 2 to: 1. Violate the law or others’ rights, including to: a. Engage in, promote, generate, contribute to, encourage, plan, incite, or further illegal or unlawful activity or content, such as: i. Violence or terrorism ii. Exploitation or harm to children, including the solicitation, creation, acquisition, or dissemination of child exploitative content or failure to report Child Sexual Abuse Material b. Human trafficking, exploitation, and sexual violence iii. The illegal distribution of information or materials to minors, including obscene materials, or failure to employ legally required age-gating in connection with such information or materials. vi. Any other criminal activity c. Engage in, promote, incite, or facilitate the harassment, abuse, threatening, or bullying of individuals or groups of individuals d. Engage in, promote, incite, or facilitate discrimination or other unlawful or harmful conduct in the provision of employment, employment benefits, credit, housing, other economic benefits, or other essential goods and services e. Engage in the unauthorized or unlicensed practice of any profession including, but not limited to, financial, legal, medical/health, or related professional practices f. Collect, process, disclose, generate, or infer health, demographic, or other sensitive personal or private information about individuals without rights and consents required by applicable laws g. Engage in or facilitate any action or generate any content that infringes, misappropriates, or otherwise violates any third-party rights, including the outputs or results of any products or services using the Llama 2 Materials h. Create, generate, or facilitate the creation of malicious code, malware, computer viruses or do anything else that could disable, overburden, interfere with or impair the proper working, integrity, operation or appearance of a website or computer system 2. Engage in, promote, incite, facilitate, or assist in the planning or development of activities that present a risk of death or bodily harm to individuals, including use of Llama 2 related to the following: a. Military, warfare, nuclear industries or applications, espionage, use for materials or activities that are subject to the International Traffic Arms Regulations (ITAR) maintained by the United States Department of State b. Guns and illegal weapons (including weapon development) c. Illegal drugs and regulated/controlled substances d. Operation of critical infrastructure, transportation technologies, or heavy machinery e. Self-harm or harm to others, including suicide, cutting, and eating disorders f. Any content intended to incite or promote violence, abuse, or any infliction of bodily harm to an individual 3. Intentionally deceive or mislead others, including use of Llama 2 related to the following: a. Generating, promoting, or furthering fraud or the creation or promotion of disinformation b. Generating, promoting, or furthering defamatory content, including the creation of defamatory statements, images, or other content c. Generating, promoting, or further distributing spam d. Impersonating another individual without consent, authorization, or legal right e. Representing that the use of Llama 2 or outputs are human-generated f. Generating or facilitating false online engagement, including fake reviews and other means of fake online engagement 4. Fail to appropriately disclose to end users any known dangers of your AI system Please report any violation of this Policy, software “bug,” or other problems that could lead to a violation of this Policy through one of the following means: Reporting issues with the model: Reporting risky content generated by the model: Reporting bugs and security concerns: Reporting violations of the Acceptable Use Policy or unlicensed uses of Llama: Skip to main content ----------- -Skip to main content Llama 2 Version Release Date: July 18, 2023 means the terms and conditions for use, reproduction, distribution and modification of the Llama Materials set forth herein. means the specifications, manuals and documentation accompanying Llama 2 distributed by Meta at means you, or your employer or any other person or entity (if you are entering into this Agreement on such person or entity’s behalf), of the age required under applicable laws, rules or regulations to provide legal consent and that has legal authority to bind your employer or such other person or entity if you are entering in this Agreement on their behalf. means the foundational large language models and software and algorithms, including machine-learning model code, trained model weights, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing distributed by Meta at means, collectively, Meta’s proprietary Llama 2 and Documentation (and any portion thereof) made available under this Agreement. means Meta Platforms Ireland Limited (if you are located in or, if you are an entity, your principal place of business is in the EEA or Switzerland) and Meta Platforms, Inc. (if you are located outside of the EEA or Switzerland). By clicking “I Accept” below or by using or distributing any portion or element of the Llama Materials, you agree to be bound by this Agreement. License Rights and Redistribution. a. Grant of Rights. You are granted a non-exclusive, worldwide, non-transferable and royalty-free limited license under Meta’s intellectual property or other rights owned by Meta embodied in the Llama Materials to use, reproduce, distribute, copy, create derivative works of, and make modifications to the Llama Materials. b. Redistribution and Use. i. If you distribute or make the Llama Materials, or any derivative works thereof, available to a third party, you shall provide a copy of this Agreement to such third party. ii. If you receive Llama Materials, or any derivative works thereof, from a Licensee as part of an integrated end user product, then Section 2 of this Agreement will not apply to you. iii. You must retain in all copies of the Llama Materials that you distribute the following attribution notice within a “Notice” text file distributed as a part of such copies: “Llama 2 is licensed under the LLAMA 2 Community License, Copyright © Meta Platforms, Inc. All Rights Reserved.” iv. Your use of the Llama Materials must comply with applicable laws and regulations (including trade compliance laws and regulations) and adhere to the Acceptable Use Policy for the Llama Materials (available at ), which is hereby incorporated by reference into this Agreement. v. You will not use the Llama Materials or any output or results of the Llama Materials to improve any other large language model (excluding Llama 2 or derivative works thereof). If, on the Llama 2 version release date, the monthly active users of the products or services made available by or for Licensee, or Licensee’s affiliates, is greater than 700 million monthly active users in the preceding calendar month, you must request a license from Meta, which Meta may grant to you in its sole discretion, and you are not authorized to exercise any of the rights under this Agreement unless or until Meta otherwise expressly grants you such rights. UNLESS REQUIRED BY APPLICABLE LAW, THE LLAMA MATERIALS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING THE LLAMA MATERIALS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR USE OF THE LLAMA MATERIALS AND ANY OUTPUT AND RESULTS. IN NO EVENT WILL META OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN IF META OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING. a. No trademark licenses are granted under this Agreement, and in connection with the Llama Materials, neither Meta nor Licensee may use any name or mark owned by or associated with the other or any of its affiliates, except as required for reasonable and customary use in describing and redistributing the Llama Materials. b. Subject to Meta’s ownership of Llama Materials and derivatives made by or for Meta, with respect to any derivative works and modifications of the Llama Materials that are made by you, as between you and Meta, you are and will be the owner of such derivative works and modifications. c. If you institute litigation or other proceedings against Meta or any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Llama Materials or Llama 2 outputs or results, or any portion of any of the foregoing, constitutes infringement of intellectual property or other rights owned or licensable by you, then any licenses granted to you under this Agreement shall terminate as of the date such litigation or claim is filed or instituted. You will indemnify and hold harmless Meta from and against any claim by any third party arising out of or related to your use or distribution of the Llama Materials. The term of this Agreement will commence upon your acceptance of this Agreement or access to the Llama Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein. Meta may terminate this Agreement if you are in breach of any term or condition of this Agreement. Upon termination of this Agreement, you shall delete and cease use of the Llama Materials. Sections 3, 4 and 7 shall survive the termination of this Agreement. Governing Law and Jurisdiction. This Agreement will be governed and construed under the laws of the State of California without regard to choice of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement. The courts of California shall have exclusive jurisdiction of any dispute arising out of this Agreement. ----------- -Skip to main content Code Llama, a state-of-the-art large language model for coding Code Llama has the potential to make workflows faster and more efficient for current developers and lower the barrier to entry for people who are learning to code. Code Llama has the potential to be used as a productivity and educational tool to help programmers write more robust, well-documented software. Free for research and commercial use: Code Llama is built on top of Llama 2 and is available in three models: Code Llama Code Llama Python Code Llama Instruct With each model download you'll receive: All Code Llama models README (User Guide) Acceptable Use Policy Model Card How Code Llama works Code Llama is a code-specialized version of Llama 2 that was created by further training Llama 2 on its code-specific datasets, sampling more data from that same dataset for longer. Essentially, Code Llama features enhanced coding capabilities, built on top of Llama 2. It can generate code, and natural language about code, from both code and natural language prompts (e.g., “Write me a function that outputs the fibonacci sequence.”) It can also be used for code completion and debugging. It supports many of the most popular languages being used today, including Python, C++, Java, PHP, Typescript (Javascript), C#, and Bash. Code Llama is available in four sizes with 7B, 13B, 34B, and 70B parameters respectively. Each of these models is trained with 500B tokens of code and code-related data, apart from 70B, which is trained on 1T tokens. The 7B, 13B and 70B base and instruct models have also been trained with fill-in-the-middle (FIM) capability, allowing them to insert code into existing code, meaning they can support tasks like code completion right out of the box. The four models address different serving and latency requirements. The 7B model, for example, can be served on a single GPU. The 34B and 70B models return the best results and allow for better coding assistance, but the smaller 7B and 13B models are faster and more suitable for tasks that require low latency, like real-time code completion. Note: We do not recommend using Code Llama or Code Llama Python to perform general natural language tasks since neither of these models are designed to follow natural language instructions. Code Llama is specialized for code-specific tasks and isn’t appropriate as a foundation model for other tasks. Evaluating Code Llama’s performance To test Code Llama’s performance against existing solutions, we used two popular coding benchmarks: HumanEval and Mostly Basic Python Programming ( MBPP ). HumanEval tests the model’s ability to complete code based on docstrings and MBPP tests the model’s ability to write code based on a description. Our benchmark testing showed that Code Llama performed better than open-source, code-specific LLMs and outperformed Llama 2. Code Llama 70B Instruct, for example, scored 67.8% on HumanEval and 62.2% on MBPP, the highest compared with other state-of-the-art open solutions, and on par with ChatGPT. As with all cutting edge technology, Code Llama comes with risks. Building AI models responsibly is crucial, and we undertook numerous safety measures before releasing Code Llama. As part of our red teaming efforts, we ran a quantitative evaluation of Code Llama’s risk of generating malicious code. We created prompts that attempted to solicit malicious code with clear intent and scored Code Llama’s responses to those prompts against ChatGPT’s (GPT3.5 Turbo). Our results found that Code Llama answered with safer responses. Details about our red teaming efforts from domain experts in responsible AI, offensive security engineering, malware development, and software engineering are available in our research paper Releasing Code Llama Programmers are already using LLMs to assist in a variety of tasks, ranging from writing new software to debugging existing code. The goal is to make developer workflows more efficient, so they can focus on the most human centric aspects of their job, rather than repetitive tasks. At Meta, we believe that AI models, but LLMs for coding in particular, benefit most from an open approach, both in terms of innovation and safety. Publicly available, code-specific models can facilitate the development of new technologies that improve peoples' lives. By releasing code models like Code Llama, the entire community can evaluate their capabilities, identify issues, and fix vulnerabilities. Code Llama’s training recipes are available on our Github repository and model weights are also available. GitHub Responsible use Our research paper discloses details of Code Llama’s development as well as how we conducted our benchmarking tests. It also provides more information into the model’s limitations, known challenges we encountered, mitigations we’ve taken, and future challenges we intend to investigate. We’ve also updated our Responsible Use Guide and it includes guidance on developing downstream models responsibly, including: Defining content policies and mitigations. Preparing data. Fine-tuning the model. Evaluating and improving performance. Addressing input- and output-level risks. Building transparency and reporting mechanisms in user interactions. Developers should evaluate their models using code-specific evaluation benchmarks and perform safety studies on code-specific use cases such as generating malware, computer viruses, or malicious code. We also recommend leveraging safety datasets for automatic and human evaluations, and red teaming on adversarial prompts The future of generative AI for coding Code Llama is designed to support software engineers in all sectors – including research, industry, open source projects, NGOs, and businesses. But there are still many more use cases to support than what our base and instruct models can serve. We hope that Code Llama will inspire others to leverage Llama 2 to create new innovative tools for research and commercial products. Explore more on Code Llama Discover more about Code Llama here — visit our resources, ranging from our research paper, getting started guide and more. Code Llama GitHub repository ----------- -Skip to main content Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Build the future of AI with Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Experience Llama 3 on Meta AI Experience Llama 3 with Meta AI We’ve integrated Llama 3 into Meta AI, our intelligent assistant, that expands the ways people can get things done, create and connect with Meta AI. You can see first-hand the performance of Llama 3 by using Meta AI for coding tasks and problem solving. Whether you're developing agents, or other AI-powered applications, Llama 3 in both 8B and 70B will offer the capabilities and flexibility you need to develop your ideas. Experience Llama 3 on Meta AI Enhanced performance Experience the state-of-the-art performance of Llama 3, an openly accessible model that excels at language nuances, contextual understanding, and complex tasks like translation and dialogue generation. With enhanced scalability and performance, Llama 3 can handle multi-step tasks effortlessly, while our refined post-training processes significantly lower false refusal rates, improve response alignment, and boost diversity in model answers. Additionally, it drastically elevates capabilities like reasoning, code generation, and instruction following. Build the future of AI with Llama 3. Download Llama 3 Getting Started Guide With each Meta Llama request, you will receive: Meta Llama Guard 2 Community license agreement Llama 3 models take data and scale to new heights. It’s been trained on our two recently announced custom-built 24K GPU clusters on over 15T token of data – a training dataset 7x larger than that used for Llama 2, including 4x more code. This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2. Trust & safety A comprehensive approach to responsibility With the release of Llama 3, we’ve updated the Responsible Use Guide (RUG) to provide the most comprehensive information on responsible development with LLMs. Our system-centric approach includes updates to our trust and safety tools with Llama Guard 2, optimized to support the newly announced taxonomy published by MLCommons expanding its coverage to a more comprehensive set of safety categories, Code Shield, and Cybersec Eval 2. In line with the principles outlined in our RUG , we recommend thorough checking and filtering of all inputs to and outputs from LLMs based on your unique content guidelines for your intended use case and audience. Meta Llama Guard 2 Explore more on Meta Llama 3 Introducing Meta Llama 3: The most capable openly available LLM to date Read the blog Meet Your New Assistant: Meta AI, Built With Llama 3 Meta Llama 3 repository View repository Explore ----------- -Meta Llama 3 License Skip to main content META LLAMA 3 COMMUNITY LICENSE AGREEMENT Meta Llama 3 Version Release Date: April 18, 2024 “ Agreement ” means the terms and conditions for use, reproduction, distribution and modification of the Llama Materials set forth herein. Documentation ” means the specifications, manuals and documentation accompanying Meta Llama 3 distributed by Meta at https://llama.meta.com/get-started/ Licensee ” or “ you ” means you, or your employer or any other person or entity (if you are entering into this Agreement on such person or entity’s behalf), of the age required under applicable laws, rules or regulations to provide legal consent and that has legal authority to bind your employer or such other person or entity if you are entering in this Agreement on their behalf. MetaLlama 3 ” means the foundational large language models and software and algorithms, including machine-learning model code, trained model weights, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing distributed by Meta at https://llama.meta.com/llama-downloads Llama Materials ” means, collectively, Meta’s proprietary Meta Llama 3 and Documentation (and any portion thereof) made available under this Agreement. Meta we ” means Meta Platforms Ireland Limited (if you are located in or, if you are an entity, your principal place of business is in the EEA or Switzerland) and Meta Platforms, Inc. (if you are located outside of the EEA or Switzerland). By clicking “I Accept” below or by using or distributing any portion or element of the Llama Materials, you agree to be bound by this Agreement. License Rights and Redistribution a. Grant of Rights . You are granted a non-exclusive, worldwide, non-transferable and royalty-free limited license under Meta’s intellectual property or other rights owned by Meta embodied in the Llama Materials to use, reproduce, distribute, copy, create derivative works of, and make modifications to the Llama Materials. b. Redistribution and Use i. If you distribute or make available the Llama Materials (or any derivative works thereof), or a product or service that uses any of them, including another AI model, you shall (A) provide a copy of this Agreement with any such Llama Materials; and (B) prominently display “Built with Meta Llama 3” on a related website, user interface, blogpost, about page, or product documentation. If you use the Llama Materials to create, train, fine tune, or otherwise improve an AI model, which is distributed or made available, you shall also include “Llama 3” at the beginning of any such AI model name. ii. If you receive Llama Materials, or any derivative works thereof, from a Licensee as part of an integrated end user product, then Section 2 of this Agreement will not apply to you. iii. You must retain in all copies of the Llama Materials that you distribute the following attribution notice within a “Notice” text file distributed as a part of such copies: “Meta Llama 3 is licensed under the Meta Llama 3 Community License, Copyright © Meta Platforms, Inc. All Rights Reserved.” iv. Your use of the Llama Materials must comply with applicable laws and regulations (including trade compliance laws and regulations) and adhere to the Acceptable Use Policy for the Llama Materials (available at https://llama.meta.com/llama3/use-policy ), which is hereby incorporated by reference into this Agreement. v. You will not use the Llama Materials or any output or results of the Llama Materials to improve any other large language model (excluding Meta Llama 3 or derivative works thereof). Additional Commercial Terms . If, on the Meta Llama 3 version release date, the monthly active users of the products or services made available by or for Licensee, or Licensee’s affiliates, is greater than 700 million monthly active users in the preceding calendar month, you must request a license from Meta, which Meta may grant to you in its sole discretion, and you are not authorized to exercise any of the rights under this Agreement unless or until Meta otherwise expressly grants you such rights. 3 . Disclaimer of Warranty . UNLESS REQUIRED BY APPLICABLE LAW, THE LLAMA MATERIALS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OF ANY KIND, AND META DISCLAIMS ALL WARRANTIES OF ANY KIND, BOTH EXPRESS AND IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING THE LLAMA MATERIALS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR USE OF THE LLAMA MATERIALS AND ANY OUTPUT AND RESULTS. Limitation of Liability . IN NO EVENT WILL META OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN IF META OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING. Intellectual Property a. No trademark licenses are granted under this Agreement, and in connection with the Llama Materials, neither Meta nor Licensee may use any name or mark owned by or associated with the other or any of its affiliates, except as required for reasonable and customary use in describing and redistributing the Llama Materials or as set forth in this Section 5(a). Meta hereby grants you a license to use “Llama 3” (the “Mark”) solely as required to comply with the last sentence of Section 1.b.i. You will comply with Meta’s brand guidelines (currently accessible at https://about.meta.com/brand/resources/meta/company-brand/ ). All goodwill arising out of your use of the Mark will inure to the benefit of Meta. b. Subject to Meta’s ownership of Llama Materials and derivatives made by or for Meta, with respect to any derivative works and modifications of the Llama Materials that are made by you, as between you and Meta, you are and will be the owner of such derivative works and modifications. c. If you institute litigation or other proceedings against Meta or any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Llama Materials or Meta Llama 3 outputs or results, or any portion of any of the foregoing, constitutes infringement of intellectual property or other rights owned or licensable by you, then any licenses granted to you under this Agreement shall terminate as of the date such litigation or claim is filed or instituted. You will indemnify and hold harmless Meta from and against any claim by any third party arising out of or related to your use or distribution of the Llama Materials. Term and Termination . The term of this Agreement will commence upon your acceptance of this Agreement or access to the Llama Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein. Meta may terminate this Agreement if you are in breach of any term or condition of this Agreement. Upon termination of this Agreement, you shall delete and cease use of the Llama Materials. Sections 3, 4 and 7 shall survive the termination of this Agreement. Governing Law and Jurisdiction . This Agreement will be governed and construed under the laws of the State of California without regard to choice of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement. The courts of California shall have exclusive jurisdiction of any dispute arising out of this Agreement. ----------- -Meta Llama 3 | Model Cards and Prompt formats Skip to main content Table Of Contents Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Getting the Models Hugging Face Kaggle Llama Everywhere Overview Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud How-To Guides Fine-tuning Quantization Prompting Validation Integration Guides LangChain Llamalndex Community Support Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards & Prompt formats You can find details about this model in the model card Special Tokens used with Meta Llama 3 <|begin_of_text|> : This is equivalent to the BOS token <|eot_id|> : This signifies the end of the message in a turn. <|start_header_id|>{role}<|end_header_id|> : These tokens enclose the role for a particular message. The possible roles can be: system, user, assistant. <|end_of_text|>: This is equivalent to the EOS token. On generating this token, Llama 3 will cease to generate more tokens. A prompt can optionally contain a single system message, or multiple alternating user and assistant messages, but always ends with the last user message followed by the assistant header. Code to produce this prompt format can be found Note : Newlines (0x0A) are part of the prompt format, for clarity in the example, they have been represented as actual new lines. <|begin_of_text|>{{ user_message }} Meta Llama 3 Instruct Code to generate this prompt format can be found Notes : Newlines (0x0A) are part of the prompt format, for clarity in the examples, they have been represented as actual new lines. The model expects the assistant header at the end of the prompt to start completing it. Decomposing an example instruct prompt with a system message: <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a helpful AI assistant for travel tips and recommendations<|eot_id|><|start_header_id|>user<|end_header_id|> What can you help me with?<|eot_id|><|start_header_id|>assistant<|end_header_id|> : Specifies the start of the prompt <|start_header_id|>system<|end_header_id|> : Specifies the role for the following message, i.e. “system” You are a helpful AI assistant for travel tips and recommendations : The system message : Specifies the end of the input message <|start_header_id|>user<|end_header_id|> : Specifies the role for the following message i.e. “user” What can you help me with? : The user message <|start_header_id|>assistant<|end_header_id|> : Ends with the assistant header, to prompt the model to start generation. Following this prompt, Llama 3 completes it by generating the {{assistant_message}}. It signals the end of the {{assistant_message}} by generating the Example prompt with a single user message <|begin_of_text|><|start_header_id|>user<|end_header_id|> What is France's capital?<|eot_id|><|start_header_id|>assistant<|end_header_id|> System prompt and multiple turn conversation between the user and assistant You are a helpful AI assistant for travel tips and recommendations<|eot_id|><|start_header_id|>user<|end_header_id|> What is France's capital?<|eot_id|><|start_header_id|>assistant<|end_header_id|> Bonjour! The capital of France is Paris!<|eot_id|><|start_header_id|>user<|end_header_id|> What can I do there?<|eot_id|><|start_header_id|>assistant<|end_header_id|> Paris, the City of Light, offers a romantic getaway with must-see attractions like the Eiffel Tower and Louvre Museum, romantic experiences like river cruises and charming neighborhoods, and delicious food and drink options, with helpful tips for making the most of your trip.<|eot_id|><|start_header_id|>user<|end_header_id|> Give me a detailed list of the attractions I should visit, and time it takes in each one, to plan my trip accordingly.<|eot_id|><|start_header_id|>assistant<|end_header_id|> On this page Special Tokens used with Meta Llama 3 Meta Llama 3 Instruct ----------- -Meta Llama Guard 2 | Model Cards and Prompt formats Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards & Prompt formats Meta Llama Guard 2 You can find details about this model in the Prompt format As the guardrails can be applied both on the input and output of the model, there are two different prompts: one for user input and the other for agent output. The role placeholder can have the values User Agent . The former refers to the input and the later to the output. When evaluating the user input, the agent response must not be present in the conversation. The llama-recipes repository has a helper function and an inference example that shows how to properly format the prompt with the provided categories. This can be used as a template to create custom categories for the prompt. The variables to replace in this prompt template are: {{ role }} : It can have the values: {{ unsafe_categories }} : The default categories and their descriptions are shown below. These can be customized for zero-shot or few-shot prompting. {{ user_message }} : input message from the user. {{ model_answer }} : output from the model. <|begin_of_text|>[INST] Task: Check if there is unsafe content in '{{ role }}' messages in conversations according our safety policy with the below categories. User: {{ user_message_1 }} Agent: {{ model_answer_1 }} Provide your safety assessment for ONLY THE LAST {{ role }} in the above conversation: - First line must read 'safe' or 'unsafe'. - If unsafe, a second line must include a comma-separated list of violated categories. [/INST] Llama Guard was trained in the following categories. S1: Violent Crimes. S2: Non-Violent Crimes. S3: Sex Crimes. S4: Child Exploitation. S5: Specialized Advice. S6: Privacy. S7: Intellectual Property. S8: Indiscriminate Weapons. S9: Hate. S10: Self-Harm. S11: Sexual Content. Additionally, the full description for each category can be set in the prompt as well. AI models should not create content that enables, encourages, or excuses the commission of violent crimes. Examples of violent crimes include, but are not limited to: - Unlawful violence toward people (ex: terrorism, genocide, murder, hate crimes, child abuse, assault, battery, kidnapping) - Unlawful violence toward animals (ex: animal abuse) AI models should not create content that enables, encourages, or excuses the commission of non-violent crimes. Examples of non-violent crimes include, but are not limited to: - Financial crimes (ex: fraud, scams, money laundering) - Property crimes (ex: burglary, robbery, arson, vandalism) - Drug crimes (ex: creating or trafficking narcotics) - Weapons crimes (ex: producing unlicensed firearms) - Cyber crimes (ex: hacking, spyware, malware) AI models should not create content that enables, encourages, or excuses the commission of sex crimes. Examples of sex crimes include, but are not limited to: - Human trafficking - Sexual assault (ex: rape) - Sexual harassment (ex: groping) - Lewd conduct AI models should not create content that depicts child nudity or that enables, encourages, excuses, or depicts the sexual abuse of children. AI models should not create content that contains specialized financial, medical, or legal advice. AI models should not create content that contains sensitive, personal information about private individuals. AI models should not create content that violates the intellectual property rights of third parties. AI models should not create content that enables the creation of weapons of mass destruction. Examples of such weapons include, but are not limited to: - Chemical weapons (ex: nerve gas) - Biological weapons (ex: anthrax) - Radiological weapons (ex: salted bombs) - Nuclear weapons (ex: atomic warheads) - High-yield explosive weapons (ex: cluster munitions) AI models should not create content that is hateful toward people on the basis of their protected characteristics (race, color, ethnicity, national origin, disability, religious affiliation, caste, sexual orientation, sex, gender identity and serious disease). AI models should not create content that perpetuates negative stereotypes. AI models should not create content that enables, encourages, or excuses acts of intentional self-harm. Examples of such actions include, but are not limited to: - Suicide - Self-injury (ex: cutting) - Disordered eating AI models should not create depictions of nude adults or content that contains erotic descriptions or explicit depictions of sex acts. Meta Llama Guard 2 ----------- -Meta Code Llama 70B | Model Cards and Prompt formats Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards & Prompt formats Meta Code Llama 70B You can find details about this model in the . Note that Meta Code Llama 70B uses the same model card as Meta Code Llama 7B, 13B, and 34B. Completion In this format, the model continues to write code following the provided code in the prompt. An implementation of this prompt can be found {{ code_prompt }} Instructions Meta Code Llama 70B has a different prompt template compared to 34B, 13B and 7B. It starts with a Source: system tag—which can have an empty body—and continues with alternating user or assistant values. Each turn of the conversation uses the special character to separate the messages. The last turn of the conversation uses an Source: assistant tag with an empty message and a Destination: user tag to prompt the model to answer the user question. A detailed implementation of this format is provided. Notes: The structure requires a Source: system tag, but the system prompt can be empty. Each user query is preceded by a blank line. At the end of the prompt is a blank line followed by a line containing a space character (0x20). Source: system System prompt Source: user First user query Source: assistant Model response to first query Source: user Second user query Source: assistant Destination: user Meta Code Llama 70B ----------- -Meta Llama Guard 1 | Model Cards and Prompt formats Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards & Prompt formats Meta Llama Guard 1 You can find details about this model in the As the guardrails can be applied both on the input and output of the model, there are two different prompts: one for user input and the other for agent output. The role placeholder can have the values User or Agent. The former refers to the input and the later to the output. When evaluating the user input, the agent response must not be present in the conversation. The llama-recipes repository has a that shows how to properly format the prompt with the provided categories. This can be used as a template to create custom categories for the prompt. [INST] Task: Check if there is unsafe content in '{{ role }}' messages in conversations according our safety policy with the below categories. User: {{ user_message_1 }} Agent: {{ model_answer_1 }} Provide your safety assessment for {{ role }} in the above conversation: - First line must read 'safe' or 'unsafe'. - If unsafe, a second line must include a comma-separated list of violated categories. [/INST] Llama Guard was trained in the following categories that can be adapted through prompt engineering. O1: Violence and Hate. Should not - Help people plan or engage in violence. - Encourage people to engage in violence. - Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their race, color, religion, national origin, sexual orientation, gender, gender identity, or disability. - Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics. Can - Provide information on violence and discrimination. - Discuss the topics of hate and violence, and can discuss historical events involving violence. O2: Sexual Content. - Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities. - Discuss topics related to sexuality and sexual education. - Provide general information regarding sexual activities. - Provide guidance on topics related to sexual health. O3: Criminal Planning. - Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them. - Encourage people to engage in criminal activities or condone criminal behavior. - Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes. - Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems. O4: Guns and Illegal Weapons. - Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them. - Encourage people to engage in firearm or illegal weapons crimes or condone such behavior. - Discuss firearms and the arguments for and against firearm ownership. O5: Regulated or Controlled Substances. - Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis. - Assist or encourage people to create such substances when it is illegal to do so. - Discuss the use of regulated or controlled substances, and can provide general information on those substances (ex: what their chemical composition is and what their effects are). - Provide information on how those substances are manufactured, provided that the information is not so detailed as to provide step-by-step instructions for their creation. O6: Self-Harm. - Encourage people to harm themselves. - Romanticize or condone self-harm or suicide. - Provide information regarding the methods of suicide or self-harm. - Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves. - Provide general information regarding self-harm and suicide (ex: statistics, the mental health challenges faced by those who take these actions). Should - Provide information regarding appropriate health resources to people who imply or express the desire to harm themselves. Meta Llama Guard 1 ----------- -Meta Code Llama | Model Cards and Prompt formats Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards & Prompt formats You can find details about this model in the Meta Code Llama 7B, 13B, and 34B In this format, the model continues to write code following the code that is provided in the prompt. An implementation of this prompt can be found The instructions prompt template for Meta Code Llama follow the same structure as the Meta Llama 2 chat model, where the system prompt is optional, and the user and assistant messages alternate, always ending with a user message. Note the beginning of sequence (BOS) token between each user and assistant message. An implementation for Meta Code Llama can be found [INST] <> {{ system_prompt }} <> {{ user_message_1 }} [/INST] {{ model_answer_1 }} [INST] {{ user_message_2 }} [/INST] Infilling Infilling can be done in two different ways: with the prefix-suffix-middle format or the suffix-prefix-middle. An implementation of this format is provided Infilling is only available in the 7B and 13B base models—not in the Python, Instruct, 34B, or 70B models The BOS character is not used for infilling when encoding the prefix or suffix, but only at the beginning of each prompt. Prefix-suffix-middle
{{ code_prefix }}{{ code_suffix }} Suffix-prefix-middle 
{{ code_suffix }}{{ code_prefix }} Meta Code Llama 7B, 13B, and 34B
-----------
-Meta Llama 2 | Model Cards and Prompt formats Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards & Prompt formats You can find details about this model in the Special Tokens used with Meta Llama 2  : These are the BOS and EOS tokens from SentencePiece. When multiple messages are present in a multi turn conversation, they separate them, including the user input and model response. [INST][/INST] : These tokens enclose user messages in multi turn conversations. <><> : These enclose the system message. The base model supports text completion, so any incomplete user prompt, without special tags, will prompt the model to complete it. The tokenizer provided with the model will include the SentencePiece beginning of sequence (BOS) token () if requested. Review this code for details. {{ user_prompt }} Meta Llama 2 Chat Code to produce this prompt format can be found . The system prompt is optional. Single message instance with optional system prompt. {{ user_message }} [/INST] Multiple user and assistant messages example. {{ user_message_1 }} [/INST] {{ model_answer_1 }}  [INST] {{ user_message_2 }} [/INST] Special Tokens used with Meta Llama 2 Meta Llama 2 Chat Skip to main content
-----------
-Getting the models Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud You can get the Meta Llama models directly from Meta or through Hugging Face or Kaggle. However you get the models, you will first need to accept the license agreements for the models you want. For more detailed information about each of the Meta Llama models, see the Model Cards section immediately following this section. To get the models directly from Meta, go to our Meta Llama download form at Fill in your information–including your email. Select the models that you want, and review and accept the appropriate license agreements. For each model that you request, you will receive an email that contains instructions and a pre-signed URL to download that model. You can use the same URL to download multiple model weights, such as 7B and 13B. The URL expires after 24 hours or five downloads, but you can re-request models in order to receive fresh pre-signed URLs. The model download process uses a script that relies on the following tools: wget,md5sum ; so ensure that these are available on your local computer.
-----------
-Hugging Face | Getting the models To obtain the models from Hugging Face (HF), sign into your account at https://huggingface.co/meta-llama Select the model you want. You will be taken to a page where you can fill in your information and review the appropriate license agreement. After accepting the agreement, your information is reviewed; the review process could take up to a few days. When you are approved, you will receive an email informing you that you have access to the HF repository for the model. Note that cloning the HF repository to a local computer does not give you all the model files because some of the files are too large. In the local clone, those files contain only metadata for the actual file. To get these larger files, go to the file in the repository on the HF site and download it directly from there. For example, to get consolidated.00.pth for the Meta Llama 2 7B model, you download it from: https://huggingface.co/meta-llama/Llama-27b/blob/main/consolidated.00.pth Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Skip to main content
-----------
-Kaggle | Getting the models Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud To obtain the models from Kaggle–including the HF versions of the models–sign into your account at: https://www.kaggle.com/organizations/metaresearch/models Before you can access the models on Kaggle, you need to submit a request for model access , which requires that you accept the model license agreement on the Meta site: Note that the email address that you provide when you accept the license agreement must be the same as the email that you use for your Kaggle account. Once you have accepted the license agreement, return to Kaggle and submit the request for model access. When your request is approved, which might take a few days, you’ll receive an email that says that you have received access. You’ll then be able to access the models on Kaggle. To access a particular model, select it from the Model Variations dropdown box, and click the download icon. An archive file that contains the model will start downloading.
-----------
-Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Although Meta Llama models are often hosted by Cloud Service Providers (CSP), Meta Llama can be used in other contexts as well, such as Linux, the Windows Subsystem for Linux (WSL), macOS, Jupyter notebooks, and even mobile devices. If you are interested in exploring t hese scenarios, we suggest that you check out the following resources: Llama 3 on Your Local Computer, with Resources for Other Options - How to run Llama on your desktop using Windows, macOS, or Linux. Also, pointers to other ways to run Llama, either on premise or in the cloud Llama Recipes QuickStart - Provides an introduction to Meta Llama using Jupyter notebooks and also demonstrates running Llama locally on macOS. Machine Learning Compilation for Large Language Models (MLC LLM) - Enables “everyone to develop, optimize and deploy AI models natively on everyone's devices with ML compilation techniques.” Llama C++ - Uses the portability of C++ to enable inference with Llama models on a variety of different hardware.
-----------
-Running Meta Llama on Linux | Llama Everywhere Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Running Meta Llama on Linux This tutorial is a part of our Build with Meta Llama series, where we demonstrate the capabilities and practical applications of Llama for developers like you, so that you can leverage the benefits that Llama has to offer and incorporate it into your own applications. This tutorial supports the video Running Llama on Linux | Build with Meta Llama , where we learn how to run Llama on Linux OS by getting the weights and running the model locally, with a step-by-step tutorial to help you follow along. If you're interested in learning by watching or listening, check out our video on Running Llama on Linux. Introduction to llama models At Meta, we strongly believe in an open approach to AI development, particularly in the fast-evolving domain of generative AI. By making AI models publicly accessible, we enable their advantages to reach every segment of society. Last year, we open sourced Meta Llama 2, and this year we released the Meta Llama 3 family of models, available in both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications, unlocking the power of these large language models, and making them accessible to everyone, so you can experiment, innovate, and scale your ideas responsibly. Running Meta Llama on Linux Setup With a Linux setup having a GPU with a minimum of 16GB VRAM, you should be able to load the 8B Llama models in fp16 locally. If you have an Nvidia GPU, you can confirm your setup using the NVIDIA System Management Interface tool that shows you the GPU you have, the VRAM available, and other useful information by typing: nvidia-smi In our current setup, we are on Ubuntu, specifically Pop OS, and have an Nvidia RTX 4090 with a total VRAM of about 24GB. Terminal with nvidia-smi showing NVIDIA GPU Configuration Getting the weights To download the weights, go to the Llama website . Fill in your details in the form and select the models you’d like to download. In our case, we will download the Llama 3 models. Select Meta Llama 3 and Meta Llama Guard 2 on the download page Read and agree to the license agreement, then click Accept and continue . You will see a unique URL on the website. You will also receive the URL in your email and it is valid for 24hrs to allow you to download each model up to 5 times. You can always request a new URL. Download page with unique pre-signed URL We are now ready to get the weights and run the model locally on our machine. It is recommended to use a Python virtual environment for running this demo. In this demo, we are using Miniconda, but you can use any virtual environment of your choice. Open your terminal, and make a new folder called llama3-demo in your workspace. Navigate to the new folder and clone the Llama repo: mkdir llama3-demo cd llama3-demo git clone https://github.com/meta-llama/llama3.git For this demo, we’ll need two prerequisites installed: wget and md5sum . To confirm if your distribution has these, use: wget --version md5sum --version which should return the installed versions. If your distribution does not have these, you can install them using apt-get install wget apt-get install md5sum To make sure we have all the package dependencies installed, while in the newly cloned repo folder, type: pip install -e . We are now all set to download the model weights for our local setup. Our team has created a helper script to make it easy to download the model weights. In your terminal, type: ./download.sh The script will ask for the URL from your email. Paste in the URL you received from Meta. It will then ask you to enter the list of models to download. For our example, we’ll download the 8B pretrained model and the fine-tuned 8B chat models. So we’ll enter “8B,8B-instruct” Downloading the 8B models Running the model We are all set to run the example inference script to test if our model has been set up correctly and works. Our team has created an example Python script called example_text_completion.py that you can use to test out the model. The script defines a main function that uses the Llama class from the llama library to generate text completions for given prompts using the pre-trained models. It takes a few arguments: Parameters Descriptions ckpt_dir: str Directory containing the checkpoint files of the model. tokenizer_path: str Path to the tokenizer of the model. temperature: float = 0.6 This parameter controls the randomness of the generation process. Higher values may lead to more creative but less coherent outputs, while lower values may lead to more conservative but more coherent outputs. top_p: float = 0.9 This defines the maximum probability threshold for generating tokens. max_seq_len: int = 128 Defines the maximum length of the input sequence or prompt allowed for the model to process. max_gen_len: int = 64 Defines the maximum length of the generated text the model is allowed to produce. max_batch_size: int = 4 Defines the maximum number of prompts to process in one batch. The function builds an instance of the class, using the provided arguments, then defines a list of prompts for which the model will use generator.text_completion method to generate the completions. To run the script, go back to our terminal, and while in the llama3 repo, type: torchrun --nproc_per_node 1 example_text_completion.py --ckpt_dir Meta-Llama-3-8B/ --tokenizer_path Meta-Llama-3-8B/tokenizer.model --max_seq_len 128 --max_batch_size 4 Replace Meta-Llama-3-8B/ with the path to your checkpoint directory and tokenizer.model with the path to your tokenizer model. If you run it from this main directory, the path may not need to change. Set the –nproc_per_node to the MP value for the model you are using. For 8B models, the value is set to 1. Adjust the max_seq_len max_batch_size parameters as needed. We have set them to 128 and 4 respectively. Running the 8B model on the example text completion script To try out the fine-tuned chat model ( 8B-instruct ), we have a similar example called example_chat_completion.py torchrun --nproc_per_node 1 example_chat_completion.py --ckpt_dir Meta-Llama-3-8B-Instruct/ --tokenizer_path Meta-Llama-3-8B-Instruct/tokenizer.model --max_seq_len 512 --max_batch_size 6 Note that in this case, we use the Meta-Llama-3-8B-Instruct/ model and provide the correct tokenizer under the instruct model folder. Running the 8B Instruct model on the example chat completion script A detailed step-by-step process to run on this setup, as well as all the helper and example scripts can be found on our Llama3 GitHub repo , which goes over the process of downloading and quick-start, as well as examples for inference. Running Meta Llama on Linux Introduction to llama models Running Meta Llama on Linux
-----------
-Running Meta Llama on Windows | Llama Everywhere Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Running Meta Llama on Windows This tutorial is a part of our Build with Meta Llama series, where we demonstrate the capabilities and practical applications of Llama for developers like you, so that you can leverage the benefits that Llama has to offer and incorporate it into your own applications. This tutorial supports the video Running Llama on Windows | Build with Meta Llama , where we learn how to run Llama on Windows using Hugging Face APIs, with a step-by-step tutorial to help you follow along. If you're interested in learning by watching or listening, check out our video on Running Llama on Windows. For this demo, we will be using a Windows OS machine with an RTX 4090 GPU. If you have an Nvidia GPU, you can confirm your setup by opening the Terminal and typing (NVIDIA System Management Interface), which will show you the GPU you have, the VRAM available, and other useful information about your setup. Since we will be using the Hugging Face transformers library for this setup, this setup can also be used on other operating systems that the library supports such as Linux or Mac using similar steps as the ones shown in the video. To allow easy access to Meta Llama models , we are providing them on Hugging Face, where you can download the models in both transformers and native Llama 3 formats. To download the weights, visit the meta-llama repo containing the model you’d like to use. For example, we will use the Meta-Llama-3-8B-Instruct model for this demo. Read and agree to the license agreement. Fill in your details and accept the license, and click on submit. Once your request is approved, you'll be granted access to all the Llama 3 models. Meta-Llama 3-8B-Instruct model on Hugging Face For this tutorial, we will be using Meta Llama models already converted to Hugging Face format. However, if you’d like to download the original native weights, click on the "Files and versions" tab and download the contents of the original folder. If you prefer, you can also download the original weights from the command line using the Hugging Face CLI: pip install huggingface-hub huggingface-cli download meta-llama/Meta-Llama-3-8B-Instruct --include "original/*" --local-dir meta-llama/Meta-Llama-3-8B-Instruct In this example, we will showcase how you can use Meta Llama models already converted to Hugging Face format using Transformers. To use the model with Transformers, we will be using the pipeline class from Hugging Face. We recommend that you use a Python virtual environment for running this demo. In this demo, we are using Miniconda, but you can use any virtual environment of your choice. Make sure to use the latest version of transformers pip install -U transformers --upgrade We will also use the accelerate library, which enables our code to be run across any distributed configuration. pip install accelerate We will be using Python for our demo script. To install Python, visit the Python website , where you can choose your OS and download the version of Python you like.  We will also be using PyTorch for our demo, so we will need to make sure we have PyTorch installed in our setup. To install PyTorch for your setup, visit the Pytorch downloads website and choose your OS and configuration to get the installation command you need. Paste that command in your terminal and press enter. PyTorch Installation Guide For our script, open the editor of your choice, and create a Python script. We’ll first add the imports that we need for our example: import transformers import torch from transformers import AutoTokenizer Let's define the model we’d like to use. In our demo, we will use the 8B instruct model which is fine tuned for chat: model = "meta-llama/Meta-Llama-3-8B-Instruct" We will also instantiate the tokenizer which can be derived from AutoTokenizer, based on the model we’ve chosen, using the from_pretrained method of AutoTokenizer. This will download and cache the pre-trained tokenizer and return an instance of the appropriate tokenizer class. tokenizer = AutoTokenizer.from_pretrained(model) To use our model for inference: pipeline = transformers.pipeline( "text-generation", model=model, torch_dtype=torch.float16, device_map="auto", ) Hugging Face pipelines allow us to specify which type of task the pipeline needs to run ( text-generation in this case), the model that the pipeline should use to make predictions (specified by model ), the precision to use with this model ( torch.float16 ), the device on which the pipeline should run ( device_map ), and various other options. We’ll also set the argument to auto , which means the pipeline will automatically use a GPU if one is available. Next, let's provide some text prompts as inputs to our pipeline for it to use when it runs to generate responses. Let’s define this as the variable, sequences: sequences = pipeline( 'I have tomatoes, basil and cheese at home. What can I cook for dinner?\n', do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, truncation = True, max_length=400, The pipeline sets do_sample to True , which allows us to specify the decoding strategy we’d like to use to select the next token from the probability distribution over the entire vocabulary. In our example, we are using top_k sampling. By changing max_length , you can specify how long you’d like the generated response to be. Setting the num_return_sequences parameter to greater than one will let you generate more than one output. Finally, we add the following to provide input, and information on how to run the pipeline: for seq in sequences: print(f"Result: {seq['generated_text']}") Save your script and head back to the terminal. We will save it as llama3-hf-demo.py . Before we run the script, let’s make sure we can access and interact with Hugging Face directly from the terminal. To do that, make sure you have the Hugging Face CLI installed: pip install -U "huggingface_hub[cli]" followed by huggingface-cli login Here, it will ask us for our access token which we can get from our HF account under Settings . Copy it and provide it in the command line. We are now all set to run our script. python llama3-hf-demo.py Running Meta-Llama-3-8B-Instruct locally To check out the full example and run it on your own local machine, see the detailed sample notebook that you can refer to in the llama-recipes GitHub repo . Here you will find an example of how to run Llama 3 models using already converted Hugging Face weights, as well as an example that goes over how you can convert the original weights into Hugging Face format and run using those. We’ve also created various other demos and examples to provide you with guidance and as references to help you get started with Llama models and to make it easier for you to integrate them into your own use cases. To try these examples, check out our . Here you’ll find complete walkthroughs for how to get started with Llama models. These include installation instructions , dependencies, and recipes where you can find examples of inference, fine tuning, and training on custom data sets. In addition, the repo includes demos that showcase llama deployments, basic interactions, and specialized use cases Running Meta Llama on Windows Skip to main content
-----------
-Running Meta Llama on Mac | Llama Everywhere Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Running Meta Llama on Mac This tutorial is a part of our Build with Meta Llama series, where we demonstrate the capabilities and practical applications of Llama for developers like you, so that you can leverage the benefits that Llama has to offer and incorporate it into your own applications. This tutorial supports the video Running Llama on Mac | Build with Meta Llama , where we learn how to run Llama on Mac OS  using Ollama , with a step-by-step tutorial to help you follow along. If you're interested in learning by watching or listening, check out our video on Running Llama on Mac. For this demo, we are using a Macbook Pro running Sonoma 14.4.1 with 64GB memory. Since we will be using Ollamap, this setup can also be used on other operating systems that are supported such as Linux or Windows using similar steps as the ones shown here. lets you set up and run Large Language models like Llama models locally. Downloading Ollama The first step is to install Ollama. To do that, visit their website , where you can choose your platform, and click on “Download” to download Ollama. For our demo, we will choose macOS, and select “Download for macOS”. Next, we will make sure that we can test run Meta Llama 3 models on Ollama . Please note that Ollama provides Meta Llama models in the 4-bit quantized format. To test run the model, let’s open our terminal, and run ollama pull llama3 to download the 4-bit quantized Meta Llama 3 8B chat model, with a size of about 4.7 GB. Downloading 4-bit quantized Meta Llama models If you’d like to download the Llama 3 70B chat model, also in 4-bit, you can instead type ollama pull llama3:70b which in quantized format, would have a size of about 39GB. Running using ollama run To run our model, in your terminal, type: ollama run llama3 We are all set to ask questions and chat with our Meta Llama 3 model. Let’s ask some questions: “Who wrote the book godfather?" Meta Llama model generating a response We can see that it gives the right answer, along with more information about the book as well as the movie that was based on the book. What if I just wanted the name of the author, without the extra information. Let’s adapt our prompt accordingly, specifying the kind of response we expect: "Who wrote the book godfather? Answer with only the name." Meta Llama model generating a specified responses based on prompt We can see that it generates the answer in the format we requested. You can also try running the 70B model: ollama run llama3:70b but the inference speed will likely be slower. Running with curl You can even run and test the Llama 3 8B model directly by using the curl command and specifying your prompt right in the command: curl http://localhost:11434/api/chat -d '{ "model": "llama3", "messages": [ { "role": "user", "content": "who wrote the book godfather?" } ], "stream": false }' Here, we are sending a POST request to an API running on localhost. The API endpoint is for "chat", which will interact with our AI model hosted on the server. We are providing a JSON payload that contains a string specifying the name of the AI model to use for processing the input prompt ( ), an array with a string indicating the role of the message sender ( user ) and a string with the user's input prompt (" who wrote the book godfather? "), and a boolean value stream indicating whether the response should be streamed or not. In our case, it is set to false, meaning the entire response will be returned at once. Ollama running Llama model with curl command As we can see, the model generated the response with the answer to our question. Running as a Python script This example can also be run using a Python script. To install Python, visit the , where you can choose your OS and download the version of Python you like. To run it using a Python script, open the editor of your choice, and create a new file. First, let’s add the imports we will need for this demo, and define a parameter called url , which will have the same value as the URL we saw in the demo: import requests import json url = "http://localhost:11434/api/chat" We will now add a new function called , which will take in prompt as an argument: def llama3(prompt): data = { "content": prompt "stream": False headers = { 'Content-Type': 'application/json' response = requests.post(url, headers=headers, json=data) return(response.json()['message']['content']) This function constructs a JSON payload containing the specified prompt and the model name, which is "llama3”. Then, it sends a POST request to the API endpoint with the JSON payload as the message body, using the requests library.  Once the response is received, the function extracts the content of the response message from the JSON object returned by the API, and returns this extracted content. Finally, we will provide the prompt and print the generated response: response = llama3("who wrote the book godfather") print(response) To run the script, write python .py and press enter. Running Meta Llama model using Ollama and Python script As we can see, it generated the response based on the prompt we provided in our script. To learn more about the complete Ollama APIs, check out their documentation To check out the full example, and run it on your own machine, our team has worked on a that you can refer to and can be found in the llama-recipes Github repo , where you will find an example of how to run Llama 3 models on a Mac as well as other platforms. You will find the examples we discussed here, as well as other ways to use Llama 3 locally with Ollama via LangChain. We’ve also created various other demos and examples to provide you with guidance and as references to help you get started with Llama models and to make it easier for you to integrate Llama into your own use cases. These demos and examples are also located in our , where you’ll find complete walkthroughs for how to get started with Llama models, including , dependencies, and recipes. You’ll also find several examples for inference, fine tuning, and training on custom data sets—as well as demos that showcase Llama deployments, basic interactions, and specialized Running Meta Llama on Mac Running using ollama run Running as a Python script Skip to main content
-----------
-Meta Llama in the Cloud | Llama Everywhere Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Meta Llama in the Cloud This tutorial is a part of our Build with Meta Llama series, where we demonstrate the capabilities and practical applications of Llama for developers like you, so that you can leverage the benefits that Llama has to offer and incorporate it into your own applications. This tutorial supports the video Many other ways to run Llama and resources | Build with Meta Llama , where we learn about some of the various other ways in which you can host or run Meta Llama models, and provide you with all the resources that can help you get started. If you're interested in learning by watching or listening, check out our video on Many other ways to run Llama and resources. Apart from running the models locally, one of the most common ways to run Meta Llama models is to run them in the cloud. We saw an example of this using a service called in our running Llama on Windows video . Let's take a look at some of the other services we can use to host and run Llama models such as AWS , Azure, Google, , and VertexAI —among others. Amazon Web Services Amazon Web Services (AWS) provides multiple ways to host your Llama models such as SageMaker Jumpstart and Bedrock. Bedrock is a fully managed service that lets you quickly and easily build generative AI-powered experiences. To use Meta Llama with Bedrock, check out their that goes over how to integrate and use Meta Llama models in your applications. You can also use AWS through SageMaker JumpStart, which enables you to build, train, and deploy ML models from a broad selection of publicly available foundation models, and deploy them on SageMaker Instances for model training and inference. Learn more about how to use Meta Llama on Sagemaker on their Microsoft Azure Another way to run Meta Llama models is on Microsoft Azure. You can access Meta Llama models on Azure in two ways: Models as a Service (MaaS) provides access to Meta Llama hosted APIs through Azure AI Studio Model as a Platform (MaaP) provides access to Meta Llama family of models with out of the box support for fine-tuning and evaluation though Azure Machine Learning Studio Please refer to our How to Guide for more details. Google Cloud Platform You can also use GCP, or Google Cloud Platform, to run Meta Llama models. GCP is a suite of cloud computing services that provides computing resources as well as virtual machines. Building on top of GCP services, Model Garden on Vertex AI offers infrastructure to jumpstart your ML project with a single place to discover, customize, and deploy a wide range of models. We have collaborated with Vertex AI from Google Cloud to fully integrate Meta Llama, offering pre-trained, instruction-tuned, and Meta CodeLlama, in various sizes. Check out how to fine-tune & deploy Meta Llama models on Vertex AI by visiting the . Please note that you may need to request proper GPU computing quota as a prerequisite. IBM watsonx You can also use IBM's watsonx to run Meta Llama models. IBM watsonx is an advanced platform designed for AI builders, integrating generative AI capabilities, foundation models, and traditional machine learning. It provides a comprehensive suite of tools that span the AI lifecycle, enabling users to tune models with their enterprise data. The platform supports multi-model flexibility, client protection, AI governance, and hybrid, multi-cloud deployments. It offers features for extracting insights, discovering trends, generating synthetic tabular data, running jupyter notebooks, and creating new content and code. Watsonx.ai equips data scientists with the necessary tools, pipelines, and runtimes for building and deploying ML models, thereby automating the entire AI model lifecycle. We've worked with IBM to make Llama and Code Llama models available on their platform . To test the platform and evaluate Llama on watsonx, creating an account is free and allows testing the available models through the Prompt Lab. For detailed instructions, refer to the getting started guide and the quick start tutorials Other hosting providers You can also run Llama models using hosting providers such as OpenAI, Together AI, Anyscale, Replicate, Groq, etc. Our team has worked on step by step examples to showcase how to run Llama on externally hosted providers. The examples can be found on our Llama-recipes GitHub repo , which goes over the process of setting up and running inference for Llama models on some of these externally hosted providers. Running Llama on premise Many enterprise customers prefer to deploy Llama models on-premise and on their own servers. One way to deploy and run Llama models in this manner is by using TorchServe . TorchServe is an easy to use tool for deploying PyTorch models at scale. It is cloud and environment agnostic and supports features such as multi-model serving, logging, metrics and the creation of RESTful endpoints for application integration. To learn more about how TorchServe works, with setup, quickstart, and examples check out the Github repo Another way to deploy llama models on premise is by using Virtual Large Language Model ( vLLM ) or Text Generation Inference (TGI) , two leading open-source tools to deploy and serve LLMs. A detailed step by step tutorial can be found on our that showcases how to use Llama models with vLLM and Hugging Face TGI, and how to create vLLM and TGI hosted Llama instances with LangChain—a language model integration framework for the creation of applications using large language models. You can find various demos and examples that can provide you with guidance—and that you can use as references to get started with Llama models—on our , where you’ll find several examples for inference and fine tuning, as well as running on various API providers. Learn more about Llama 3 and how to get started by checking out our Getting to know Llama notebook that you can find in our . Here you will find a guided tour of Llama 3, including a comparison to Llama 2, descriptions of different Llama 3 models, how and where to access them, Generative AI and Chatbot architectures, prompt engineering, RAG (Retrieval Augmented Generation), fine-tuning, and more. You will find all this implemented with starter code that you can take and adapt to use in your own Meta Llama 3 projects. To learn more about our Llama 3 models, check out our announcement blog where you can find details about how the models work, data on performance and benchmarks, information about trust and safety, and various other resources to get you started. Get the model source from our Llama 3 Github repo , where you can learn how the models work along with a minimalist example of how to load Llama 3 models and run inference. Here, you will also find steps to download and set up the models, and examples for running the text completion and chat models. Meta Llama3 GitHub repo Dive deeper and learn more about the model in the , which goes over the model architecture, intended use, hardware and software requirements, training data, results, and licenses. Check out our new Meta AI , built with Llama 3 technology, which is now one of the world’s leading AI assistants that can boost your intelligence and lighten your load, helping you learn, get things done, create content, and connect to make the most out of every moment. You can use Meta AI on Facebook, Instagram, WhatsApp, Messenger, and the web to get things done, learn, create, and connect with the things that matter to you. To learn more about the latest updates and releases of Llama models, check out our website , where you can learn more about the latest models as well as find resources to learn more about how these models work and how you can use them in your own applications. Check out our Getting Started guide that provides information and resources to help you set up Llama including how to access the models, prompt formats, hosting, how-to and integration guides, as well as resources that you can reference to get started with your projects. Take a look at some of our latest blogs that discuss new announcements , the latest on the Llama ecosystem , and our responsible approach to Meta AI and Meta Llama 3 Check out the community resources on our website to help you get started with Meta Llama models, learn about performance & latency, fine tuning, and more. Dive deeper into prompt engineering, learning best practices for prompting Meta Llama models and interacting with Meta Llama Chat, Code Llama, and Llama Guard models in our short course on Prompt Engineering with Llama 2 on DeepLearing.ai, recently updated to showcase both Llama 2 and  Llama 3 models. Community Stories that go over interesting use cases of Llama models in various fields such as in Business, Healthcare, Gaming, Pharmaceutical, and more! Learn more about the Llama ecosystem, building product experiences with Llama, and examples that showcase how industry pioneers have adopted Llama to build and grow innovative products for users across their platforms at Connect 2023 Also check out our that provides developers with recommended best practices and considerations for safely building products powered by LLMs. We hope you found the Build with Meta Llama videos and tutorials helpful to provide you with insights and resources that you may need to get started with using Llama models. We at Meta strongly believe in an open approach to AI development, democratizing access through an open platform and providing you with AI models, tools, and resources to give you the power to shape the next wave of innovation. We want to kickstart that next wave of innovation across the stack—from applications to developer tools to evals to inference optimizations and more. We can’t wait to see what you build and look forward to your feedback. Meta Llama in the Cloud Running Llama on premise
-----------
-Fine-tuning | How-to guides Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud How-to guides If you are looking to learn by writing code it's highly recommended to look into the Getting to Know Llama 3 notebook. It's a great place to start with most commonly performed operations on Meta Llama. Full parameter fine-tuning is a method that fine-tunes all the parameters of all the layers of the pre-trained model. In general, it can achieve the best performance but it is also the most resource-intensive and time consuming: it requires most GPU resources and takes the longest. PEFT, or Parameter Efficient Fine Tuning, allows one to fine tune models with minimal resources and costs. There are two important PEFT methods: LoRA (Low Rank Adaptation) and QLoRA (Quantized LoRA), where pre-trained models are loaded to GPU as quantized 8-bit and 4-bit weights, respectively. It’s likely that you can fine-tune the Llama 2-13B model using LoRA or QLoRA fine-tuning with a single consumer GPU with 24GB of memory, and using QLoRA requires even less GPU memory and fine-tuning time than LoRA. Typically, one should try LoRA, or if resources are extremely limited, QLoRA, first, and after the fine-tuning is done, evaluate the performance. Only consider full fine-tuning when the performance is not desirable. Experiment tracking Experiment tracking is crucial when evaluating various fine-tuning methods like LoRA, and QLoRA. It ensures reproducibility, maintains a structured version history, allows for easy collaboration, and aids in identifying optimal training configurations. Especially with numerous iterations, hyperparameters, and model versions at play, tools like Weights & Biases (W&B) become indispensable. With its seamless integration into multiple frameworks, W&B provides a comprehensive dashboard to visualize metrics, compare runs, and manage model checkpoints. It's often as simple as adding a single argument to your training script to realize these benefits - we’ll show an example in the Hugging Face PEFT LoRA section. Recipes PEFT LoRA The llama-recipes repo has details on different fine-tuning (FT) alternatives supported by the provided sample scripts. In particular, it highlights the use of PEFT as the preferred FT method, as it reduces the hardware requirements and prevents catastrophic forgetting. For specific cases, full parameter FT can still be valid, and different strategies can be used to still prevent modifying the model too much. Additionally, FT can be done in single gpu multi-gpu with FSDP. In order to run the recipes, follow the steps below: Create a conda environment with pytorch and additional dependencies Install the recipes as described Download the desired model from hf, either using git-lfs or using the llama download script. With everything configured, run the following command: python -m llama_recipes.finetuning  --use_peft --peft_method lora --quantization  --model_name ../llama/models_hf/7B --output_dir ../llama/models_ft/7B-peft --batch_size_training 2 --gradient_accumulation_steps 2 torchtune ( link torchtune is a PyTorch-native library that can be used to fine-tune the Meta Llama family of models including Meta Llama 3. It supports the end-to-end fine-tuning lifecycle including: Downloading model checkpoints and datasets Training recipes for fine-tuning Llama 3 using full fine-tuning, LoRA, and QLoRA Support for single-GPU fine-tuning capable of running on consumer-grade GPUs with 24GB of VRAM Scaling fine-tuning to multiple GPUs using PyTorch FSDP Log metrics and model checkpoints during training using Weights & Biases Evaluation of fine-tuned models using EleutherAI’s LM Evaluation Harness Post-training quantization of fine-tuned models via TorchAO Interoperability with inference engines including ExecuTorch To install torchtune simply run the pip install command pip install torchtune Follow the instructions on the Hugging Face meta-llama repository to ensure you have access to the Llama 3 model weights. Once you have confirmed access, you can run the following command to download the weights to your local machine. This will also download the tokenizer model and a responsible use guide. tune download meta-llama/Meta-Llama-3-8B \ --output-dir  \ --hf-token  Set your environment variable HF_TOKEN or pass in --hf-token to the command in order to validate your access. You can find your token at https://huggingface.co/settings/tokens The basic command for a single-device LoRA fine-tune of Llama 3 is tune run lora_finetune_single_device --config llama3/8B_lora_single_device torchtune contains built-in recipes for: Full fine-tuning on single device and on multiple devices with FSDP LoRA finetuning on multiple devices with FSDP QLoRA finetuning on , with a QLoRA specific configuration You can find more information on fine-tuning Meta Llama models by reading the torchtune guide. Hugging Face PEFT LoRA ( Using Low Rank Adaption (LoRA) , Meta Llama is loaded to the GPU memory as quantized 8-bit weights. Using the Hugging Face Fine-tuning with PEFT LoRA ( ) is super easy - an example fine-tuning run on Meta Llama 2 7b using the OpenAssistant data set can be done in three simple steps: pip install trl git clone https://github.com/huggingface/trl python trl/examples/scripts/sft.py \ --model_name meta-llama/Llama-2-7b-hf \ --dataset_name timdettmers/openassistant-guanaco \ --load_in_4bit \ --use_peft \ --batch_size 4 \ --gradient_accumulation_steps 2 \ --log_with wandb This takes about 16 hours on a single GPU and uses less than 10GB GPU memory; changing batch size to 8/16/32 will use over 11/16/25 GB GPU memory. After the fine-tuning completes, you’ll see in a new directory named “output” at least adapter_config.json and adapter_model.bin -  run the script below to infer with the base model and the new model, generated by merging the base model with the fined-tuned one: from transformers import ( AutoModelForCausalLM, AutoTokenizer, pipeline, from peft import LoraConfig, PeftModel from trl import SFTTrainer model_name = "meta-llama/Llama-2-7b-chat-hf" new_model = "output" device_map = {"": 0} base_model = AutoModelForCausalLM.from_pretrained( model_name, low_cpu_mem_usage=True, return_dict=True, device_map=device_map, model = PeftModel.from_pretrained(base_model, new_model) model = model.merge_and_unload() tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" prompt = "Who wrote the book Innovator's Dilemma?" pipe = pipeline(task="text-generation", model=base_model, tokenizer=tokenizer, max_length=200) result = pipe(f"[INST] {prompt} [/INST]") print(result[0]['generated_text']) pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) result = pipe(f"[INST] {prompt} [/INST]") QLoRA Fine TuningQLoRA (Q for quantized) is more memory efficient than LoRA. In QLoRA, the pretrained model is loaded to the GPU as quantized 4-bit weights. Fine-tuning using QLoRA is also very easy to run - an example of fine-tuning Llama 2-7b with the OpenAssistant can be done in four quick steps: git clone https://github.com/artidoro/qlora cd qlora pip install -U -r requirements.txt ./scripts/finetune_llama2_guanaco_7b.sh It takes about 6.5 hours to run on a single GPU, using 11GB memory of the GPU. After the fine-tuning completes and the output_dir specified in ./scripts/finetune_llama2_guanaco_7b.sh will have checkoutpoint-xxx subfolders, holding the fine-tuned adapter model files. To run inference, use the script below: from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline from peft import LoraConfig, PeftModel model_id = "meta-llama/Llama-2-7b-hf" new_model = "output/llama-2-guanaco-7b/checkpoint-1875/adapter_model" # change if needed quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type='nf4' model = AutoModelForCausalLM.from_pretrained( model_id, quantization_config=quantization_config, device_map='auto' model = PeftModel.from_pretrained(model, new_model) tokenizer = AutoTokenizer.from_pretrained(model_id) prompt = "Who wrote the book innovator's dilemma?" pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) result = pipe(f"[INST] {prompt} [/INST]") Axolotl is another open source library you can use to streamline the fine-tuning of Llama 2. A good example of using Axolotl to fine-tune Meta Llama with four notebooks covering the whole fine-tuning process (generate the dataset, fine-tune the model using LoRA, evaluate and benchmark) is QLoRA Fine Tuning Note: This has been tested on Meta Llama 2 models only. QLoRA (Q for quantized) is more memory efficient than LoRA. In QLoRA, the pretrained model is loaded to the GPU as quantized 4-bit weights. Fine-tuning using QLoRA is also very easy to run - an example of fine-tuning Llama 2-7b with the OpenAssistant can be done in four quick steps: pip install -U -r requirements.txt It takes about 6.5 hours to run on a single GPU, using 11GB memory of the GPU. After the fine-tuning completes and the output_dir specified in ./scripts/finetune_llama2_guanaco_7b.sh will have checkoutpoint-xxx subfolders, holding the fine-tuned adapter model files. To run inference, use the script below: from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline from peft import LoraConfig, PeftModel new_model = "output/llama-2-guanaco-7b/checkpoint-1875/adapter_model" # change if needed model = PeftModel.from_pretrained(model, new_model) prompt = "Who wrote the book innovator's dilemma?" pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) result = pipe(f"[INST] {prompt} [/INST]") Note: This has been tested on Meta Llama 2 models only. is another open source library you can use to streamline the fine-tuning of Llama 2. A good example of using Axolotl to fine-tune Meta Llama with four notebooks covering the whole fine-tuning process (generate the dataset, fine-tune the model using LoRA, evaluate and benchmark) is torchtune (link) Hugging Face PEFT LoRA (link)
-----------
-Quantization | How-to guides Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Quantization is a technique used in machine learning to reduce the computational and memory requirements of models, making them more efficient for deployment on servers and edge devices. It involves representing model weights and activations, typically 32-bit floating numbers, with lower precision data such as 16-bit float, brain float 16-bit, 8-bit int, or even 4/3/2/1-bit int. The benefits of quantization include smaller model sizes, faster fine-tuning, and faster inference—particularly beneficial in resource-constrained environments. However, the tradeoff is a reduction in model quality due to the loss of precision. Supported quantization modes in PyTorch Post-Training Dynamic Quantization: Weights are pre-quantized ahead of time and activations are converted to int8 during inference, just before computation. This results in faster computation due to efficient int8 matrix multiplication and maintains accuracy on the activation layer. Post-Training Static Quantization: This technique improves performance by converting networks to use both integer arithmetic and int8 memory accesses. It involves feeding batches of data through the network and computing the resulting distributions of the different activations. This information is used to determine how the different activations should be quantized at inference time. Quantization Aware Training (QAT): In QAT, all weights and activations are "fake quantized" during both the forward and backward passes of training. This means float values are rounded to mimic int8 values, but all computations are still done with floating point numbers. This method usually yields higher accuracy than the other two methods as all weight adjustments during training are made while "aware" of the fact that the model will ultimately be quantized. More details about these methods and how they can be applied to different types of models can be found in the official PyTorch . Additionally, the community has already conducted studies on the effectiveness of common quantization methods on Meta Llama 3, and the results and code to evaluate can be found in this GitHub repository We will focus next on quantization tools available for Meta Llama models. As this is a constantly evolving space, the libraries and methods detailed here are the most widely used at the moment and are subject to change as the space evolves. Pytorch quantization with TorchAO TorchAO library offers several methods for quantization, each with different schemes for how the activations and weights are quantized. We distinguish between two main types of quantization: weight only quantization and dynamic quantization. For weight only quantization, we support 8-bit and 4-bit quantization. The 4-bit quantization also has GPTQ support for improved accuracy, which requires calibration but has the same final performance. For dynamic quantization, we support 8-bit activation quantization and 8-bit weight quantization. We also support this type of quantization with smoothquant for improved accuracy, which requires calibration and has slightly worse performance. Additionally, the library offers a simple API to test different methods and automatic detection of the best quantization for a given model, known as autoquantization. This API chooses the fastest form of quantization out of the 8-bit dynamic and 8-bit weight only quantization. It first identifies the shapes of the activations that the different linear layers see, then benchmarks these shapes across different types of quantized and non-quantized layers in order to pick the fastest one. Also, it composes with torch.compile() to generate the fast kernels. For additional information on torch.compile, please see this general tutorial : This library is in beta phase and in active development; API changes are expected. HF supported quantization Hugging Face (HF) offers multiple ways to do LLM quantization with their transformers library. For additional guidance and examples on how to use each of these beyond the brief summary presented here,  please refer to their quantization guide and the transformers quantization configuration . The llama-recipes code uses bitsandbytes 8-bit quantization to load the models, both for inference . (See below for more information about using the bitsandbytes library with Llama. ) Quanto Quanto is a versatile PyTorch quantization toolkit that uses linear quantization. It provides features such as weights quantization, activation quantization, and compatibility with various devices and modalities. It supports quantization-aware training and is easy to integrate with custom kernels for specific devices. More details can be found in the announcement blog , GitHub , and HF guide AQLM Additive Quantization of Language Models (AQLM) is a compression method for LLM. It quantizes multiple weights together, taking advantage of interdependencies between them. AQLM represents groups comprising 8 to16 weights each as a sum of multiple vector codes. This library supports fine-tuning its quantized models with Parameter-Efficient Fine-Tuning and LoRA by integrating into HF's PEFT library as well. More details can be found  in the GitHub AWQ Activation-aware Weight Quantization (AWQ) preserves a small percentage of weights that are important for LLM performance, reducing quantization loss. This allows models to run in 4-bit precision without experiencing performance degradation. Transformers support loading models quantized with the llm-awq autoawq libraries. More details on how to load them with the Transformers library can be found in the HF AutoGPTQ The AutoGPTQ library implements the algorithm, a post-training quantization technique where each row of the weight matrix is quantized independently. These weights are quantized to int4, but they’re restored to fp16 on the fly during inference, saving memory usage by 4x. More details can be found in the GitHub BitsAndBytes BitsAndBytes is an easy option for quantizing a model to 8-bit and 4-bit. The library supports any model in any modality, as long as it supports loading with Hugging Face Accelerate and contains torch.nn.Linear layers. It also provides features for offloading weights between the CPU and GPU to support fitting very large models into memory, adjusting the outlier threshold for 8-bit quantization, skipping module conversion for certain models, and fine-tuning with 8-bit and 4-bit weights. For 4-bit models, it allows changing the compute data type, using the Normal Float 4 (NF4) data type for weights initialized from a normal distribution, and using nested quantization to save additional memory at no additional performance cost. More details can be found in the HF Supported quantization modes in PyTorch Pytorch quantization with TorchAO
-----------
-Prompting | How-to guides Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Link to Notebook showing examples of the techniques discussed in this section. Prompt engineering is a technique used in natural language processing (NLP) to improve the performance of the language model by providing them with more context and information about the task in hand. It involves creating prompts, which are short pieces of text that provide additional information or guidance to the model, such as the topic or genre of the text it will generate. By using prompts, the model can better understand what kind of output is expected and produce more accurate and relevant results. In Llama 2 the size of the context, in terms of number of tokens, has doubled from 2048 to 4096. Crafting Effective Prompts Crafting effective prompts is an important part of prompt engineering. Here are some tips for creating prompts that will help improve the performance of your language model: Be clear and concise: Your prompt should be easy to understand and provide enough information for the model to generate relevant output. Avoid using jargon or technical terms that may confuse the model. Use specific examples: Providing specific examples in your prompt can help the model better understand what kind of output is expected. For example, if you want the model to generate a story about a particular topic, include a few sentences about the setting, characters, and plot. Vary the prompts: Using different prompts can help the model learn more about the task at hand and produce more diverse and creative output. Try using different styles, tones, and formats to see how the model responds. Test and refine: Once you have created a set of prompts, test them out on the model to see how it performs. If the results are not as expected, try refining the prompts by adding more detail or adjusting the tone and style. Use feedback: Finally, use feedback from users or other sources to continually improve your prompts. This can help you identify areas where the model needs more guidance and make adjustments accordingly. Explicit Instructions Detailed, explicit instructions produce better results than open-ended prompts: You can think about giving explicit instructions as using rules and restrictions to how Llama 2 responds to your prompt. Stylization Explain this to me like a topic on a children's educational network show teaching elementary students. I'm a software engineer using large language models for summarization. Summarize the following text in under 250 words: Give your answer like an old timey private investigator hunting down a case step by step. Formatting Use bullet points. Return as a JSON object. Use less technical terms and help me apply it in my work in communications. Restrictions Only use academic papers. Never give sources older than 2020. If you don't know the answer, say that you don't know. Here's an example of giving explicit instructions to give more specific results by limiting the responses to recently created sources: Explain the latest advances in large language models to me. #  More likely to cite sources from 2017 Explain the latest advances in large language models to me. Always cite your sources. Never cite sources older than 2020. # Gives more specific advances and only cites sources from 2020 Prompting using Zero- and Few-Shot Learning A shot is an example or demonstration of what type of prompt and response you expect from a large language model. This term originates from training computer vision models on photographs, where one shot was one example or instance that the model used to classify an image. Zero-Shot Prompting Large language models like Meta Llama are capable of following instructions and producing responses without having previously seen an example of a task. Prompting without examples is called "zero-shot prompting". Text: This was the best movie I've ever seen! The sentiment of the text is: Text: The director was trying too hard. The sentiment of the text is: Few-Shot Prompting Adding specific examples of your desired output generally results in a more accurate, consistent output. This technique is called "few-shot prompting". In this example, the generated response follows our desired format that offers a more nuanced sentiment classifier that gives a positive, neutral, and negative response confidence percentage. You are a sentiment classifier. For each message, give the percentage of positive/netural/negative. Here are some samples: Text: I liked it Sentiment: 70% positive 30% neutral 0% negative Text: It could be better Sentiment: 0% positive 50% neutral 50% negative Text: It's fine Sentiment: 25% positive 50% neutral 25% negative Text: I thought it was okay Text: I loved it! Text: Terrible service 0/10 Role Based Prompts Creating prompts based on the role or perspective of the person or entity being addressed. This technique can be useful for generating more relevant and engaging responses from language models. Pros: Improves relevance: Role-based prompting helps the language model understand the role or perspective of the person or entity being addressed, which can lead to more relevant and engaging responses. Increases accuracy: Providing additional context about the role or perspective of the person or entity being addressed can help the language model avoid making mistakes or misunderstandings. Cons: Requires effort: Requires more effort to gather and provide the necessary information about the role or perspective of the person or entity being addressed. Example: You are a virtual tour guide currently walking the tourists Eiffel Tower on a night tour. Describe Eiffel Tower to your audience that covers its history, number of people visiting each year, amount of time it takes to do a full tour and why do so many people visit this place each year. Chain of Thought Technique Involves providing the language model with a series of prompts or questions to help guide its thinking and generate a more coherent and relevant response. This technique can be useful for generating more thoughtful and well-reasoned responses from language models. Improves coherence: Helps the language model think through a problem or question in a logical and structured way, which can lead to more coherent and relevant responses. Increases depth: Providing a series of prompts or questions can help the language model explore a topic more deeply and thoroughly, potentially leading to more insightful and informative responses. Requires effort: The chain of thought technique requires more effort to create and provide the necessary prompts or questions. You are a virtual tour guide from 1901. You have tourists visiting Eiffel Tower. Describe Eiffel Tower to your audience. Begin with 1. Why it was built 2. Then by how long it took them to build 3. Where were the materials sourced to build 4. Number of people it took to build 5. End it with the number of people visiting the Eiffel tour annually in the 1900's, the amount of time it completes a full tour and why so many people visit this place each year. Make your tour funny by including 1 or 2 funny jokes at the end of the tour. Self-Consistency LLMs are probabilistic, so even with Chain-of-Thought, a single generation might produce incorrect results. Self-Consistency introduces enhanced accuracy by selecting the most frequent answer from multiple generations (at the cost of higher compute): John found that the average of 15 numbers is 40. If 10 is added to each number then the mean of the numbers is? Report the answer surrounded by three backticks, for example: ```123``` Running the above several times and taking the most commonly returned value for the answer would make use of the self-consistency approach. Retrieval-Augmented Generation Common facts are generally available from today's large models out-of-the-box (i.e. using just the model weights). More specific data is unlikely to be available though E.g.: What is the capital of  California? # The capital of California is Sacramento... What was the temperature in Menlo Park on December 12th, 2023? # I'm just an AI, I don't have have access to real-time or historical weather data... Retrieval-Augmented Generation, or RAG, describes the practice of including information in the prompt that has been retrieved from an external database. It's an effective way to incorporate facts into your LLM application and is more affordable than fine-tuning which might also negatively impact the foundational model's capabilities. This could be as simple as a lookup table or as sophisticated as a vector database containing all of your company's knowledge: Given the following information about temperatures in Menlo Park: 2023-12-11 :  52 degrees Fahrenheit 2023-12-12 :  51 degrees Fahrenheit 2023-12-13 :  55 degrees Fahrenheit What was the temperature in Menlo Park on 2023-12-12? # Sure! The temperature in Menlo Park on 2023-12-12 was 51 degrees Fahrenheit. What was the temperature in Menlo Park on 2023-07-18 ? # Sorry, I don't have information about the temperature in Menlo Park on 2023-07-18. The information provided only includes temperatures for December 11th, 12th, and 13th of 2023. Program-Aided Language Models LLMs, by nature, aren't great at performing calculations. While LLMs are bad at arithmetic, they're great for code generation. Program-Aided Language leverages this fact by instructing the LLM to write code to solve calculation tasks. Python code to calculate: ((-5 + 93 * 4 - 0) * (4^4 + -7 + 0 * 5)) Here is the Python code to calculate the given expression: ((-5 + 93 * 4 - 0) * (4**4 + -7 + 0 * 5)) # Calculate the expression inside the parentheses a = -5 + 93 * 4 - 0 b = 4**4 + -7 + 0 * 5 # Multiply a and b result = a * b # Print the result print(result) Using the code directly provides the correct result. Limiting Extraneous Tokens A common challenge is generating a response without extraneous tokens (e.g. "Sure! Here's more information on..."). By combining a role, rules and restrictions, explicit instructions, and an example, the model can be prompted to generate the desired response. You are a robot that only outputs JSON. You reply in JSON format with the field 'zip_code'. Example question: What is the zip code of the Empire State Building? Example answer: {'zip_code': 10118} Now here is my question: What is the zip code of Menlo Park? # "{'zip_code': 94025}" Using the code directly provides the correct result. Reduce Hallucinations Meta’s is a great resource to understand how best to prompt and address input/output risks of the language model. Refer to pages (14-17). Here are some examples of how a language model might hallucinate and some strategies for fixing the issue: Example 1: A language model is asked to generate a response to a question about a topic it has not been trained on. The language model may hallucinate information or make up facts that are not accurate or supported by evidence. Fix: To fix this issue, you can provide the language model with more context or information about the topic to help it understand what is being asked and generate a more accurate response. You could also ask the language model to provide sources or evidence for any claims it makes to ensure that its responses are based on factual information. Example 2: A language model is asked to generate a response to a question that requires a specific perspective or point of view. The language model may hallucinate information or make up facts that are not consistent with the desired perspective or point of view. To fix this issue, you can provide the language model with additional information about the desired perspective or point of view, such as the goals, values, or beliefs of the person or entity being addressed. This can help the language model understand the context and generate a response that is more consistent with the desired perspective or point of view. Example 3: A language model is asked to generate a response to a question that requires a specific tone or style. The language model may hallucinate information or make up facts that are not consistent with the desired tone or style. To fix this issue, you can provide the language model with additional information about the desired tone or style, such as the audience or purpose of the communication. This can help the language model understand the context and generate a response that is more consistent with the desired tone or style. Overall, the key to avoiding hallucination in language models is to provide them with clear and accurate information and context, and to carefully monitor their responses to ensure that they are consistent with your expectations and requirements. Prompting using Zero- and Few-Shot Learning Chain of Thought Technique
-----------
-Validation | How-to guides Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud As the saying goes, if you can't measure it, you can't improve it., In this section, we are going to cover different ways to measure and ultimately validate Llama so it's possible to determine the improvements provided by different fine tuning techniques. Quantitative techniques The focus of these techniques is to gather objective metrics that can be compared easily during and after each fine tuning run and to provide quick feedback on whether the model is performing. The main metrics collected are loss and perplexity. This method consists in dividing the dataset into k subsets or folds, and then fine tuning the model k times. On each run, a different fold is used as a validation dataset, using the rest for training. The performance results of each run are averaged out for the final report. This provides a more accurate metric of the performance of the model across the complete dataset, as all entries serve both for validation and training. While it produces the most accurate prediction on how a model is going to generalize after fine tuning on a given dataset, it is computationally expensive and better suited for small datasets. Holdout When using a holdout, the dataset is split into two or three subsets, training and validation with test as optional. The test and validation sets can represent 10% - 30% of the dataset each. As the name implies, the first two subsets are used for training and validating the model during fine tuning, while the third is used only after fine tuning is complete to evaluate how well the model generalizes on data it has not seen in either phase. The advantage of having three partitions is that it provides a way to evaluate the model after fine-tuning for an unbiased view into the model performance, but it requires a slightly bigger dataset to allow for a proper split. This is currently implemented in the Llama recipes fine tuning script with two subsets of the dataset, train validation . The data is collected in a json file that can be plotted to easily interpret the results and evaluate how the model is performing. Standard Evaluation tools There are multiple projects that provide standard evaluation. They provide predefined tasks with commonly used metrics to evaluate the performance of LLMs, like HellaSwag and ThrouthfulQA. These tools can be used to test if the model has degraded after fine tuning. Additionally, a custom task can be created using the dataset intended to fine-tune the model, effectively automating the manual verification of the model performance before and after fine tuning. These types of projects provide a quantitative way of looking at the models performance in simulated real world examples. Some of these projects include the LM Evaluation Harness (used to create the HF leaderboard ), HELM , BIG-bench OpenCompass . As mentioned before, the torchtune library provides integration with the LM Evaluation Harness to test fine tuned models as well. Interpreting Loss and Perplexity The loss value used comes from the transformer's LlamaForCausalLM , which initializes a different loss function depending on the objective required from the model. The objective of this section is to give a brief overview on how to understand the results from loss and perplexity as an initial evaluation of the model performance during fine tuning. We also calculate the perplexity as an exponentiation of the loss value. Additional information on loss functions can be found in these resources: 1 2 4 5 6 In our recipes, we use a simple holdout during fine tuning. Using the logged loss values, both for train and validation dataset, the curves for both are plotted to analyze the results of the process. Given the setup in the recipe, the expected behavior is a log graph that shows a diminishing train and validation loss value as it progresses. If the validation curve starts going up while the train curve continues decreasing, the model is overfitting and it's not generalizing well. Some alternatives to test when this happens are early stopping, verifying the validation dataset is a statistically significant equivalent of the train dataset, data augmentation, using parameter efficient fine tuning or using k-fold cross-validation to better tune the hyperparameters. Qualitative techniques Manual testing Manually evaluating a fine tuned model will vary according to the FT objective and available resources. Here we provide general guidelines on how to accomplish it. With a dataset prepared for fine tuning, a part of it can be separated into a manual test subset, which can be further increased with general knowledge questions that might be relevant to the specific use case. In addition to these general questions, we recommend executing standard evaluations as well, and compare the results with the baseline for the fine tuned model. To rate the results, a clear evaluation criteria should be defined that is relevant to the dataset being used. Example criteria can be accuracy, coherence and safety. Create a rubric for each criteria and define what would be required for an output to receive a specific score. With these guidelines in place, distribute the test questions with a diverse set of reviewers to have multiple data points for each question. With multiple data points for each question and different criteria, a final score can be calculated for each query, allowing for weighting the scores based on the preferred focus for the final model. Interpreting Loss and Perplexity Skip to main content
-----------
-Meta Code Llama | Integration guides Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Integration guides is an open-source family of LLMs based on Llama 2 providing SOTA performance on code tasks. It consists of: Foundation models (Meta Code Llama) Python specializations (Meta Code Llama - Python), and Instruction-following models (Meta Code Llama - Instruct) with 7B, 13B, 34B and 70B parameters each. See the recipes for examples on how to make use of Meta Code Llama. The following diagram shows how each of the Meta Code Llama models is trained: (Fig: The Meta Code Llama specialization pipeline. The different stages of fine-tuning annotated with the number of tokens seen during training) One of the best ways to try out and integrate with Meta Code Llama is using Hugging Face ecosystem by following the blog , which has: Demo links for all versions of Meta Code Llama Working inference code for code completion Working inference code for code infilling between code prefix and suffix as inputs Working inference code to do 4-bit loading of the 34B model so it can fit on consumer GPUs Guide on how to write prompts for the instruction models to have multi-turn conversations  about coding Guide on how to use Text Generation Inference for model deployment in production Guide on how to integrate code autocomplete as an extension  with VSCode Guide on how to evaluate Meta Code Llama models If the model does not perform well on your specific task, for example if none of the Meta Code Llama models (7B/13B/34B/70B) generate the correct answer for a text to SQL task, fine-tuning should be considered. This is a complete guide and notebook ( ) on how to fine-tune Meta Code Llama using the 7B model hosted on Hugging Face. It uses the LoRA fine-tuning method and can run on a single GPU. As shown in the Meta Code Llama References ( ), fine-tuning improves the performance of Meta Code Llama on SQL code generation, and it can be critical that LLMs are able to interoperate with structured data and SQL, the primary way to access structured data - we are developing demo apps in LangChain and RAG with Llama 2 to show this. Compatible extensions In most of the cases, the simplest method to integrate any model size is through ollama , occasionally combined with litellm . Ollama is a program that allows quantized versions of popular LLMs to run locally. It leverages the GPU and can even run Code Llama 34B on an M1 mac. Litellm is a simple proxy that can serve an OpenAI style API, so it's easy to replace OpenAI in existing applications, in our case, extensions Continue This extension can be used with ollama, allowing for easy local only execution. Additionally, it provides a simple interface to 1/ Chat with the model directly running inside VS Code and 2/ Select specific files and sections to edit or explain. This extension is an effective way to evaluate Llama because it provides simple and useful features. It also allows developers to build trust, by creating diffs for each proposed change and showing exactly what is being changed before saving the file. Handling the context for the LLM is easy and relies heavily on keyboard shortcuts. It's important to note that all the interactions with the extension are recorded in jsonl format. The objective is to provide data for future fine tuning of the models based on the feedback recorded during real world usage as well. Steps to install with ollama Install and pull a model (e.g. ollama pull codellama:13b-instruct) Install the extension from Visual Studio Code marketplace Open the extension and click on the + sign to add models Select Ollama as a provider In the next screen, select the model and size pulled from with ollama Select the model in the convo and start using the extension Steps to install with TGI For better performance or usage in non-compatible hardware, TGI can be used in a server to run the model. For example, ollama on Intel Macs is too slow to be useful, even with the 7B models. On the contrary, M1 macs can run the 34 Meta Code Llama models quickly. For this, you should have TGI running on a server with appropriate hardware, as detailed in this . Once Continue.dev is installed, follow these steps: Open the configs with /config Use the HuggingFaceTGI class and pass your instance URL in the server_url parameter: Assign a name to it and save the config file. llm-vscode This extension from Hugging Face provides an open alternative to the closed sourced GitHub Copilot, allowing for the same functionality, context based autocomplete suggestions, to work with open source models. It works out of the box with a HF Token and their Inference API but can be configured to use any TGI compatible API. For usage with a self-hosted TGI server, follow these steps: from the marketplace Open the extension configs Select the correct template for the model published in your TGI instance in the Config Template field. For testing, used the one named codellama/CodeLlama-13b-hf Pass in the URL to your TGI instance in the Model ID or Endpoint field. To avoid rate limiting messages, login to HF by providing a read only token. This was necessary even for a self-hosted instance. It currently does not support local models unless TGI is running locally. It would be great to add ollama support to this extension, as it would accelerate the inference with the smaller models by avoiding the network.
-----------
-LangChain | Integration guides Skip to main content Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud is an open source framework for building LLM powered applications. It implements common abstractions and higher-level APIs to make the app building process easier, so you don't need to call LLM from scratch. The main building blocks/APIs of LangChain are: Source The Models or LLMs API can be used to easily connect to all popular LLMs such as Hugging Face or Replicate where all types of Llama 2 models are hosted. The Prompts API implements the useful prompt template abstraction to help you easily reuse good, often long and detailed, prompts when building sophisticated LLM apps. There are also many built-in prompts for common operations such as summarization or connection to SQL databases for quick app development. Prompts can also work closely with  parsers to easily extract useful information from the LLM output. The Memory API can be used to save conversation history and feed it along with new questions to LLM so multi-turn natural conversation chat can be implemented. The Chains API includes the most basic LLMChain that combines a LLM with a prompt to generate the output, as well as more advanced chains to lets you build sophisticated LLM apps in a systematic way. For example, the output of the first LLM chain can be the input/prompt of another chain, or a chain can have multiple inputs and/or multiple outputs, either pre-defined or dynamically decided by the LLM output of a prompt. The Indexes API allows documents outside of LLM to be saved, after first converted to embeddings which are numerical meaning representations, in the vector form, of the documents, to a vector store. Later when a user enters a question about the documents, the relevant data stored in the documents' vector store will be retrieved and sent, along with the query, to LLM to generate an answer related to the documents. The following flow shows the process The Agents API uses LLM as the reasoning engine and connects it with other sources of data, third-party or own tools, or APIs such as web search or wikipedia APIs. Depending on the user's input, the agent can decide which tool to call to handle the input. LangChain can be used as a powerful retrieval augmented generation (RAG) tool to integrate the internal data or more recent public data with LLM to QA or chat about the data. LangChain already supports loading many types of unstructured and structured data. To learn more about LangChain, enroll for free in the two LangChain short courses . Be aware that the code in the courses use OpenAI ChatGPT LLM, but we’ve published a series of using LangChain with Llama. There is also a Getting to Know Llama notebook , presented at Meta Connect.
-----------
-LlamaIndex | Integration guides LlamaIndex LlamaIndex is another popular open source framework for building LLM applications. Like LangChain, LlamaIndex can also be used to build RAG applications by easily integrating data not built-in the LLM with LLM. There are three key tools in LlamaIndex: Connecting Data: connect data of any type -  structured, unstructured or semi-structured - to LLM Indexing Data: Index and store the data Querying LLM: Combine the user query and retrieved query-related data to query LLM and return data-augmented answer LlamaIndex is mainly a data framework for connecting private or domain-specific data with LLMs, so it specializes in RAG, smart data storage and retrieval, while LangChain is a more general purpose framework which can be used to build agents connecting multiple tools. The integration of the two may provide the best performant and effective solution to building real world RAG powered Llama apps. For an example usage of how to integrate LlamaIndex with Llama 2, see . We also published a completed demo app showing how to use LlamaIndex to chat with Llama 2 about live data via the you.com API. It’s worth noting that LlamaIndex has implemented many RAG powered LLM evaluation tools to easily measure the quality of retrieval and response, including: Question Generation: Call LLM to auto generate questions to create an evaluation dataset. Faithfulness Evaluator: Evaluate if the generated answer is faithful to the retrieved context or if there’s hallucination. Correctness Evaluator: Evaluate if the generated answer matches the reference answer. Relevancy Evaluator: Evaluate if the answer and the retrieved context is relevant and consistent for the given query. Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Model Cards and Prompt Formats Meta Llama Guard 2 Meta Code Llama 70B Meta Llama Guard 1 Meta Llama on Linux Meta Llama on Windows Meta Llama on Mac Meta Llama in the Cloud Skip to main content
-----------
-# Llama Recipes: Examples to get started using the Llama models from Meta The 'llama-recipes' repository is a companion to the [Meta Llama 3](https://github.com/meta-llama/llama3) models. The goal of this repository is to provide a scalable library for fine-tuning Meta Llama models, along with some example scripts and notebooks to quickly get started with using the models in a variety of use-cases, including fine-tuning for domain adaptation and building LLM-based applications with Meta Llama and other tools in the LLM ecosystem. The examples here showcase how to run Meta Llama locally, in the cloud, and on-prem. [Meta Llama 2](https://github.com/meta-llama/llama) is also supported in this repository. We highly recommend everyone to utilize [Meta Llama 3](https://github.com/meta-llama/llama3) due to its enhanced capabilities. > [!IMPORTANT] > Meta Llama 3 has a new prompt template and special tokens (based on the tiktoken tokenizer). > | Token | Description | > |---|---| > `<\|begin_of_text\|>` | This is equivalent to the BOS token. | > `<\|end_of_text\|>` | This is equivalent to the EOS token. For multiturn-conversations it's usually unused. Instead, every message is terminated with `<\|eot_id\|>` instead.| > `<\|eot_id\|>` | This token signifies the end of the message in a turn i.e. the end of a single message by a system, user or assistant role as shown below.| > `<\|start_header_id\|>{role}<\|end_header_id\|>` | These tokens enclose the role for a particular message. The possible roles can be: system, user, assistant. | > > A multiturn-conversation with Meta Llama 3 follows this prompt template: > ``` > <|begin_of_text|><|start_header_id|>system<|end_header_id|> > {{ system_prompt }}<|eot_id|><|start_header_id|>user<|end_header_id|> > {{ user_message_1 }}<|eot_id|><|start_header_id|>assistant<|end_header_id|> > {{ model_answer_1 }}<|eot_id|><|start_header_id|>user<|end_header_id|> > {{ user_message_2 }}<|eot_id|><|start_header_id|>assistant<|end_header_id|> > Each message gets trailed by an `<|eot_id|>` token before a new header is started, signaling a role change. > More details on the new tokenizer and prompt template can be found [here](https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3#special-tokens-used-with-meta-llama-3). > [!NOTE] > The llama-recipes repository was recently refactored to promote a better developer experience of using the examples. Some files have been moved to new locations. The `src/` folder has NOT been modified, so the functionality of this repo and package is not impacted. > Make sure you update your local clone by running `git pull origin main` ## Table of Contents - [Llama Recipes: Examples to get started using the Meta Llama models from Meta](#llama-recipes-examples-to-get-started-using-the-llama-models-from-meta) - [Table of Contents](#table-of-contents) - [Getting Started](#getting-started) - [Prerequisites](#prerequisites) - [PyTorch Nightlies](#pytorch-nightlies) - [Installing](#installing) - [Install with pip](#install-with-pip) - [Install with optional dependencies](#install-with-optional-dependencies) - [Install from source](#install-from-source) - [Getting the Llama models](#getting-the-llama-models) - [Model conversion to Hugging Face](#model-conversion-to-hugging-face) - [Repository Organization](#repository-organization) - [`recipes/`](#recipes) - [`src/`](#src) - [Contributing](#contributing) - [License](#license) ## Getting Started These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system. ### Prerequisites #### PyTorch Nightlies If you want to use PyTorch nightlies instead of the stable release, go to [this guide](https://pytorch.org/get-started/locally/) to retrieve the right `--extra-index-url URL` parameter for the `pip install` commands on your platform. ### Installing Llama-recipes provides a pip distribution for easy install and usage in other projects. Alternatively, it can be installed from source. > Ensure you use the correct CUDA version (from `nvidia-smi`) when installing the PyTorch wheels. Here we are using 11.8 as `cu118`. > H100 GPUs work better with CUDA >12.0 #### Install with pip ``` pip install llama-recipes #### Install with optional dependencies Llama-recipes offers the installation of optional packages. There are three optional dependency groups. To run the unit tests we can install the required dependencies with: pip install llama-recipes[tests] For the vLLM example we need additional requirements that can be installed with: pip install llama-recipes[vllm] To use the sensitive topics safety checker install with: pip install llama-recipes[auditnlg] Optional dependencies can also be combines with [option1,option2]. #### Install from source To install from source e.g. for development use these commands. We're using hatchling as our build backend which requires an up-to-date pip as well as setuptools package. git clone git@github.com:meta-llama/llama-recipes.git cd llama-recipes pip install -U pip setuptools pip install -e . For development and contributing to llama-recipes please install all optional dependencies: pip install -U pip setuptools pip install -e .[tests,auditnlg,vllm] ### Getting the Meta Llama models You can find Meta Llama models on Hugging Face hub [here](https://huggingface.co/meta-llama), **where models with `hf` in the name are already converted to Hugging Face checkpoints so no further conversion is needed**. The conversion step below is only for original model weights from Meta that are hosted on Hugging Face model hub as well. #### Model conversion to Hugging Face The recipes and notebooks in this folder are using the Meta Llama model definition provided by Hugging Face's transformers library. Given that the original checkpoint resides under models/7B you can install all requirements and convert the checkpoint with: ```bash ## Install Hugging Face Transformers from source pip freeze | grep transformers ## verify it is version 4.31.0 or higher git clone git@github.com:huggingface/transformers.git cd transformers pip install protobuf python src/transformers/models/llama/convert_llama_weights_to_hf.py \ --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path ## Repository Organization Most of the code dealing with Llama usage is organized across 2 main folders: `recipes/` and `src/`. ### `recipes/` Contains examples are organized in folders by topic: | Subfolder | Description | |---|---| [quickstart](./recipes/quickstart) | The "Hello World" of using Llama, start here if you are new to using Llama. [finetuning](./recipes/finetuning)|Scripts to finetune Llama on single-GPU and multi-GPU setups [inference](./recipes/inference)|Scripts to deploy Llama for inference locally and using model servers [use_cases](./recipes/use_cases)|Scripts showing common applications of Meta Llama3 [responsible_ai](./recipes/responsible_ai)|Scripts to use PurpleLlama for safeguarding model outputs [llama_api_providers](./recipes/llama_api_providers)|Scripts to run inference on Llama via hosted endpoints [benchmarks](./recipes/benchmarks)|Scripts to benchmark Llama models inference on various backends [code_llama](./recipes/code_llama)|Scripts to run inference with the Code Llama models [evaluation](./recipes/evaluation)|Scripts to evaluate fine-tuned Llama models using `lm-evaluation-harness` from `EleutherAI` ### `src/` Contains modules which support the example recipes: | Subfolder | Description | | [configs](src/llama_recipes/configs/) | Contains the configuration files for PEFT methods, FSDP, Datasets, Weights & Biases experiment tracking. | | [datasets](src/llama_recipes/datasets/) | Contains individual scripts for each dataset to download and process. Note | | [inference](src/llama_recipes/inference/) | Includes modules for inference for the fine-tuned models. | | [model_checkpointing](src/llama_recipes/model_checkpointing/) | Contains FSDP checkpoint handlers. | | [policies](src/llama_recipes/policies/) | Contains FSDP scripts to provide different policies, such as mixed precision, transformer wrapping policy and activation checkpointing along with any precision optimizer (used for running FSDP with pure bf16 mode). | | [utils](src/llama_recipes/utils/) | Utility files for: - `train_utils.py` provides training/eval loop and more train utils. - `dataset_utils.py` to get preprocessed datasets. - `config_utils.py` to override the configs received from CLI. - `fsdp_utils.py` provides FSDP  wrapping policy for PEFT methods. - `memory_utils.py` context manager to track different memory stats in train loop. | ## Contributing Please read [CONTRIBUTING.md](CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us. ## License See the License file for Meta Llama 3 [here](https://llama.meta.com/llama3/license/) and Acceptable Use Policy [here](https://llama.meta.com/llama3/use-policy/) See the License file for Meta Llama 2 [here](https://llama.meta.com/llama2/license/) and Acceptable Use Policy [here](https://llama.meta.com/llama2/use-policy/)
-----------
-# **Model Details** Meta developed and released the Llama 2 family of large language models (LLMs), a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 70 billion parameters. Our fine-tuned LLMs, called Llama-2-Chat, are optimized for dialogue use cases. Llama-2-Chat models outperform open-source chat models on most benchmarks we tested, and in our human evaluations for helpfulness and safety, are on par with some popular closed-source models like ChatGPT and PaLM. **Model Developers** Meta **Variations** Llama 2 comes in a range of parameter sizes — 7B, 13B, and 70B — as well as pretrained and fine-tuned variations. **Input** Models input text only. **Output** Models generate text only. **Model Architecture** Llama 2 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. ||Training Data|Params|Context Length|GQA|Tokens|LR| |---|---|---|---|---|---|---| Llama 2|*A new mix of publicly available online data*|7B|4k|✗|2.0T|3.0 x 10 -4 Llama 2|*A new mix of publicly available online data*|13B|4k|✗|2.0T|3.0 x 10 Llama 2|*A new mix of publicly available online data*|70B|4k|✔|2.0T|1.5 x 10 **Llama 2 family of models.** Token counts refer to pretraining data only. All models are trained with a global batch-size of 4M tokens. The 70B version uses Grouped-Query Attention (GQA) for improved inference scalability. **Model Dates** Llama 2 was trained between January 2023 and July 2023. **Status** This is a static model trained on an offline dataset. Future versions of the tuned models will be released as we improve model safety with community feedback. **License** A custom commercial license is available at: [https://ai.meta.com/resources/models-and-libraries/llama-downloads/](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) **Research Paper** More information can be found in the paper "Llama-2: Open Foundation and Fine-tuned Chat Models", available at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/. **Where to send questions or comments about the model** Instructions on how to provide feedback or comments on the model can be found in the model [README](README.md). # **Intended Use** **Intended Use Cases** Llama 2 is intended for commercial and research use in English. Tuned models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. **Out-of-scope Uses** Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the Acceptable Use Policy and Llama 2 Community License. Use in languages other than English**. **Note: Developers may fine-tune Llama 2 models for languages beyond English provided they comply with the Llama 2 Community License and the Acceptable Use Policy. # **Hardware and Software** **Training Factors** We used custom training libraries, Meta's Research Super Cluster, and production clusters for pretraining. Fine-tuning, annotation, and evaluation were also performed on third-party cloud compute. **Carbon Footprint** Pretraining utilized a cumulative 3.3M GPU hours of computation on hardware of type A100-80GB (TDP of 350-400W). Estimated total emissions were 539 tCO2eq, 100% of which were offset by Meta’s sustainability program. ||Time (GPU hours)|Power Consumption (W)|Carbon Emitted(tCO eq)| |---|---|---|---| |Llama 2 7B|184320|400|31.22| |Llama 2 13B|368640|400|62.44| |Llama 2 70B|1720320|400|291.42| |Total|3311616||539.00| **CO emissions during pretraining.** Time: total GPU time required for training each model. Power Consumption: peak power capacity per GPU device for the GPUs used adjusted for power usage efficiency. 100% of the emissions are directly offset by Meta's sustainability program, and because we are openly releasing these models, the pretraining costs do not need to be incurred by others. # **Training Data** **Overview** Llama 2 was pretrained on 2 trillion tokens of data from publicly available sources. The fine-tuning data includes publicly available instruction datasets, as well as over one million new human-annotated examples. Neither the pretraining nor the fine-tuning datasets include Meta user data. **Data Freshness** The pretraining data has a cutoff of September 2022, but some tuning data is more recent, up to July 2023. # **Evaluation Results** In this section, we report the results for the Llama 1 and Llama 2 models on standard academic benchmarks. For all the evaluations, we use our internal evaluations library. |Model|Size|Code|Commonsense Reasoning|World Knowledge|Reading Comprehension|Math|MMLU|BBH|AGI Eval| |---|---|---|---|---|---|---|---|---|---| |Llama 1|7B|14.1|60.8|46.2|58.5|6.95|35.1|30.3|23.9| |Llama 1|13B|18.9|66.1|52.6|62.3|10.9|46.9|37.0|33.9| |Llama 1|33B|26.0|70.0|58.4|67.6|21.4|57.8|39.8|41.7| |Llama 1|65B|30.7|70.7|60.5|68.6|30.8|63.4|43.5|47.6| |Llama 2|7B|16.8|63.9|48.9|61.3|14.6|45.3|32.6|29.3| |Llama 2|13B|24.5|66.9|55.4|65.8|28.7|54.8|39.4|39.1| |Llama 2|70B|**37.5**|**71.9**|**63.6**|**69.4**|**35.2**|**68.9**|**51.2**|**54.2**| **Overall performance on grouped academic benchmarks.** *Code:* We report the average pass@1 scores of our models on HumanEval and MBPP. *Commonsense Reasoning:* We report the average of PIQA, SIQA, HellaSwag, WinoGrande, ARC easy and challenge, OpenBookQA, and CommonsenseQA. We report 7-shot results for CommonSenseQA and 0-shot results for all other benchmarks. *World Knowledge:* We evaluate the 5-shot performance on NaturalQuestions and TriviaQA and report the average. *Reading Comprehension:* For reading comprehension, we report the 0-shot average on SQuAD, QuAC, and BoolQ. *MATH:* We report the average of the GSM8K (8 shot) and MATH (4 shot) benchmarks at the top 1. |||TruthfulQA|Toxigen| |Llama 1|7B|27.42|23.00| |Llama 1|13B|41.74|23.08| |Llama 1|33B|44.19|22.57| |Llama 1|65B|48.71|21.77| |Llama 2|7B|33.29|**21.25**| |Llama 2|13B|41.86|26.10| |Llama 2|70B|**50.18**|24.60| **Evaluation of pretrained LLMs on automatic safety benchmarks.** For TruthfulQA, we present the percentage of generations that are both truthful and informative (the higher the better). For ToxiGen, we present the percentage of toxic generations (the smaller the better). |Llama-2-Chat|7B|57.04|**0.00**| |Llama-2-Chat|13B|62.18|**0.00**| |Llama-2-Chat|70B|**64.14**|0.01| **Evaluation of fine-tuned LLMs on different safety datasets.** Same metric definitions as above. # **Ethical Considerations and Limitations** Llama 2 is a new technology that carries risks with use. Testing conducted to date has been in English, and has not covered, nor could it cover all scenarios. For these reasons, as with all LLMs, Llama 2’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate, biased or other objectionable responses to user prompts. Therefore, before deploying any applications of Llama 2, developers should perform safety testing and tuning tailored to their specific applications of the model. Please see the Responsible Use Guide available at [https://ai.meta.com/llama/responsible-use-guide/](https://ai.meta.com/llama/responsible-use-guide/)
-----------
-# Llama 2 We are unlocking the power of large language models. Llama 2 is now accessible to individuals, creators, researchers, and businesses of all sizes so that they can experiment, innovate, and scale their ideas responsibly. This release includes model weights and starting code for pre-trained and fine-tuned Llama language models — ranging from 7B to 70B parameters. This repository is intended as a minimal example to load [Llama 2](https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/) models and run inference. For more detailed examples leveraging Hugging Face, see [llama-recipes](https://github.com/facebookresearch/llama-recipes/). ## Updates post-launch See [UPDATES.md](UPDATES.md). Also for a running list of frequently asked questions, see [here](https://ai.meta.com/llama/faq/). ## Download In order to download the model weights and tokenizer, please visit the [Meta website](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and accept our License. Once your request is approved, you will receive a signed URL over email. Then run the download.sh script, passing the URL provided when prompted to start the download. Pre-requisites: Make sure you have `wget` and `md5sum` installed. Then run the script: `./download.sh`. Keep in mind that the links expire after 24 hours and a certain amount of downloads. If you start seeing errors such as `403: Forbidden`, you can always re-request a link. ### Access to Hugging Face We are also providing downloads on [Hugging Face](https://huggingface.co/meta-llama). You can request access to the models by acknowledging the license and filling the form in the model card of a repo. After doing so, you should get access to all the Llama models of a version (Code Llama, Llama 2, or Llama Guard) within 1 hour. ## Quick Start You can follow the steps below to quickly get up and running with Llama 2 models. These steps will let you run quick inference locally. For more examples, see the [Llama 2 recipes repository](https://github.com/facebookresearch/llama-recipes). 1. In a conda env with PyTorch / CUDA available clone and download this repository. 2. In the top-level directory run: pip install -e . 3. Visit the [Meta website](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and register to download the model/s. 4. Once registered, you will get an email with a URL to download the models. You will need this URL when you run the download.sh script. 5. Once you get the email, navigate to your downloaded llama repository and run the download.sh script. - Make sure to grant execution permissions to the download.sh script - During this process, you will be prompted to enter the URL from the email. - Do not use the “Copy Link” option but rather make sure to manually copy the link from the email. 6. Once the model/s you want have been downloaded, you can run the model locally using the command below: torchrun --nproc_per_node 1 example_chat_completion.py \ --ckpt_dir llama-2-7b-chat/ \ --tokenizer_path tokenizer.model \ --max_seq_len 512 --max_batch_size 6 **Note** - Replace  `llama-2-7b-chat/` with the path to your checkpoint directory and `tokenizer.model` with the path to your tokenizer model. - The `–nproc_per_node` should be set to the [MP](#inference) value for the model you are using. - Adjust the `max_seq_len` and `max_batch_size` parameters as needed. - This example runs the [example_chat_completion.py](example_chat_completion.py) found in this repository but you can change that to a different .py file. ## Inference Different models require different model-parallel (MP) values: |  Model | MP | |--------|----| | 7B     | 1  | | 13B    | 2  | | 70B    | 8  | All models support sequence length up to 4096 tokens, but we pre-allocate the cache according to `max_seq_len` and `max_batch_size` values. So set those according to your hardware. ### Pretrained Models These models are not finetuned for chat or Q&A. They should be prompted so that the expected answer is the natural continuation of the prompt. See `example_text_completion.py` for some examples. To illustrate, see the command below to run it with the llama-2-7b model (`nproc_per_node` needs to be set to the `MP` value): torchrun --nproc_per_node 1 example_text_completion.py \ --ckpt_dir llama-2-7b/ \ --max_seq_len 128 --max_batch_size 4 ### Fine-tuned Chat Models The fine-tuned models were trained for dialogue applications. To get the expected features and performance for them, a specific formatting defined in [`chat_completion`](https://github.com/facebookresearch/llama/blob/main/llama/generation.py#L212) needs to be followed, including the `INST` and `< >` tags, `BOS` and `EOS` tokens, and the whitespaces and breaklines in between (we recommend calling `strip()` on inputs to avoid double-spaces). You can also deploy additional classifiers for filtering out inputs and outputs that are deemed unsafe. See the llama-recipes repo for [an example](https://github.com/facebookresearch/llama-recipes/blob/main/examples/inference.py) of how to add a safety checker to the inputs and outputs of your inference code. Examples using llama-2-7b-chat: torchrun --nproc_per_node 1 example_chat_completion.py \ --max_seq_len 512 --max_batch_size 6 Llama 2 is a new technology that carries potential risks with use. Testing conducted to date has not — and could not — cover all scenarios. In order to help developers address these risks, we have created the [Responsible Use Guide](Responsible-Use-Guide.pdf). More details can be found in our research paper as well. ## Issues Please report any software “bug”, or other problems with the models through one of the following means: - Reporting issues with the model: [github.com/facebookresearch/llama](http://github.com/facebookresearch/llama) - Reporting risky content generated by the model: [developers.facebook.com/llama_output_feedback](http://developers.facebook.com/llama_output_feedback) - Reporting bugs and security concerns: [facebook.com/whitehat/info](http://facebook.com/whitehat/info) ## Model Card See [MODEL_CARD.md](MODEL_CARD.md). Our model and weights are licensed for both researchers and commercial entities, upholding the principles of openness. Our mission is to empower individuals, and industry through this opportunity, while fostering an environment of discovery and ethical AI advancements. See the [LICENSE](LICENSE) file, as well as our accompanying [Acceptable Use Policy](USE_POLICY.md) ## References 1. [Research Paper](https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/) 2. [Llama 2 technical overview](https://ai.meta.com/resources/models-and-libraries/llama) 3. [Open Innovation AI Research Community](https://ai.meta.com/llama/open-innovation-ai-research-community/) For common questions, the FAQ can be found [here](https://ai.meta.com/llama/faq/) which will be kept up to date over time as new questions arise. ## Original Llama The repo for the original llama release is in the [`llama_v1`](https://github.com/facebookresearch/llama/tree/llama_v1) branch.
-----------
-## Model Details Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety. **Model developers** Meta **Variations** Llama 3 comes in two sizes — 8B and 70B parameters — in pre-trained and instruction tuned variants. **Input** Models input text only. **Output** Models generate text and code only. **Model Architecture** Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. Training Data Params Context length GQA Token count Knowledge cutoff Llama 3 A new mix of publicly available online data. 8B 8k Yes 15T+ March, 2023 70B December, 2023 **Llama 3 family of models**. Token counts refer to pretraining data only. Both the 8 and 70B versions use Grouped-Query Attention (GQA) for improved inference scalability. **Model Release Date** April 18, 2024. **Status** This is a static model trained on an offline dataset. Future versions of the tuned models will be released as we improve model safety with community feedback. **License** A custom commercial license is available at: [https://llama.meta.com/llama3/license](https://llama.meta.com/llama3/license) Where to send questions or comments about the model Instructions on how to provide feedback or comments on the model can be found in the model [README](https://github.com/meta-llama/llama3). For more technical information about generation parameters and recipes for how to use Llama 3 in applications, please go [here](https://github.com/meta-llama/llama-recipes). ## Intended Use **Intended Use Cases** Llama 3 is intended for commercial and research use in English. Instruction tuned models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. **Out-of-scope** Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the [Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/) and [Llama 3 Community License](https://llama.meta.com/llama3/license/). Use in languages other than English**. **Note: Developers may fine-tune Llama 3 models for languages beyond English provided they comply with the [Llama 3 Community License](https://llama.meta.com/llama3/license/) and the [Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/). ## Hardware and Software **Training Factors** We used custom training libraries, Meta's Research SuperCluster, and production clusters for pretraining. Fine-tuning, annotation, and evaluation were also performed on third-party cloud compute. **Carbon Footprint Pretraining utilized a cumulative** 7.7M GPU hours of computation on hardware of type H100-80GB (TDP of 700W). Estimated total emissions were 2290 tCO2eq, 100% of which were offset by Meta’s sustainability program. Time (GPU hours) Power Consumption (W) Carbon Emitted(tCO2eq) Llama 3 8B 1.3M 700 390 Llama 3 70B 6.4M 1900 Total 7.7M 2290 **CO2 emissions during pre-training**. Time: total GPU time required for training each model. Power Consumption: peak power capacity per GPU device for the GPUs used adjusted for power usage efficiency. 100% of the emissions are directly offset by Meta's sustainability program, and because we are openly releasing these models, the pretraining costs do not need to be incurred by others. ## Training Data **Overview** Llama 3 was pretrained on over 15 trillion tokens of data from publicly available sources. The fine-tuning data includes publicly available instruction datasets, as well as over 10M human-annotated examples. Neither the pretraining nor the fine-tuning datasets include Meta user data. **Data Freshness** The pretraining data has a cutoff of March 2023 for the 8B and December 2023 for the 70B models respectively. ## Benchmarks In this section, we report the results for Llama 3 models on standard automatic benchmarks. For all the evaluations, we use our internal evaluations library. For details on the methodology see [here](https://github.com/meta-llama/llama3/blob/main/eval_details.md). ### Base pretrained models Category Benchmark Llama2 7B Llama2 13B Llama2 70B General MMLU (5-shot) 66.6 45.7 53.8 79.5 69.7 AGIEval English (3-5 shot) 45.9 28.8 38.7 63.0 54.8 CommonSenseQA (7-shot) 72.6 57.6 67.6 83.8 78.7 Winogrande (5-shot) 76.1 73.3 75.4 83.1 81.8 BIG-Bench Hard (3-shot, CoT) 61.1 38.1 47.0 81.3 65.7 ARC-Challenge (25-shot) 78.6 53.7 93.0 85.3 Knowledge reasoning TriviaQA-Wiki (5-shot) 78.5 72.1 79.6 89.7 87.5 Reading comprehension SQuAD (1-shot) 76.4 72.2 85.6 82.6 QuAC (1-shot, F1) 44.4 39.6 44.9 51.1 49.4 BoolQ (0-shot) 75.7 65.5 66.9 79.0 73.1 DROP (3-shot, F1) 58.4 37.9 49.8 79.7 70.2 ### Instruction tuned models Llama 2 7B Llama 2 13B Llama 2 70B 68.4 34.1 47.8 82.0 52.9 GPQA (0-shot) 34.2 21.7 22.3 39.5 21.0 HumanEval (0-shot) 62.2 7.9 14.0 81.7 25.6 GSM-8K (8-shot, CoT) 25.7 77.4 57.5 MATH (4-shot, CoT) 30.0 3.8 6.7 50.4 11.6 ### Responsibility & Safety We believe that an open approach to AI leads to better, safer products, faster innovation, and a bigger overall market. We are committed to Responsible AI development and took a series of steps to limit misuse and harm and support the open source community. Foundation models are widely capable technologies that are built to be used for a diverse range of applications. They are not designed to meet every developer preference on safety levels for all use cases, out-of-the-box, as those by their nature will differ across different applications. Rather, responsible LLM-application deployment is achieved by implementing a series of safety best practices throughout the development of such applications, from the model pre-training, fine-tuning and the deployment of systems composed of safeguards to tailor the safety needs specifically to the use case and audience. As part of the Llama 3 release, we updated our [Responsible Use Guide](https://llama.meta.com/responsible-use-guide/) to outline the steps and best practices for developers to implement model and system level safety for their application. We also provide a set of resources including [Meta Llama Guard 2](https://llama.meta.com/purple-llama/) and [Code Shield](https://llama.meta.com/purple-llama/) safeguards. These tools have proven to drastically reduce residual risks of LLM Systems, while maintaining a high level of helpfulness. We encourage developers to tune and deploy these safeguards according to their needs and we provide a [reference implementation](https://github.com/meta-llama/llama-recipes/tree/main/recipes/responsible_ai) to get you started. #### Llama 3-Instruct As outlined in the Responsible Use Guide, some trade-off between model helpfulness and model alignment is likely unavoidable. Developers should exercise discretion about how to weigh the benefits of alignment and helpfulness for their specific use case and audience. Developers should be mindful of residual risks when using Llama models and leverage additional safety tools as needed to reach the right safety bar for their use case. Safety For our instruction tuned model, we conducted extensive red teaming exercises, performed adversarial evaluations and implemented safety mitigations techniques to lower residual risks. As with any Large Language Model, residual risks will likely remain and we recommend that developers assess these risks in the context of their use case. In parallel, we are working with the community to make AI safety benchmark standards transparent, rigorous and interpretable. Refusals In addition to residual risks, we put a great emphasis on model refusals to benign prompts. Over-refusing not only can impact the user experience but could even be harmful in certain contexts as well. We’ve heard the feedback from the developer community and improved our fine tuning to ensure that Llama 3 is significantly less likely to falsely refuse to answer prompts than Llama 2. We built internal benchmarks and developed mitigations to limit false refusals making Llama 3 our most helpful model to date. #### Responsible release In addition to responsible use considerations outlined above, we followed a rigorous process that requires us to take extra measures against misuse and critical risks before we make our release decision. Misuse If you access or use Llama 3, you agree to the Acceptable Use Policy. The most recent copy of this policy can be found at [https://llama.meta.com/llama3/use-policy/](https://llama.meta.com/llama3/use-policy/). #### Critical risks CBRNE (Chemical, Biological, Radiological, Nuclear, and high yield Explosives) We have conducted a two fold assessment of the safety of the model in this area: * Iterative testing during model training to assess the safety of responses related to CBRNE threats and other adversarial risks. * Involving external CBRNE experts to conduct an uplift test assessing the ability of the model to accurately provide expert knowledge and reduce barriers to potential CBRNE misuse, by reference to what can be achieved using web search (without the model). ### Cyber Security We have evaluated Llama 3 with CyberSecEval, Meta’s cybersecurity safety eval suite, measuring Llama 3’s propensity to suggest insecure code when used as a coding assistant, and Llama 3’s propensity to comply with requests to help carry out cyber attacks, where attacks are defined by the industry standard MITRE ATT&CK cyber attack ontology. On our insecure coding and cyber attacker helpfulness tests, Llama 3 behaved in the same range or safer than models of [equivalent coding capability](https://huggingface.co/spaces/facebook/CyberSecEval). Child Safety Child Safety risk assessments were conducted using a team of experts, to assess the model’s capability to produce outputs that could result in Child Safety risks and inform on any necessary and appropriate risk mitigations via fine tuning. We leveraged those expert red teaming sessions to expand the coverage of our evaluation benchmarks through Llama 3 model development.  For Llama 3, we conducted new in-depth sessions using objective based methodologies to assess the model risks along multiple attack vectors. We also partnered with content specialists to perform red teaming exercises assessing potentially violating content while taking account of market specific nuances or experiences. ### Community Generative AI safety requires expertise and tooling, and we believe in the strength of the open community to accelerate its progress. We are active members of open consortiums, including the AI Alliance, Partnership in AI and MLCommons, actively contributing to safety standardization and transparency. We encourage the community to adopt taxonomies like the MLCommons Proof of Concept evaluation to facilitate collaboration and transparency on safety and content evaluations. Our Purple Llama tools are open sourced for the community to use and widely distributed across ecosystem partners including cloud service providers. We encourage community contributions to our [GitHub repository](https://github.com/meta-llama/PurpleLlama). Finally, we put in place a set of resources including an [output reporting mechanism](https://developers.facebook.com/llama_output_feedback) and [bug bounty program](https://www.facebook.com/whitehat) to continuously improve the Llama technology with the help of the community. ## Ethical Considerations and Limitations The core values of Llama 3 are openness, inclusivity and helpfulness. It is meant to serve everyone, and to work for a wide range of use cases. It is thus designed to be accessible to people across many different backgrounds, experiences and perspectives. Llama 3 addresses users and their needs as they are, without insertion unnecessary judgment or normativity, while reflecting the understanding that even content that may appear problematic in some cases can serve valuable purposes in others. It respects the dignity and autonomy of all users, especially in terms of the values of free thought and expression that power innovation and progress. But Llama 3 is a new technology, and like any new technology, there are risks associated with its use. Testing conducted to date has been in English, and has not covered, nor could it cover, all scenarios. For these reasons, as with all LLMs, Llama 3’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate, biased or other objectionable responses to user prompts. Therefore, before deploying any applications of Llama 3 models, developers should perform safety testing and tuning tailored to their specific applications of the model. As outlined in the Responsible Use Guide, we recommend incorporating [Purple Llama](https://github.com/facebookresearch/PurpleLlama) solutions into your workflows and specifically [Llama Guard](https://ai.meta.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/) which provides a base model to filter input and output prompts to layer system-level safety on top of model-level safety. Please see the Responsible Use Guide available at [http://llama.meta.com/responsible-use-guide](http://llama.meta.com/responsible-use-guide) ## Citation instructions @article{llama3modelcard, title={Llama 3 Model Card}, author={AI@Meta}, year={2024}, url = {https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md} ## Contributors Aaditya Singh; Aaron Grattafiori; Abhimanyu Dubey; Abhinav Jauhri; Abhinav Pandey; Abhishek Kadian; Adam Kelsey; Adi Gangidi; Ahmad Al-Dahle; Amit Sangani; Ahuva Goldstand; Aiesha Letman; Ajay Menon; Akhil Mathur; Alan Schelten; Alex Vaughan; Amy Yang; Andrei Lupu; Andres Alvarado; Andrew Gallagher; Andrew Gu; Andrew Ho; Andrew Poulton; Andrew Ryan; Angela Fan; Ankit Ramchandani; Anthony Hartshorn; Archi Mitra; Archie Sravankumar; Artem Korenev; Arun Rao; Ashley Gabriel; Ashwin Bharambe; Assaf Eisenman; Aston Zhang; Ash JJhaveri; Aurelien Rodriguez; Austen Gregerson; Ava Spataru; Baptiste Roziere; Ben Maurer; Benjamin Leonhardi; Bernie Huang; Bhargavi Paranjape; Bing Liu; Binh Tang; Bobbie Chern; Brani Stojkovic; Brian Fuller; Catalina Mejia Arenas; Chao Zhou; Charlotte Caucheteux; Chaya Nayak; Ching-Hsiang Chu; Chloe Bi; Chris Cai; Chris Cox; Chris Marra; Chris McConnell; Christian Keller; Christoph Feichtenhofer; Christophe Touret; Chunyang Wu; Corinne Wong; Cristian Canton Ferrer; Damien Allonsius; Daniel Kreymer; Daniel Haziza; Daniel Li; Danielle Pintz; Danny Livshits; Danny Wyatt; David Adkins; David Esiobu; David Xu; Davide Testuggine; Delia David; Devi Parikh; Dhruv Choudhary; Dhruv Mahajan; Diana Liskovich; Diego Garcia-Olano; Diego Perino; Dieuwke Hupkes; Dingkang Wang; Dustin Holland; Egor Lakomkin; Elina Lobanova; Xiaoqing Ellen Tan; Emily Dinan; Eric Smith; Erik Brinkman; Esteban Arcaute; Filip Radenovic; Firat Ozgenel; Francesco Caggioni; Frank Seide; Frank Zhang; Gabriel Synnaeve; Gabriella Schwarz; Gabrielle Lee; Gada Badeer; Georgia Anderson; Graeme Nail; Gregoire Mialon; Guan Pang; Guillem Cucurell; Hailey Nguyen; Hamid Shojanazeri; Hannah Korevaar; Hannah Wang; Haroun Habeeb; Harrison Rudolph; Henry Aspegren; Hu Xu; Hugo Touvron; Iga Kozlowska; Igor Molybog; Igor Tufanov; Iliyan Zarov; Imanol Arrieta Ibarra; Irina-Elena Veliche; Isabel Kloumann; Ishan Misra; Ivan Evtimov; Jade Copet; Jake Weissman; Jan Geffert; Jana Vranes; Japhet Asher; Jason Park; Jay Mahadeokar; Jean-Baptiste Gaya; Jeet Shah; Jelmer van der Linde; Jennifer Chan; Jenny Hong; Jenya Lee; Jeremy Fu; Jeremy Teboul; Jianfeng Chi; Jianyu Huang; Jie Wang; Jiecao Yu; Joanna Bitton; Joe Spisak; Joelle Pineau; Jon Carvill; Jongsoo Park; Joseph Rocca; Joshua Johnstun; Junteng Jia; Kalyan Vasuden Alwala; Kam Hou U; Kate Plawiak; Kartikeya Upasani; Kaushik Veeraraghavan; Ke Li; Kenneth Heafield; Kevin Stone; Khalid El-Arini; Krithika Iyer; Kshitiz Malik; Kuenley Chiu; Kunal Bhalla; Kyle Huang; Lakshya Garg; Lauren Rantala-Yeary; Laurens van der Maaten; Lawrence Chen; Leandro Silva; Lee Bell; Lei Zhang; Liang Tan; Louis Martin; Lovish Madaan; Luca Wehrstedt; Lukas Blecher; Luke de Oliveira; Madeline Muzzi; Madian Khabsa; Manav Avlani; Mannat Singh; Manohar Paluri; Mark Zuckerberg; Marcin Kardas; Martynas Mankus; Mathew Oldham; Mathieu Rita; Matthew Lennie; Maya Pavlova; Meghan Keneally; Melanie Kambadur; Mihir Patel; Mikayel Samvelyan; Mike Clark; Mike Lewis; Min Si; Mitesh Kumar Singh; Mo Metanat; Mona Hassan; Naman Goyal; Narjes Torabi; Nicolas Usunier; Nikolay Bashlykov; Nikolay Bogoychev; Niladri Chatterji; Ning Dong; Oliver Aobo Yang; Olivier Duchenne; Onur Celebi; Parth Parekh; Patrick Alrassy; Paul Saab; Pavan Balaji; Pedro Rittner; Pengchuan Zhang; Pengwei Li; Petar Vasic; Peter Weng; Polina Zvyagina; Prajjwal Bhargava; Pratik Dubal; Praveen Krishnan; Punit Singh Koura; Puxin Xu; Qing He; Rachel Rodriguez; Ragavan Srinivasan; Rahul Mitra; Ramon Calderer; Raymond Li; Robert Stojnic; Roberta Raileanu; Robin Battey; Rocky Wang; Rohit Girdhar; Rohit Patel; Romain Sauvestre; Ronnie Polidoro; Roshan Sumbaly; Ross Taylor; Ruan Silva; Rui Hou; Rui Wang; Russ Howes; Ruty Rinott; Saghar Hosseini; Sai Jayesh Bondu; Samyak Datta; Sanjay Singh; Sara Chugh; Sargun Dhillon; Satadru Pan; Sean Bell; Sergey Edunov; Shaoliang Nie; Sharan Narang; Sharath Raparthy; Shaun Lindsay; Sheng Feng; Sheng Shen; Shenghao Lin; Shiva Shankar; Shruti Bhosale; Shun Zhang; Simon Vandenhende; Sinong Wang; Seohyun Sonia Kim; Soumya Batra; Sten Sootla; Steve Kehoe; Suchin Gururangan; Sumit Gupta; Sunny Virk; Sydney Borodinsky; Tamar Glaser; Tamar Herman; Tamara Best; Tara Fowler; Thomas Georgiou; Thomas Scialom; Tianhe Li; Todor Mihaylov; Tong Xiao; Ujjwal Karn; Vedanuj Goswami; Vibhor Gupta; Vignesh Ramanathan; Viktor Kerkez; Vinay Satish Kumar; Vincent Gonguet; Vish Vogeti; Vlad Poenaru; Vlad Tiberiu Mihailescu; Vladan Petrovic; Vladimir Ivanov; Wei Li; Weiwei Chu; Wenhan Xiong; Wenyin Fu; Wes Bouaziz; Whitney Meers; Will Constable; Xavier Martinet; Xiaojian Wu; Xinbo Gao; Xinfeng Xie; Xuchao Jia; Yaelle Goldschlag; Yann LeCun; Yashesh Gaur; Yasmine Babaei; Ye Qi; Yenda Li; Yi Wen; Yiwen Song; Youngjin Nam; Yuchen Hao; Yuchen Zhang; Yun Wang; Yuning Mao; Yuzi He; Zacharie Delpierre Coudert; Zachary DeVito; Zahra Hankir; Zhaoduo Wen; Zheng Yan; Zhengxing Chen; Zhenyu Yang; Zoe Papakipos
-----------
-🤗 Models on Hugging Face | Blog Website --- # Meta Llama 3 We are unlocking the power of large language models. Our latest version of Llama is now accessible to individuals, creators, researchers, and businesses of all sizes so that they can experiment, innovate, and scale their ideas responsibly. This release includes model weights and starting code for pre-trained and instruction-tuned Llama 3 language models — including sizes of 8B to 70B parameters. This repository is a minimal example of loading Llama 3 models and running inference. For more detailed examples, see [llama-recipes](https://github.com/facebookresearch/llama-recipes/). To download the model weights and tokenizer, please visit the [Meta Llama website](https://llama.meta.com/llama-downloads/) and accept our License. Once your request is approved, you will receive a signed URL over email. Then, run the download.sh script, passing the URL provided when prompted to start the download. Pre-requisites: Ensure you have `wget` and `md5sum` installed. Then run the script: `./download.sh`. Remember that the links expire after 24 hours and a certain amount of downloads. You can always re-request a link if you start seeing errors such as `403: Forbidden`. ### Access to Hugging Face We also provide downloads on [Hugging Face](https://huggingface.co/meta-llama), in both transformers and native `llama3` formats. To download the weights from Hugging Face, please follow these steps: - Visit one of the repos, for example [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct). - Read and accept the license. Once your request is approved, you'll be granted access to all the Llama 3 models. Note that requests used to take up to one hour to get processed. - To download the original native weights to use with this repo, click on the "Files and versions" tab and download the contents of the `original` folder. You can also download them from the command line if you `pip install huggingface-hub`: huggingface-cli download meta-llama/Meta-Llama-3-8B-Instruct --include "original/*" --local-dir meta-llama/Meta-Llama-3-8B-Instruct - To use with transformers, the following [pipeline](https://huggingface.co/docs/transformers/en/main_classes/pipelines) snippet will download and cache the weights: ```python model_id = "meta-llama/Meta-Llama-3-8B-Instruct" model="meta-llama/Meta-Llama-3-8B-Instruct", model_kwargs={"torch_dtype": torch.bfloat16}, device="cuda", You can follow the steps below to get up and running with Llama 3 models quickly. These steps will let you run quick inference locally. For more examples, see the [Llama recipes repository](https://github.com/facebookresearch/llama-recipes). 1. Clone and download this repository in a conda env with PyTorch / CUDA. 2. In the top-level directory run: pip install -e . 3. Visit the [Meta Llama website](https://llama.meta.com/llama-downloads/) and register to download the model/s. 4. Once registered, you will get an email with a URL to download the models. You will need this URL when you run the download.sh script. 5. Once you get the email, navigate to your downloaded llama repository and run the download.sh script. - Make sure to grant execution permissions to the download.sh script - During this process, you will be prompted to enter the URL from the email. - Do not use the “Copy Link” option; copy the link from the email manually. 6. Once the model/s you want have been downloaded, you can run the model locally using the command below: torchrun --nproc_per_node 1 example_chat_completion.py \ --ckpt_dir Meta-Llama-3-8B-Instruct/ \ --tokenizer_path Meta-Llama-3-8B-Instruct/tokenizer.model \ --max_seq_len 512 --max_batch_size 6 - Replace  `Meta-Llama-3-8B-Instruct/` with the path to your checkpoint directory and `Meta-Llama-3-8B-Instruct/tokenizer.model` with the path to your tokenizer model. - The `–nproc_per_node` should be set to the [MP](#inference) value for the model you are using. - Adjust the `max_seq_len` and `max_batch_size` parameters as needed. - This example runs the [example_chat_completion.py](example_chat_completion.py) found in this repository, but you can change that to a different .py file. Different models require different model-parallel (MP) values: |  Model | MP | | 8B     | 1  | | 70B    | 8  | All models support sequence length up to 8192 tokens, but we pre-allocate the cache according to `max_seq_len` and `max_batch_size` values. So set those according to your hardware. These models are not finetuned for chat or Q&A. They should be prompted so that the expected answer is the natural continuation of the prompt. See `example_text_completion.py` for some examples. To illustrate, see the command below to run it with the llama-3-8b model (`nproc_per_node` needs to be set to the `MP` value): torchrun --nproc_per_node 1 example_text_completion.py \ --ckpt_dir Meta-Llama-3-8B/ \ --tokenizer_path Meta-Llama-3-8B/tokenizer.model \ --max_seq_len 128 --max_batch_size 4 ### Instruction-tuned Models The fine-tuned models were trained for dialogue applications. To get the expected features and performance for them, specific formatting defined in [`ChatFormat`](https://github.com/meta-llama/llama3/blob/main/llama/tokenizer.py#L202) needs to be followed: The prompt begins with a `<|begin_of_text|>` special token, after which one or more messages follow. Each message starts with the `<|start_header_id|>` tag, the role `system`, `user` or `assistant`, and the `<|end_header_id|>` tag. After a double newline `\n\n`, the message's contents follow. The end of each message is marked by the `<|eot_id|>` token. You can also deploy additional classifiers to filter out inputs and outputs that are deemed unsafe. See the llama-recipes repo for [an example](https://github.com/meta-llama/llama-recipes/blob/main/recipes/inference/local_inference/inference.py) of how to add a safety checker to the inputs and outputs of your inference code. Examples using llama-3-8b-chat: torchrun --nproc_per_node 1 example_chat_completion.py \ --max_seq_len 512 --max_batch_size 6 Llama 3 is a new technology that carries potential risks with use. Testing conducted to date has not — and could not — cover all scenarios. To help developers address these risks, we have created the [Responsible Use Guide](https://ai.meta.com/static-resource/responsible-use-guide/). Please report any software “bug” or other problems with the models through one of the following means: - Reporting issues with the model: [https://github.com/meta-llama/llama3/issues](https://github.com/meta-llama/llama3/issues) - Reporting risky content generated by the model: [developers.facebook.com/llama_output_feedback](http://developers.facebook.com/llama_output_feedback) - Reporting bugs and security concerns: [facebook.com/whitehat/info](http://facebook.com/whitehat/info) Our model and weights are licensed for researchers and commercial entities, upholding the principles of openness. Our mission is to empower individuals and industry through this opportunity while fostering an environment of discovery and ethical AI advancements. See the [LICENSE](LICENSE) file, as well as our accompanying [Acceptable Use Policy](USE_POLICY.md) ## Questions For common questions, the FAQ can be found [here](https://llama.meta.com/faq), which will be updated over time as new questions arise.
-----------
-# Code Llama ## **Model Details** **Model Developers** Meta AI **Variations** Code Llama comes in four model sizes, and three variants: 1) Code Llama: our base models are designed for general code synthesis and understanding 2) Code Llama - Python: designed specifically for Python 3) Code Llama - Instruct: for instruction following and safer deployment All variants are available in sizes of 7B, 13B, 34B and 70B parameters. **Input** Models input text only. **Output** Models output text only. **Model Architecture** Code Llama and its variants are autoregressive language models using optimized transformer architectures. Code Llama 7B, 13B and 70B additionally support infilling text generation. All models but Code Llama - Python 70B and Code Llama - Instruct 70B were fine-tuned with up to 16K tokens, and support up to 100K tokens at inference time. **Model Dates** Code Llama and its variants have been trained between January 2023 and January 2024. **Status** This is a static model trained on an offline dataset. Future versions of Code Llama - Instruct will be released  as we improve model safety with community feedback. **Licence** A custom commercial license is available at: [https://ai.meta.com/resources/models-and-libraries/llama-downloads/](https://ai.meta.com/resources/models-and-libraries/llama-downloads/). **Research Paper** More information can be found in the paper "[Code Llama: Open Foundation Models for Code](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/)". **Where to send comments** Instructions on how to provide feedback or comments on the model can be found in the model [README](README.md), or by opening an issue in the GitHub repository ([https://github.com/facebookresearch/codellama/](https://github.com/facebookresearch/codellama/)). ## **Intended Use** **Intended Use Cases** Code Llama and its variants are intended for commercial and research use in English and relevant programming languages. The base model Code Llama can be adapted for a variety of code synthesis and understanding tasks, Code Llama - Python is designed specifically to handle the Python programming language, and Code Llama - Instruct is intended to be safer to use for code assistance and generation applications. **Out-of-Scope Uses** Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for Code Llama and its variants. ## **Hardware and Software** **Training Factors** We used custom training libraries. The training and fine-tuning of the released models have been performed by Meta’s Research Super Cluster. **Carbon Footprint** In aggregate, training all 12 Code Llama models required 1400K GPU hours of computation on hardware of type A100-80GB (TDP of 350-400W). Estimated total emissions were 228.55 tCO2eq, 100% of which were offset by Meta’s sustainability program. **Training data** All experiments reported here and the released models have been trained and fine-tuned using the same data as Llama 2 with different weights (see Section 2 and Table 1 in the [research paper](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) for details). Code Llama - Instruct uses additional instruction fine-tuning data. **Evaluation Results** See evaluations for the main models and detailed ablations in Section 3 and safety evaluations in Section 4 of the research paper. ## **Ethical Considerations and Limitations** Code Llama and its variants are a new technology that carries risks with use. Testing conducted to date has been in English, and has not covered, nor could it cover all scenarios. For these reasons, as with all LLMs, Code Llama’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate or objectionable responses to user prompts. Therefore, before deploying any applications of Code Llama, developers should perform safety testing and tuning tailored to their specific applications of the model. Please see the Responsible Use Guide available available at [https://ai.meta.com/llama/responsible-user-guide](https://ai.meta.com/llama/responsible-user-guide).
-----------
-# Introducing Code Llama Code Llama is a family of large language models for code based on [Llama 2](https://github.com/facebookresearch/llama) providing state-of-the-art performance among open models, infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks. We provide multiple flavors to cover a wide range of applications: foundation models (Code Llama), Python specializations (Code Llama - Python), and instruction-following models (Code Llama - Instruct) with 7B, 13B and 34B parameters each. All models are trained on sequences of 16k tokens and show improvements on inputs with up to 100k tokens. 7B and 13B Code Llama and Code Llama - Instruct variants support infilling based on surrounding content. Code Llama was developed by fine-tuning Llama 2 using a higher sampling of code. As with Llama 2, we applied considerable safety mitigations to the fine-tuned versions of the model. For detailed information on model training, architecture and parameters, evaluations, responsible AI and safety refer to  our [research paper](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/). Output generated by code generation features of the Llama Materials, including Code Llama, may be subject to third party licenses, including, without limitation, open source licenses. We are unlocking the power of large language models and our latest version of Code Llama is now accessible to individuals, creators, researchers and businesses of all sizes so that they can experiment, innovate and scale their ideas responsibly. This release includes model weights and starting code for pretrained and fine-tuned Llama language models — ranging from 7B to 34B parameters. This repository is intended as a minimal example to load [Code Llama](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) models and run inference. [comment]: <> (Code Llama models are compatible with the scripts in llama-recipes) In order to download the model weights and tokenizers, please visit the [Meta website](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and accept our License. Once your request is approved, you will receive a signed URL over email. Then run the download.sh script, passing the URL provided when prompted to start the download. Make sure that you copy the URL text itself, **do not use the 'Copy link address' option** when you right click the URL. If the copied URL text starts with: https://download.llamameta.net, you copied it correctly. If the copied URL text starts with: https://l.facebook.com, you copied it the wrong way. Pre-requisites: make sure you have `wget` and `md5sum` installed. Then to run the script: `bash download.sh`. Keep in mind that the links expire after 24 hours and a certain amount of downloads. If you start seeing errors such as `403: Forbidden`, you can always re-request a link. ### Model sizes | Model | Size     | |-------|----------| | 7B    | ~12.55GB | | 13B   | 24GB     | | 34B   | 63GB     | | 70B   | 131GB    | [comment]: <> (Access on Hugging Face, We are also providing downloads on Hugging Face. You must first request a download from the Meta website using the same email address as your Hugging Face account. After doing so, you can request access to any of the models on Hugging Face and within 1-2 days your account will be granted access to all versions.) ## Setup In a conda environment with PyTorch / CUDA available, clone the repo and run in the top-level directory: pip install -e . Different models require different model-parallel (MP) values: | Model | MP | |-------|----| | 7B    | 1  | | 13B   | 2  | | 34B   | 4  | | 70B   | 8  | All models, except the 70B python and instruct versions, support sequence lengths up to 100,000 tokens, but we pre-allocate the cache according to `max_seq_len` and `max_batch_size` values. So set those according to your hardware and use-case. ### Pretrained Code Models The Code Llama and Code Llama - Python models are not fine-tuned to follow instructions. They should be prompted so that the expected answer is the natural continuation of the prompt. See `example_completion.py` for some examples. To illustrate, see command below to run it with the `CodeLlama-7b` model (`nproc_per_node` needs to be set to the `MP` value): torchrun --nproc_per_node 1 example_completion.py \ --ckpt_dir CodeLlama-7b/ \ --tokenizer_path CodeLlama-7b/tokenizer.model \ --max_seq_len 128 --max_batch_size 4 Pretrained code models are: the Code Llama models `CodeLlama-7b`, `CodeLlama-13b`, `CodeLlama-34b`, `CodeLlama-70b` and the Code Llama - Python models `CodeLlama-7b-Python`, `CodeLlama-13b-Python`, `CodeLlama-34b-Python`, `CodeLlama-70b-Python`. ### Code Infilling Code Llama and Code Llama - Instruct 7B and 13B models are capable of filling in code given the surrounding context. See `example_infilling.py` for some examples. The `CodeLlama-7b` model can be run for infilling with the command below (`nproc_per_node` needs to be set to the `MP` value): torchrun --nproc_per_node 1 example_infilling.py \ --max_seq_len 192 --max_batch_size 4 Pretrained infilling models are: the Code Llama models `CodeLlama-7b` and `CodeLlama-13b` and the Code Llama - Instruct models `CodeLlama-7b-Instruct`, `CodeLlama-13b-Instruct`. ### Fine-tuned Instruction Models Code Llama - Instruct models are fine-tuned to follow instructions. To get the expected features and performance for the 7B, 13B and 34B variants, a specific formatting defined in [`chat_completion()`](https://github.com/facebookresearch/codellama/blob/main/llama/generation.py#L319-L361) needs to be followed, including the `INST` and `< >` tags, `BOS` and `EOS` tokens, and the whitespaces and linebreaks in between (we recommend calling `strip()` on inputs to avoid double-spaces). `CodeLlama-70b-Instruct` requires a separate turn-based prompt format defined in [`dialog_prompt_tokens()`](https://github.com/facebookresearch/codellama/blob/main/llama/generation.py#L506-L548). You can use `chat_completion()` directly to generate answers with all instruct models; it will automatically perform the required formatting. You can also deploy additional classifiers for filtering out inputs and outputs that are deemed unsafe. See the llama-recipes repo for [an example](https://github.com/facebookresearch/llama-recipes/blob/main/src/llama_recipes/inference/safety_utils.py) of how to add a safety checker to the inputs and outputs of your inference code. Examples using `CodeLlama-7b-Instruct`: torchrun --nproc_per_node 1 example_instructions.py \ --ckpt_dir CodeLlama-7b-Instruct/ \ --tokenizer_path CodeLlama-7b-Instruct/tokenizer.model \ --max_seq_len 512 --max_batch_size 4 Fine-tuned instruction-following models are: the Code Llama - Instruct models `CodeLlama-7b-Instruct`, `CodeLlama-13b-Instruct`, `CodeLlama-34b-Instruct`, `CodeLlama-70b-Instruct`. Code Llama is a new technology that carries potential risks with use. Testing conducted to date has not — and could not — cover all scenarios. In order to help developers address these risks, we have created the [Responsible Use Guide](https://github.com/facebookresearch/llama/blob/main/Responsible-Use-Guide.pdf). More details can be found in our research papers as well. Please report any software “bug”, or other problems with the models through one of the following means: - Reporting issues with the model: [github.com/facebookresearch/codellama](http://github.com/facebookresearch/codellama) - Reporting risky content generated by the model: [developers.facebook.com/llama_output_feedback](http://developers.facebook.com/llama_output_feedback) - Reporting bugs and security concerns: [facebook.com/whitehat/info](http://facebook.com/whitehat/info) See [MODEL_CARD.md](MODEL_CARD.md) for the model card of Code Llama. Our model and weights are licensed for both researchers and commercial entities, upholding the principles of openness. Our mission is to empower individuals, and industry through this opportunity, while fostering an environment of discovery and ethical AI advancements. See the [LICENSE](https://github.com/facebookresearch/llama/blob/main/LICENSE) file, as well as our accompanying [Acceptable Use Policy](https://github.com/facebookresearch/llama/blob/main/USE_POLICY.md) 1. [Code Llama Research Paper](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) 2. [Code Llama Blog Post](https://ai.meta.com/blog/code-llama-large-language-model-coding/)
-----------
-Models on Hugging Face CyberSec Eval Paper Llama Guard Paper # Purple Llama Purple Llama is an umbrella project that over time will bring together tools and evals to help the community build responsibly with open generative AI models. The initial release will include tools and evals for Cyber Security and Input/Output safeguards but we plan to contribute more in the near future. ## Why purple? Borrowing a [concept](https://www.youtube.com/watch?v=ab_Fdp6FVDI) from the cybersecurity world, we believe that to truly mitigate the challenges which generative AI presents, we need to take both attack (red team) and defensive (blue team) postures. Purple teaming, composed of both red and blue team responsibilities, is a collaborative approach to evaluating and mitigating potential risks and the same ethos applies to generative AI and hence our investment in Purple Llama will be comprehensive. Components within the Purple Llama project will be licensed permissively enabling both research and commercial usage. We believe this is a major step towards enabling community collaboration and standardizing the development and usage of trust and safety tools for generative AI development. More concretely evals and benchmarks are licensed under the MIT license while any models use the Llama 2 Community license. See the table below: | **Component Type** |            **Components**            |                                          **License**                                           | | :----------------- | :----------------------------------: | :--------------------------------------------------------------------------------------------: | | Evals/Benchmarks   | Cyber Security Eval (others to come) |                                              MIT                                               | | Models             |             Llama Guard              | [Llama 2 Community License](https://github.com/facebookresearch/PurpleLlama/blob/main/LICENSE) | | Models             |             Llama Guard 2            | Llama 3 Community License | | Safeguard          |             Code Shield              | MIT | ## Evals & Benchmarks ### Cybersecurity #### CyberSec Eval v1 CyberSec Eval v1 was what we believe was the first industry-wide set of cybersecurity safety evaluations for LLMs. These benchmarks are based on industry guidance and standards (e.g., CWE and MITRE ATT&CK) and built in collaboration with our security subject matter experts. We aim to provide tools that will help address some risks outlined in the [White House commitments on developing responsible AI](https://www.whitehouse.gov/briefing-room/statements-releases/2023/07/21/fact-sheet-biden-harris-administration-secures-voluntary-commitments-from-leading-artificial-intelligence-companies-to-manage-the-risks-posed-by-ai/), including: * Metrics for quantifying LLM cybersecurity risks. * Tools to evaluate the frequency of insecure code suggestions. * Tools to evaluate LLMs to make it harder to generate malicious code or aid in carrying out cyberattacks. We believe these tools will reduce the frequency of LLMs suggesting insecure AI-generated code and reduce their helpfulness to cyber adversaries. Our initial results show that there are meaningful cybersecurity risks for LLMs, both with recommending insecure code and for complying with malicious requests. See our [Cybersec Eval paper](https://ai.meta.com/research/publications/purple-llama-cyberseceval-a-benchmark-for-evaluating-the-cybersecurity-risks-of-large-language-models/) for more details. #### CyberSec Eval 2 CyberSec Eval 2 expands on its predecessor by measuring an LLM’s propensity to abuse a code interpreter, offensive cybersecurity capabilities, and susceptibility to prompt injection. You can read the paper [here](https://ai.meta.com/research/publications/cyberseceval-2-a-wide-ranging-cybersecurity-evaluation-suite-for-large-language-models/). You can also check out the 🤗 leaderboard [here](https://huggingface.co/spaces/facebook/CyberSecEval). ## System-Level Safeguards As we outlined in Llama 3’s [Responsible Use Guide](https://ai.meta.com/llama/responsible-use-guide/), we recommend that all inputs and outputs to the LLM be checked and filtered in accordance with content guidelines appropriate to the application. ### Llama Guard To support this, and empower the community, we released Llama Guard, an openly-available model that performs competitively on common open benchmarks and provides developers with a pretrained model to help defend against generating potentially risky outputs. As part of our ongoing commitment to open and transparent science, we also released our methodology and an extended discussion of model performance in our [Llama Guard paper](https://ai.meta.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/). We are happy to share an updated version, Meta Llama Guard 2. Llama Guard 2 was optimized to support the newly [announced](https://mlcommons.org/2024/04/mlc-aisafety-v0-5-poc/) policy published by MLCommons, expanding its coverage to a more comprehensive set of safety categories, out-of-the-box. It also comes with better classification performance than Llama Guard 1 and improved zero-shot and few shot adaptability. Ultimately, our vision is to enable developers to customize this model to support relevant use cases and to make it easier to adopt best practices and improve the open ecosystem. ### Code Shield Code Shield adds support for inference-time filtering of insecure code produced by LLMs. Code Shield offers mitigation of insecure code suggestions risk, code interpreter abuse prevention, and secure command execution. [CodeShield Example Notebook](https://github.com/meta-llama/PurpleLlama/blob/main/CodeShield/notebook/CodeShieldUsageDemo.ipynb). To get started and learn how to use Purple Llama components with Llama models, see the getting started guide [here](https://ai.meta.com/llama/get-started/). The guide provides information and resources to help you set up Llama, including how to access the model, hosting how-to information and integration guides. Additionally, you will find supplemental materials to further assist you while responsibly building with Llama. The guide will be updated as more Purple Llama components get released. ## FAQ For a running list of frequently asked questions, for not only Purple Llama components but also generally for Llama models, see the FAQ [here](https://ai.meta.com/llama/faq/). ## Join the Purple Llama community See the [CONTRIBUTING](CONTRIBUTING.md) file for how to help out.
-----------
-# Model Details Meta Llama Guard 2 is an 8B parameter Llama 3-based [1] LLM safeguard model. Similar to [Llama Guard](https://ai.meta.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/), it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated. Below is a response classification example input and output for Llama Guard 2. In order to produce classifier scores, we look at the probability for the first token, and use that as the “unsafe” class probability. We can then apply score thresholding to make binary decisions. # Harm Taxonomy and Policy The model is trained to predict safety labels on the 11 categories shown below, based on the [MLCommons taxonomy](https://mlcommons.org/2024/04/mlc-aisafety-v0-5-poc/) of hazards. Harm categories S1: Violent Crimes S2: Non-Violent Crimes S3: Sex-Related Crimes S4: Child Sexual Exploitation S5: Specialized Advice S6: Privacy S7: Intellectual Property S8: Indiscriminate Weapons S9: Hate S10: Suicide & Self-Harm S11: Sexual Content **S1: Violent Crimes** Responses that enable, encourage, or endorse the commission of violent crimes, including: (1) unlawful violence toward people (ex: terrorism, genocide, murder, hate-motivated violent crimes, child abuse, assault, battery, kidnapping) and (2) unlawful violence toward animals (ex: animal abuse) **S2: Non-Violent Crimes** Responses that enable, encourage, or endorse the commission of non-violent crimes, including: (1) personal crimes (ex: labor trafficking, threats, intimidation, hate-motivated non-violent crimes), (2) financial crimes (ex: fraud, scams, money laundering), (3) property crimes (ex: theft, arson, vandalism), (4) drug crimes (ex: creating or using regulated substances), (5) weapons crimes (ex: producing unlicensed firearms), and (6) cyber crimes (ex: hacking) **S3: Sex-Related Crimes** Responses that enable, encourage, or endorse the commission of sex-related crimes, including: (1) sex trafficking, (2) sexual assault (ex: rape), (3) sexual harassment (physical, verbal, or visual), and (4) prostitution **S4: Child Sexual Exploitation** Responses that contain, describe, enable, encourage, or endorse the sexual abuse of children **S5: Specialized Advice** Responses that contain specialized financial, medical, or legal advice, or that indicate dangerous activities or objects are safe **S6: Privacy** Responses that contain sensitive, nonpublic personal information that could undermine someone's physical, digital, or financial security **S7: Intellectual Property** Responses that may violate the intellectual property rights of any third party **S8: Indiscriminate Weapons** Responses that enable, encourage, or endorse the creation of indiscriminate weapons, including: (1) chemical weapons (ex: nerve gas), (2) biological weapons (ex: anthrax), (3) radiological weapons (ex: cobalt bombs), (4) nuclear weapons (ex: fission bombs), and (5) high-yield explosive weapons (ex: cluster munitions) **S9: Hate** Responses that demean or dehumanize people on the basis of their sensitive, personal characteristics (i.e., race, color, ethnicity, national origin, disability, religious affiliation, caste, sexual orientation, sex, gender identity, and/or serious disease) **S10: Suicide & Self-Harm** Responses that enable, encourage, or endorse acts of intentional self-harm, including: (1) suicide, (2) self-injury (ex: cutting), and (3) disordered eating **S11: Sexual Content** Responses that contain erotica # Training and Evaluation Data We start with the training set of Llama Guard (see [model card](https://github.com/meta-llama/PurpleLlama/blob/main/Llama-Guard/MODEL_CARD.md#training-data)), and obtain labels on the Harm Taxonomy described above. To improve adaptability of the model to different prompts, we train on hard samples, which are obtained by taking an existing sample and prompting Llama2 70B to produce an alternate policy description that will flip the label of the given sample. We report metrics for various models and APIs on our validation set, which is obtained from combining the validation set of Llama Guard v1 and held-out samples from the additional Llama 3 safety data. We compare performance on our internal test set, as well as on open datasets like [XSTest](https://github.com/paul-rottger/exaggerated-safety?tab=readme-ov-file#license), [OpenAI moderation](https://github.com/openai/moderation-api-release), and [BeaverTails](https://github.com/PKU-Alignment/beavertails). We find that there is overlap between our training set and the BeaverTails-30k test split. Since both our internal test set and BeaverTails use prompts from the Anthropic's [hh-rlhf dataset](https://github.com/anthropics/hh-rlhf) as a starting point for curating data, it is possible that different splits of Anthropic were used while creating the two datasets. Therefore to prevent leakage of signal between our train set and the BeaverTails-30k test set, we create our own BeaverTails-30k splits based on the Anthropic train-test splits used for creating our internal sets. *Note on evaluations*: As discussed in the Llama Guard [paper](https://arxiv.org/abs/2312.06674), comparing model performance is not straightforward as each model is built on its own policy and is expected to perform better on an evaluation dataset with a policy aligned to the model. This highlights the need for industry standards. By aligning Llama Guard 2 with the Proof of Concept MLCommons taxonomy, we hope to drive adoption of industry standards like this and facilitate collaboration and transparency in the LLM safety and content evaluation space. # Model Performance We evaluate the performance of Llama Guard 2 and compare it with Llama Guard and popular content moderation APIs such as Azure, OpenAI Moderation, and Perspective. We use the token probability of the first output token (i.e. safe/unsafe) as the score for classification. For obtaining a binary classification decision from the score, we use a threshold of 0.5. Llama Guard 2 improves over Llama Guard, and outperforms other approaches on our internal test set. Note that we manage to achieve great performance while keeping a low false positive rate as we know that over-moderation can impact user experience when building LLM-applications. | **Model**                | **F1 ↑** | **AUPRC ↑** | **False Positive Rate ↓** | |--------------------------|:------:|:---------:|:-----------------------:| | Llama Guard\*             |  0.665 | 0.854 |          0.027          | | Llama Guard 2            |  **0.915** |   **0.974**   |          0.040          | | GPT4                     | 0.796 |    N/A    |          0.151          | | OpenAI Moderation API    |  0.347 |   0.669   |          0.030          | | Azure Content Safety API |  0.519 |    N/A    |          0.245          | | Perspective API          |  0.265 |   0.586   |          0.046          | Table 1: Comparison of performance of various approaches measured on our internal test set. *The performance of Llama Guard is lower on our new test set due to expansion of the number of harm categories from 6 to 11, which is not aligned to what Llama Guard was trained on. | **Category**           | **False Negative Rate\* ↓** | **False Positive Rate ↓** | |------------------------|:--------------------------:|:-------------------------:| | Violent Crimes         |            0.042           |           0.002           | | Privacy                |            0.057           |           0.004           | | Non-Violent Crimes     |            0.082           |           0.009           | | Intellectual Property  |            0.099           |           0.004           | | Hate                   |            0.190           |           0.005           | | Specialized Advice     |            0.192           |           0.009           | | Sexual Content         |            0.229           |           0.004           | | Indiscriminate Weapons |            0.263           |           0.001           | | Child Exploitation     |            0.267           |           0.000           | | Sex Crimes             |            0.275           |           0.002           | | Self-Harm              |            0.277           |           0.002           | Table 2: Category-wise breakdown of false negative rate and false positive rate for Llama Guard 2 on our internal benchmark for response classification with safety labels from the ML Commons taxonomy. *The binary safe/unsafe label is used to compute categorical FNR by using the true categories. We do not penalize the model while computing FNR for cases where the model predicts the correct overall label but an incorrect categorical label. We also report performance on OSS safety datasets, though we note that the policy used for assigning safety labels is not aligned with the policy used while training Llama Guard 2. Still, Llama Guard 2 provides a superior tradeoff between F1 score and False Positive Rate on the XSTest and OpenAI Moderation datasets, demonstrating good adaptability to other policies. The BeaverTails dataset has a lower bar for a sample to be considered unsafe compared to Llama Guard 2's policy. The policy and training data of MDJudge [4] is more aligned with this dataset and we see that it performs better on them as expected (at the cost of a higher FPR). GPT-4 achieves high recall on all of the sets but at the cost of very high FPR (9-25%), which could hurt its ability to be used as a safeguard for practical applications. (F1 ↑ / False Positive Rate ↓) False Refusals (XSTest) OpenAI policy (OpenAI Mod) BeaverTails policy (BeaverTails-30k) Llama Guard 0.737 / 0.079 0.599 / 0.035 Llama Guard 2 0.884 / 0.084 0.807 / 0.060 0.736 / 0.059 MDJudge 0.856 / 0.172 0.768 / 0.212 0.849 / 0.098 GPT4 0.895 / 0.128 0.842 / 0.092 0.802 / 0.256 OpenAI Mod API 0.576 / 0.040 0.788 / 0.156 0.284 / 0.056 Table 3: Comparison of performance of various approaches measured on our internal test set for response classification. NOTE: The policy used for training Llama Guard does not align with those used for labeling these datasets. Still, Llama Guard 2 provides a superior tradeoff between F1 score and False Positive Rate across these datasets, demonstrating strong adaptability to other policies. We hope to provide developers with a high-performing moderation solution for most use cases by aligning Llama Guard 2 taxonomy with MLCommons standard. But as outlined in our Responsible Use Guide, each use case requires specific safety considerations and we encourage developers to tune Llama Guard 2 for their own use case to achieve better moderation for their custom policies. As an example of how Llama Guard 2's performance may change, we train on the BeaverTails training dataset and compare against MDJudge (which was trained on BeaverTails among others). |          **Model**          | **F1 ↑** | **False Positive Rate ↓** | |:---------------------------:|:--------:|:-------------------------:| | Llama Guard 2               |   0.736  |           0.059           | | MDJudge                     | 0.849 |           0.098           | | Llama Guard 2 + BeaverTails |   **0.852**  |           0.101           | Table 4: Comparison of performance on BeaverTails-30k. # Limitations There are some limitations associated with Llama Guard 2. First, Llama Guard 2 itself is an LLM fine-tuned on Llama 3. Thus, its performance (e.g., judgments that need common sense knowledge, multilingual capability, and policy coverage) might be limited by its (pre-)training data. Second, Llama Guard 2 is finetuned for safety classification only (i.e. to generate "safe" or "unsafe"), and is not designed for chat use cases. However, since it is an LLM, it can still be prompted with any text to obtain a completion. Lastly, as an LLM, Llama Guard 2 may be susceptible to adversarial attacks or prompt injection attacks that could bypass or alter its intended use. However, with the help of external components (e.g., KNN, perplexity filter), recent work (e.g., [3]) demonstrates that Llama Guard is able to detect harmful content reliably. **Note on Llama Guard 2's policy** Llama Guard 2 supports 11 out of the 13 categories included in the [MLCommons AI Safety](https://mlcommons.org/working-groups/ai-safety/ai-safety/) taxonomy. The Election and Defamation categories are not addressed by Llama Guard 2 as moderating these harm categories requires access to up-to-date, factual information sources and the ability to determine the veracity of a particular output. To support the additional categories, we recommend using other solutions (e.g. Retrieval Augmented Generation) in tandem with Llama Guard 2 to evaluate information correctness. # Citation @misc{metallamaguard2, author =       {Llama Team}, title =        {Meta Llama Guard 2}, howpublished = {\url{https://github.com/meta-llama/PurpleLlama/blob/main/Llama-Guard2/MODEL_CARD.md}}, year =         {2024} # References [1] [Llama 3 Model Card](https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md) [2] [Llama Guard Model Card](https://github.com/meta-llama/PurpleLlama/blob/main/Llama-Guard/MODEL_CARD.md) [3] [RigorLLM: Resilient Guardrails for Large Language Models against Undesired Content](https://arxiv.org/pdf/2403.13031.pdf) [4] [MDJudge for Salad-Bench](https://huggingface.co/OpenSafetyLab/MD-Judge-v0.1)
-----------
-# Meta Llama Guard 2 Llama Guard 2 is a model that provides input and output guardrails for LLM deployments, based on MLCommons policy. # Download In order to download the model weights and tokenizer, please visit the [Meta website](https://llama.meta.com/llama-downloads) and accept our License. Once your request is approved, you will receive a signed URL over email. Then run the download.sh script, passing the URL provided when prompted to start the download. Pre-requisites: Make sure you have wget and md5sum installed. Then to run the script: `./download.sh`. Keep in mind that the links expire after 24 hours and a certain amount of downloads. If you start seeing errors such as `403: Forbidden`, you can always re-request a link. # Quick Start Since Llama Guard 2 is a fine-tuned Llama3 model (see our [model card](MODEL_CARD.md) for more information), the same quick start steps outlined in our [README file](https://github.com/meta-llama/llama3/blob/main/README.md) for Llama3 apply here. In addition to that, we added examples using Llama Guard 2 in the [Llama recipes repository](https://github.com/facebookresearch/llama-recipes). # Issues Please report any software bug, or other problems with the models through one of the following means: - Reporting issues with the Llama Guard model: [github.com/meta-llama/PurpleLlama](https://github.com/meta-llama/PurpleLlama) - Reporting issues with Llama in general: [github.com/meta-llama/llama3](https://github.com/meta-llama/llama3) - Reporting risky content generated by the model: [developers.facebook.com/llama_output_feedback](https://developers.facebook.com/llama_output_feedback) - Reporting bugs and security concerns: [facebook.com/whitehat/info](https://facebook.com/whitehat/info) # License Our model and weights are licensed for both researchers and commercial entities, upholding the principles of openness. Our mission is to empower individuals, and industry through this opportunity, while fostering an environment of discovery and ethical AI advancements. The same license as Llama 3 applies: see the [LICENSE](../LICENSE) file, as well as our accompanying [Acceptable Use Policy](USE_POLICY.md). author =       {Llama Team}, title =        {Meta Llama Guard 2}, [Research Paper](https://ai.facebook.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/)
-----------
-Llama Guard is a 7B parameter [Llama 2](https://arxiv.org/abs/2307.09288)-based input-output safeguard model. It can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM: it generates text in its output that indicates whether a given prompt or response is safe/unsafe, and if unsafe based on a policy, it also lists the violating subcategories. Here is an example: In order to produce classifier scores, we look at the probability for the first token, and turn that into an “unsafe” class probability. Model users can then make binary decisions by applying a desired threshold to the probability scores. # Training and Evaluation We use a mix of prompts that come from the Anthropic [dataset](https://github.com/anthropics/hh-rlhf) and redteaming examples that we have collected in house, in a separate process from our production redteaming. In particular, we took the prompts only from the Anthropic dataset, and generated new responses from our in-house LLaMA models, using jailbreaking techniques to elicit violating responses. We then annotated Anthropic data (prompts & responses) in house, mapping labels according to the categories identified above. Overall we have ~13K training examples. ## Taxonomy of harms and Risk Guidelines As automated content risk mitigation relies on classifiers to make decisions about content in real time, a prerequisite to building these systems is to have the following components: - A **taxonomy** of risks that are of interest – these become the classes of a classifier. - A **risk guideline** that determines where we put the line between encouraged and discouraged outputs for each risk category in the taxonomy. Together with this model, we release an open taxonomy inspired by existing open taxonomies such as those employed by Google, Microsoft and OpenAI in the hope that it can be useful to the community. This taxonomy does not necessarily reflect Meta's own internal policies and is meant to demonstrate the value of our method to tune LLMs into classifiers that show high performance and high degrees of adaptability to different policies. ### The Llama Guard Safety Taxonomy & Risk Guidelines Below, we provide both the harm types themselves under this taxonomy and also examples of the specific kinds of content that would be considered harmful under each category: - **Violence & Hate** encompasses statements that encourage or could help people plan or engage in violence. Similarly, statements that advocate discrimination, contain slurs, or voice hateful sentiments against people based on their sensitive personal characteristics (ex: race, color, religion, national origin, sexual orientation, gender, gender identity, or disability) would also be considered inappropriate under this category. - **Sexual Content** encompasses statements encouraging someone (who could be underage) to engage in specific sex acts. Similarly, sexually explicit (i.e., erotic) statements would also be considered inappropriate under this category. - **Guns & Illegal Weapons** encompasses statements that specifically encourage, condone, or could help people plan or execute specific crimes involving the illegal acquisition, creation, or use of guns or illegal weapons (ex: explosives, biological agents, or chemical weapons), including instructions on how to create or use them. - **Regulated or Controlled Substances** encompasses statements that specifically encourage or could help people to illegally produce, transfer, or consume regulated or controlled substances like illegal drugs, tobacco, alcohol, or cannabis. - **Suicide & Self Harm** encompasses statements that encourage, condone, or enable people to harm themselves (ex: by providing instructions or information on methods of self-harm). When people express or imply an intent to harm themselves, any response other than one that provides links to appropriate health resources would be considered inappropriate (even if entirely supportive). - **Criminal Planning** encompasses miscellaneous statements that encourage, condone, or could help people plan or execute specific criminal activities, like arson, kidnapping, or theft. Items should only be considered harmful under this category when they could not be read as violating any of the other harm types above (ex: statements that encourage violence should be considered violating under Violence & Hate rather than this category). ## Evaluation results We compare the performance of the model against standard content moderation APIs in the industry, including [OpenAI](https://platform.openai.com/docs/guides/moderation/overview), [Azure Content Safety](https://learn.microsoft.com/en-us/azure/ai-services/content-safety/concepts/harm-categories), [PerspectiveAPI](https://developers.perspectiveapi.com/s/about-the-api-attributes-and-languages?language=en_US) from Google on both public and in-house benchmarks. The public benchmarks include [ToxicChat](https://huggingface.co/datasets/lmsys/toxic-chat) and [OpenAI Moderation](https://github.com/openai/moderation-api-release). Note: comparisons are not exactly apples-to-apples due to mismatches in each taxonomy. The interested reader can find a more detailed discussion about this in our [paper](https://ai.meta.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/). |                 | Our Test Set (Prompt) | OpenAI Mod | ToxicChat | Our Test Set (Response) | | --------------- | --------------------- | ---------- | --------- | ----------------------- | | Llama Guard     | **0.945**             | 0.847      | **0.626** | **0.953**               | | OpenAI API      | 0.764                 | **0.856**  | 0.588     | 0.769                   | | Perspective API | 0.728                 | 0.787      | 0.532     | 0.699                   |
-----------
-Hamel’s Blog - Optimizing latency Subscribe for updates Summary Below is a summary of my findings: 🏁 mlc is the fastest . This is so fast that I’m skeptical and am now motivated to measure quality (if I have time). When checking the outputs manually, they didn’t seem that different than other approaches. ❤️ CTranslate2 is my favorite tool, which is among the fastest but is also the easiest to use . The documentation is the best out of all of the solutions I tried. Furthermore, I think that the ergonomics are excellent for the models that they support. Unlike vLLM, CTranslate doesn’t seem to support distributed inference just yet. 🛠️ is really fast, but CTranslate can be much faster. On other hand, vLLM supports distributed inference , which is something you will need for larger models. vLLM might be the sweet spot for serving very large models. 😐 Text Generation Inference is an ok option (but nowhere near as fast as ) if you want to deploy HuggingFace LLMs in a standard way . TGI has some nice features like telemetry baked in ( via OpenTelemetry ) and integration with the HF ecosystem like inference endpoints . One thing to note that as of 7/28/2023, the license for TGI was changed to be more restrictive that may interfere with certain commercial uses . I am personally not a fan of the license. Rough Benchmarks This study focuses on various approaches to optimizing latency . Specifically, I want to know which tools are the most effective at optimizing latency for open source LLMs. In order to focus on latency, I hold the following variables constant: batch size of n = 1 for all prediction requests (holding throughput constant). All experiments were conducted on a Nvidia A6000 GPU, unless otherwise noted. Max output tokens were always set to 200 All numbers are calculated as an average over a fixed set of 9 prompts. The model used is meta-llama/Llama-2-7b-hf on the HuggingFace Hub In addition to batch size of and using a A6000 GPU (unless noted otherwise), I also made sure I warmed up the model by sending an initial inference request before measuring latency. Llama-v2-7b benchmark: batch size = 1, max output tokens = 200 avg tok/sec avg time (seconds) avg output token count platform options gpu float16 quantization 44.8 4.5 200.0 int8 quantization 62.6 3.2 HF Hosted Inference Endpoint A10G 30.4 6.6 202.0 HuggingFace Transformers (no server) 24.6 7.5 181.4 nf4 4bit quantization bitsandbytes 24.3 7.6 21.1 9.5 quantized w/ GPTQ 23.6 8.8 quantized w/ bitsandbytes 1.9 103.0 q4f16 117.1 1.3 153.9 text-generation-webui exllama 77.0 1.7 134.0 vllm A100 (on Modal Labs) 41.5 3.4 143.1 46.4 178.0 In some cases I did not use an b/c the platform didn’t have that particular GPU available. You can ignore these rows if you like, but I still think it is valuable information. I had access to a A6000, so I just used what I had. I noticed that the output of the LLM was quite different (less tokens) when using . I am not sure if I did something wrong here, or it changes the behavior of the LLM. Furthermore, the goal was not to be super precise on these benchmarks but rather to get a general sense of how things work and how they might compare to each other out of the box. Some of the tools above are inference servers which perform logging, tracing etc. in addition to optimizing models which effect latency. The idea is to see where there are significant differences between tools. I discussed this more Background One capability you need to be successful with open source LLMs is the ability to serve models efficiently. There are two categories of tools for model inference: Inference servers: these help with providing a web server that can provide a REST/grpc or other interface to interact with your model as a service. These inference servers usually have parameters to help you make trade-offs between throughput and latency . Additionally, some inference servers come with additional features like telemetry, model versioning and more. You can learn more about this topic the serving section of these notes. For LLMs, popular inference servers are the Text Generation Inference (TGI) Model Optimization : These modify your model to make them faster for inference. Examples include quantization Paged Attention Exllama and more. It is common to use both Inference servers techniques in conjunction. Some inference servers like even help you apply optimization techniques. Notes On Tools Other than benchmarking, an important goal of this study was to understand how to use different platforms & tools. Start with compiling the model as shown in these docs After installing MLC , you can compile meta-llama/Llama-2-7b-chat-hf like so: python3 -m mlc_llm.build \ --hf-path meta-llama/Llama-2-7b-chat-hf --target cuda --quantization q4f16_1 The arguments for the compliation are documented . This puts the model in the ./dist/ folder with the name Llama-2-7b-chat-hf-q4f16_1 You can use their python client to interact with the compiled model: from mlc_chat import ChatModule, ChatConfig cfg = ChatConfig(max_gen_len cm ChatModule(model "Llama-2-7b-chat-hf-q4f16_1" , chat_config cfg) output cm.generate(prompt prompt) You can see the full benchmarking code Warning I wasn’t able to get to run correctly with the supplied python client so I am using the chat variant ( Llama-2-7b-chat-hf ) as a proxy. I asked the kind folks who work on the mlc project and they said the python client is currently designed for chat, such that they have this system prompt that is hard coded for llama models: conv.system = ("[INST] <>\n\nYou are a helpful, respectful and honest assistant. " "Always answer as helpfully as possible, while being safe. " "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, " "or illegal content. " "Please ensure that your responses are socially unbiased and positive in nature.\n\n" "If a question does not make any sense, or is not factually coherent, explain why instead " "of answering something not correct. " "If you don't know the answer to a question, please don't share false " "information.\n<>\n\n "); If you want to fix this, you must edit mlc-chat-config.json , changing conv_template LM These docs say more about the config.json The config file is located in ./dist//params/mlc-chat-config.json . For example: > cat ./dist/Llama-2-7b-hf-q4f16_1/params/mlc-chat-config.json "model_lib": "Llama-2-7b-hf-q4f16_1", "local_id": "Llama-2-7b-hf-q4f16_1", "conv_template": "llama-2", "temperature": 0.7, "repetition_penalty": 1.0, "top_p": 0.95, "mean_gen_len": 128, "max_gen_len": 512, "shift_fill_factor": 0.3, "tokenizer_files": [ "tokenizer.json", "tokenizer.model" "model_category": "llama", "model_name": "Llama-2-7b-hf" is an optimization tool that can make models ridiculously fast. h/t to Anton . The documentation for CTranslate2 contains specific instructions for llama models To optimize llama v2 , we first need to quantize the model. This can be done like so: ct2-transformers-converter --model int8 --output_dir llama-2-7b-ct2 --force refers to the HuggingFace repo for this model . The benchmarking code is as follows (can also be found ): time ctranslate2 sys sys.path.append( '../common/' questions pandas as pd generator ctranslate2.Generator( "llama-2-7b-ct2" , device "cuda" tokenizer transformers.AutoTokenizer.from_pretrained( "meta-llama/Llama-2-7b-hf" def predict(prompt: str "Generate text give a prompt" start time.perf_counter() tokens tokenizer.convert_ids_to_tokens(tokenizer.encode(prompt)) results generator.generate_batch([tokens], sampling_topk , max_length , include_prompt_in_result False results[ 0 ].sequences_ids[ ] tokenizer.decode(tokens) request_time return 'tok_count' len (tokens), 'time' : request_time, 'question' : prompt, 'answer' : output, 'note' 'CTranslate2 int8 quantization' if __name__ == '__main__' counter responses [] for q in questions: >= : responses.append(predict(q)) += df pd.DataFrame(responses) df.to_csv( 'bench-ctranslate-int8.csv' , index Text Generation Inference (TGI) License Restrictions The license for TGI was recently changed away from Apache 2.0 to be more restrictive. Be careful when using TGI in commercial applications. Text generation inference which is often referred to as “TGI” was easy to use without any optimization. You can run it like this: “start_server.sh” #!/bin/bash [ -z " $HUGGING_FACE_HUB_TOKEN then echo "HUGGING_FACE_HUB_TOKEN is not set. Please set it before running this script." exit fi "TheBloke/Llama-2-7B-GPTQ" volume $PWD /data docker run --gpus all -e HUGGING_FACE_HUB_TOKEN= GPTQ_BITS=4 GPTQ_GROUPSIZE=128 --shm-size 5g -p 8081:80 -v $volume :/data ghcr.io/huggingface/text-generation-inference --max-best-of $@ We can then run the server with this command: bash start_server.sh --model-id Help You can see all the options for the TGI container with the help flag like so: run ghcr.io/huggingface/text-generation-inference --help less Quantization was very difficult to get working. There is a —quantize flag with accepts gptq . The approach makes inference much slower, which others have reported To make work for llama v2 models requires a bunch of work, you have to install the text-generation-server which can take a while and is very brittle to get right. I had to step through the Makefile carefully. After that you have to download the weights with: text-generation-server download-weights meta-llama/Llama-2-7b-hf You can run the following command to perform the quantization (the last argument is the destination directory where the weights are stored). quantize data/quantized/ However, this step is not needed for the most popular models, as someone will likely already have quantized and uploaded them to the Hub. Pre-Quantized Models Alternatively, you can use a pre-quantized model that has been uploaded to the Hub. TheBloke/Llama-2-7B-GPTQ is a good example of one. To get this to work, you have to be careful to set the GPTQ_BITS GPTQ_GROUPSIZE environment variables to match the config. For example This config necessitates setting These are already set in shown above. This PR will eventually fix that. To use the with TGI, I can use the same bash script with the following arguments: --quantize Comparison Without TGI Server When I first drafted this study I got the following response on twitter: Based on your code ( https://t.co/hSYaPTsEaK ) it seems like you measure the full HTTP request, which is like comparing trees to an apple. — Philipp Schmid ( @_philschmid July 29, 2023 Phillip certainly has a point! I am indeed testing both! I’m looking for big differences in tools here, and since some inference servers have optimization tools, and some optimization tools do not have an inference server I cannot do a true apples to apples comparison. However, I think its still useful to try different things as advertised to see what is possible, and also take note of really significant gaps in latency between tools. Therefore, I ran the following tests to perform the similar optimizations as TGI, but without the server to see what happened: HuggingFace Transformers I was able to get slightly better performance without the TGI server as predicted by Phillip, but it did not account for the the massive gap between some tools (which is exactly the kind of thing I was looking for). To benchmark quantization with bitsandbytes, I followed this blog post and wrote this benchmarking code . I quantized the model by loading it like this: model_id AutoTokenizer.from_pretrained(model_id) nf4_config BitsAndBytesConfig( load_in_4bit bnb_4bit_quant_type "nf4" bnb_4bit_compute_dtype torch.bfloat16 model_nf4 AutoModelForCausalLM.from_pretrained(model_id, quantization_config nf4_config) Unlike TGI, I was able to get bitsandbytes to work properly here, but just like TGI it didn’t speed anything up for me with respect to inference latency. As reflected in the benchmark table, I got nearly the same results with transformers without any optimizations I also quantized the model using without an inference server to compare against TGI. The code for that is The results were so bad ~ 5 tok/sec that I decided not to put this in the table, because it seemed quite off to me. Text Generation WebUI Aman let me know about text-generation-web-ui , and also these instructions for quickly experimenting with ExLlama ggml . I wasn’t able to get the variant to work properly, unfortunately. If you are really serious about using exllama, I recommend trying to use it without the text generation UI and look at the repo, specifically at test_benchmark_inference.py . (I didn’t have time for this, but if I was going to use exllama for anything serious I would go this route). From the root of the repo, you can run the following commands to start an inference server optimized with download-model.py TheBloke/Llama-2-7B-GPTQ server.py --listen --extensions openai --loader exllama_hf TheBloke_Llama-2-7B-GPTQ After the server was started, I used to conduct the benchmark. Overall, I didn’t like this particular piece of software much. It’s bit bloated because its trying to do too many things at once (An inference server, Web UIs, and other optimizations). That being said, the documentation is good and it is easy to use. I don’t think there is any particular reason to use this unless you want an end-to-end solution that also comes with a web user-interface (which many people want!). only works with CUDA 11.8, which I configured using this approach . After configuring CUDA and installing the right version of PyTorch, you need to install the bleeding edge from git: pip install -U git+https://github.com/vllm-project/vllm.git A good recipe to use for vLLM can be find on these Modal docs . Surprisingly, I had much lower latency when running on a local vs. a hosted A100 on Modal Labs. It’s possible that I did something wrong here. Currently, is the fastest solution for when you need distributed inference (i.e. when your model doesn’t fit on a single GPU). offers a server , but I benchmarked the model locally using their tools instead. The code for the benchmarking can be found here SamplingParams, LLM #from https://modal.com/docs/guide/ex/vllm_inference # Coding questions "Implement a Python function to compute the Fibonacci numbers." "Write a Rust function that performs binary exponentiation." "What are the differences between Javascript and Python?" # Literature "Write a story in the style of James Joyce about a trip to the Australian outback in 2083, to see robots in the beautiful desert." "Who does Harry turn into a balloon?" "Write a tale about a time-traveling historian who's determined to witness the most significant events in human history." # Math "What is the product of 9 and 8?" "If a train travels 120 kilometers in 2 hours, what is its average speed?" "Think through this step by step. If the sequence a_n is defined by a_1 = 3, a_2 = 5, and a_n = a_(n-1) + a_(n-2) for n > 2, find a_6." MODEL_DIR "/home/ubuntu/hamel-drive/vllm-models" download_model_to_folder(): huggingface_hub snapshot_download os snapshot_download( local_dir MODEL_DIR, token os.environ[ "HUGGING_FACE_HUB_TOKEN" LLM(MODEL_DIR) generate(question, llm, note None response : question, : note} sampling_params SamplingParams( temperature 1.0 top_p max_tokens result llm.generate(question, sampling_params) result: response[ (output.outputs[ ].token_ids) output.outputs[ ].text llm download_model_to_folder() generate(question q, llm llm, note 'vLLM' responses.append(response) 'bench-vllm.csv' HuggingFace Inference Endpoint I deployed an inference endpoint on HuggingFace for , on a Nvidia A10G GPU. I didn’t try to turn on any optimizations like quantization and wanted to see what the default performance would be like. The documentation for these interfaces can be found . There is also a python client Their documentation says they are using TGI under the hood. However, my latency was significantly faster on their hosted inference platform than using TGI locally. This could be due to the fact that I used a with them but only a locally. It’s worth looking into why this discrepancy exists further. The code for this benchmark can be found Footnotes It is common to explore the inference vs throughput frontier when conducting inference benchmarks. I did not do this, since I was most interested in latency. Here is an example of how to conduct inference benchmarks that consider both throughput and latency. ↩︎ For Llama v2 models , you must be careful to use the models ending in -hf as those are the ones that are compatible with the transformers library. The Modular Inference Engine is another example of an inference server that also applies optimization techniques. At the time of this writing, this is proprietary technology, but its worth keeping an eye on this in the future. Edit this page
-----------
-Achieve 23x LLM Inference Throughput & Reduce p50 Latency Anyscale Preview is now available! Login today to get free $50 compute credit 🚀 Home Blog Detail How continuous batching enables 23x throughput in LLM inference while reducing p50 latency By Cade Daniel Chen Shen Eric Liang Richard Liaw June 22, 2023 In this blog, we’ll cover the basics of large language model (LLM) inference and highlight inefficiencies in traditional batching policies. We’ll introduce continuous batching and discuss benchmark results for existing batching systems such as HuggingFace’s text-generation-inference and vLLM. By leveraging vLLM, users can achieve 23x LLM inference throughput while reducing p50 latency. Update June 2024: Anyscale Endpoints (Anyscale's LLM API Offering) and Private Endpoints (self-hosted LLMs) are now available as part of the Anyscale Platform.  Click to get started on the Anyscale platform. Due to the large GPU memory footprint and compute cost of LLMs , serving dominates the compute cost for most real world applications. ML engineers often treat LLMs like "black boxes" that can only be optimized with internal changes such as quantization and custom CUDA kernels. However, this is not entirely the case. Because LLMs iteratively generate their output, and because LLM inference is often memory and not compute bound, there are surprising system-level batching optimizations that make 10x or more differences in real-world workloads. One recent such proposed optimization is , also known as dynamic batching , or batching with iteration-level scheduling . We wanted to see how this optimization performs. We will get into details below, including how we simulate a production workload, but to summarize our findings: Up to 23x throughput improvement using continuous batching and continuous batching-specific memory optimizations (using ). 8x throughput over naive batching by using continuous batching (both on Ray Serve Hugging Face’s text-generation-inference 4x throughput over naive batching by using an optimized model implementation ( NVIDIA’s FasterTransformer You can try out continuous batching today: see this example to run vLLM on Ray Serve The remainder of this blog is structured as follows: We’ll cover the basics of how LLM inference works and highlight inefficiencies in traditional request-based dynamic batching policies. We’ll introduce continuous batching and how it answers many of the inefficiencies of request-based dynamic batching. We then discuss our benchmarks and the implications this has on how to serve LLM models cost-effectively. Link The basics of LLM inference There is a lot to know about LLM inference, and we refer users to Efficient Inference on a Single GPU Optimization story: Bloom inference for more detail. However, at a high level, LLM inference is pretty straightforward. For each request: You start with a sequence of tokens (called the "prefix" or "prompt"). The LLM produces a sequence of completion tokens, stopping only after producing a stop token or reaching a maximum sequence length. This is an iterative process. You get one additional completion token for each new forward pass of the model. For example, suppose you prompt with a sentence "What is the capital of California: ", it would take ten forward pass iterations to get back the full response of ["S", "a", "c", "r", “a”, "m", "e", "n", "t", "o"]. This example simplifies things a little bit because in actuality tokens do not map 1:1 to ASCII characters (a popular token encoding technique is Byte-Pair Encoding which is beyond the scope of this blog post), but the iterative nature of generation is the same regardless of how you tokenize your sequences. Simplified LLM inference. This toy example shows a hypothetical model which supports a maximum sequence length of 8 tokens (T1, T2, …, T8). Starting from the prompt tokens (yellow), the iterative process generates a single token at a time (blue). Once the model generates an end-of-sequence token (red), the generation loop stops. This example shows a batch of only one input sequence, so the batch size is 1. Now that we understand the simplicity of the iterative process, let’s dive deeper with some things you may not know about LLM inference: The initial ingestion (“prefill”) of the prompt "What is the capital of California: " takes about as much time as the generation of each subsequent token. This is because the prefill phase pre-computes some inputs of the attention mechanism that remain constant over the lifetime of the generation. This prefill phase efficiently uses the GPU’s parallel compute because these inputs can be computed independently of each other. LLM inference is memory-IO bound , not compute bound. In other words, it currently takes more time to load 1MB of data to the GPU’s compute cores than it does for those compute cores to perform LLM computations on 1MB of data. This means that LLM inference throughput is largely determined by how large a batch you can fit into high-bandwidth GPU memory . See this page in the NVIDIA docs for more details. The amount of GPU memory consumed scales with the base model size + the length of the token sequence. In Numbers every LLM developer should know , it’s estimated that a 13B parameter model consumes nearly 1MB of state for each token in a sequence. On a higher-end A100 GPU with 40GB RAM, back-of-the-envelope math suggests that since 14 GB are left after storing the 26GB of model parameters, ~14k tokens can be held in memory at once. This may seem high but is actually quite limiting; if we limit our sequence lengths to 512, we can process at most ~28 sequences in a batch. The problem is worse for higher sequence lengths; a sequence length of 2048 means our batch size is limited to 7 sequences. Note that this is an upper bound since it doesn’t leave room for storing intermediate computations. What this all means is that there is substantial “room on the table” so to speak if you can optimize memory usage. This is why approaches such as model quantization strategies such as are potentially so powerful; if you could halve the memory usage by moving from 16-bit to 8-bit representations, you could double the space available for larger batch sizes. However, not all strategies require modifications to the model weights. For example, FlashAttention found significant throughput improvements by reorganizing the attention computation to require less memory-IO. Continuous batching is another memory optimization technique which does not require modification of the model. We next explain how naive batching works (and is inefficient), and how continuous batching increases the memory-efficiency of LLM generation. LLM batching explained GPUs are massively-parallel compute architectures, with compute rates (measured in floating-point operations per second, or flops) in the teraflop ( ) or even petaflop ( H100 ) range. Despite these staggering amounts of compute, LLMs struggle to achieve saturation because so much of the chip’s memory bandwidth is spent loading model parameters. Batching is one way to improve the situation; instead of loading new model parameters each time you have an input sequence, you can load the model parameters once and then use them to process many input sequences. This more efficiently uses the chip’s memory bandwidth, leading to higher compute utilization, higher throughput, and cheaper LLM inference. Naive batching / static batching We call this traditional approach to batching static batching , because the size of the batch remains constant until the inference is complete. Here’s an illustration of static batching in context of LLM inference: Completing four sequences using static batching. On the first iteration (left), each sequence generates one token (blue) from the prompt tokens (yellow). After several iterations (right), the completed sequences each have different sizes because each emits their end-of-sequence-token (red) at different iterations. Even though sequence 3 finished after two iterations, static batching means that the GPU will be underutilized until the last sequence in the batch finishes generation (in this example, sequence 2 after six iterations). Unlike traditional deep learning models, batching for LLMs can be tricky due to the iterative nature of their inference. Intuitively, this is because requests can "finish" earlier in a batch, but it is tricky to release their resources and add new requests to the batch that may be at different completion states. This means that as the GPU is underutilized as generation lengths of different sequences in a batch differ from the largest generation length of the batch. In the figure on the right above, this is illustrated by the white squares after end-of-sequence tokens for sequences 1, 3, and 4. How often does static batching under-utilize the GPU? It depends on the generation lengths of sequences in a batch. For example, one could use LLM inference to emit a single token as a classification task (there are better ways to do this but let’s use this as an example). In this case, every output sequence is the same size (1 token). If the input sequences are also the same size (say, 512 tokens), then each static batch will achieve the best possible GPU utilization. On the other hand, a LLM-powered chatbot service cannot assume fixed-length input sequences, nor assume fixed-length output sequences. Proprietary models offer maximum context lengths in excess of 8K tokens at the time of writing. With static batching, variance in generation output could cause massive underutilization of GPUs. It’s no wonder OpenAI CEO Sam Altman described the compute costs as eye-watering Without restrictive assumptions on user input and model output, unoptimized production-grade LLM systems simply can’t serve traffic without underutilizing GPUs and incurring unnecessarily high costs. We need to optimize how we serve LLMs for their power to be broadly accessible. Continuous batching The industry recognized the inefficiency and came up with a better approach. Orca: A Distributed Serving System for Transformer-Based Generative Models is a paper presented in OSDI ‘22 which is the first to our knowledge to tackle this problem. Instead of waiting until every sequence in a batch has completed generation, Orca implements iteration-level scheduling where the batch size is determined per iteration. The result is that once a sequence in a batch has completed generation, a new sequence can be inserted in its place, yielding higher GPU utilization than static batching. Completing seven sequences using continuous batching. Left shows the batch after a single iteration, right shows the batch after several iterations. Once a sequence emits an end-of-sequence token, we insert a new sequence in its place (i.e. sequences S5, S6, and S7). This achieves higher GPU utilization since the GPU does not wait for all sequences to complete before starting a new one. Reality is a bit more complicated than this simplified model: since the prefill phase takes compute and has a different computational pattern than generation, it cannot be easily batched with the generation of tokens. Continuous batching frameworks currently manage this via hyperparameter: waiting_served_ratio , or the ratio of requests waiting for prefill to those waiting end-of-sequence tokens. Speaking of frameworks, Hugging Face has productionized continuous batching in their Rust- and Python-based text-generation-inference LLM inference server . We use their implementation to understand the performance characteristics of continuous batching in our benchmarks below. : Continuous batching, dynamic batching, and iteration-level scheduling are all close enough in meaning that any one of them can be used to describe the batching algorithm. We chose to use continuous batching. Dynamic batching is fitting but can be confused with request-level batching, where an LLM inference server uses a static batch whose size is chosen when the current batch has completely finished generation. We feel that iteration-level scheduling is descriptive of the scheduling mechanism but not the process as a whole. PagedAttention and vLLM For this blog post, we want to showcase the differences between static batching and continuous batching. It turns out that continuous batching can unlock memory optimizations that are not possible with static batching by improving upon Orca’s design. PagedAttention is a new attention mechanism implemented in ( ). It takes inspiration from traditional OS concepts such as paging virtual memory . They allow the KV cache (what is computed in the “prefill” phase, discussed above) to be non-contiguous by allocating memory in fixed-size “pages”, or blocks. The attention mechanism can then be rewritten to operate on block-aligned inputs, allowing attention to be performed on non-contiguous memory ranges. This means that buffer allocation can happen just-in-time instead of ahead-of-time: when starting a new generation, the framework does not need to allocate a contiguous buffer of size maximum_context_length. Each iteration, the scheduler can decide if it needs more room for a particular generation, and allocate on the fly without any degradation to PagedAttention’s performance. This doesn’t guarantee perfect utilization of memory ( their blog says the wastage is now limited to under 4%, only in the last block), but it significantly improves upon wastage from ahead-of-time allocation schemes used widely by the industry today. Altogether, PagedAttention + vLLM enable massive memory savings as most sequences will not consume the entire context window. These memory savings translate directly into a higher batch size, which means higher throughput and cheaper serving. We include vLLM in our benchmarks below. Benchmarking setup We’ll discuss our experimental setup then dive into the results of our benchmarks. Experiments Our goal is to see how continuous batching performs versus static batching on a simulated real-world live-inference workload. Fundamentally, we care about cost. We break this down into throughput and latency since cost is directly downstream of how efficiently you can serve at a given latency. Benchmark goal Measurement Measure throughput Time-to-process a queue of 1000 requests, each with 512 input tokens and generation length sampled from an exponential distribution. Measure latency Request latencies for 100 requests, with varying input lengths, output lengths, and arrival times at a fixed average rate. We’ll discuss the datasets and other details of the experiments in their respective results section. Hardware/model We benchmark throughput and latency on a single NVIDIA A100 GPU provided by Anyscale . Our A100 has 40GB of GPU RAM. We selected Meta’s OPT-13B model because each framework under test had a readily-available integration with this model. We selected the 13B variant because it fits into our GPU without requiring tensor parallelism, yet is still large enough to present memory efficiency challenges. We opt not to use tensor parallelism, where each transformer block is split over multiple GPUs, to keep our experiments simple, although both static batching and continuous batching work with tensor parallelism. Frameworks We test two static batching frameworks and three continuous batching frameworks. Our static batching frameworks are: Hugging Face’s Pipelines This is the simplest inference solution. It provides static batching with an easy-to-use API that works with any model and supports more tasks than simple text-generation. We use this as our baseline. This is a library which provides optimized implementations of various transformer models. It currently only provides static batching (the Triton inference server provides request-level dynamic batching, but not continuous batching yet). This provides us with an idea of how far an extremely optimized implementation of our model can get us with static batching – it provides a more competitive baseline than the relatively unoptimized OPT-13B implementation available on Hugging Face Hub Our continuous batching frameworks are: This is the inference server Hugging Face uses to power their LLM live-inference APIs. It implements continuous batching. Continuous batching on Ray Serve leverages Ray’s serverless capabilities to provide seamless autoscaling, high-availability, and support for complex DAGs. We wanted to understand how continuous batching works, so we re-implemented text-generation-inference’s core continuous batching logic in pure-Python on Ray Serve. As you will see in our results, our implementation achieves the same performance as text-generation-inference, which validates our understanding. This is an open-source project recently released by folks at UC Berkeley ( ). It builds upon Orca’s continuous batching design by taking full control of dynamic memory allocations, allowing it to significantly reduce different forms of GPU memory fragmentation. We test this framework because it shows the impact of further optimizations made possible by iteration-level scheduling and continuous batching. Benchmarking results: Throughput Based on our understanding of static batching, we expect continuous batching to perform significantly better when there is higher variance in sequence lengths in each batch. To show this, we run our throughput benchmark four times for each framework, each time on a dataset with higher variance in sequence lengths. To do this, we create a dataset containing 1000 sequences each with 512 input tokens. We configure our model to always emit a per-sequence generation length by ignoring the end-of-sequence token and configuring max_tokens. We then generate 1000 generation lengths, one for each request, sampled from an exponential distribution with mean=128 tokens. We use an exponential distribution as it is a good approximation of the generation lengths that one may encounter while serving an application like ChatGPT. To vary the variance of each run, we select only samples from the exponential distribution that are less than or equal to 32, 128, 512, and 1536. The total output sequence length is then, at most, 512+32=544, 512+128=640, 512+512=1024, and 512+1536=2048 (the maximum sequence length of our model). We then use a simple asyncio Python benchmarking script to submit HTTP requests to our model server. The benchmarking script submits all requests in burst fashion, so that the compute is saturated. The results are as follows: Throughput in tokens per second of each framework as variance in sequence length increases. As expected, the static batchers and naive continuous batchers perform approximately identically for lower-variance generation lengths. However as the variance increases, naive static batching’s performance plummets to 81 token/s. FasterTransformers improves upon naive static batching significantly, nearly keeping up with the naive continuous batchers until generation length limit of 1536. Continuous batching on Ray Serve and text-generation-inference achieves about the same performance, which is what we expect since they use the same batching algorithm. What is most impressive here is vLLM. For each dataset, vLLM more than doubles performance compared to naive continuous batching. We have not analyzed what optimization contributes the most to vLLM performance the most, but we suspect vLLM’s ability to reserve space dynamically instead of ahead-of-time allows vLLM to dramatically increase the batch size. We plot these performance results relative to naive static batching: Our throughput benchmark results presented as improvement multiples over naive static batching, log scale. It’s important to note how impressive even FasterTransformer’s 4x improvement is; we’re very interested in benchmarking FasterTransformers plus continuous batching when NVIDIA implements it. However, continuous batching is clearly a significant improvement over static batching even with an optimized model. The performance gap becomes gigantic when you include further memory optimization enabled by continuous batching and iteration-level scheduling as vLLM does. Benchmarking results: Latency Live-inference endpoints often face latency-throughput tradeoffs that must be optimized based on user needs. We benchmark latency on a realistic workload and measure how the cumulative distribution function of latencies changes with each framework. Similar to the throughput benchmark, we configure the model to always emit a specified amount of tokens specified per-request. We prepare 100 randomly-generated prompts by sampling lengths from a uniform distribution between 1 token and 512 tokens. We sample 100 output lengths from a capped exponential distribution with mean=128 and maximum size of 1536. These numbers were chosen because they are reasonably realistic and allow the generation to use up the full context-length of our model (512+1536=2048). Instead of submitting all requests at the same time as done in the throughput benchmark, we delay each request by a predetermined number of seconds. We sample a Poisson distribution to determine how long each request waits after the previously submitted request. The Poisson distribution is parameterized by λ, the expected rate, which in our case is how many queries per second (QPS) hit our model endpoint. We measure latencies at both QPS=1 and QPS=4 to see how the latency distribution changes as load changes. Median generation request latency for each framework, under average load of 1 QPS and 4 QPS. Continuous batching systems improve median latency. We see that while improving throughput, continuous batching systems also improve median latency. This is because continuous batching systems allow for new requests to be added to an existing batch if there is room, each iteration. But how about other percentiles? In fact, we find that they improve latency across all percentiles: Cumulative distribution function of generation request latencies for each framework with QPS=1. Static batchers and continuous batchers have distinct curve shapes caused by the presence of iteration-level batch scheduling in continuous batchers. All continuous batchers perform approximately equally under this load; FasterTransformers performs noticeably better than static batching on a naive model implementation. The reason why continuous batching improves latency at all percentiles is the same as why it improves latency at p50: new requests can be added regardless of how far into generation other sequences in the batch are. However, like static batching, continuous batching is still limited by how much space is available on the GPU. As your serving system becomes saturated with requests, meaning a higher on-average batch size, there are less opportunities to inject new requests immediately when they are received. We can see this as we increase the average QPS to 4: Cumulative distribution function of generation request latencies for each framework with QPS=4. Compared to QPS=1, FasterTransformer’s distribution of latencies becomes more similar to static batching on a naive model. Both Ray Serve and text-generation-inference’s continuous batching implementations perform similarly, but noticeably worse than vLLM. We observe that FasterTransformer becomes more similar to naive static batching, and that both text-generation-inference and Ray Serve’s implementation of continuous batching are on their way to look like FasterTransformer’s curve with QPS=1. That is, as the systems become saturated there are less opportunities to inject new requests immediately, so request latency goes up. This lines up with the vLLM curve – it remains mostly unchanged between QPS=1 and QPS=4. This is because due to its advanced memory optimizations, it has a higher maximum batch size. Anecdotally, we observe that vLLM becomes saturated around QPS=8 with a throughput near 1900 token/s. To compare these numbers apples-to-apples to the other serving systems requires more experimentation; however we have shown that continuous batching significantly improves over static batching by 1) reducing latency by injecting new requests immediately when possible, and 2) enable advanced memory optimizations (in vLLM’s case) that increase the QPS that the serving system can handle before becoming saturated. Conclusion LLMs present some amazing capabilities, and we believe their impact is still mostly undiscovered. We have shared how a new serving technique, continuous batching, works and how it outperforms static batching. It improves throughput by wasting fewer opportunities to schedule new requests, and improves latency by being capable of immediately injecting new requests into the compute stream. We are excited to see what people can do with continuous batching, and where the industry goes from here. Try out continuous batching for yourself We have a vLLM + Ray Serve example that allows you to try out continuous batching. We are integrating continuous batching systems into Aviary , a webapp that allows you to compare the outputs of different LLMs in parallel , and will release it within the week. Acknowledgements. We’d like to thank the following people for assisting in benchmarking and/or reviewing our results. : Stephanie Wang, Antoni Baum, Edward Oakes, and Amog Kamsetty; UC Berkeley : Zhuohan Li and Woosuk Kwon. Get involved with Ray code used for the experiments in the blog post is here . To connect with the Ray community, join the Ray Slack or ask questions on the Discuss forum . If you are interested in hosting LLMs, check out our managed Ray offering . If you are interested in learning more about Ray, see ray.io docs.ray.io See our earlier blog series on solving Generative AI infrastructure and using LangChain with Ray Ray Summit 2023 : If you are interested to learn much more about how Ray can be used to build performant and scalable LLM applications and fine-tune/train/serve LLMs on Ray, join Ray Summit on September 18-20th ! We have a set of great keynote speakers including John Schulman from OpenAI and Aidan Gomez Cohere , community and tech talks about Ray as well as practical training focused on LLMs Table of contents The basics of LLM inference Naive batching / static batching Try out continuous batching for yourself Get involved with Ray Sharing Tags LLM Sign up for product updates Recommended content Ray Spotlight Series: Multitenant Serve Applications with Runtime Envs as Containers Cross-modal Search for E-commerce: Building and Scaling a Cross-Modal Image Retrieval App Figure 1. End-to-end Stable Diffusion training architecture diagram. We Pre-Trained Stable Diffusion Models on 2 billion Images and Didn't Break the Bank - Definitive Guides with Ray Series Ready to try Anyscale? Access Anyscale today to see how companies using Anyscale and Ray benefit from rapid time-to-market and faster iterations across the entire AI lifecycle. Try free © Anyscale, Inc 2024 - Privacy Policy Follow Anyscale Follow Ray Company About Us News Careers Contact sales Learn Case Studies Ray Summit 2024 Events Ray Training Ray Docs Anyscale Docs Products Anyscale Platform Ray Open Source Integrations © Anyscale, Inc 2024 -
-----------
-GitHub - huggingface/peft: 🤗 PEFT: State-of-the-art Parameter-Efficient Fine-Tuning. Skip to content You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. to refresh your session. You switched accounts on another tab or window. to refresh your session. Dismiss alert huggingface / peft Public Notifications You must be signed in to change notification settings Fork 1.4k Star 14.6k 🤗 PEFT: State-of-the-art Parameter-Efficient Fine-Tuning. huggingface.co/docs/peft Apache-2.0 license stars forks Branches Activity You must be signed in to change notification settings huggingface/peft This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository. Go to file Code Folders and files Name Last commit message Last commit date Latest commit History 1,000 Commits .github docs examples scripts src/ tests .gitignore .pre-commit-config.yaml LICENSE README.md pyproject.toml requirements.txt setup.py View all files Repository files navigation 🤗 PEFT State-of-the-art Parameter-Efficient Fine-Tuning (PEFT) methods Fine-tuning large pretrained models is often prohibitively costly due to their scale. Parameter-Efficient Fine-Tuning (PEFT) methods enable efficient adaptation of large pretrained models to various downstream applications by only fine-tuning a small number of (extra) model parameters instead of all the model's parameters. This significantly decreases the computational and storage costs. Recent state-of-the-art PEFT techniques achieve performance comparable to fully fine-tuned models. PEFT is integrated with Transformers for easy model training and inference, Diffusers for conveniently managing different adapters, and Accelerate for distributed training and inference for really big models. Tip Visit the PEFT organization to read about the PEFT methods implemented in the library and to see notebooks demonstrating how to apply these methods to a variety of downstream tasks. Click the "Watch repos" button on the organization page to be notified of newly implemented methods and notebooks! Check the PEFT Adapters API Reference section for a list of supported PEFT methods, and read the Adapters Soft prompts IA3 conceptual guides to learn more about how these methods work. Quickstart Install PEFT from pip: pip install peft Prepare a model for training with a PEFT method such as LoRA by wrapping the base model and PEFT configuration with get_peft_model . For the bigscience/mt0-large model, you're only training 0.19% of the parameters! AutoModelForSeq2SeqLM get_peft_config LoraConfig TaskType model_name_or_path "bigscience/mt0-large" tokenizer_name_or_path peft_config task_type SEQ_2_SEQ_LM inference_mode r 8 lora_alpha 32 lora_dropout 0.1 print_trainable_parameters () "trainable params: 2359296 || all params: 1231940608 || trainable%: 0.19151053100118282" To load a PEFT model for inference: AutoPeftModelForCausalLM AutoTokenizer torch "ybelkada/opt-350m-lora" "facebook/opt-350m" eval inputs "Preheat the oven to 350 degrees and place the cookie dough" return_tensors "pt" outputs generate input_ids "input_ids" ]. max_new_tokens 50 print batch_decode skip_special_tokens )[ ]) "Preheat the oven to 350 degrees and place the cookie dough in the center of the oven. In a large bowl, combine the flour, baking powder, baking soda, salt, and cinnamon. In a separate bowl, combine the egg yolks, sugar, and vanilla." Why you should use PEFT There are many benefits of using PEFT but the main one is the huge savings in compute and storage, making PEFT applicable to many different use cases. High performance on consumer hardware Consider the memory requirements for training the following models on the ought/raft/twitter_complaints dataset with an A100 80GB GPU with more than 64GB of CPU RAM. Model Full Finetuning PEFT-LoRA PyTorch PEFT-LoRA DeepSpeed with CPU Offloading bigscience/T0_3B (3B params) 47.14GB GPU / 2.96GB CPU 14.4GB GPU / 2.96GB CPU 9.8GB GPU / 17.8GB CPU bigscience/mt0-xxl (12B params) OOM GPU 56GB GPU / 3GB CPU 22GB GPU / 52GB CPU bigscience/bloomz-7b1 (7B params) 32GB GPU / 3.8GB CPU 18.1GB GPU / 35GB CPU With LoRA you can fully finetune a 12B parameter model that would've otherwise run out of memory on the 80GB GPU, and comfortably fit and train a 3B parameter model. When you look at the 3B parameter model's performance, it is comparable to a fully finetuned model at a fraction of the GPU memory. Submission Name Accuracy Human baseline (crowdsourced) 0.897 Flan-T5 0.892 lora-t0-3b 0.863 The bigscience/T0_3B model performance isn't optimized in the table above. You can squeeze even more performance out of it by playing around with the input instruction templates, LoRA hyperparameters, and other training related hyperparameters. The final checkpoint size of this model is just 19MB compared to 11GB of the full bigscience/T0_3B model. Learn more about the advantages of finetuning with PEFT in this blog post Quantization is another method for reducing the memory requirements of a model by representing the data in a lower precision. It can be combined with PEFT methods to make it even easier to train and load LLMs for inference. Learn how to finetune with QLoRA and the TRL library on a 16GB GPU in the Finetune LLMs on your own consumer hardware using tools from PyTorch and Hugging Face ecosystem blog post. Learn how to finetune a openai/whisper-large-v2 model for multilingual automatic speech recognition with LoRA and 8-bit quantization in this (see this instead for an example of streaming a dataset). Save compute and storage PEFT can help you save storage by avoiding full finetuning of models on each of downstream task or dataset. In many cases, you're only finetuning a very small fraction of a model's parameters and each checkpoint is only a few MBs in size (instead of GBs). These smaller PEFT adapters demonstrate performance comparable to a fully finetuned model. If you have many datasets, you can save a lot of storage with a PEFT model and not have to worry about catastrophic forgetting or overfitting the backbone or base model. PEFT integrations PEFT is widely supported across the Hugging Face ecosystem because of the massive efficiency it brings to training and inference. Diffusers The iterative diffusion process consumes a lot of memory which can make it difficult to train. PEFT can help reduce the memory requirements and reduce the storage size of the final model checkpoint. For example, consider the memory required for training a Stable Diffusion model with LoRA on an A100 80GB GPU with more than 64GB of CPU RAM. The final model checkpoint size is only 8.8MB! PEFT-LoRA PEFT-LoRA with Gradient Checkpointing CompVis/stable-diffusion-v1-4 27.5GB GPU / 3.97GB CPU 15.5GB GPU / 3.84GB CPU 8.12GB GPU / 3.77GB CPU Take a look at the examples/lora_dreambooth/train_dreambooth.py training script to try training your own Stable Diffusion model with LoRA, and play around with the smangrul/peft-lora-sd-dreambooth Space which is running on a T4 instance. Learn more about the PEFT integration in Diffusers in this is a library for distributed training and inference on various training setups and hardware (GPUs, TPUs, Apple Silicon, etc.). PEFT models work with Accelerate out of the box, making it really convenient to train really large models or use them for inference on consumer hardware with limited resources. PEFT can also be applied to training LLMs with RLHF components such as the ranker and policy. Get started by reading: Fine-tune a Mistral-7b model with Direct Preference Optimization with PEFT and the library to learn more about the Direct Preference Optimization (DPO) method and how to apply it to a LLM. Fine-tuning 20B LLMs with RLHF on a 24GB consumer GPU with PEFT and the library, and then try out the gpt2-sentiment_peft.ipynb notebook to optimize GPT2 to generate positive movie reviews. StackLLaMA: A hands-on guide to train LLaMA with RLHF with PEFT, and then try out the stack_llama/scripts for supervised finetuning, reward modeling, and RL finetuning. Model support Use this Space or check out the to find which models officially support a PEFT method out of the box. Even if you don't see a model listed below, you can manually configure the model config to enable PEFT for a model. Read the New transformers architecture guide to learn how. Contribute If you would like to contribute to PEFT, please check out our contribution guide Citing 🤗 PEFT To use 🤗 PEFT in your publication, please cite it by using the following BibTeX entry. @Misc title PEFT: State-of-the-art Parameter-Efficient Fine-Tuning methods author Sourab Mangrulkar and Sylvain Gugger and Lysandre Debut and Younes Belkada and Sayak Paul and Benjamin Bossan howpublished \url{https://github.com/huggingface/peft} year 2022 About 🤗 PEFT: State-of-the-art Parameter-Efficient Fine-Tuning. Topics python adapter pytorch lora diffusion parameter-efficient-learning Readme Custom properties Stars Watchers 107 watching Forks Report repository Releases 17 v0.11.1 Latest May 17, 2024 + 16 releases Packages No packages published Used by 9.1k + 9,081 Contributors 175 + 161 contributors Languages Python 98.9% Other 1.1% Footer © 2024 GitHub, Inc. You can’t perform that action at this time.
-----------
-llama-recipes/docs/LLM_finetuning.md at main · meta-llama/llama-recipes · GitHub You signed in with another tab or window. to refresh your session. You signed out in another tab or window. to refresh your session. You switched accounts on another tab or window. to refresh your session. You must be signed in to change notification settings 10.1k © 2024 GitHub, Inc. You can’t perform that action at this time.
-----------
-llama-recipes/recipes/finetuning/datasets/README.md at main · meta-llama/llama-recipes · GitHub You signed in with another tab or window. to refresh your session. You signed out in another tab or window. to refresh your session. You switched accounts on another tab or window. to refresh your session. You must be signed in to change notification settings © 2024 GitHub, Inc. You can’t perform that action at this time.
-----------
-Efficient Fine-Tuning with LoRA: A Guide to Optimal Parameter Selection for Large Language Models | Databricks Blog Skip to main content Share this post With the rapid advancement of neural network-based techniques and Large Language Model (LLM) research, businesses are increasingly interested in AI applications for value generation. They employ various machine learning approaches, both generative and non-generative, to address text-related challenges such as classification, summarization, sequence-to-sequence tasks, and controlled text generation. Organizations can opt for third-party APIs, but fine-tuning models with proprietary data offers domain-specific and pertinent results, enabling cost-effective and independent solutions deployable across different environments in a secure manner. Ensuring efficient resource utilization and cost-effectiveness is crucial when choosing a strategy for fine-tuning. This blog explores arguably the most popular and effective variant of such parameter efficient methods, Low Rank Adaptation (LoRA), with a particular emphasis on QLoRA (an even more efficient variant of LoRA). The approach here will be to take an open large language model and fine-tune it to generate fictitious product descriptions when prompted with a product name and a category. The model chosen for this exercise is OpenLLaMA-3b-v2 , an open large language model with a permissive license (Apache 2.0), and the dataset chosen is Red Dot Design Award Product Descriptions , both of which can be downloaded from the HuggingFace Hub at the links provided. Fine-Tuning, LoRA and QLoRA In the realm of language models, fine tuning an existing language model to perform a specific task on specific data is a common practice. This involves adding a task-specific head, if necessary, and updating the weights of the neural network through backpropagation during the training process. It is important to note the distinction between this finetuning process and training from scratch. In the latter scenario, the model's weights are randomly initialized, while in finetuning, the weights are already optimized to a certain extent during the pre-training phase. The decision of which weights to optimize or update, and which ones to keep frozen, depends on the chosen technique. Full finetuning involves optimizing or training all layers of the neural network. While this approach typically yields the best results, it is also the most resource-intensive and time-consuming. Fortunately, there exist parameter-efficient approaches for fine-tuning that have proven to be effective. Although most such approaches have yielded less performance, Low Rank Adaptation (LoRA) has bucked this trend by even outperforming full finetuning in some cases, as a consequence of avoiding catastrophic forgetting (a phenomenon which occurs when the knowledge of the pretrained model is lost during the fine-tuning process). LoRA is an improved finetuning method where instead of finetuning all the weights that constitute the weight matrix of the pre-trained large language model, two smaller matrices that approximate this larger matrix are fine-tuned. These matrices constitute the LoRA adapter. This fine-tuned adapter is then loaded to the pretrained model and used for inference. QLoRA is an even more memory efficient version of LoRA where the pretrained model is loaded to GPU memory as quantized 4-bit weights (compared to 8-bits in the case of LoRA), while preserving similar effectiveness to LoRA. Probing this method, comparing the two methods when necessary, and figuring out the best combination of QLoRA hyperparameters to achieve optimal performance with the quickest training time will be the focus here. LoRA is implemented in the Hugging Face Parameter Efficient Fine-Tuning (PEFT) library, offering ease of use and QLoRA can be leveraged by using together. HuggingFace Transformer Reinforcement Learning (TRL) library offers a convenient trainer for supervised finetuning with seamless integration for LoRA. These three libraries will provide the necessary tools to finetune the chosen pretrained model to generate coherent and convincing product descriptions once prompted with an instruction indicating the desired attributes. Prepping the data for supervised fine-tuning To probe the effectiveness of QLoRA for fine tuning a model for instruction following, it is essential to transform the data to a format suited for supervised fine-tuning. Supervised fine-tuning in essence, further trains a pretrained model to generate text conditioned on a provided prompt. It is supervised in that the model is finetuned on a dataset that has prompt-response pairs formatted in a consistent manner. An example observation from our chosen dataset from the Hugging Face hub looks as follows: product category description text "Biamp Rack Products" "Digital Audio Processors" "“High recognition value, uniform aesthetics and practical scalability – this has been impressively achieved with the Biamp brand language …" "Product Name: Biamp Rack Products; Product Category: Digital Audio Processors; Product Description: “High recognition value, uniform aesthetics and practical scalability – this has been impressively achieved with the Biamp brand language … As useful as this dataset is, this is not well formatted for fine-tuning of a language model for instruction following in the manner described above. The following code snippet loads the dataset from the Hugging Face hub into memory, transforms the necessary fields into a consistently formatted string representing the prompt, and inserts the response( i.e. the description), immediately afterwards. This format is known as the ‘Alpaca format’ in large language model research circles as it was the format used to finetune the original LlaMA model from Meta to result in the Alpaca model, one of the first widely distributed instruction-following large language models (although not licensed for commercial use). datasets load_dataset Dataset #Load the dataset from the HuggingFace Hub rd_ds = load_dataset( "xiyuez/red-dot-design-award-product-description" #Convert to pandas dataframe for convenient processing rd_df = pd.DataFrame(rd_ds[ 'train' #Combine the two attributes into an instruction string rd_df[ 'instruction' ] = 'Create a detailed description for the following product: ' + rd_df[ 'product' ]+ ', belonging to category: ' 'category' rd_df = rd_df[[ 'description' ]] #Get a 5000 sample subset for fine-tuning purposes rd_df_sample = rd_df.sample(n= 5000 , random_state= 42 #Define template and format data into the template for supervised fine-tuning template = """Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: {} ### Response:\n""" rd_df_sample[ 'prompt' ] = rd_df_sample[ "instruction" ].apply( lambda x: template. format (x)) rd_df_sample.rename(columns={ 'response' }, inplace= ] + "\n### End" rd_df_sample = rd_df_sample[[ 'text' ] = rd_df[ "prompt" ] + rd_df[ "response" rd_df.drop(columns=[ ], inplace= The resulting prompts are then loaded into a hugging face dataset for supervised finetuning. Each such prompt has the following format. Below is an instruction that describes a task. Write a response that appropriately completes the request. Create a detailed description the following product: Beseye Pro, belonging to category: Cloud-Based Home Security Camera ### Response: Beseye Pro combines intelligent home monitoring with decorative art. The camera, whose form reminiscent of a water drop, secured the mounting a neodymium magnet can be rotated by 360 degrees. This allows it to be easily positioned the desired direction. The camera also houses modern technologies, such infrared LEDs, cloud-based intelligent video analyses SSL encryption. ### End To facilitate quick experimentation, each fine-tuning exercise will be done on a 5000 observation subset of this data. Testing model performance before fine-tuning Before any fine-tuning, it’s a good idea to check how the model performs without any fine-tuning to get a baseline for pre-trained model performance. The model can be loaded in 8-bit as follows and prompted with the format specified in the model card on Hugging Face LlamaTokenizer, LlamaForCausalLM model_path = 'openlm-research/open_llama_3b_v2' tokenizer = LlamaTokenizer.from_pretrained(model_path) model = LlamaForCausalLM.from_pretrained( model_path, load_in_8bit= , device_map= 'auto' #Pass in a prompt and infer with the model prompt = 'Q: Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse\nA:' input_ids = tokenizer(prompt, return_tensors= ).input_ids generation_output = model.generate( input_ids=input_ids, max_new_tokens= 128 (tokenizer.decode(generation_output[ ])) The output obtained is not quite what we want. Q: Create a detailed description the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse A: The Corelogic Smooth Mouse a wireless optical mouse that has a 1000 dpi resolution. It has a 2.4 GHz wireless connection a 12 -month warranty. Q: What the price of the Corelogic Smooth Mouse? A: The Corelogic Smooth Mouse priced at $ 29.99 . Q: What the weight of the Corelogic Smooth Mouse? A: The Corelogic Smooth Mouse weighs pounds. Q: What the dimensions of the Corelogic Smooth Mouse? A: The Corelogic Smooth Mouse has a dimension The first part of the result is actually satisfactory, but the rest of it is more of a rambling mess. Similarly, if the model is prompted with the input text in the ‘Alpaca format’ as discussed before, the output is expected to be just as sub-optimal: prompt= """Below is an instruction that describes a task. Write a response that appropriately completes the request. Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse ### Response:""" input_ids = tokenizer(prompt, return_tensors= And sure enough, it is: Corelogic Smooth Mouse a mouse that designed to be used by people disabilities. It a wireless mouse that designed to be used by people a wireless mouse that designed to be used by people a wireless mouse that designed to be used by people a wireless mouse that designed to be used by people a wireless mouse that designed to be used by people a wireless mouse that designed to be used by people a wireless mouse that designed to be used by The model performs what it was trained to do, predicts the next most probable token. The point of supervised fine-tuning in this context is to generate the desired text in a controllable manner. Please note that in the subsequent experiments, while QLoRA leverages a model loaded in 4-bit with the weights frozen, the inference process to examine output quality is done once the model has been loaded in 8-bit as shown above for consistency. The Turnable Knobs When using PEFT to train a model with LoRA or QLoRA (note that, as mentioned before, the primary difference between the two is that in the latter, the pretrained models are frozen in 4-bit during the fine-tuning process), the hyperparameters of the low rank adaptation process can be defined in a LoRA config as shown below: ... #If only targeting attention blocks of the model target_modules = [ "q_proj" "v_proj" #If targeting all linear layers 'q_proj' 'k_proj' 'v_proj' 'o_proj' 'gate_proj' 'down_proj' 'up_proj' 'lm_head' lora_config = LoraConfig( r= 16 target_modules = target_modules, lora_alpha= lora_dropout= 0.05 bias= "none" task_type= "CAUSAL_LM" ,} Two of these hyperparameters, r and target_modules are empirically shown to affect adaptation quality significantly and will be the focus of the tests that follow. The other hyperparameters are kept constant at the values indicated above for simplicity. represents the rank of the low rank matrices learned during the finetuning process. As this value is increased, the number of parameters needed to be updated during the low-rank adaptation increases. Intuitively, a lower r may lead to a quicker, less computationally intensive training process, but may affect the quality of the model thus produced. However, increasing r beyond a certain value may not yield any discernible increase in quality of model output. How the value of r affects adaptation (fine-tuning) quality will be put to the test shortly. When fine-tuning with LoRA, it is possible to target specific modules in the model architecture. The adaptation process will target these modules and apply the update matrices to them. Similar to the situation with " ," targeting more modules during LoRA adaptation results in increased training time and greater demand for compute resources. Thus, it is a common practice to only target the attention blocks of the transformer. However, recent work as shown in the QLoRA paper by Dettmers et al. suggests that targeting all linear layers results in better adaptation quality. This will be explored here as well. Names of the linear layers of the model can be conveniently appended to a list with the following code snippet: re model_modules = (model.modules) pattern = r'\((\w+)\): Linear' linear_layer_names = re.findall(pattern, model_modules) names = [] # Print the names of the Linear layers name linear_layer_names: names.append(name) target_modules = list set (names)) Tuning the finetuning with LoRA The developer experience of fine tuning large language models in general have improved dramatically over the past year or so. The latest high level abstraction from Hugging Face is the SFTTrainer class in the TRL library. To perform QLoRA, all that is needed is the following: 1.  Load the model to GPU memory in 4-bit (bitsandbytes enables this process). 2.  Define the LoRA configuration as discussed above. 3.  Define the train and test splits of the prepped instruction following data into Hugging Face Dataset objects. 4. Define training arguments. These include the number of epochs, batch size and other training hyperparameters which will be kept constant during this exercise. 5. Pass these arguments into an instance of SFTTrainer. These steps are clearly indicated in the source file in the associated with this blog. The actual training logic is abstracted away nicely as follows: trainer = SFTTrainer( model, train_dataset=dataset[ eval_dataset = dataset[ 'test' dataset_text_field= "text" max_seq_length= 256 args=training_args, # Initiate the training process mlflow.start_run(run_name= ‘run_name_of_choice’): trainer.train() If MLFlow autologging is enabled in the Databricks workspace, which is highly recommended, all the training parameters and metrics are automatically tracked and logged with the MLFlow tracking server. This functionality is invaluable in monitoring long-running training tasks. Needless to say, the fine-tuning process is performed using a compute cluster (in this case, a single node with a single A100 GPU) created using the latest Databricks Machine runtime with GPU support. Hyperparameter Combination #1: QLoRA with r=8 and targeting “q_proj”, “v_proj” The first combination of QLoRA hyperparameters attempted is r=8 and targets only the attention blocks, namely “q_proj” and “v_proj” for adaptation. The following code snippets gives the number of trainable parameters: model = get_peft_model(model, lora_config) model.print_trainable_parameters() These choices result in 2,662,400 parameters being updated during the fine-tuning process (~2.6 million) from a total of ~3.2 billion parameters the model consists of. This is less than 0.1% of the model parameters. The entire finetuning process on a single Nvidia A100 with 80 GBs of GPU for 3 epochs only takes roughly 12 minutes. The GPU utilization metrics can be conveniently viewed at the metrics tab of the cluster configurations. At the end of the training process, the fine-tuned model is obtained by loading the adapter weights to the pre-trained model as follows: peft_model = PeftModel.from_pretrained(model, adapter_location) This model can now be used for inference as any other model. Qualitative Evaluation A couple of example prompt-response pairs are listed below Prompt (passed to the model in the Alpaca format, not shown for conciseness here): Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse Response: The Corelogic Smooth Mouse a wireless optical mouse a smooth surface. The mouse equipped DPI sensor Hz polling rate. The mouse available black white. Prompt: Create a detailed description for the following product: Hoover Lightspeed, belonging to category: Cordless Vacuum Cleaner The Hoover Lightspeed a cordless vacuum cleaner that a lithium-ion battery. The battery charged via a USB cable. The vacuum cleaner brush a turbo brush. The brush suitable cleaning carpets hard floors. The turbo brush hard floors. The vacuum cleaner a dust container that can be emptied via a dust container. The model has clearly been adapted for generating more consistent descriptions. However the response to the first prompt about the optical mouse is quite short and the following phrase “ The vacuum cleaner is equipped with a dust container that can be emptied via a dust container ” is logically flawed. Hyperparameter Combination #2: QLoRA with r=16 and targeting all linear layers Surely, things can be improved here. It is worth exploring increasing the rank of low rank matrices learned during adaptation to 16, i.e. double the value of r to 16 and keep all else  the same. This doubles the number of trainable parameters to 5,324,800 (~5.3 million). The quality of output, however, remains unchanged for the same exact prompts. Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse The Corelogic Smooth Mouse a high-quality optical mouse a smooth surface. The mouse Hz polling rate. The mouse Create a detailed description for the following product: Hoover Lightspeed, belonging to category: Cordless Vacuum Cleaner a cordless vacuum cleaner that a lithium-ion battery. The battery charged via a USB cable. The vacuum cleaner a motorized brush. The brush used to clean hard floors the motorized brush used to clean carpets. The vacuum cleaner a dust container that can be emptied via a dust container. The vacuum cleaner a LED display that shows the remaining battery capacity. The same lack of detail and logical flaws in detail where details are available persists. If this fine tuned model is used for product description generation in a real-world scenario, this is not acceptable output. Hyperparameter Combination #3: QLoRA with r=8 and targeting all linear layers Given that doubling r does not seemingly result in any perceivable increase in output quality, it is worth changing the other important knob. i.e. targeting all linear layers instead of just the attention blocks. Here, the LoRA hyperparameters are r=8 and target_layers are  'q_proj','k_proj','v_proj','o_proj','gate_proj','down_proj','up_proj' and 'lm_head'. This increases the number of parameters updated to 12,994,560 and increases the training time to roughly 15.5 minutes. Prompting the model with the same prompts yield the following: Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse The Corelogic Smooth Mouse a wireless optical mouse a smooth surface. The mouse Hz polling rate. The mouse a USB receiver. The mouse Create a detailed description for the following product: Hoover Lightspeed, belonging to category: Cordless Vacuum Cleaner The Hoover Lightspeed cordless vacuum cleaner a high-performance device that easy to use. The device a lithium-ion battery that provides up to 40 minutes of runtime. The device a high-performance motor that provides a suction power of up to watts. The device dusting brush a crevice tool. The device a LED display that shows the remaining battery charge. Now it is possible to see a somewhat longer coherent description of the fictitious optical mouse and there are no logical flaws in the description of the vacuum cleaner. The product descriptions are not only logical, but relevant. Just as a reminder, these relatively high-quality results are obtained by fine-tuning less than a 1% of the model’s weights with a total dataset of 5000 such prompt-description pairs formatted in a consistent manner. Hyperparameter Combination #4: LoRA with  r=8 and targeting all linear transformer layers It is also worth exploring whether the quality of output from the model improves if the pretrained model is frozen in 8-bit instead of 4-bit. In other words, replicating the exact finetuning process using LoRA instead of QLoRA. Here, the LoRA hyperparameters are kept the same as before, in the new-found optimal configuration, i.e. r=8 and targeting all linear transformer layers during the adaptation process. The results for the two prompts used throughout the article are as given below: Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse The Corelogic Smooth Mouse a high-quality optical mouse a smooth surface. The mouse Hz polling rate. The mouse three colours: black, white red. The mouse a USB receiver compatible Windows Mac. Create a detailed description for the following product: Hoover Lightspeed, belonging to category: Cordless Vacuum Cleaner The Hoover Lightspeed cordless vacuum cleaner a compact lightweight device that easy to use. The device a lithium-ion battery that provides up to minutes of cleaning time. The vacuum cleaner a high-performance filter that ensures that the air cleaned of dust allergens. The device a crevice tool that can be used to clean hard-to-reach areas. Again, there isn’t much of an improvement in the quality of the output text. Key Observations Based on the above set of trials, and further evidence detailed in the excellent publication presenting QLoRA, it can be deduced that the value of r (the rank of matrices updated during adaptation) does not improve adaptation quality beyond a certain point. The biggest improvement is observed in targeting all linear layers in the adaptation process, as opposed to just the attention blocks, as commonly documented in technical literature detailing LoRA and QLoRA. The trials executed above and other empirical evidence suggest that QLoRA does not indeed suffer from any discernible reduction in quality of text generated, compared to LoRA. Further Considerations for using LoRA adapters in deployment It's important to optimize the usage of adapters and understand the limitations of the technique. The size of the LoRA adapter obtained through finetuning is typically just a few megabytes, while the pretrained base model can be several gigabytes in memory and on disk. During inference, both the adapter and the pretrained LLM need to be loaded, so the memory requirement remains similar. Furthermore, if the weights of the pre-trained LLM and the adapter aren’t merged, there will be a slight increase in inference latency. Fortunately, with the PEFT library, the process of merging the weights with the adapter can be done with a single line of code as shown here: merged_model = peft_model.merge_and_unload() The figure below outlines the process from fine-tuning an adapter to model deployment. While the adapter pattern offers significant benefits, merging adapters is not a universal solution. One advantage of the adapter pattern is the ability to deploy a single large pretrained model with task-specific adapters. This allows for efficient inference by utilizing the pretrained model as a backbone for different tasks. However, merging weights makes this approach impossible. The decision to merge weights depends on the specific use case and acceptable inference latency. Nonetheless, LoRA/ QLoRA continues to be a highly effective method for parameter efficient fine-tuning and is widely used. Low Rank Adaptation is a powerful fine-tuning technique that can yield great results if used with the right configuration. Choosing the correct value of rank and the layers of the neural network architecture to target during adaptation could decide the quality of the output from the fine-tuned model. QLoRA results in further memory savings while preserving the adaptation quality. Even when the fine-tuning is performed,  there are several important engineering considerations to ensure the adapted model is deployed in the correct manner. In summary, a concise table indicating the different combinations of LoRA parameters attempted, text quality output and number of parameters updated when fine-tuning OpenLLaMA-3b-v2 for 3 epochs on 5000 observations on a single A100 is shown below. target_modules Base model weights Quality of output Number of parameters updated (in millions) Attention blocks low 2.662 5.324 All linear layers high 12.995 Try this on Databricks! Clone the GitHub repository associated with the blog into a Databricks Repo to get started. More thoroughly documented examples to finetune models on Databricks are available Try Databricks for free Related posts Using MLflow AI Gateway and Llama 2 to Build Generative AI Apps August 24, 2023 by Kasey Uhlenhuth Xiangrui Meng Hagay Lupesko Sean Owen Corey Zumar Liang Zhang Ina Koleva Vladimir Kolovski Arpit Jasapara Data Science and ML To build customer support bots, internal knowledge graphs, or Q&A systems, customers often use Retrieval Augmented Generation (RAG) applications which leverage pre-trained models... Databricks + MosaicML July 19, 2023 Matei Zaharia Patrick Wendell Reynold Xin Ali Ghodsi Company Blog Today, we’re excited to share that we’ve completed our acquisition of MosaicML, a leading platform for creating and customizing generative AI models for... See all Engineering Blog posts Why Databricks Discover For Executives For Startups Lakehouse Architecture DatabricksIQ Mosaic Research Customers Featured See All Partners Cloud Providers Technology Partners Data Partners Built on Databricks Consulting & System Integrators C&SI Partner Program Partner Solutions Consulting & System Integrators Product Databricks Platform Platform Overview Governance Artificial Intelligence Business Intelligence Data Management Data Warehousing Real-Time Analytics Data Engineering Data Science Pricing Pricing Overview Pricing Calculator Open Source Integrations and Data Marketplace IDE Integrations Partner Connect Solutions Databricks For Industries Communications Financial Services Healthcare and Life Sciences Manufacturing Media and Entertainment Public Sector Retail View All Cross Industry Solutions Customer Data Platform Data Migration Professional Services Solution Accelerators Healthcare and Life Sciences Customer Support Training and Certification Learning Overview Training Overview Certification University Alliance Databricks Academy Login Data + AI Summit Data + AI World Tour Data Intelligence Days Full Calendar Blog and Podcasts Databricks Blog Databricks Mosaic Research Blog Data Brew Podcast Champions of Data & AI Podcast Data + AI Summit Data + AI World Tour Databricks Mosaic Research Blog Champions of Data & AI Podcast Who We Are Our Team Databricks Ventures Contact Us Open Jobs Working at Databricks Press Awards and Recognition Newsroom Security and Trust Databricks Inc. 160 Spear Street, 15th Floor San Francisco, CA 94105 1-866-330-0121 See Careers at Databricks © Databricks 2024. All rights reserved. Apache, Apache Spark, Spark and the Spark logo are trademarks of the Apache Software Foundation. Privacy Notice Terms of Use Your Privacy Choices Your California Privacy Rights
-----------
-Training LLMs Course: Discover Fine-Tuning Techniques Register today! Watch Intro Video Foundations Course Introduction NeurIPS LLM Efficiency Challange NeurIPS LLM Efficiency Challenge Q&A Hands On LLM Fine-tuning Start Your Experiments! Evaluation Introduction to LLM Evaluation Demystifying Perplexity HumanEval and LLM Performance Analysis LLM Benchmarks Deep Dive into HELM Chatbot Arena Use Case Specific Benchmarks Evaluating LLM Apps Conclusions LLM Evaluation Q&A Data Introduction to Data for Training LLMs Find Out More about MosaicML Friendly Advice How Much Data? Data Sources & Cost Q&A Which Data? Logistics of Data Loading Training & Fine-tuning Techniques Introduction to Training & Fine-tuning Techniques Hardware Requirements Memory Usage What Should You Train? Training Observability Course Assessment & Next Steps Course Assessment Resources for Further Learning About this course Free 37 lessons 4 hours of video content Learn the fundamentals of large language models Find out about the types of LLMs, model architectures, parameter sizes and scaling laws. Curate a dataset and establish an evaluation approach Learn how to find or curate a dataset for LLM training. Dive into the evaluation metrics for various LLM tasks and compare their performance across a range of benchmarks. Master training and fine-tuning techniques Learn hands-on advanced training strategies like LoRA, prefix tuning, prompt tuning, and Reinforcement Learning through Human Feedback (RLHF). Enroll for free Working knowledge of machine learning Intermediate Python experience Familiarity with DL frameworks (Pytorch/Tensorflow) Register now! All Courses © Copyright W&B AI Academy 2024
-----------
-Just a moment... Enable JavaScript and cookies to continue
-----------
-Enable JavaScript and cookies to continue
-----------
-loaded 51
diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py
index 4ae6b71a6..af5174ac9 100644
--- a/examples/E2E-RAG-App/rag_main.py
+++ b/examples/E2E-RAG-App/rag_main.py
@@ -9,6 +9,7 @@
 import base64
 import mimetypes
 import os
+import uuid
 
 import fire
 import pandas as pd
@@ -84,16 +85,16 @@ def build_index(client: LlamaStackClient, file_dir: str, bank_id: str) -> str:
     return bank_id
 
 
-async def get_response_row(agent: Agent, input_query: str) -> str:
-    # single turn, each prompt is a new session
-    session_id = agent.create_session(f"session-{input_query}")
+async def get_response_row(agent: Agent, input_query: str, session_id) -> str:
+    messages = [
+        {
+            "role": "user",
+            "content": input_query,
+        }
+    ]
+    print("messages", messages)
     response = agent.create_turn(
-        messages=[
-            {
-                "role": "user",
-                "content": input_query,
-            }
-        ],
+        messages=messages,
         session_id=session_id,
     )
 
@@ -101,6 +102,8 @@ async def get_response_row(agent: Agent, input_query: str) -> str:
         event = chunk.event
         event_type = event.payload.event_type
         if event_type == "turn_complete":
+            print("----input_query-------", input_query)
+            print(event.payload.turn)
             return event.payload.turn.output_message.content
 
 
@@ -120,7 +123,7 @@ async def run_main(host: str, port: int, docs_dir: str):
     assert model_name is not None, "No model found"
     agent_config = AgentConfig(
         model=model_name,
-        instructions="You are a helpful assistant",
+        instructions="You are a helpful assistant that can answer questions with the provided documents. Read the documents carefully and answer the question based on the documents. If you don't know the answer, just say that you don't know.",
         sampling_params={
             "strategy": "greedy",
             "temperature": 1.0,
@@ -132,28 +135,32 @@ async def run_main(host: str, port: int, docs_dir: str):
                 "type": "memory",
                 "memory_bank_configs": [{"bank_id": bank_id, "type": "vector"}],
                 "query_generator_config": {"type": "default", "sep": " "},
-                "max_tokens_in_context": 4096,
-                "max_chunks": 50,
+                "max_tokens_in_context": 1024,
+                "max_chunks": 5,
+                "score_threshold": 0.8,
             }
         ],
         tool_choice="auto",
         tool_prompt_format="json",
         input_shields=[],
         output_shields=[],
-        enable_session_persistence=False,
+        enable_session_persistence=True,
     )
 
     agent = Agent(client, agent_config)
 
     # load dataset and generate responses for the RAG agent
-    user_prompts = ["What methods are best for finetuning llama models?"]
+    user_prompts = [
+        "What is the name of the llama model released on October 24, 2024?",
+        "What about Llama 3.1 model, what is the release date for it?",
+    ]
 
     llamastack_generated_responses = []
-
+    session_id = agent.create_session(f"session-{uuid.uuid4()}")
     for prompt in tqdm(user_prompts):
         print(f"Generating response for: {prompt}")
         try:
-            generated_response = await get_response_row(agent, prompt)
+            generated_response = await get_response_row(agent, prompt, session_id)
             llamastack_generated_responses.append(generated_response)
         except Exception as e:
             print(f"Error generating response for {prompt}: {e}")

From c0e7b88a0de4cda979dc391cd1e2d6334466600b Mon Sep 17 00:00:00 2001
From: Justin Lee 
Date: Wed, 13 Nov 2024 14:56:10 -0800
Subject: [PATCH 04/99] added persistent memory

---
 examples/E2E-RAG-App/rag_main.py | 109 ++++++++++++++++++++-----------
 1 file changed, 70 insertions(+), 39 deletions(-)

diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py
index af5174ac9..376e1a71e 100644
--- a/examples/E2E-RAG-App/rag_main.py
+++ b/examples/E2E-RAG-App/rag_main.py
@@ -5,11 +5,11 @@
 # the root directory of this source tree.
 
 import asyncio
-
 import base64
 import mimetypes
 import os
 import uuid
+import json
 
 import fire
 import pandas as pd
@@ -23,6 +23,18 @@
 from tqdm import tqdm
 
 
+def save_memory_bank(bank_id: str, memory_bank_data: dict, file_path: str):
+    with open(file_path, 'w') as f:
+        json.dump(memory_bank_data, f)
+
+
+def load_memory_bank(file_path: str):
+    if os.path.exists(file_path):
+        with open(file_path, 'r') as f:
+            return json.load(f)
+    return None
+
+
 def data_url_from_file(file_path: str) -> str:
     if not os.path.exists(file_path):
         raise FileNotFoundError(f"File not found: {file_path}")
@@ -38,50 +50,69 @@ def data_url_from_file(file_path: str) -> str:
     return data_url
 
 
-def build_index(client: LlamaStackClient, file_dir: str, bank_id: str) -> str:
+def build_index(client: LlamaStackClient, file_dir: str, bank_id: str, memory_bank_file: str) -> str:
     """Build a memory bank from a directory of pdf files"""
-    # 1. create memory bank
-    providers = client.providers.list()
-    client.memory_banks.register(
-        memory_bank={
+    # Check if a saved memory bank exists
+    memory_bank_data = load_memory_bank(memory_bank_file)
+    if memory_bank_data:
+        # Load the memory bank from the file
+        print(f"Loaded memory bank from {memory_bank_file}")
+        # Assuming you have a method to register the loaded memory bank
+        client.memory_banks.register(memory_bank=memory_bank_data)
+    else:
+        # 1. create memory bank
+        providers = client.providers.list()
+        client.memory_banks.register(
+            memory_bank={
+                "identifier": bank_id,
+                "embedding_model": "all-MiniLM-L6-v2",
+                "chunk_size_in_tokens": 512,
+                "overlap_size_in_tokens": 64,
+                "provider_id": providers["memory"][0].provider_id,
+            }
+        )
+
+        # 2. load pdf,text,md from directory as raw text
+        paths = []
+        documents = []
+        for filename in os.listdir(file_dir):
+            if filename.endswith(".pdf"):
+                file_path = os.path.join(file_dir, filename)
+                paths.append(file_path)
+
+                documents.append(
+                    Document(
+                        document_id=os.path.basename(file_path),
+                        content=data_url_from_file(file_path),
+                        mime_type="application/pdf",
+                    )
+                )
+            elif filename.endswith(".txt") or filename.endswith(".md"):
+                file_path = os.path.join(file_dir, filename)
+                paths.append(file_path)
+                documents.append(
+                    Document(
+                        document_id=os.path.basename(file_path),
+                        content=data_url_from_file(file_path),
+                        mime_type="text/plain",
+                    )
+                )
+
+        # insert some documents
+        client.memory.insert(bank_id=bank_id, documents=documents)
+        print(f"Inserted {len(documents)} documents into bank: {bank_id}")
+
+        # Save the memory bank to a file after building it
+        memory_bank_data = {
             "identifier": bank_id,
             "embedding_model": "all-MiniLM-L6-v2",
             "chunk_size_in_tokens": 512,
             "overlap_size_in_tokens": 64,
             "provider_id": providers["memory"][0].provider_id,
         }
-    )
-
-    # 2. load pdf,text,md from directory as raw text
-    paths = []
-    documents = []
-    for filename in os.listdir(file_dir):
-        if filename.endswith(".pdf"):
-            file_path = os.path.join(file_dir, filename)
-            paths.append(file_path)
-
-            documents.append(
-                Document(
-                    document_id=os.path.basename(file_path),
-                    content=data_url_from_file(file_path),
-                    mime_type="application/pdf",
-                )
-            )
-        elif filename.endswith(".txt") or filename.endswith(".md"):
-            file_path = os.path.join(file_dir, filename)
-            paths.append(file_path)
-            documents.append(
-                Document(
-                    document_id=os.path.basename(file_path),
-                    content=data_url_from_file(file_path),
-                    mime_type="text/plain",
-                )
-            )
+        save_memory_bank(bank_id, memory_bank_data, memory_bank_file)
+        print(f"Saved memory bank to {memory_bank_file}")
 
-    # insert some documents
-    client.memory.insert(bank_id=bank_id, documents=documents)
-    print(f"Inserted {len(documents)} documents into bank: {bank_id}")
-    # TODO: add a way to check if the bank is created successfully
     return bank_id
 
 
@@ -111,7 +142,8 @@ async def run_main(host: str, port: int, docs_dir: str):
     client = LlamaStackClient(base_url=f"http://{host}:{port}")
 
     bank_id = "rag_agent_docs"
-    build_index(client, docs_dir, bank_id)
+    memory_bank_file = 'path/to/memory_bank.json'  # Define the path to your memory bank file
+    build_index(client, docs_dir, bank_id, memory_bank_file)
     print(f"Created bank: {bank_id}")
     models_response = client.models.list()
     print(f"Found {len(models_response)} models", models_response)
@@ -130,7 +162,6 @@ async def run_main(host: str, port: int, docs_dir: str):
             "top_p": 0.9,
         },
         tools=[
-            # TODO: save the bank to local file and load it from local file
             {
                 "type": "memory",
                 "memory_bank_configs": [{"bank_id": bank_id, "type": "vector"}],

From c1bc6952f31e4d332a21f7293823f87127a16cae Mon Sep 17 00:00:00 2001
From: Justin Lee 
Date: Wed, 13 Nov 2024 15:31:47 -0800
Subject: [PATCH 05/99] removed faiss

---
 examples/E2E-RAG-App/rag_main.py | 109 +++++++++++--------------------
 1 file changed, 39 insertions(+), 70 deletions(-)

diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py
index 376e1a71e..af5174ac9 100644
--- a/examples/E2E-RAG-App/rag_main.py
+++ b/examples/E2E-RAG-App/rag_main.py
@@ -5,11 +5,11 @@
 # the root directory of this source tree.
 
 import asyncio
+
 import base64
 import mimetypes
 import os
 import uuid
-import json
 
 import fire
 import pandas as pd
@@ -23,18 +23,6 @@
 from tqdm import tqdm
 
 
-def save_memory_bank(bank_id: str, memory_bank_data: dict, file_path: str):
-    with open(file_path, 'w') as f:
-        json.dump(memory_bank_data, f)
-
-
-def load_memory_bank(file_path: str):
-    if os.path.exists(file_path):
-        with open(file_path, 'r') as f:
-            return json.load(f)
-    return None
-
-
 def data_url_from_file(file_path: str) -> str:
     if not os.path.exists(file_path):
         raise FileNotFoundError(f"File not found: {file_path}")
@@ -50,69 +38,50 @@ def data_url_from_file(file_path: str) -> str:
     return data_url
 
 
-def build_index(client: LlamaStackClient, file_dir: str, bank_id: str, memory_bank_file: str) -> str:
+def build_index(client: LlamaStackClient, file_dir: str, bank_id: str) -> str:
     """Build a memory bank from a directory of pdf files"""
-    # Check if a saved memory bank exists
-    memory_bank_data = load_memory_bank(memory_bank_file)
-    if memory_bank_data:
-        # Load the memory bank from the file
-        print(f"Loaded memory bank from {memory_bank_file}")
-        # Assuming you have a method to register the loaded memory bank
-        client.memory_banks.register(memory_bank=memory_bank_data)
-    else:
-        # 1. create memory bank
-        providers = client.providers.list()
-        client.memory_banks.register(
-            memory_bank={
-                "identifier": bank_id,
-                "embedding_model": "all-MiniLM-L6-v2",
-                "chunk_size_in_tokens": 512,
-                "overlap_size_in_tokens": 64,
-                "provider_id": providers["memory"][0].provider_id,
-            }
-        )
-
-        # 2. load pdf,text,md from directory as raw text
-        paths = []
-        documents = []
-        for filename in os.listdir(file_dir):
-            if filename.endswith(".pdf"):
-                file_path = os.path.join(file_dir, filename)
-                paths.append(file_path)
-
-                documents.append(
-                    Document(
-                        document_id=os.path.basename(file_path),
-                        content=data_url_from_file(file_path),
-                        mime_type="application/pdf",
-                    )
-                )
-            elif filename.endswith(".txt") or filename.endswith(".md"):
-                file_path = os.path.join(file_dir, filename)
-                paths.append(file_path)
-                documents.append(
-                    Document(
-                        document_id=os.path.basename(file_path),
-                        content=data_url_from_file(file_path),
-                        mime_type="text/plain",
-                    )
-                )
-
-        # insert some documents
-        client.memory.insert(bank_id=bank_id, documents=documents)
-        print(f"Inserted {len(documents)} documents into bank: {bank_id}")
-
-        # Save the memory bank to a file after building it
-        memory_bank_data = {
+    # 1. create memory bank
+    providers = client.providers.list()
+    client.memory_banks.register(
+        memory_bank={
             "identifier": bank_id,
             "embedding_model": "all-MiniLM-L6-v2",
             "chunk_size_in_tokens": 512,
             "overlap_size_in_tokens": 64,
             "provider_id": providers["memory"][0].provider_id,
         }
-        save_memory_bank(bank_id, memory_bank_data, memory_bank_file)
-        print(f"Saved memory bank to {memory_bank_file}")
+    )
+
+    # 2. load pdf,text,md from directory as raw text
+    paths = []
+    documents = []
+    for filename in os.listdir(file_dir):
+        if filename.endswith(".pdf"):
+            file_path = os.path.join(file_dir, filename)
+            paths.append(file_path)
+
+            documents.append(
+                Document(
+                    document_id=os.path.basename(file_path),
+                    content=data_url_from_file(file_path),
+                    mime_type="application/pdf",
+                )
+            )
+        elif filename.endswith(".txt") or filename.endswith(".md"):
+            file_path = os.path.join(file_dir, filename)
+            paths.append(file_path)
+            documents.append(
+                Document(
+                    document_id=os.path.basename(file_path),
+                    content=data_url_from_file(file_path),
+                    mime_type="text/plain",
+                )
+            )
 
+    # insert some documents
+    client.memory.insert(bank_id=bank_id, documents=documents)
+    print(f"Inserted {len(documents)} documents into bank: {bank_id}")
+    # TODO: add a way to check if the bank is created successfully
     return bank_id
 
 
@@ -142,8 +111,7 @@ async def run_main(host: str, port: int, docs_dir: str):
     client = LlamaStackClient(base_url=f"http://{host}:{port}")
 
     bank_id = "rag_agent_docs"
-    memory_bank_file = 'path/to/memory_bank.json'  # Define the path to your memory bank file
-    build_index(client, docs_dir, bank_id, memory_bank_file)
+    build_index(client, docs_dir, bank_id)
     print(f"Created bank: {bank_id}")
     models_response = client.models.list()
     print(f"Found {len(models_response)} models", models_response)
@@ -162,6 +130,7 @@ async def run_main(host: str, port: int, docs_dir: str):
             "top_p": 0.9,
         },
         tools=[
+            # TODO: save the bank to local file and load it from local file
             {
                 "type": "memory",
                 "memory_bank_configs": [{"bank_id": bank_id, "type": "vector"}],

From d1d954dd87eafbbe21fd88d9b964206b79bf5a41 Mon Sep 17 00:00:00 2001
From: Justin Lee 
Date: Thu, 14 Nov 2024 14:46:46 -0800
Subject: [PATCH 06/99] included external chromadb

---
 examples/E2E-RAG-App/rag_main.py | 231 ++++++++++++++-----------------
 1 file changed, 107 insertions(+), 124 deletions(-)

diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py
index af5174ac9..605d13062 100644
--- a/examples/E2E-RAG-App/rag_main.py
+++ b/examples/E2E-RAG-App/rag_main.py
@@ -1,176 +1,159 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
 import asyncio
-
-import base64
-import mimetypes
+import json
 import os
 import uuid
+from typing import List, Optional
 
+import chromadb
 import fire
-import pandas as pd
-
+import requests
+from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
+from dotenv import load_dotenv
 from llama_stack_client import LlamaStackClient
 from llama_stack_client.lib.agents.agent import Agent
-from llama_stack_client.lib.agents.event_logger import EventLogger
 from llama_stack_client.types.agent_create_params import AgentConfig
-from llama_stack_client.types.memory_insert_params import Document
 from termcolor import cprint
 from tqdm import tqdm
 
+# Initialization
+load_dotenv()
+embedding_function = SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
+chroma_client = chromadb.PersistentClient(path="chroma")
+
 
-def data_url_from_file(file_path: str) -> str:
-    if not os.path.exists(file_path):
-        raise FileNotFoundError(f"File not found: {file_path}")
+def chunk_text(content: str, chunk_size: int = 500) -> List[str]:
+    """Splits content into chunks of approximately `chunk_size` characters."""
+    chunks = []
+    current_chunk = []
 
-    with open(file_path, "rb") as file:
-        file_content = file.read()
+    for paragraph in content.split("\n\n"):
+        if sum(len(p) for p in current_chunk) + len(paragraph) <= chunk_size:
+            current_chunk.append(paragraph)
+        else:
+            chunks.append("\n\n".join(current_chunk).strip())
+            current_chunk = [paragraph]
 
-    base64_content = base64.b64encode(file_content).decode("utf-8")
-    mime_type, _ = mimetypes.guess_type(file_path)
+    if current_chunk:
+        chunks.append("\n\n".join(current_chunk).strip())
 
-    data_url = f"data:{mime_type};base64,{base64_content}"
+    return chunks
 
-    return data_url
 
+def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 500) -> None:
+    """Inserts text documents from a directory into ChromaDB."""
+    collection_name = "documents"
+    existing_collections = chroma_client.list_collections()
+    collection_names = [col.name for col in existing_collections]
 
-def build_index(client: LlamaStackClient, file_dir: str, bank_id: str) -> str:
-    """Build a memory bank from a directory of pdf files"""
-    # 1. create memory bank
-    providers = client.providers.list()
-    client.memory_banks.register(
-        memory_bank={
-            "identifier": bank_id,
-            "embedding_model": "all-MiniLM-L6-v2",
-            "chunk_size_in_tokens": 512,
-            "overlap_size_in_tokens": 64,
-            "provider_id": providers["memory"][0].provider_id,
-        }
+    if collection_name in collection_names:
+        print(
+            f"Collection '{collection_name}' already exists. Skipping document insertion."
+        )
+        return
+
+    collection = chroma_client.create_collection(
+        name=collection_name, embedding_function=embedding_function
     )
+    print(f"Collection '{collection_name}' created.")
 
-    # 2. load pdf,text,md from directory as raw text
-    paths = []
-    documents = []
     for filename in os.listdir(file_dir):
-        if filename.endswith(".pdf"):
-            file_path = os.path.join(file_dir, filename)
-            paths.append(file_path)
-
-            documents.append(
-                Document(
-                    document_id=os.path.basename(file_path),
-                    content=data_url_from_file(file_path),
-                    mime_type="application/pdf",
-                )
-            )
-        elif filename.endswith(".txt") or filename.endswith(".md"):
+        if filename.endswith((".txt", ".md")):
             file_path = os.path.join(file_dir, filename)
-            paths.append(file_path)
-            documents.append(
-                Document(
-                    document_id=os.path.basename(file_path),
-                    content=data_url_from_file(file_path),
-                    mime_type="text/plain",
-                )
-            )
-
-    # insert some documents
-    client.memory.insert(bank_id=bank_id, documents=documents)
-    print(f"Inserted {len(documents)} documents into bank: {bank_id}")
-    # TODO: add a way to check if the bank is created successfully
-    return bank_id
-
-
-async def get_response_row(agent: Agent, input_query: str, session_id) -> str:
+            with open(file_path, "r", encoding="utf-8") as file:
+                content = file.read()
+                chunks = chunk_text(content, chunk_size=chunk_size)
+
+                for i, chunk in enumerate(chunks):
+                    chunk_id = f"{filename}_chunk_{i}"
+                    collection.add(
+                        documents=[chunk],
+                        ids=[chunk_id],
+                        metadatas=[
+                            {"filename": filename, "chunk_index": i, "content": chunk}
+                        ],
+                    )
+
+    print(f"Inserted documents from {file_dir} into ChromaDB.")
+
+
+def query_chromadb(query: str) -> Optional[dict]:
+    """Queries ChromaDB for relevant context based on input query."""
+    collection = chroma_client.get_collection(
+        name="documents", embedding_function=embedding_function
+    )
+    results = collection.query(query_texts=[query], n_results=1)
+    return results if results else None
+
+
+async def get_response_with_context(
+    agent: Agent, input_query: str, session_id: str
+) -> str:
+    """Fetches response from the agent with context from ChromaDB."""
+    results = query_chromadb(input_query)
+    context = (
+        "No relevant context found."
+        if not results or not results["metadatas"][0]
+        else "\n".join(metadata["content"] for metadata in results["metadatas"][0])
+    )
+
     messages = [
-        {
-            "role": "user",
-            "content": input_query,
-        }
+        {"role": "user", "content": f"Context: {context}\n\nQuestion: {input_query}"}
     ]
-    print("messages", messages)
-    response = agent.create_turn(
-        messages=messages,
-        session_id=session_id,
-    )
+    print("Sending messages to agent:", messages)
+
+    response = agent.create_turn(messages=messages, session_id=session_id)
 
     async for chunk in response:
-        event = chunk.event
-        event_type = event.payload.event_type
-        if event_type == "turn_complete":
+        if chunk.event.payload.event_type == "turn_complete":
             print("----input_query-------", input_query)
-            print(event.payload.turn)
-            return event.payload.turn.output_message.content
+            return chunk.event.payload.turn.output_message.content
+
+    return "No response generated."
 
 
-async def run_main(host: str, port: int, docs_dir: str):
+async def run_main(host: str, port: int, docs_dir: str) -> None:
+    """Main async function to register model, insert documents, and generate responses."""
     client = LlamaStackClient(base_url=f"http://{host}:{port}")
 
-    bank_id = "rag_agent_docs"
-    build_index(client, docs_dir, bank_id)
-    print(f"Created bank: {bank_id}")
-    models_response = client.models.list()
-    print(f"Found {len(models_response)} models", models_response)
-    model_name = None
-    for model in models_response:
-        if not model_name and model.identifier.endswith("Instruct"):
-            model_name = model.llama_model
-            print(f"Use model: {model_name}")
-    assert model_name is not None, "No model found"
+    insert_documents_to_chromadb(docs_dir)
+
+    model_name = "Llama3.2-3B-Instruct"
+    url = "http://localhost:5000/models/register"
+    headers = {"Content-Type": "application/json"}
+    data = {
+        "model_id": model_name,
+        "provider_model_id": None,
+        "provider_id": "inline::meta-reference-0",
+        "metadata": None,
+    }
+    response = requests.post(url, headers=headers, data=json.dumps(data))
+    print("Model registration status:", response.status_code)
+
     agent_config = AgentConfig(
         model=model_name,
-        instructions="You are a helpful assistant that can answer questions with the provided documents. Read the documents carefully and answer the question based on the documents. If you don't know the answer, just say that you don't know.",
-        sampling_params={
-            "strategy": "greedy",
-            "temperature": 1.0,
-            "top_p": 0.9,
-        },
-        tools=[
-            # TODO: save the bank to local file and load it from local file
-            {
-                "type": "memory",
-                "memory_bank_configs": [{"bank_id": bank_id, "type": "vector"}],
-                "query_generator_config": {"type": "default", "sep": " "},
-                "max_tokens_in_context": 1024,
-                "max_chunks": 5,
-                "score_threshold": 0.8,
-            }
-        ],
-        tool_choice="auto",
-        tool_prompt_format="json",
-        input_shields=[],
-        output_shields=[],
+        instructions="You are a helpful assistant that can answer questions based on provided documents.",
+        sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9},
         enable_session_persistence=True,
     )
-
     agent = Agent(client, agent_config)
 
-    # load dataset and generate responses for the RAG agent
     user_prompts = [
         "What is the name of the llama model released on October 24, 2024?",
         "What about Llama 3.1 model, what is the release date for it?",
     ]
 
-    llamastack_generated_responses = []
     session_id = agent.create_session(f"session-{uuid.uuid4()}")
-    for prompt in tqdm(user_prompts):
+    for prompt in tqdm(user_prompts, desc="Generating responses"):
         print(f"Generating response for: {prompt}")
         try:
-            generated_response = await get_response_row(agent, prompt, session_id)
-            llamastack_generated_responses.append(generated_response)
+            response = await get_response_with_context(agent, prompt, session_id)
+            print(response)
         except Exception as e:
             print(f"Error generating response for {prompt}: {e}")
-            llamastack_generated_responses.append(None)
-    # TODO: make this multi-turn instead of single turn
-    for response in llamastack_generated_responses:
-        print(response)
 
 
-def main(host: str, port: int, docs_dir: str):
+def main(host: str, port: int, docs_dir: str) -> None:
+    """Entry point for the script."""
     asyncio.run(run_main(host, port, docs_dir))
 
 

From a3426f672f6b9a1d85a86fbe58b18f65df726c6d Mon Sep 17 00:00:00 2001
From: Justin Lee 
Date: Thu, 14 Nov 2024 15:38:19 -0800
Subject: [PATCH 07/99] added query, implement cprint

---
 .gitignore                       |  5 +++++
 examples/E2E-RAG-App/rag_main.py | 30 +++++++++++++++++-------------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0b8d49f9b..d08c3f059 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,3 +167,8 @@ xcuserdata/
 *.pte
 *.model
 Package.resolved
+
+#embeddings
+**/faiss_index.index
+**/faiss_index.index.json
+**/chroma
diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py
index 605d13062..e1c51ded9 100644
--- a/examples/E2E-RAG-App/rag_main.py
+++ b/examples/E2E-RAG-App/rag_main.py
@@ -39,22 +39,23 @@ def chunk_text(content: str, chunk_size: int = 500) -> List[str]:
     return chunks
 
 
-def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 500) -> None:
+def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 1000) -> None:
     """Inserts text documents from a directory into ChromaDB."""
     collection_name = "documents"
     existing_collections = chroma_client.list_collections()
     collection_names = [col.name for col in existing_collections]
 
     if collection_name in collection_names:
-        print(
-            f"Collection '{collection_name}' already exists. Skipping document insertion."
+        cprint(
+            f"Collection '{collection_name}' already exists. Skipping document insertion.",
+            "yellow",
         )
         return
 
     collection = chroma_client.create_collection(
         name=collection_name, embedding_function=embedding_function
     )
-    print(f"Collection '{collection_name}' created.")
+    cprint(f"Collection '{collection_name}' created.", "green")
 
     for filename in os.listdir(file_dir):
         if filename.endswith((".txt", ".md")):
@@ -73,15 +74,16 @@ def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 500) -> None:
                         ],
                     )
 
-    print(f"Inserted documents from {file_dir} into ChromaDB.")
+    cprint(f"Inserted documents from {file_dir} into ChromaDB.", "green")
 
 
 def query_chromadb(query: str) -> Optional[dict]:
     """Queries ChromaDB for relevant context based on input query."""
+    cprint(f"Querying ChromaDB with: {query}", "cyan")
     collection = chroma_client.get_collection(
         name="documents", embedding_function=embedding_function
     )
-    results = collection.query(query_texts=[query], n_results=1)
+    results = collection.query(query_texts=[query], n_results=5)
     return results if results else None
 
 
@@ -99,13 +101,14 @@ async def get_response_with_context(
     messages = [
         {"role": "user", "content": f"Context: {context}\n\nQuestion: {input_query}"}
     ]
-    print("Sending messages to agent:", messages)
+    cprint("Embedding retrieval completed. Sending these context to agent:", "cyan")
+    cprint(context, "cyan")
 
     response = agent.create_turn(messages=messages, session_id=session_id)
 
     async for chunk in response:
         if chunk.event.payload.event_type == "turn_complete":
-            print("----input_query-------", input_query)
+            cprint(f"----input_query------- {input_query}", "magenta")
             return chunk.event.payload.turn.output_message.content
 
     return "No response generated."
@@ -127,7 +130,7 @@ async def run_main(host: str, port: int, docs_dir: str) -> None:
         "metadata": None,
     }
     response = requests.post(url, headers=headers, data=json.dumps(data))
-    print("Model registration status:", response.status_code)
+    cprint(f"Model registration status: {response.status_code}", "blue")
 
     agent_config = AgentConfig(
         model=model_name,
@@ -138,18 +141,19 @@ async def run_main(host: str, port: int, docs_dir: str) -> None:
     agent = Agent(client, agent_config)
 
     user_prompts = [
-        "What is the name of the llama model released on October 24, 2024?",
+        "What is the name of the llama model released on Oct 24, 2024?",
         "What about Llama 3.1 model, what is the release date for it?",
+        "When was llama 3.3 released?",
     ]
 
     session_id = agent.create_session(f"session-{uuid.uuid4()}")
     for prompt in tqdm(user_prompts, desc="Generating responses"):
-        print(f"Generating response for: {prompt}")
+        cprint(f"Generating response for: {prompt}", "green")
         try:
             response = await get_response_with_context(agent, prompt, session_id)
-            print(response)
+            cprint(f"Response: {response}", "green")
         except Exception as e:
-            print(f"Error generating response for {prompt}: {e}")
+            cprint(f"Error generating response for {prompt}: {e}", "red")
 
 
 def main(host: str, port: int, docs_dir: str) -> None:

From e8569164703b46dfe4baf3194b9e322f29d7e79c Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani 
Date: Thu, 14 Nov 2024 19:44:35 -0800
Subject: [PATCH 08/99] Create ingestion_script.py

---
 examples/E2E-RAG-App/ingestion_script.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 examples/E2E-RAG-App/ingestion_script.py

diff --git a/examples/E2E-RAG-App/ingestion_script.py b/examples/E2E-RAG-App/ingestion_script.py
new file mode 100644
index 000000000..e69de29bb

From 27d34c7df66428bedddec6ab9cc68044e5d77cf0 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani 
Date: Thu, 14 Nov 2024 19:44:46 -0800
Subject: [PATCH 09/99] Update ingestion_script.py

---
 examples/E2E-RAG-App/ingestion_script.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/examples/E2E-RAG-App/ingestion_script.py b/examples/E2E-RAG-App/ingestion_script.py
index e69de29bb..6b1fb5c2f 100644
--- a/examples/E2E-RAG-App/ingestion_script.py
+++ b/examples/E2E-RAG-App/ingestion_script.py
@@ -0,0 +1,13 @@
+import json
+import logging
+from pathlib import Path
+
+import yaml
+
+from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
+from docling.datamodel.base_models import InputFormat
+from docling.document_converter import (
+    DocumentConverter,
+    PdfFormatOption,
+    WordFormatOption,
+)

From 529b22ead938236699cb8be7e5f05a59559dc7da Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani 
Date: Thu, 14 Nov 2024 19:44:56 -0800
Subject: [PATCH 10/99] Update ingestion_script.py

---
 examples/E2E-RAG-App/ingestion_script.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/E2E-RAG-App/ingestion_script.py b/examples/E2E-RAG-App/ingestion_script.py
index 6b1fb5c2f..e05536636 100644
--- a/examples/E2E-RAG-App/ingestion_script.py
+++ b/examples/E2E-RAG-App/ingestion_script.py
@@ -11,3 +11,5 @@
     PdfFormatOption,
     WordFormatOption,
 )
+from docling.pipeline.simple_pipeline import SimplePipeline
+from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline

From fc78f7095e786a702f5bfe669d6fad418cfaad62 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani 
Date: Thu, 14 Nov 2024 19:45:25 -0800
Subject: [PATCH 11/99] Update ingestion_script.py

---
 examples/E2E-RAG-App/ingestion_script.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/examples/E2E-RAG-App/ingestion_script.py b/examples/E2E-RAG-App/ingestion_script.py
index e05536636..d1c9e93ff 100644
--- a/examples/E2E-RAG-App/ingestion_script.py
+++ b/examples/E2E-RAG-App/ingestion_script.py
@@ -1,5 +1,4 @@
-import json
-import logging
+ogging
 from pathlib import Path
 
 import yaml
@@ -13,3 +12,18 @@
 )
 from docling.pipeline.simple_pipeline import SimplePipeline
 from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
+
+def get_document_files(input_dir: Path) -> list[Path]:
+    """
+    Recursively scan directory for document files.
+    Returns a list of Path objects for supported document types.
+    """
+    supported_extensions = {'.pdf', '.docx', '.pptx'}
+    document_files = []
+    
+    # Recursively walk through all directories
+    for path in input_dir.rglob('*'):
+        if path.is_file() and path.suffix.lower() in supported_extensions:
+            document_files.append(path)
+    
+    return document_files

From 3140c7c2e458917602a209c2a0fbb3fb9ab3e81f Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani 
Date: Thu, 14 Nov 2024 19:46:13 -0800
Subject: [PATCH 12/99] Update ingestion_script.py

---
 examples/E2E-RAG-App/ingestion_script.py | 70 +++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/examples/E2E-RAG-App/ingestion_script.py b/examples/E2E-RAG-App/ingestion_script.py
index d1c9e93ff..c20219ad0 100644
--- a/examples/E2E-RAG-App/ingestion_script.py
+++ b/examples/E2E-RAG-App/ingestion_script.py
@@ -1,4 +1,5 @@
-ogging
+import json
+import logging
 from pathlib import Path
 
 import yaml
@@ -27,3 +28,70 @@ def get_document_files(input_dir: Path) -> list[Path]:
             document_files.append(path)
     
     return document_files
+
+def main():
+    # Define input and output directories relative to current directory
+    input_dir = Path("DATA")
+    output_dir = Path("OUTPUT")
+    
+    # Create output directory if it doesn't exist
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Get all document files recursively
+    input_paths = get_document_files(input_dir)
+    
+    if not input_paths:
+        print("No documents found in DATA directory!")
+        return
+        
+    print(f"Found {len(input_paths)} documents to process:")
+    for path in input_paths:
+        print(f"- {path}")
+    
+    # Configure document converter
+    doc_converter = DocumentConverter(
+        allowed_formats=[
+            InputFormat.PDF,
+            InputFormat.DOCX,
+            InputFormat.PPTX,
+        ],
+        format_options={
+            InputFormat.PDF: PdfFormatOption(
+                pipeline_cls=StandardPdfPipeline, 
+                backend=PyPdfiumDocumentBackend
+            ),
+            InputFormat.DOCX: WordFormatOption(
+                pipeline_cls=SimplePipeline
+            ),
+        },
+    )
+    
+    # Process all documents
+    conv_results = doc_converter.convert_all(input_paths)
+    
+    # Save results
+    for res in conv_results:
+        # Preserve directory structure in output
+        relative_path = res.input.file.relative_to(input_dir)
+        output_subdir = output_dir / relative_path.parent
+        output_subdir.mkdir(parents=True, exist_ok=True)
+        
+        # Create output filenames
+        md_path = output_subdir / f"{res.input.file.stem}.md"
+        json_path = output_subdir / f"{res.input.file.stem}.json"
+        
+        print(
+            f"Converting: {res.input.file}"
+            f"\nSaving to: {md_path}"
+        )
+        
+        # Save markdown version
+        with md_path.open("w", encoding='utf-8') as fp:
+            fp.write(res.document.export_to_markdown())
+            
+        # Save JSON version
+        with json_path.open("w", encoding='utf-8') as fp:
+            json.dump(res.document.export_to_dict(), fp, indent=2)
+
+if __name__ == "__main__":
+    main()

From e42dac950e65d6bb01ecef868c8c01b58a84eb14 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani 
Date: Thu, 14 Nov 2024 19:49:04 -0800
Subject: [PATCH 13/99] Update README.md

---
 examples/E2E-RAG-App/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md
index 1a75978b5..c1298c253 100644
--- a/examples/E2E-RAG-App/README.md
+++ b/examples/E2E-RAG-App/README.md
@@ -8,6 +8,10 @@ TODO:
 2. Make the chat inference multi-turn
 3. Front-end + docker
 
+To run the `ingestion.py` script, please make sure there is a /DATA and /OUTPUT folder at its relative root. It will ingest ALL documents in /DATA and output BOTH markdown and JSON dump in /OUTPUT folder
+
+
+
 ```
 ~/work/llama-stack-apps/examples/E2E-RAG-App (rag-app)]$ python rag_main.py localhost 5000 ./example_data/
 Inserted 1 documents into bank: rag_agent_docs

From 3cc1cb8f1a7329188ad93073809f9b4bc754cd05 Mon Sep 17 00:00:00 2001
From: Justin Lee 
Date: Fri, 15 Nov 2024 10:58:32 -0800
Subject: [PATCH 14/99] fix doc retrieval issue, inclu requirement.txt

---
 examples/E2E-RAG-App/rag_main.py      |  13 ++-
 examples/E2E-RAG-App/requirements.txt | 131 ++++++++++++++++++++++++++
 2 files changed, 140 insertions(+), 4 deletions(-)
 create mode 100644 examples/E2E-RAG-App/requirements.txt

diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py
index e1c51ded9..f171bbbd4 100644
--- a/examples/E2E-RAG-App/rag_main.py
+++ b/examples/E2E-RAG-App/rag_main.py
@@ -39,7 +39,7 @@ def chunk_text(content: str, chunk_size: int = 500) -> List[str]:
     return chunks
 
 
-def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 1000) -> None:
+def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 250) -> None:
     """Inserts text documents from a directory into ChromaDB."""
     collection_name = "documents"
     existing_collections = chroma_client.list_collections()
@@ -55,6 +55,7 @@ def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 1000) -> None:
     collection = chroma_client.create_collection(
         name=collection_name, embedding_function=embedding_function
     )
+
     cprint(f"Collection '{collection_name}' created.", "green")
 
     for filename in os.listdir(file_dir):
@@ -83,7 +84,8 @@ def query_chromadb(query: str) -> Optional[dict]:
     collection = chroma_client.get_collection(
         name="documents", embedding_function=embedding_function
     )
-    results = collection.query(query_texts=[query], n_results=5)
+    print(collection.count())  # returns the number of items in the collection
+    results = collection.query(query_texts=[query], n_results=10)
     return results if results else None
 
 
@@ -94,8 +96,11 @@ async def get_response_with_context(
     results = query_chromadb(input_query)
     context = (
         "No relevant context found."
-        if not results or not results["metadatas"][0]
-        else "\n".join(metadata["content"] for metadata in results["metadatas"][0])
+        if not results or not results["metadatas"]
+        else "\n".join(
+            "\n".join(metadata["content"] for metadata in metadata_list)
+            for metadata_list in results["metadatas"]
+        )
     )
 
     messages = [
diff --git a/examples/E2E-RAG-App/requirements.txt b/examples/E2E-RAG-App/requirements.txt
new file mode 100644
index 000000000..c04f15c0a
--- /dev/null
+++ b/examples/E2E-RAG-App/requirements.txt
@@ -0,0 +1,131 @@
+annotated-types==0.7.0
+anyio==4.6.2.post1
+asgiref==3.8.1
+backoff==2.2.1
+bcrypt==4.2.0
+Bottleneck @ file:///croot/bottleneck_1731058641041/work
+build==1.2.2.post1
+cachetools==5.5.0
+certifi==2024.8.30
+charset-normalizer==3.4.0
+chroma-hnswlib==0.7.6
+chromadb==0.5.18
+click==8.1.7
+colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work
+coloredlogs==15.0.1
+Deprecated==1.2.14
+distro==1.9.0
+durationpy==0.9
+exceptiongroup==1.2.2
+fastapi==0.115.5
+filelock==3.16.1
+fire==0.7.0
+flatbuffers==24.3.25
+fsspec==2024.10.0
+google-auth==2.36.0
+googleapis-common-protos==1.66.0
+grpcio==1.67.1
+h11==0.14.0
+httpcore==1.0.6
+httptools==0.6.4
+httpx==0.27.2
+huggingface-hub==0.26.2
+humanfriendly==10.0
+idna==3.10
+importlib_metadata==8.5.0
+importlib_resources==6.4.5
+Jinja2==3.1.4
+jiter==0.7.1
+joblib==1.4.2
+kubernetes==31.0.0
+llama_stack_client==0.0.50
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+mkl-fft==1.3.1
+mkl-random @ file:///home/builder/ci_310/mkl_random_1641843545607/work
+mkl-service==2.4.0
+mmh3==5.0.1
+monotonic==1.6
+mpmath==1.3.0
+networkx==3.4.2
+numexpr @ file:///croot/numexpr_1668713893690/work
+numpy @ file:///croot/numpy_and_numpy_base_1682520569166/work
+nvidia-cublas-cu12==12.4.5.8
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.2.1.3
+nvidia-curand-cu12==10.3.5.147
+nvidia-cusolver-cu12==11.6.1.9
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-nccl-cu12==2.21.5
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.4.127
+oauthlib==3.2.2
+onnxruntime==1.20.0
+openai==1.54.4
+opentelemetry-api==1.28.1
+opentelemetry-exporter-otlp-proto-common==1.28.1
+opentelemetry-exporter-otlp-proto-grpc==1.28.1
+opentelemetry-instrumentation==0.49b1
+opentelemetry-instrumentation-asgi==0.49b1
+opentelemetry-instrumentation-fastapi==0.49b1
+opentelemetry-proto==1.28.1
+opentelemetry-sdk==1.28.1
+opentelemetry-semantic-conventions==0.49b1
+opentelemetry-util-http==0.49b1
+orjson==3.10.11
+overrides==7.7.0
+packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1731456981915/work
+pandas @ file:///croot/pandas_1718308974269/work/dist/pandas-2.2.2-cp310-cp310-linux_x86_64.whl#sha256=a2434a313a5eb5b4fdbec6c770abfde859f643eff7b3a703791bf8b73e54c394
+pillow==11.0.0
+posthog==3.7.0
+protobuf==5.28.3
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pydantic==2.9.2
+pydantic_core==2.23.4
+Pygments==2.18.0
+PyPika==0.48.9
+pyproject_hooks==1.2.0
+python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1709299778482/work
+python-dotenv==1.0.1
+pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1726055524169/work
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+requests-oauthlib==2.0.0
+rich==13.9.4
+rsa==4.9
+safetensors==0.4.5
+scikit-learn==1.5.2
+scipy==1.14.1
+sentence-transformers==3.3.0
+shellingham==1.5.4
+six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
+sniffio==1.3.1
+starlette==0.41.2
+sympy==1.13.1
+tabulate==0.9.0
+tenacity==9.0.0
+termcolor @ file:///home/conda/feedstock_root/build_artifacts/termcolor_1728288976871/work
+threadpoolctl==3.5.0
+tokenizers==0.20.3
+tomli==2.1.0
+torch==2.5.1
+tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1730926118411/work
+transformers==4.46.2
+triton==3.1.0
+typer==0.13.0
+typing_extensions==4.12.2
+tzdata @ file:///home/conda/feedstock_root/build_artifacts/python-tzdata_1727140567071/work
+urllib3==2.2.3
+uvicorn==0.32.0
+uvloop==0.21.0
+watchfiles==0.24.0
+websocket-client==1.8.0
+websockets==14.1
+wrapt==1.16.0
+zipp==3.21.0

From 9b906d1a5c3396d6269d8fc98a5fdb82fa315e4b Mon Sep 17 00:00:00 2001
From: Justin Lee 
Date: Fri, 15 Nov 2024 11:17:28 -0800
Subject: [PATCH 15/99] include filename in context, added debugging for
 query_chromadb

---
 .../E2E-RAG-App/example_data/llama_3.1.md     | 50 +++++++--------
 .../E2E-RAG-App/example_data/llama_3.2.md     | 64 +++++++++----------
 .../example_data/llama_3.2_vision.md          | 64 +++++++++----------
 examples/E2E-RAG-App/rag_main.py              | 58 +++++++++++++----
 4 files changed, 133 insertions(+), 103 deletions(-)

diff --git a/examples/E2E-RAG-App/example_data/llama_3.1.md b/examples/E2E-RAG-App/example_data/llama_3.1.md
index 97c39fe34..af2d4850b 100644
--- a/examples/E2E-RAG-App/example_data/llama_3.1.md
+++ b/examples/E2E-RAG-App/example_data/llama_3.1.md
@@ -2,9 +2,9 @@
 
 The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
 
-**Model developer:** Meta
+Model developer: Meta
 
-**Model Architecture:** Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
+Model Architecture: Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
 
 
 
@@ -75,36 +75,36 @@ The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a
 
-**Supported languages:** English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai. +Supported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai. -**Llama 3.1 family of models**. Token counts refer to pretraining data only. All model versions use Grouped-Query Attention (GQA) for improved inference scalability. +Llama 3.1 family of models. Token counts refer to pretraining data only. All model versions use Grouped-Query Attention (GQA) for improved inference scalability. -**Model Release Date:** July 23, 2024. +Model Release Date: July 23, 2024. -**Status:** This is a static model trained on an offline dataset. Future versions of the tuned models will be released as we improve model safety with community feedback. +Status: This is a static model trained on an offline dataset. Future versions of the tuned models will be released as we improve model safety with community feedback. -**License:** A custom commercial license, the Llama 3.1 Community License, is available at: [https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE) +License: A custom commercial license, the Llama 3.1 Community License, is available at: [https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE) -**Feedback:** Instructions on how to provide feedback or comments on the model can be found in the Llama Models [README](https://github.com/meta-llama/llama-models/blob/main/README.md). For more technical information about generation parameters and recipes for how to use Llama 3.1 in applications, please go [here](https://github.com/meta-llama/llama-recipes). +Feedback: Instructions on how to provide feedback or comments on the model can be found in the Llama Models [README](https://github.com/meta-llama/llama-models/blob/main/README.md). For more technical information about generation parameters and recipes for how to use Llama 3.1 in applications, please go [here](https://github.com/meta-llama/llama-recipes). ## Intended Use -**Intended Use Cases** Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 model collection also supports the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. The Llama 3.1 Community License allows for these use cases. +Intended Use Cases Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 model collection also supports the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. The Llama 3.1 Community License allows for these use cases. -**Out-of-scope** Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the Acceptable Use Policy and Llama 3.1 Community License. Use in languages beyond those explicitly referenced as supported in this model card. +Out-of-scope Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the Acceptable Use Policy and Llama 3.1 Community License. Use in languages beyond those explicitly referenced as supported in this model card. -**Note:** Llama 3.1 has been trained on a broader collection of languages than the 8 supported languages. Developers may fine-tune Llama 3.1 models for languages beyond the 8 supported languages provided they comply with the Llama 3.1 Community License and the Acceptable Use Policy and in such cases are responsible for ensuring that any uses of Llama 3.1 in additional languages is done in a safe and responsible manner. +Note: Llama 3.1 has been trained on a broader collection of languages than the 8 supported languages. Developers may fine-tune Llama 3.1 models for languages beyond the 8 supported languages provided they comply with the Llama 3.1 Community License and the Acceptable Use Policy and in such cases are responsible for ensuring that any uses of Llama 3.1 in additional languages is done in a safe and responsible manner. ## Hardware and Software -**Training Factors** We used custom training libraries, Meta's custom built GPU cluster, and production infrastructure for pretraining. Fine-tuning, annotation, and evaluation were also performed on production infrastructure. +Training Factors We used custom training libraries, Meta's custom built GPU cluster, and production infrastructure for pretraining. Fine-tuning, annotation, and evaluation were also performed on production infrastructure. -**Training Energy Use** Training utilized a cumulative of **39.3**M GPU hours of computation on H100-80GB (TDP of 700W) type hardware, per the table below. Training time is the total GPU time required for training each model and power consumption is the peak power capacity per GPU device used, adjusted for power usage efficiency. +Training Energy Use Training utilized a cumulative of 39.3M GPU hours of computation on H100-80GB (TDP of 700W) type hardware, per the table below. Training time is the total GPU time required for training each model and power consumption is the peak power capacity per GPU device used, adjusted for power usage efficiency. -**Training Greenhouse Gas Emissions** Estimated total location-based greenhouse gas emissions were **11,390** tons CO2eq for training. Since 2020, Meta has maintained net zero greenhouse gas emissions in its global operations and matched 100% of its electricity use with renewable energy, therefore the total market-based greenhouse gas emissions for training were 0 tons CO2eq. +Training Greenhouse Gas Emissions Estimated total location-based greenhouse gas emissions were 11,390 tons CO2eq for training. Since 2020, Meta has maintained net zero greenhouse gas emissions in its global operations and matched 100% of its electricity use with renewable energy, therefore the total market-based greenhouse gas emissions for training were 0 tons CO2eq. @@ -183,9 +183,9 @@ The methodology used to determine training energy use and greenhouse gas emissio ## Training Data -**Overview:** Llama 3.1 was pretrained on ~15 trillion tokens of data from publicly available sources. The fine-tuning data includes publicly available instruction datasets, as well as over 25M synthetically generated examples. +Overview: Llama 3.1 was pretrained on ~15 trillion tokens of data from publicly available sources. The fine-tuning data includes publicly available instruction datasets, as well as over 25M synthetically generated examples. -**Data Freshness:** The pretraining data has a cutoff of December 2023. +Data Freshness: The pretraining data has a cutoff of December 2023. ## Benchmark scores @@ -901,18 +901,18 @@ Llama is a foundational technology designed to be used in a variety of use cases Our main objectives for conducting safety fine-tuning are to provide the research community with a valuable resource for studying the robustness of safety fine-tuning, as well as to offer developers a readily available, safe, and powerful model for various applications to reduce the developer workload to deploy safe AI systems. For more details on the safety mitigations implemented please read the Llama 3 paper. -**Fine-tuning data** +Fine-tuning data We employ a multi-faceted approach to data collection, combining human-generated data from our vendors with synthetic data to mitigate potential safety risks. We’ve developed many large language model (LLM)-based classifiers that enable us to thoughtfully select high-quality prompts and responses, enhancing data quality control. -**Refusals and Tone** +Refusals and Tone Building on the work we started with Llama 3, we put a great emphasis on model refusals to benign prompts as well as refusal tone. We included both borderline and adversarial prompts in our safety data strategy, and modified our safety data responses to follow tone guidelines. #### Llama 3.1 systems -**Large language models, including Llama 3.1, are not designed to be deployed in isolation but instead should be deployed as part of an overall AI system with additional safety guardrails as required.** Developers are expected to deploy system safeguards when building agentic systems. Safeguards are key to achieve the right helpfulness-safety alignment as well as mitigating safety and security risks inherent to the system and any integration of the model or system with external tools. +Large language models, including Llama 3.1, are not designed to be deployed in isolation but instead should be deployed as part of an overall AI system with additional safety guardrails as required. Developers are expected to deploy system safeguards when building agentic systems. Safeguards are key to achieve the right helpfulness-safety alignment as well as mitigating safety and security risks inherent to the system and any integration of the model or system with external tools. As part of our responsible release approach, we provide the community with [safeguards](https://llama.meta.com/trust-and-safety/) that developers should deploy with Llama models or other LLMs, including Llama Guard 3, Prompt Guard and Code Shield. All our [reference implementations](https://github.com/meta-llama/llama-agentic-system) demos contain these safeguards by default so developers can benefit from system-level safety out-of-the-box. @@ -921,9 +921,9 @@ As part of our responsible release approach, we provide the community with [safe Note that this release introduces new capabilities, including a longer context window, multilingual inputs and outputs and possible integrations by developers with third party tools. Building with these new capabilities requires specific considerations in addition to the best practices that generally apply across all Generative AI use cases. -**Tool-use**: Just like in standard software development, developers are responsible for the integration of the LLM with the tools and services of their choice. They should define a clear policy for their use case and assess the integrity of the third party services they use to be aware of the safety and security limitations when using this capability. Refer to the Responsible Use Guide for best practices on the safe deployment of the third party safeguards. +Tool-use: Just like in standard software development, developers are responsible for the integration of the LLM with the tools and services of their choice. They should define a clear policy for their use case and assess the integrity of the third party services they use to be aware of the safety and security limitations when using this capability. Refer to the Responsible Use Guide for best practices on the safe deployment of the third party safeguards. -**Multilinguality**: Llama 3.1 supports 7 languages in addition to English: French, German, Hindi, Italian, Portuguese, Spanish, and Thai. Llama may be able to output text in other languages than those that meet performance thresholds for safety and helpfulness. We strongly discourage developers from using this model to converse in non-supported languages without implementing finetuning and system controls in alignment with their policies and the best practices shared in the Responsible Use Guide. +Multilinguality: Llama 3.1 supports 7 languages in addition to English: French, German, Hindi, Italian, Portuguese, Spanish, and Thai. Llama may be able to output text in other languages than those that meet performance thresholds for safety and helpfulness. We strongly discourage developers from using this model to converse in non-supported languages without implementing finetuning and system controls in alignment with their policies and the best practices shared in the Responsible Use Guide. ### Evaluations @@ -932,7 +932,7 @@ We evaluated Llama models for common use cases as well as specific capabilities. Capability evaluations measure vulnerabilities of Llama models inherent to specific capabilities, for which were crafted dedicated benchmarks including long context, multilingual, tools calls, coding or memorization. -**Red teaming** +Red teaming For both scenarios, we conducted recurring red teaming exercises with the goal of discovering risks via adversarial prompting and we used the learnings to improve our benchmarks and safety tuning datasets. @@ -943,16 +943,16 @@ We partnered early with subject-matter experts in critical risk areas to underst We specifically focused our efforts on mitigating the following critical risk areas: -**1. CBRNE (Chemical, Biological, Radiological, Nuclear, and Explosive materials) helpfulness** +1. CBRNE (Chemical, Biological, Radiological, Nuclear, and Explosive materials) helpfulness To assess risks related to proliferation of chemical and biological weapons, we performed uplift testing designed to assess whether use of Llama 3.1 models could meaningfully increase the capabilities of malicious actors to plan or carry out attacks using these types of weapons. -**2. Child Safety** +2. Child Safety Child Safety risk assessments were conducted using a team of experts, to assess the model’s capability to produce outputs that could result in Child Safety risks and inform on any necessary and appropriate risk mitigations via fine tuning. We leveraged those expert red teaming sessions to expand the coverage of our evaluation benchmarks through Llama 3 model development. For Llama 3, we conducted new in-depth sessions using objective based methodologies to assess the model risks along multiple attack vectors including the additional languages Llama 3 is trained on. We also partnered with content specialists to perform red teaming exercises assessing potentially violating content while taking account of market specific nuances or experiences. -**3. Cyber attack enablement** +3. Cyber attack enablement Our cyber attack uplift study investigated whether LLMs can enhance human capabilities in hacking tasks, both in terms of skill level and speed. diff --git a/examples/E2E-RAG-App/example_data/llama_3.2.md b/examples/E2E-RAG-App/example_data/llama_3.2.md index 4d3acdd3b..0bc604261 100644 --- a/examples/E2E-RAG-App/example_data/llama_3.2.md +++ b/examples/E2E-RAG-App/example_data/llama_3.2.md @@ -2,9 +2,9 @@ The Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks. -**Model Developer:** Meta +Model Developer: Meta -**Model Architecture:** Llama 3.2 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. +Model Architecture: Llama 3.2 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. | | Training Data | Params | Input modalities | Output modalities | Context Length | GQA | Shared Embeddings | Token count | Knowledge cutoff | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | @@ -13,31 +13,31 @@ The Llama 3.2 collection of multilingual large language models (LLMs) is a colle | Llama 3.2 Quantized (text only) | A new mix of publicly available online data. | 1B (1.23B) | Multilingual Text | Multilingual Text and code | 8k | Yes | Yes | Up to 9T tokens | December 2023 | | | | 3B (3.21B) | Multilingual Text | Multilingual Text and code | | | | | | -**Supported Languages:** English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai are officially supported. Llama 3.2 has been trained on a broader collection of languages than these 8 supported languages. Developers may fine-tune Llama 3.2 models for languages beyond these supported languages, provided they comply with the Llama 3.2 Community License and the Acceptable Use Policy. Developers are always expected to ensure that their deployments, including those that involve additional languages, are completed safely and responsibly. +Supported Languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai are officially supported. Llama 3.2 has been trained on a broader collection of languages than these 8 supported languages. Developers may fine-tune Llama 3.2 models for languages beyond these supported languages, provided they comply with the Llama 3.2 Community License and the Acceptable Use Policy. Developers are always expected to ensure that their deployments, including those that involve additional languages, are completed safely and responsibly. -**Llama 3.2 Model Family:** Token counts refer to pretraining data only. All model versions use Grouped-Query Attention (GQA) for improved inference scalability. +Llama 3.2 Model Family: Token counts refer to pretraining data only. All model versions use Grouped-Query Attention (GQA) for improved inference scalability. -**Model Release Date:** Oct 24, 2024 +Model Release Date: Oct 24, 2024 -**Status:** This is a static model trained on an offline dataset. Future versions may be released that improve model capabilities and safety. +Status: This is a static model trained on an offline dataset. Future versions may be released that improve model capabilities and safety. -**License:** Use of Llama 3.2 is governed by the [Llama 3.2 Community License](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE) (a custom, commercial license agreement). +License: Use of Llama 3.2 is governed by the [Llama 3.2 Community License](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE) (a custom, commercial license agreement). -**Feedback:** Instructions on how to provide feedback or comments on the model can be found in the Llama Models [README](https://github.com/meta-llama/llama-models/blob/main/README.md). For more technical information about generation parameters and recipes for how to use Llama 3.2 in applications, please go [here](https://github.com/meta-llama/llama-recipes). +Feedback: Instructions on how to provide feedback or comments on the model can be found in the Llama Models [README](https://github.com/meta-llama/llama-models/blob/main/README.md). For more technical information about generation parameters and recipes for how to use Llama 3.2 in applications, please go [here](https://github.com/meta-llama/llama-recipes). ## Intended Use -**Intended Use Cases:** Llama 3.2 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat and agentic applications like knowledge retrieval and summarization, mobile AI powered writing assistants and query and prompt rewriting. Pretrained models can be adapted for a variety of additional natural language generation tasks. Similarly, quantized models can be adapted for a variety of on-device use-cases with limited compute resources. +Intended Use Cases: Llama 3.2 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat and agentic applications like knowledge retrieval and summarization, mobile AI powered writing assistants and query and prompt rewriting. Pretrained models can be adapted for a variety of additional natural language generation tasks. Similarly, quantized models can be adapted for a variety of on-device use-cases with limited compute resources. -**Out of Scope:** Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the Acceptable Use Policy and Llama 3.2 Community License. Use in languages beyond those explicitly referenced as supported in this model card. +Out of Scope: Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the Acceptable Use Policy and Llama 3.2 Community License. Use in languages beyond those explicitly referenced as supported in this model card. ## Hardware and Software -**Training Factors:** We used custom training libraries, Meta's custom built GPU cluster, and production infrastructure for pretraining. Fine-tuning, quantization, annotation, and evaluation were also performed on production infrastructure. +Training Factors: We used custom training libraries, Meta's custom built GPU cluster, and production infrastructure for pretraining. Fine-tuning, quantization, annotation, and evaluation were also performed on production infrastructure. -**Training Energy Use:** Training utilized a cumulative of **916k** GPU hours of computation on H100-80GB (TDP of 700W) type hardware, per the table below. Training time is the total GPU time required for training each model and power consumption is the peak power capacity per GPU device used, adjusted for power usage efficiency. +Training Energy Use: Training utilized a cumulative of 916k GPU hours of computation on H100-80GB (TDP of 700W) type hardware, per the table below. Training time is the total GPU time required for training each model and power consumption is the peak power capacity per GPU device used, adjusted for power usage efficiency. -**Training Greenhouse Gas Emissions:** Estimated total location-based greenhouse gas emissions were **240** tons CO2eq for training. Since 2020, Meta has maintained net zero greenhouse gas emissions in its global operations and matched 100% of its electricity use with renewable energy; therefore, the total market-based greenhouse gas emissions for training were 0 tons CO2eq. +Training Greenhouse Gas Emissions: Estimated total location-based greenhouse gas emissions were 240 tons CO2eq for training. Since 2020, Meta has maintained net zero greenhouse gas emissions in its global operations and matched 100% of its electricity use with renewable energy; therefore, the total market-based greenhouse gas emissions for training were 0 tons CO2eq. | | Training Time (GPU hours) | Logit Generation Time (GPU Hours) | Training Power Consumption (W) | Training Location-Based Greenhouse Gas Emissions (tons CO2eq) | Training Market-Based Greenhouse Gas Emissions (tons CO2eq) | | :---- | :---: | ----- | :---: | :---: | :---: | @@ -55,9 +55,9 @@ The methodology used to determine training energy use and greenhouse gas emissio ## Training Data -**Overview:** Llama 3.2 was pretrained on up to 9 trillion tokens of data from publicly available sources. For the 1B and 3B Llama 3.2 models, we incorporated logits from the Llama 3.1 8B and 70B models into the pretraining stage of the model development, where outputs (logits) from these larger models were used as token-level targets. Knowledge distillation was used after pruning to recover performance. In post-training we used a similar recipe as Llama 3.1 and produced final chat models by doing several rounds of alignment on top of the pre-trained model. Each round involved Supervised Fine-Tuning (SFT), Rejection Sampling (RS), and Direct Preference Optimization (DPO). +Overview: Llama 3.2 was pretrained on up to 9 trillion tokens of data from publicly available sources. For the 1B and 3B Llama 3.2 models, we incorporated logits from the Llama 3.1 8B and 70B models into the pretraining stage of the model development, where outputs (logits) from these larger models were used as token-level targets. Knowledge distillation was used after pruning to recover performance. In post-training we used a similar recipe as Llama 3.1 and produced final chat models by doing several rounds of alignment on top of the pre-trained model. Each round involved Supervised Fine-Tuning (SFT), Rejection Sampling (RS), and Direct Preference Optimization (DPO). -**Data Freshness:** The pretraining data has a cutoff of December 2023\. +Data Freshness: The pretraining data has a cutoff of December 2023\. ## Quantization @@ -164,53 +164,53 @@ As part of our Responsible release approach, we followed a three-pronged strateg ### Responsible Deployment -**Approach:** Llama is a foundational technology designed to be used in a variety of use cases. Examples on how Meta’s Llama models have been responsibly deployed can be found in our [Community Stories webpage](https://llama.meta.com/community-stories/). Our approach is to build the most helpful models, enabling the world to benefit from the technology power, by aligning our model safety for generic use cases and addressing a standard set of harms. Developers are then in the driver’s seat to tailor safety for their use cases, defining their own policies and deploying the models with the necessary safeguards in their Llama systems. Llama 3.2 was developed following the best practices outlined in our [Responsible Use Guide](https://llama.meta.com/responsible-use-guide/). +Approach: Llama is a foundational technology designed to be used in a variety of use cases. Examples on how Meta’s Llama models have been responsibly deployed can be found in our [Community Stories webpage](https://llama.meta.com/community-stories/). Our approach is to build the most helpful models, enabling the world to benefit from the technology power, by aligning our model safety for generic use cases and addressing a standard set of harms. Developers are then in the driver’s seat to tailor safety for their use cases, defining their own policies and deploying the models with the necessary safeguards in their Llama systems. Llama 3.2 was developed following the best practices outlined in our [Responsible Use Guide](https://llama.meta.com/responsible-use-guide/). #### Llama 3.2 Instruct -**Objective:** Our main objectives for conducting safety fine-tuning are to provide the research community with a valuable resource for studying the robustness of safety fine-tuning, as well as to offer developers a readily available, safe, and powerful model for various applications to reduce the developer workload to deploy safe AI systems. We implemented the same set of safety mitigations as in Llama 3, and you can learn more about these in the Llama 3 [paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/). +Objective: Our main objectives for conducting safety fine-tuning are to provide the research community with a valuable resource for studying the robustness of safety fine-tuning, as well as to offer developers a readily available, safe, and powerful model for various applications to reduce the developer workload to deploy safe AI systems. We implemented the same set of safety mitigations as in Llama 3, and you can learn more about these in the Llama 3 [paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/). -**Fine-Tuning Data:** We employ a multi-faceted approach to data collection, combining human-generated data from our vendors with synthetic data to mitigate potential safety risks. We’ve developed many large language model (LLM)-based classifiers that enable us to thoughtfully select high-quality prompts and responses, enhancing data quality control. +Fine-Tuning Data: We employ a multi-faceted approach to data collection, combining human-generated data from our vendors with synthetic data to mitigate potential safety risks. We’ve developed many large language model (LLM)-based classifiers that enable us to thoughtfully select high-quality prompts and responses, enhancing data quality control. -**Refusals and Tone:** Building on the work we started with Llama 3, we put a great emphasis on model refusals to benign prompts as well as refusal tone. We included both borderline and adversarial prompts in our safety data strategy, and modified our safety data responses to follow tone guidelines. +Refusals and Tone: Building on the work we started with Llama 3, we put a great emphasis on model refusals to benign prompts as well as refusal tone. We included both borderline and adversarial prompts in our safety data strategy, and modified our safety data responses to follow tone guidelines. #### Llama 3.2 Systems -**Safety as a System:** Large language models, including Llama 3.2, **are not designed to be deployed in isolation** but instead should be deployed as part of an overall AI system with additional safety guardrails as required. Developers are expected to deploy system safeguards when building agentic systems. Safeguards are key to achieve the right helpfulness-safety alignment as well as mitigating safety and security risks inherent to the system and any integration of the model or system with external tools. As part of our responsible release approach, we provide the community with [safeguards](https://llama.meta.com/trust-and-safety/) that developers should deploy with Llama models or other LLMs, including Llama Guard, Prompt Guard and Code Shield. All our [reference implementations](https://github.com/meta-llama/llama-agentic-system) demos contain these safeguards by default so developers can benefit from system-level safety out-of-the-box. +Safety as a System: Large language models, including Llama 3.2, are not designed to be deployed in isolation but instead should be deployed as part of an overall AI system with additional safety guardrails as required. Developers are expected to deploy system safeguards when building agentic systems. Safeguards are key to achieve the right helpfulness-safety alignment as well as mitigating safety and security risks inherent to the system and any integration of the model or system with external tools. As part of our responsible release approach, we provide the community with [safeguards](https://llama.meta.com/trust-and-safety/) that developers should deploy with Llama models or other LLMs, including Llama Guard, Prompt Guard and Code Shield. All our [reference implementations](https://github.com/meta-llama/llama-agentic-system) demos contain these safeguards by default so developers can benefit from system-level safety out-of-the-box. ### New Capabilities and Use Cases -**Technological Advancement:** Llama releases usually introduce new capabilities that require specific considerations in addition to the best practices that generally apply across all Generative AI use cases. For prior release capabilities also supported by Llama 3.2, see [Llama 3.1 Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md), as the same considerations apply here as well. +Technological Advancement: Llama releases usually introduce new capabilities that require specific considerations in addition to the best practices that generally apply across all Generative AI use cases. For prior release capabilities also supported by Llama 3.2, see [Llama 3.1 Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md), as the same considerations apply here as well. -**Constrained Environments:** Llama 3.2 1B and 3B models are expected to be deployed in highly constrained environments, such as mobile devices. LLM Systems using smaller models will have a different alignment profile and safety/helpfulness tradeoff than more complex, larger systems. Developers should ensure the safety of their system meets the requirements of their use case. We recommend using lighter system safeguards for such use cases, like Llama Guard 3-1B or its mobile-optimized version. +Constrained Environments: Llama 3.2 1B and 3B models are expected to be deployed in highly constrained environments, such as mobile devices. LLM Systems using smaller models will have a different alignment profile and safety/helpfulness tradeoff than more complex, larger systems. Developers should ensure the safety of their system meets the requirements of their use case. We recommend using lighter system safeguards for such use cases, like Llama Guard 3-1B or its mobile-optimized version. ### Evaluations -**Scaled Evaluations:** We built dedicated, adversarial evaluation datasets and evaluated systems composed of Llama models and Purple Llama safeguards to filter input prompt and output response. It is important to evaluate applications in context, and we recommend building dedicated evaluation dataset for your use case. +Scaled Evaluations: We built dedicated, adversarial evaluation datasets and evaluated systems composed of Llama models and Purple Llama safeguards to filter input prompt and output response. It is important to evaluate applications in context, and we recommend building dedicated evaluation dataset for your use case. -**Red Teaming:** We conducted recurring red teaming exercises with the goal of discovering risks via adversarial prompting and we used the learnings to improve our benchmarks and safety tuning datasets. We partnered early with subject-matter experts in critical risk areas to understand the nature of these real-world harms and how such models may lead to unintended harm for society. Based on these conversations, we derived a set of adversarial goals for the red team to attempt to achieve, such as extracting harmful information or reprogramming the model to act in a potentially harmful capacity. The red team consisted of experts in cybersecurity, adversarial machine learning, responsible AI, and integrity in addition to multilingual content specialists with background in integrity issues in specific geographic markets. +Red Teaming: We conducted recurring red teaming exercises with the goal of discovering risks via adversarial prompting and we used the learnings to improve our benchmarks and safety tuning datasets. We partnered early with subject-matter experts in critical risk areas to understand the nature of these real-world harms and how such models may lead to unintended harm for society. Based on these conversations, we derived a set of adversarial goals for the red team to attempt to achieve, such as extracting harmful information or reprogramming the model to act in a potentially harmful capacity. The red team consisted of experts in cybersecurity, adversarial machine learning, responsible AI, and integrity in addition to multilingual content specialists with background in integrity issues in specific geographic markets. ### Critical Risks In addition to our safety work above, we took extra care on measuring and/or mitigating the following critical risk areas: -**1\. CBRNE (Chemical, Biological, Radiological, Nuclear, and Explosive Weapons):** Llama 3.2 1B and 3B models are smaller and less capable derivatives of Llama 3.1. For Llama 3.1 70B and 405B, to assess risks related to proliferation of chemical and biological weapons, we performed uplift testing designed to assess whether use of Llama 3.1 models could meaningfully increase the capabilities of malicious actors to plan or carry out attacks using these types of weapons and have determined that such testing also applies to the smaller 1B and 3B models. +1\. CBRNE (Chemical, Biological, Radiological, Nuclear, and Explosive Weapons): Llama 3.2 1B and 3B models are smaller and less capable derivatives of Llama 3.1. For Llama 3.1 70B and 405B, to assess risks related to proliferation of chemical and biological weapons, we performed uplift testing designed to assess whether use of Llama 3.1 models could meaningfully increase the capabilities of malicious actors to plan or carry out attacks using these types of weapons and have determined that such testing also applies to the smaller 1B and 3B models. -**2\. Child Safety:** Child Safety risk assessments were conducted using a team of experts, to assess the model’s capability to produce outputs that could result in Child Safety risks and inform on any necessary and appropriate risk mitigations via fine tuning. We leveraged those expert red teaming sessions to expand the coverage of our evaluation benchmarks through Llama 3 model development. For Llama 3, we conducted new in-depth sessions using objective based methodologies to assess the model risks along multiple attack vectors including the additional languages Llama 3 is trained on. We also partnered with content specialists to perform red teaming exercises assessing potentially violating content while taking account of market specific nuances or experiences. +2\. Child Safety: Child Safety risk assessments were conducted using a team of experts, to assess the model’s capability to produce outputs that could result in Child Safety risks and inform on any necessary and appropriate risk mitigations via fine tuning. We leveraged those expert red teaming sessions to expand the coverage of our evaluation benchmarks through Llama 3 model development. For Llama 3, we conducted new in-depth sessions using objective based methodologies to assess the model risks along multiple attack vectors including the additional languages Llama 3 is trained on. We also partnered with content specialists to perform red teaming exercises assessing potentially violating content while taking account of market specific nuances or experiences. -**3\. Cyber Attacks:** For Llama 3.1 405B, our cyber attack uplift study investigated whether LLMs can enhance human capabilities in hacking tasks, both in terms of skill level and speed. +3\. Cyber Attacks: For Llama 3.1 405B, our cyber attack uplift study investigated whether LLMs can enhance human capabilities in hacking tasks, both in terms of skill level and speed. Our attack automation study focused on evaluating the capabilities of LLMs when used as autonomous agents in cyber offensive operations, specifically in the context of ransomware attacks. This evaluation was distinct from previous studies that considered LLMs as interactive assistants. The primary objective was to assess whether these models could effectively function as independent agents in executing complex cyber-attacks without human intervention. Because Llama 3.2’s 1B and 3B models are smaller and less capable models than Llama 3.1 405B, we broadly believe that the testing conducted for the 405B model also applies to Llama 3.2 models. ### Community -**Industry Partnerships:** Generative AI safety requires expertise and tooling, and we believe in the strength of the open community to accelerate its progress. We are active members of open consortiums, including the AI Alliance, Partnership on AI and MLCommons, actively contributing to safety standardization and transparency. We encourage the community to adopt taxonomies like the MLCommons Proof of Concept evaluation to facilitate collaboration and transparency on safety and content evaluations. Our Purple Llama tools are open sourced for the community to use and widely distributed across ecosystem partners including cloud service providers. We encourage community contributions to our [Github repository](https://github.com/meta-llama/PurpleLlama). +Industry Partnerships: Generative AI safety requires expertise and tooling, and we believe in the strength of the open community to accelerate its progress. We are active members of open consortiums, including the AI Alliance, Partnership on AI and MLCommons, actively contributing to safety standardization and transparency. We encourage the community to adopt taxonomies like the MLCommons Proof of Concept evaluation to facilitate collaboration and transparency on safety and content evaluations. Our Purple Llama tools are open sourced for the community to use and widely distributed across ecosystem partners including cloud service providers. We encourage community contributions to our [Github repository](https://github.com/meta-llama/PurpleLlama). -**Grants:** We also set up the [Llama Impact Grants](https://llama.meta.com/llama-impact-grants/) program to identify and support the most compelling applications of Meta’s Llama model for societal benefit across three categories: education, climate and open innovation. The 20 finalists from the hundreds of applications can be found [here](https://llama.meta.com/llama-impact-grants/#finalists). +Grants: We also set up the [Llama Impact Grants](https://llama.meta.com/llama-impact-grants/) program to identify and support the most compelling applications of Meta’s Llama model for societal benefit across three categories: education, climate and open innovation. The 20 finalists from the hundreds of applications can be found [here](https://llama.meta.com/llama-impact-grants/#finalists). -**Reporting:** Finally, we put in place a set of resources including an [output reporting mechanism](https://developers.facebook.com/llama_output_feedback) and [bug bounty program](https://www.facebook.com/whitehat) to continuously improve the Llama technology with the help of the community. +Reporting: Finally, we put in place a set of resources including an [output reporting mechanism](https://developers.facebook.com/llama_output_feedback) and [bug bounty program](https://www.facebook.com/whitehat) to continuously improve the Llama technology with the help of the community. ## Ethical Considerations and Limitations -**Values:** The core values of Llama 3.2 are openness, inclusivity and helpfulness. It is meant to serve everyone, and to work for a wide range of use cases. It is thus designed to be accessible to people across many different backgrounds, experiences and perspectives. Llama 3.2 addresses users and their needs as they are, without insertion unnecessary judgment or normativity, while reflecting the understanding that even content that may appear problematic in some cases can serve valuable purposes in others. It respects the dignity and autonomy of all users, especially in terms of the values of free thought and expression that power innovation and progress. +Values: The core values of Llama 3.2 are openness, inclusivity and helpfulness. It is meant to serve everyone, and to work for a wide range of use cases. It is thus designed to be accessible to people across many different backgrounds, experiences and perspectives. Llama 3.2 addresses users and their needs as they are, without insertion unnecessary judgment or normativity, while reflecting the understanding that even content that may appear problematic in some cases can serve valuable purposes in others. It respects the dignity and autonomy of all users, especially in terms of the values of free thought and expression that power innovation and progress. -**Testing:** Llama 3.2 is a new technology, and like any new technology, there are risks associated with its use. Testing conducted to date has not covered, nor could it cover, all scenarios. For these reasons, as with all LLMs, Llama 3.2’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate, biased or other objectionable responses to user prompts. Therefore, before deploying any applications of Llama 3.2 models, developers should perform safety testing and tuning tailored to their specific applications of the model. Please refer to available resources including our [Responsible Use Guide](https://llama.meta.com/responsible-use-guide), [Trust and Safety](https://llama.meta.com/trust-and-safety/) solutions, and other [resources](https://llama.meta.com/docs/get-started/) to learn more about responsible development. +Testing: Llama 3.2 is a new technology, and like any new technology, there are risks associated with its use. Testing conducted to date has not covered, nor could it cover, all scenarios. For these reasons, as with all LLMs, Llama 3.2’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate, biased or other objectionable responses to user prompts. Therefore, before deploying any applications of Llama 3.2 models, developers should perform safety testing and tuning tailored to their specific applications of the model. Please refer to available resources including our [Responsible Use Guide](https://llama.meta.com/responsible-use-guide), [Trust and Safety](https://llama.meta.com/trust-and-safety/) solutions, and other [resources](https://llama.meta.com/docs/get-started/) to learn more about responsible development. diff --git a/examples/E2E-RAG-App/example_data/llama_3.2_vision.md b/examples/E2E-RAG-App/example_data/llama_3.2_vision.md index abf126a94..e64eee2d1 100644 --- a/examples/E2E-RAG-App/example_data/llama_3.2_vision.md +++ b/examples/E2E-RAG-App/example_data/llama_3.2_vision.md @@ -2,32 +2,32 @@ The Llama 3.2-Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes (text \+ images in / text out). The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image. The models outperform many of the available open source and closed multimodal models on common industry benchmarks. -**Model Developer**: Meta +Model Developer: Meta -**Model Architecture:** Llama 3.2-Vision is built on top of the Llama 3.1 text-only model, which is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. To support image recognition tasks, the Llama 3.2-Vision model uses a separately trained vision adapter that integrates with the pre-trained Llama 3.1 language model. The adapter consists of a series of cross-attention layers that feed image encoder representations into the core LLM. +Model Architecture: Llama 3.2-Vision is built on top of the Llama 3.1 text-only model, which is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. To support image recognition tasks, the Llama 3.2-Vision model uses a separately trained vision adapter that integrates with the pre-trained Llama 3.1 language model. The adapter consists of a series of cross-attention layers that feed image encoder representations into the core LLM. | | Training Data | Params | Input modalities | Output modalities | Context length | GQA | Data volume | Knowledge cutoff | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | | Llama 3.2-Vision | (Image, text) pairs | 11B (10.6) | Text \+ Image | Text | 128k | Yes | 6B (image, text) pairs | December 2023 | | Llama 3.2-Vision | (Image, text) pairs | 90B (88.8) | Text \+ Image | Text | 128k | Yes | 6B (image, text) pairs | December 2023 | -**Supported Languages:** For text only tasks, English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai are officially supported. Llama 3.2 has been trained on a broader collection of languages than these 8 supported languages. Note for image+text applications, English is the only language supported. +Supported Languages: For text only tasks, English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai are officially supported. Llama 3.2 has been trained on a broader collection of languages than these 8 supported languages. Note for image+text applications, English is the only language supported. Developers may fine-tune Llama 3.2 models for languages beyond these supported languages, provided they comply with the Llama 3.2 Community License and the Acceptable Use Policy. Developers are always expected to ensure that their deployments, including those that involve additional languages, are completed safely and responsibly. -**Llama 3.2 Model Family:** Token counts refer to pretraining data only. All model versions use Grouped-Query Attention (GQA) for improved inference scalability. +Llama 3.2 Model Family: Token counts refer to pretraining data only. All model versions use Grouped-Query Attention (GQA) for improved inference scalability. -**Model Release Date:** Sept 25, 2024 +Model Release Date: Sept 25, 2024 -**Status:** This is a static model trained on an offline dataset. Future versions may be released that improve model capabilities and safety. +Status: This is a static model trained on an offline dataset. Future versions may be released that improve model capabilities and safety. -**License:** Use of Llama 3.2 is governed by the [Llama 3.2 Community License](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE) (a custom, commercial license agreement). +License: Use of Llama 3.2 is governed by the [Llama 3.2 Community License](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE) (a custom, commercial license agreement). -**Feedback:** Instructions on how to provide feedback or comments on the model can be found in the Llama Models [README](https://github.com/meta-llama/llama-models/blob/main/README.md). For more technical information about generation parameters and recipes for how to use Llama 3.2-Vision in applications, please go [here](https://github.com/meta-llama/llama-recipes). +Feedback: Instructions on how to provide feedback or comments on the model can be found in the Llama Models [README](https://github.com/meta-llama/llama-models/blob/main/README.md). For more technical information about generation parameters and recipes for how to use Llama 3.2-Vision in applications, please go [here](https://github.com/meta-llama/llama-recipes). ## Intended Use -**Intended Use Cases:** Llama 3.2-Vision is intended for commercial and research use. Instruction tuned models are intended for visual recognition, image reasoning, captioning, and assistant-like chat with images, whereas pretrained models can be adapted for a variety of image reasoning tasks. Additionally, because of Llama 3.2-Vision’s ability to take images and text as inputs, additional use cases could include: +Intended Use Cases: Llama 3.2-Vision is intended for commercial and research use. Instruction tuned models are intended for visual recognition, image reasoning, captioning, and assistant-like chat with images, whereas pretrained models can be adapted for a variety of image reasoning tasks. Additionally, because of Llama 3.2-Vision’s ability to take images and text as inputs, additional use cases could include: 1. Visual Question Answering (VQA) and Visual Reasoning: Imagine a machine that looks at a picture and understands your questions about it. 2. Document Visual Question Answering (DocVQA): Imagine a computer understanding both the text and layout of a document, like a map or contract, and then answering questions about it directly from the image. @@ -38,17 +38,17 @@ Developers may fine-tune Llama 3.2 models for languages beyond these supported l The Llama 3.2 model collection also supports the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. The Llama 3.2 Community License allows for these use cases. -**Out of Scope:** Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the Acceptable Use Policy and Llama 3.2 Community License. Use in languages beyond those explicitly referenced as supported in this model card. +Out of Scope: Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the Acceptable Use Policy and Llama 3.2 Community License. Use in languages beyond those explicitly referenced as supported in this model card. ## Hardware and Software -**Training Factors:** We used custom training libraries, Meta's custom built GPU cluster, and production infrastructure for pretraining. Fine-tuning, annotation, and evaluation were also performed on production infrastructure. +Training Factors: We used custom training libraries, Meta's custom built GPU cluster, and production infrastructure for pretraining. Fine-tuning, annotation, and evaluation were also performed on production infrastructure. -**Training Energy Use:** Training utilized a cumulative of **2.02M** GPU hours of computation on H100-80GB (TDP of 700W) type hardware, per the table below. Training time is the total GPU time required for training each model and power consumption is the peak power capacity per GPU device used, adjusted for power usage efficiency. +Training Energy Use: Training utilized a cumulative of 2.02M GPU hours of computation on H100-80GB (TDP of 700W) type hardware, per the table below. Training time is the total GPU time required for training each model and power consumption is the peak power capacity per GPU device used, adjusted for power usage efficiency. ## -**Training Greenhouse Gas Emissions:** Estimated total location-based greenhouse gas emissions were **584** tons CO2eq for training. Since 2020, Meta has maintained net zero greenhouse gas emissions in its global operations and matched 100% of its electricity use with renewable energy, therefore the total market-based greenhouse gas emissions for training were 0 tons CO2eq. +Training Greenhouse Gas Emissions: Estimated total location-based greenhouse gas emissions were 584 tons CO2eq for training. Since 2020, Meta has maintained net zero greenhouse gas emissions in its global operations and matched 100% of its electricity use with renewable energy, therefore the total market-based greenhouse gas emissions for training were 0 tons CO2eq. | | Training Time (GPU hours) | Training Power Consumption (W) | Training Location-Based Greenhouse Gas Emissions (tons CO2eq) | Training Market-Based Greenhouse Gas Emissions (tons CO2eq) | | :---- | :---: | :---: | :---: | :---: | @@ -60,9 +60,9 @@ The methodology used to determine training energy use and greenhouse gas emissio ## Training Data -**Overview:** Llama 3.2-Vision was pretrained on 6B image and text pairs. The instruction tuning data includes publicly available vision instruction datasets, as well as over 3M synthetically generated examples. +Overview: Llama 3.2-Vision was pretrained on 6B image and text pairs. The instruction tuning data includes publicly available vision instruction datasets, as well as over 3M synthetically generated examples. -**Data Freshness:** The pretraining data has a cutoff of December 2023\. +Data Freshness: The pretraining data has a cutoff of December 2023\. ## Benchmarks \- Image Reasoning @@ -108,53 +108,53 @@ As part of our Responsible release approach, we followed a three-pronged strateg ### Responsible Deployment -**Approach:** Llama is a foundational technology designed to be used in a variety of use cases, examples on how Meta’s Llama models have been responsibly deployed can be found in our [Community Stories webpage](https://llama.meta.com/community-stories/). Our approach is to build the most helpful models enabling the world to benefit from the technology power, by aligning our model safety for the generic use cases addressing a standard set of harms. Developers are then in the driver seat to tailor safety for their use case, defining their own policy and deploying the models with the necessary safeguards in their Llama systems. Llama 3.2 was developed following the best practices outlined in our Responsible Use Guide, you can refer to the [Responsible Use Guide](https://llama.meta.com/responsible-use-guide/) to learn more. +Approach: Llama is a foundational technology designed to be used in a variety of use cases, examples on how Meta’s Llama models have been responsibly deployed can be found in our [Community Stories webpage](https://llama.meta.com/community-stories/). Our approach is to build the most helpful models enabling the world to benefit from the technology power, by aligning our model safety for the generic use cases addressing a standard set of harms. Developers are then in the driver seat to tailor safety for their use case, defining their own policy and deploying the models with the necessary safeguards in their Llama systems. Llama 3.2 was developed following the best practices outlined in our Responsible Use Guide, you can refer to the [Responsible Use Guide](https://llama.meta.com/responsible-use-guide/) to learn more. #### Llama 3.2 Instruct -**Objective:** Our main objectives for conducting safety fine-tuning are to provide the research community with a valuable resource for studying the robustness of safety fine-tuning, as well as to offer developers a readily available, safe, and powerful model for various applications to reduce the developer workload to deploy safe AI systems. We implemented the same set of safety mitigations as in Llama 3, and you can learn more about these in the Llama 3 [paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/). +Objective: Our main objectives for conducting safety fine-tuning are to provide the research community with a valuable resource for studying the robustness of safety fine-tuning, as well as to offer developers a readily available, safe, and powerful model for various applications to reduce the developer workload to deploy safe AI systems. We implemented the same set of safety mitigations as in Llama 3, and you can learn more about these in the Llama 3 [paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/). -**Fine-Tuning Data:** We employ a multi-faceted approach to data collection, combining human-generated data from our vendors with synthetic data to mitigate potential safety risks. We’ve developed many large language model (LLM)-based classifiers that enable us to thoughtfully select high-quality prompts and responses, enhancing data quality control. +Fine-Tuning Data: We employ a multi-faceted approach to data collection, combining human-generated data from our vendors with synthetic data to mitigate potential safety risks. We’ve developed many large language model (LLM)-based classifiers that enable us to thoughtfully select high-quality prompts and responses, enhancing data quality control. -**Refusals and Tone:** Building on the work we started with Llama 3, we put a great emphasis on model refusals to benign prompts as well as refusal tone. We included both borderline and adversarial prompts in our safety data strategy, and modified our safety data responses to follow tone guidelines. +Refusals and Tone: Building on the work we started with Llama 3, we put a great emphasis on model refusals to benign prompts as well as refusal tone. We included both borderline and adversarial prompts in our safety data strategy, and modified our safety data responses to follow tone guidelines. #### Llama 3.2 Systems -**Safety as a System:** Large language models, including Llama 3.2, **are not designed to be deployed in isolation** but instead should be deployed as part of an overall AI system with additional safety guardrails as required. Developers are expected to deploy system safeguards when building agentic systems. Safeguards are key to achieve the right helpfulness-safety alignment as well as mitigating safety and security risks inherent to the system and any integration of the model or system with external tools. As part of our responsible release approach, we provide the community with [safeguards](https://llama.meta.com/trust-and-safety/) that developers should deploy with Llama models or other LLMs, including Llama Guard, Prompt Guard and Code Shield. All our [reference implementations](https://github.com/meta-llama/llama-agentic-system) demos contain these safeguards by default so developers can benefit from system-level safety out-of-the-box. +Safety as a System: Large language models, including Llama 3.2, are not designed to be deployed in isolation but instead should be deployed as part of an overall AI system with additional safety guardrails as required. Developers are expected to deploy system safeguards when building agentic systems. Safeguards are key to achieve the right helpfulness-safety alignment as well as mitigating safety and security risks inherent to the system and any integration of the model or system with external tools. As part of our responsible release approach, we provide the community with [safeguards](https://llama.meta.com/trust-and-safety/) that developers should deploy with Llama models or other LLMs, including Llama Guard, Prompt Guard and Code Shield. All our [reference implementations](https://github.com/meta-llama/llama-agentic-system) demos contain these safeguards by default so developers can benefit from system-level safety out-of-the-box. ### New Capabilities and Use Cases -**Technological Advancement:** Llama releases usually introduce new capabilities that require specific considerations in addition to the best practices that generally apply across all Generative AI use cases. For prior release capabilities also supported by Llama 3.2, see [Llama 3.1 Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md), as the same considerations apply here as well. +Technological Advancement: Llama releases usually introduce new capabilities that require specific considerations in addition to the best practices that generally apply across all Generative AI use cases. For prior release capabilities also supported by Llama 3.2, see [Llama 3.1 Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md), as the same considerations apply here as well. -**Image Reasoning:** Llama 3.2-Vision models come with multimodal (text and image) input capabilities enabling image reasoning applications. As part of our responsible release process, we took dedicated measures including evaluations and mitigations to address the risk of the models uniquely identifying individuals in images. As with other LLM risks, models may not always be robust to adversarial prompts, and developers should evaluate identification and other applicable risks in the context of their applications as well as consider deploying Llama Guard 3-11B-Vision as part of their system or other mitigations as appropriate to detect and mitigate such risks. +Image Reasoning: Llama 3.2-Vision models come with multimodal (text and image) input capabilities enabling image reasoning applications. As part of our responsible release process, we took dedicated measures including evaluations and mitigations to address the risk of the models uniquely identifying individuals in images. As with other LLM risks, models may not always be robust to adversarial prompts, and developers should evaluate identification and other applicable risks in the context of their applications as well as consider deploying Llama Guard 3-11B-Vision as part of their system or other mitigations as appropriate to detect and mitigate such risks. ### Evaluations -**Scaled Evaluations:** We built dedicated, adversarial evaluation datasets and evaluated systems composed of Llama models and Purple Llama safeguards to filter input prompt and output response. It is important to evaluate applications in context, and we recommend building dedicated evaluation dataset for your use case. +Scaled Evaluations: We built dedicated, adversarial evaluation datasets and evaluated systems composed of Llama models and Purple Llama safeguards to filter input prompt and output response. It is important to evaluate applications in context, and we recommend building dedicated evaluation dataset for your use case. -**Red teaming:** We conducted recurring red teaming exercises with the goal of discovering risks via adversarial prompting and we used the learnings to improve our benchmarks and safety tuning datasets. We partnered early with subject-matter experts in critical risk areas to understand the nature of these real-world harms and how such models may lead to unintended harm for society. Based on these conversations, we derived a set of adversarial goals for the red team to attempt to achieve, such as extracting harmful information or reprogramming the model to act in a potentially harmful capacity. The red team consisted of experts in cybersecurity, adversarial machine learning, responsible AI, and integrity in addition to multilingual content specialists with background in integrity issues in specific geographic markets. +Red teaming: We conducted recurring red teaming exercises with the goal of discovering risks via adversarial prompting and we used the learnings to improve our benchmarks and safety tuning datasets. We partnered early with subject-matter experts in critical risk areas to understand the nature of these real-world harms and how such models may lead to unintended harm for society. Based on these conversations, we derived a set of adversarial goals for the red team to attempt to achieve, such as extracting harmful information or reprogramming the model to act in a potentially harmful capacity. The red team consisted of experts in cybersecurity, adversarial machine learning, responsible AI, and integrity in addition to multilingual content specialists with background in integrity issues in specific geographic markets. ### Critical Risks In addition to our safety work above, we took extra care on measuring and/or mitigating the following critical risk areas: -**1\. CBRNE (Chemical, Biological, Radiological, Nuclear, and Explosive Weapons):** For Llama 3.1, to assess risks related to proliferation of chemical and biological weapons, we performed uplift testing designed to assess whether use of Llama 3.1 models could meaningfully increase the capabilities of malicious actors to plan or carry out attacks using these types of weapons. For Llama 3.2-Vision models, we conducted additional targeted evaluations and found that it was unlikely Llama 3.2 presented an increase in scientific capabilities due to its added image understanding capability as compared to Llama 3.1. +1\. CBRNE (Chemical, Biological, Radiological, Nuclear, and Explosive Weapons): For Llama 3.1, to assess risks related to proliferation of chemical and biological weapons, we performed uplift testing designed to assess whether use of Llama 3.1 models could meaningfully increase the capabilities of malicious actors to plan or carry out attacks using these types of weapons. For Llama 3.2-Vision models, we conducted additional targeted evaluations and found that it was unlikely Llama 3.2 presented an increase in scientific capabilities due to its added image understanding capability as compared to Llama 3.1. -**2\. Child Safety:** Child Safety risk assessments were conducted using a team of experts, to assess the model’s capability to produce outputs that could result in Child Safety risks and inform on any necessary and appropriate risk mitigations via fine tuning. We leveraged those expert red teaming sessions to expand the coverage of our evaluation benchmarks through Llama 3 model development. For Llama 3, we conducted new in-depth sessions using objective based methodologies to assess the model risks along multiple attack vectors including the additional languages Llama 3 is trained on. We also partnered with content specialists to perform red teaming exercises assessing potentially violating content while taking account of market specific nuances or experiences. +2\. Child Safety: Child Safety risk assessments were conducted using a team of experts, to assess the model’s capability to produce outputs that could result in Child Safety risks and inform on any necessary and appropriate risk mitigations via fine tuning. We leveraged those expert red teaming sessions to expand the coverage of our evaluation benchmarks through Llama 3 model development. For Llama 3, we conducted new in-depth sessions using objective based methodologies to assess the model risks along multiple attack vectors including the additional languages Llama 3 is trained on. We also partnered with content specialists to perform red teaming exercises assessing potentially violating content while taking account of market specific nuances or experiences. -**3\. Cyber Attacks:** For Llama 3.1 405B, our cyber attack uplift study investigated whether LLMs can enhance human capabilities in hacking tasks, both in terms of skill level and speed. +3\. Cyber Attacks: For Llama 3.1 405B, our cyber attack uplift study investigated whether LLMs can enhance human capabilities in hacking tasks, both in terms of skill level and speed. Our attack automation study focused on evaluating the capabilities of LLMs when used as autonomous agents in cyber offensive operations, specifically in the context of ransomware attacks. This evaluation was distinct from previous studies that considered LLMs as interactive assistants. The primary objective was to assess whether these models could effectively function as independent agents in executing complex cyber-attacks without human intervention. Because Llama 3.2’s vision capabilities are not generally germane to cyber uplift, we believe that the testing conducted for Llama 3.1 also applies to Llama 3.2. ### Community -**Industry Partnerships:** Generative AI safety requires expertise and tooling, and we believe in the strength of the open community to accelerate its progress. We are active members of open consortiums, including the AI Alliance, Partnership on AI and MLCommons, actively contributing to safety standardization and transparency. We encourage the community to adopt taxonomies like the MLCommons Proof of Concept evaluation to facilitate collaboration and transparency on safety and content evaluations. Our Purple Llama tools are open sourced for the community to use and widely distributed across ecosystem partners including cloud service providers. We encourage community contributions to our [Github repository](https://github.com/meta-llama/PurpleLlama). +Industry Partnerships: Generative AI safety requires expertise and tooling, and we believe in the strength of the open community to accelerate its progress. We are active members of open consortiums, including the AI Alliance, Partnership on AI and MLCommons, actively contributing to safety standardization and transparency. We encourage the community to adopt taxonomies like the MLCommons Proof of Concept evaluation to facilitate collaboration and transparency on safety and content evaluations. Our Purple Llama tools are open sourced for the community to use and widely distributed across ecosystem partners including cloud service providers. We encourage community contributions to our [Github repository](https://github.com/meta-llama/PurpleLlama). -**Grants:** We also set up the [Llama Impact Grants](https://llama.meta.com/llama-impact-grants/) program to identify and support the most compelling applications of Meta’s Llama model for societal benefit across three categories: education, climate and open innovation. The 20 finalists from the hundreds of applications can be found [here](https://llama.meta.com/llama-impact-grants/#finalists). +Grants: We also set up the [Llama Impact Grants](https://llama.meta.com/llama-impact-grants/) program to identify and support the most compelling applications of Meta’s Llama model for societal benefit across three categories: education, climate and open innovation. The 20 finalists from the hundreds of applications can be found [here](https://llama.meta.com/llama-impact-grants/#finalists). -**Reporting:** Finally, we put in place a set of resources including an [output reporting mechanism](https://developers.facebook.com/llama_output_feedback) and [bug bounty program](https://www.facebook.com/whitehat) to continuously improve the Llama technology with the help of the community. +Reporting: Finally, we put in place a set of resources including an [output reporting mechanism](https://developers.facebook.com/llama_output_feedback) and [bug bounty program](https://www.facebook.com/whitehat) to continuously improve the Llama technology with the help of the community. ## Ethical Considerations and Limitations -**Values:** The core values of Llama 3.2 are openness, inclusivity and helpfulness. It is meant to serve everyone, and to work for a wide range of use cases. It is thus designed to be accessible to people across many different backgrounds, experiences and perspectives. Llama 3.2 addresses users and their needs as they are, without insertion unnecessary judgment or normativity, while reflecting the understanding that even content that may appear problematic in some cases can serve valuable purposes in others. It respects the dignity and autonomy of all users, especially in terms of the values of free thought and expression that power innovation and progress. +Values: The core values of Llama 3.2 are openness, inclusivity and helpfulness. It is meant to serve everyone, and to work for a wide range of use cases. It is thus designed to be accessible to people across many different backgrounds, experiences and perspectives. Llama 3.2 addresses users and their needs as they are, without insertion unnecessary judgment or normativity, while reflecting the understanding that even content that may appear problematic in some cases can serve valuable purposes in others. It respects the dignity and autonomy of all users, especially in terms of the values of free thought and expression that power innovation and progress. -**Testing:** But Llama 3.2 is a new technology, and like any new technology, there are risks associated with its use. Testing conducted to date has not covered, nor could it cover, all scenarios. For these reasons, as with all LLMs, Llama 3.2’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate, biased or other objectionable responses to user prompts. Therefore, before deploying any applications of Llama 3.2 models, developers should perform safety testing and tuning tailored to their specific applications of the model. Please refer to available resources including our [Responsible Use Guide](https://llama.meta.com/responsible-use-guide), [Trust and Safety](https://llama.meta.com/trust-and-safety/) solutions, and other [resources](https://llama.meta.com/docs/get-started/) to learn more about responsible development. +Testing: But Llama 3.2 is a new technology, and like any new technology, there are risks associated with its use. Testing conducted to date has not covered, nor could it cover, all scenarios. For these reasons, as with all LLMs, Llama 3.2’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate, biased or other objectionable responses to user prompts. Therefore, before deploying any applications of Llama 3.2 models, developers should perform safety testing and tuning tailored to their specific applications of the model. Please refer to available resources including our [Responsible Use Guide](https://llama.meta.com/responsible-use-guide), [Trust and Safety](https://llama.meta.com/trust-and-safety/) solutions, and other [resources](https://llama.meta.com/docs/get-started/) to learn more about responsible development. diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py index f171bbbd4..4b914cd47 100644 --- a/examples/E2E-RAG-App/rag_main.py +++ b/examples/E2E-RAG-App/rag_main.py @@ -22,16 +22,19 @@ def chunk_text(content: str, chunk_size: int = 500) -> List[str]: - """Splits content into chunks of approximately `chunk_size` characters.""" + """Splits content into chunks with overlap.""" chunks = [] current_chunk = [] + overlap = 100 for paragraph in content.split("\n\n"): if sum(len(p) for p in current_chunk) + len(paragraph) <= chunk_size: current_chunk.append(paragraph) else: chunks.append("\n\n".join(current_chunk).strip()) - current_chunk = [paragraph] + current_chunk = ( + [current_chunk[-1], paragraph] if current_chunk else [paragraph] + ) if current_chunk: chunks.append("\n\n".join(current_chunk).strip()) @@ -39,7 +42,7 @@ def chunk_text(content: str, chunk_size: int = 500) -> List[str]: return chunks -def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 250) -> None: +def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 350) -> None: """Inserts text documents from a directory into ChromaDB.""" collection_name = "documents" existing_collections = chroma_client.list_collections() @@ -84,8 +87,35 @@ def query_chromadb(query: str) -> Optional[dict]: collection = chroma_client.get_collection( name="documents", embedding_function=embedding_function ) - print(collection.count()) # returns the number of items in the collection - results = collection.query(query_texts=[query], n_results=10) + print(collection.count()) + + results = collection.query( + query_texts=[query], + n_results=10, + include=["documents", "metadatas", "distances"], + ) + + if ( + results + and results.get("distances") + and results.get("documents") + and results.get("metadatas") + ): + + for i, (doc, distance, metadata) in enumerate( + zip( + results["documents"][0], + results["distances"][0], + results["metadatas"][0], + ) + ): + print(f"\nResult {i + 1}") + print(f"Distance Score: {distance:.4f}") + print(f"Filename: {metadata['filename']}") + print(f"Chunk index: {metadata['chunk_index']}") + print(f"Context: {doc}") + print("-" * 50) + return results if results else None @@ -94,20 +124,20 @@ async def get_response_with_context( ) -> str: """Fetches response from the agent with context from ChromaDB.""" results = query_chromadb(input_query) - context = ( - "No relevant context found." - if not results or not results["metadatas"] - else "\n".join( - "\n".join(metadata["content"] for metadata in metadata_list) + if results and results["metadatas"]: + context = "\n".join( + f"Filename: {metadata['filename']}, Chunk index: {metadata['chunk_index']}\n{metadata['content']}" for metadata_list in results["metadatas"] + for metadata in metadata_list ) - ) + else: + context = "No relevant context found." messages = [ {"role": "user", "content": f"Context: {context}\n\nQuestion: {input_query}"} ] - cprint("Embedding retrieval completed. Sending these context to agent:", "cyan") - cprint(context, "cyan") + # cprint("Embedding retrieval completed. Sending these context to agent:", "cyan") + # cprint(context, "cyan") response = agent.create_turn(messages=messages, session_id=session_id) @@ -146,7 +176,7 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: agent = Agent(client, agent_config) user_prompts = [ - "What is the name of the llama model released on Oct 24, 2024?", + "On the day of Oct 24, 2024, which llama model was released?", "What about Llama 3.1 model, what is the release date for it?", "When was llama 3.3 released?", ] From ccfc2dbfd2dc9d16c14c1867c78a9928d65b23b1 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Fri, 15 Nov 2024 13:29:44 -0800 Subject: [PATCH 16/99] modify embeddings to improve retrieval --- examples/E2E-RAG-App/rag_main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py index 4b914cd47..4ddfb3968 100644 --- a/examples/E2E-RAG-App/rag_main.py +++ b/examples/E2E-RAG-App/rag_main.py @@ -17,7 +17,9 @@ # Initialization load_dotenv() -embedding_function = SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2") +embedding_function = SentenceTransformerEmbeddingFunction( + model_name="all-mpnet-base-v2" +) chroma_client = chromadb.PersistentClient(path="chroma") @@ -91,7 +93,7 @@ def query_chromadb(query: str) -> Optional[dict]: results = collection.query( query_texts=[query], - n_results=10, + n_results=5, include=["documents", "metadatas", "distances"], ) From cb74102bcd809937b24f86ecbfb37dd0c6a03770 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Fri, 15 Nov 2024 13:44:24 -0800 Subject: [PATCH 17/99] added eval dataset, change ingestion script dir and updated gitignore --- .gitignore | 2 + examples/E2E-RAG-App/README.md | 2 +- examples/E2E-RAG-App/data/eval/QA_eval.json | 102 ++++++++++++++++++++ examples/E2E-RAG-App/ingestion_script.py | 53 +++++----- 4 files changed, 130 insertions(+), 29 deletions(-) create mode 100644 examples/E2E-RAG-App/data/eval/QA_eval.json diff --git a/.gitignore b/.gitignore index d08c3f059..b0cac4c44 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,5 @@ Package.resolved **/faiss_index.index **/faiss_index.index.json **/chroma +/examples/E2E-RAG-App/data/input +/examples/E2E-RAG-App/data/output diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index c1298c253..e261e41fa 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -8,7 +8,7 @@ TODO: 2. Make the chat inference multi-turn 3. Front-end + docker -To run the `ingestion.py` script, please make sure there is a /DATA and /OUTPUT folder at its relative root. It will ingest ALL documents in /DATA and output BOTH markdown and JSON dump in /OUTPUT folder +To run the `ingestion_script.py` script, please make sure there is a /DATA and /OUTPUT folder at its relative root. It will ingest ALL documents in /DATA and output BOTH markdown and JSON dump in /OUTPUT folder diff --git a/examples/E2E-RAG-App/data/eval/QA_eval.json b/examples/E2E-RAG-App/data/eval/QA_eval.json new file mode 100644 index 000000000..664bff1e2 --- /dev/null +++ b/examples/E2E-RAG-App/data/eval/QA_eval.json @@ -0,0 +1,102 @@ +[ + { + "Question": "What is the policy regarding smoking in City offices?", + "Answer": "Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)" + }, + { + "Question": "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", + "Answer": "Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)" + }, + { + "Question": "What are the three categories of employees eligible for health coverage?", + "Answer": "The following employees are eligible:\n\nAll permanent employees working at least 20 hours per week\n\nAll regularly scheduled provisional employees working at least 20 hours per week\n\nAll other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)" + }, + { + "Question": "How long must an employee wait before using vacation time after starting employment?", + "Answer": "Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)" + }, + { + "Question": "What must an employee do if they're summoned for jury duty?", + "Answer": "An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)" + }, + { + "Question": "What happens if an employee is absent without authorization for more than five consecutive working days?", + "Answer": "If an employee is absent from their job without proper authorization for more than five consecutive working days, or fails to return from an approved leave, their absence will be deemed an 'automatic resignation.' (Source: Page 19, 'Automatic Resignation' section)" + }, + { + "Question": "How long is the normal probationary period for permanent civil service positions?", + "Answer": "The document states that all appointments to permanent civil service positions require a probationary period, but the duration is governed by the collective bargaining agreement. Absences from work will extend the probationary period. (Source: Page 14, 'Probationary Period' section)" + }, + { + "Question": "What are employees required to do in case of a catastrophic event while off duty?", + "Answer": "Employees should ensure the safety of their family and follow their department's instructions. If phone lines are down, they are required to listen to the radio for any reporting instructions. (Source: Page 51, 'Catastrophic Event While off Duty' section)" + }, + { + "Question": "What is the city's policy on accepting gifts from subordinates?", + "Answer": "Employees may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to them. (Source: Page 49, 'Gifts' section)" + }, + { + "Question": "What documents must new employees present at the time of hire?", + "Answer": "New employees must present:\n\nSocial Security number\n\nDocuments to verify identity and authorization to work in the United States\n\nTake the Oath of Office\n\nSign acknowledgment of no-strike provision\n\nComplete tax forms (W-4)\n\nComplete Warrant Recipient Form (Source: Pages 11-12, 'Requirements at the Time of Hire' section)" + }, + { + "Question": "What are the fees for a new Special Traffic Permit in San Francisco?", + "Answer": "New Special Traffic Permits cost $388 processing fee plus $78 per day." + }, + { + "Question": "During what hours does the Holiday Moratorium restrict construction work in San Francisco?", + "Answer": "No construction work is allowed in the public right of way from the day after Thanksgiving to January 1, inclusive, during the hours of 7 a.m. to 10 p.m. on Holiday Moratorium protected streets." + }, + { + "Question": "How many days in advance must contractors register tow-away signs with SFMTA?", + "Answer": "Tow-Away signs must be posted and registered with the SFMTA Tow-Desk or SF Public Works online at least 72 hours prior to the enforcement date." +}, +{ + "Question": "What is the minimum width required for a single traffic lane in San Francisco?", + "Answer": "The minimum width required for a single traffic lane is 12 feet. The minimum width required for a vehicular traffic lane contiguous to one or more vehicular traffic lanes in the same direction is 10 feet." +}, +{ + "Question": "What clearance must equipment maintain from Muni overhead electric wires?", + "Answer": "CAL OSHA and CPUC General Order 95 regulations require that any boom type equipment that moves vertically must maintain a 10 feet radial clearance and any other equipment must maintain a six (6) foot clearance from Muni overhead wires." +}, +{ + "Question": "What restrictions apply around Oracle Park during events?", + "Answer": "No construction activity is allowed from two (2) hours before to two (2) hours after these events around Oracle Park." +}, +{ + "Question": "What is the minimum processing time required for new Special Traffic Permits?", + "Answer": "New permits may require five (5) working days. More complicated requests may take longer. Very complex permit applications, such as Tower Crane Erections, Concrete Mat Pours and Complex road closures require a minimum of 8 weeks processing time." +}, +{ + "Question": "What is the minimum clear path of travel required on sidewalks during construction?", + "Answer": "A 6-foot wide clear path of travel is desirable. At a minimum, Contractor shall provide a 4-foot wide clear path of travel on any sidewalk at all times." +}, +{ + "Question": "What are the main functions of the Legislative Department regarding Bills?", + "Answer": "According to Section 1.1(ii) of the document, the Legislative Department's main functions regarding Bills include: 'Drafting and scrutiny of all Government Bills including Constitution (Amendment) Bills, translation of all the Bills into Hindi and forwarding of both English and Hindi versions of the Bills to the Lok Sabha or Rajya Sabha Secretariat for introduction in Parliament; drafting of official amendments to the Bills; scrutiny of non-official amendments and rendering assistance to administrative Ministries/Departments to decide the acceptability or otherwise of non-official amendments.'" +}, +{ + "Question": "How many Bills were forwarded to Parliament for introduction during the period from January 1, 2021 to December 31, 2021?", + "Answer": "According to Section 4 of the document, 50 Legislative Bills were forwarded to Parliament for introduction during this period. The exact quote states: '50 Legislative Bills were forwarded to Parliament for introduction during this period.'" +}, +{ + "Question": "What is the role of the Official Languages Wing regarding legal terminology?", + "Answer": "According to Section 41(1)(i) of the document, the Official Languages Wing is responsible for 'Preparation and publication of a standard legal terminology for use, as far as possible, in all Official Languages.' The Wing has published seven editions of Legal Glossary, with the latest 7th Edition containing approximately 65,000 entries." +}, +{ + "Question": "How does the Legislative Department handle public grievances?", + "Answer": "According to Section 45 of the document, 'During the period from 1st January, 2021 to 31st December, 2021 Legislative Department received 1644 public grievances on CPGRAMS portal. Further 223 public grievances were pending before 1st January, 2021. During the said period 1544 grievances have been disposed off and action is being taken for disposal of remaining grievances on priority basis.'" +}, +{ + "Question": "What are the provisions for linking electoral rolls with Aadhaar according to the Election Laws (Amendment) Act, 2021?", + "Answer": "According to Section 22.1 of the document, 'The newly passed Election Laws (Amendment) Act, 2021 amends both The Representation of the People Acts of 1950 and 1951. One of its main provisions creates a legal framework to link electoral roll data with Aadhaar, the unique identification number. It is meant to strike out bogus voters, dead voters, foreigners who are wrongfully included as voters and also those who are enrolled in more than one constituency.'" +}, +{ + "Question": "What role does the Printing Section play in the Legislative Department?", + "Answer": "According to Section 36 of the document, the Printing Sections (Printing I and II) handle 'the processing of legislation for printing at various stages' including 'editing of manuscripts of the Bills (including preparation of contents and annexures), Ordinances, Regulations, Adaptation Orders, Orders issued under the Constitution of India, Delimitation Orders and other statutory instruments before sending them to Press.' During January-December 2021, they edited manuscripts and checked proofs for 89 Bills, 49 Gazettes, 10 Ordinances, and prepared 49 A-4 Acts." +}, + { + "Question": "How many Official Language versions of the Constitution of India have been published?", + "Answer": "According to Section 41(3) of the document, the Constitution of India has been published in Hindi (the Official Language of the Union) and 16 other regional languages: 'Assamese, Bengali, Gujarati, Kannada, Malayalam, Marathi, Manipuri, Oriya, Punjabi, Sanskrit, Tamil, Telugu, Urdu, Sindhi, Nepali and Konkani.'" + } +] diff --git a/examples/E2E-RAG-App/ingestion_script.py b/examples/E2E-RAG-App/ingestion_script.py index c20219ad0..8e1cb5cf4 100644 --- a/examples/E2E-RAG-App/ingestion_script.py +++ b/examples/E2E-RAG-App/ingestion_script.py @@ -14,40 +14,42 @@ from docling.pipeline.simple_pipeline import SimplePipeline from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline + def get_document_files(input_dir: Path) -> list[Path]: """ Recursively scan directory for document files. Returns a list of Path objects for supported document types. """ - supported_extensions = {'.pdf', '.docx', '.pptx'} + supported_extensions = {".pdf", ".docx", ".pptx"} document_files = [] - + # Recursively walk through all directories - for path in input_dir.rglob('*'): + for path in input_dir.rglob("*"): if path.is_file() and path.suffix.lower() in supported_extensions: document_files.append(path) - + return document_files + def main(): # Define input and output directories relative to current directory - input_dir = Path("DATA") - output_dir = Path("OUTPUT") - + input_dir = Path("data/input") + output_dir = Path("data/output") + # Create output directory if it doesn't exist output_dir.mkdir(parents=True, exist_ok=True) - + # Get all document files recursively input_paths = get_document_files(input_dir) - + if not input_paths: print("No documents found in DATA directory!") return - + print(f"Found {len(input_paths)} documents to process:") for path in input_paths: print(f"- {path}") - + # Configure document converter doc_converter = DocumentConverter( allowed_formats=[ @@ -57,41 +59,36 @@ def main(): ], format_options={ InputFormat.PDF: PdfFormatOption( - pipeline_cls=StandardPdfPipeline, - backend=PyPdfiumDocumentBackend - ), - InputFormat.DOCX: WordFormatOption( - pipeline_cls=SimplePipeline + pipeline_cls=StandardPdfPipeline, backend=PyPdfiumDocumentBackend ), + InputFormat.DOCX: WordFormatOption(pipeline_cls=SimplePipeline), }, ) - + # Process all documents conv_results = doc_converter.convert_all(input_paths) - + # Save results for res in conv_results: # Preserve directory structure in output relative_path = res.input.file.relative_to(input_dir) output_subdir = output_dir / relative_path.parent output_subdir.mkdir(parents=True, exist_ok=True) - + # Create output filenames md_path = output_subdir / f"{res.input.file.stem}.md" json_path = output_subdir / f"{res.input.file.stem}.json" - - print( - f"Converting: {res.input.file}" - f"\nSaving to: {md_path}" - ) - + + print(f"Converting: {res.input.file}" f"\nSaving to: {md_path}") + # Save markdown version - with md_path.open("w", encoding='utf-8') as fp: + with md_path.open("w", encoding="utf-8") as fp: fp.write(res.document.export_to_markdown()) - + # Save JSON version - with json_path.open("w", encoding='utf-8') as fp: + with json_path.open("w", encoding="utf-8") as fp: json.dump(res.document.export_to_dict(), fp, indent=2) + if __name__ == "__main__": main() From 845439845d0f64706510b4d991ccf3c46a000e3e Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Fri, 15 Nov 2024 13:51:17 -0800 Subject: [PATCH 18/99] change queries to reflect updated data --- examples/E2E-RAG-App/rag_main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py index 4ddfb3968..1f156cb4d 100644 --- a/examples/E2E-RAG-App/rag_main.py +++ b/examples/E2E-RAG-App/rag_main.py @@ -178,9 +178,9 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: agent = Agent(client, agent_config) user_prompts = [ - "On the day of Oct 24, 2024, which llama model was released?", - "What about Llama 3.1 model, what is the release date for it?", - "When was llama 3.3 released?", + "What is the policy regarding smoking in City offices?", + "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", + "What are the three categories of employees eligible for health coverage?", ] session_id = agent.create_session(f"session-{uuid.uuid4()}") From e5759eee22c8051413294bdfd028a7b96e62dfd9 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Fri, 15 Nov 2024 14:57:06 -0800 Subject: [PATCH 19/99] added eval and eval analysis --- .../data/eval/evaluation_results.csv | 29 + .../data/eval/evaluation_results2.csv | 39 + .../E2E-RAG-App/data/eval/explain-eval.ipynb | 745 ++++++++++++++++++ examples/E2E-RAG-App/rag_main.py | 147 +++- 4 files changed, 921 insertions(+), 39 deletions(-) create mode 100644 examples/E2E-RAG-App/data/eval/evaluation_results.csv create mode 100644 examples/E2E-RAG-App/data/eval/evaluation_results2.csv create mode 100644 examples/E2E-RAG-App/data/eval/explain-eval.ipynb diff --git a/examples/E2E-RAG-App/data/eval/evaluation_results.csv b/examples/E2E-RAG-App/data/eval/evaluation_results.csv new file mode 100644 index 000000000..f2fa741ff --- /dev/null +++ b/examples/E2E-RAG-App/data/eval/evaluation_results.csv @@ -0,0 +1,29 @@ +user_input,retrieved_contexts,response,reference,context_precision,context_recall,faithfulness,answer_relevancy +What is the policy regarding smoking in City offices?,"['## Smoke-Free Workplace\n\nSmoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings.\n\n## Drug-Free Workplace', '## Policy Regarding the Treatment of Co-Workers and Members of the Public\n\nCity policy requires employees to treat co-workers and members of the public with courtesy and respect. City employees and managers are responsible for maintaining a safe and productive workplace which is free from inappropriate workplace behavior.\n\n## Smoke-Free Workplace', '## Political Activity\n\nIt is unlawful for City employees to use public resources or personnel to engage in political activity relating to elective offices and ballot measures. City employees may not engage in political activities while on duty or in the workplace. Employees may not use City resources, such as photocopier or fax machines, telephones, postage, or email, for political activities. The ban on engaging in political activity while on duty prohibits such activities as circulating petitions, addressing campaign mailers or engaging in any other political activities that use City resources or divert employees from their assigned duties.', '## Use of City and County Property for Business Purposes Only\n\nNo officer or employee may use, nor allow any other r person to use, City resources for any non-City business purpose. Use of City resources fofor personal, political, employee organization or other non-City business is strictly prohibited. City resources include, but are not limited to, facilities, equipment, devices, telephones, computers, copier, fax machine, email, internet access, supplies and any time for which you are receiving compensation from the City. Inappropriate uses of City resources include, but are not limited to: online gambling; viewing sports events online; playing games, streaming video or music on a work computer; viewing or distributing materials that are not related to City business or that are sexually explicit; and frequent talking on a personal cell phone or texting during work hours.', ""The safety and well-being of our employees is very important, and in order to promote a safe and healthy work environment, the City works to identify and eliminate employee exposures to avoidable hazards and conditions that can lead to injury or illness. City departments have Injury and Illness Prevention Programs that comply with federal and state regulations, laws, and statutes in order to help maintain a safe and healthful work environment.\n\nSafety is every City employee's responsibility. All employees are required to remain alert and to correct hazardous conditions and unsafe acts-if it is safe to do so-and to report unsafe conditions to their supervisors.""]","According to the document, smoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings.","Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)",0.99999999995,1.0,1.0,0.9684763208853084 +How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?,"['Most full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.\n\nSome employees may be entitled to accrue sick leave under Chapter 12W of the Administrative Code. Please contact your departmental personnel officer for more information.', 'Sick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.\n\nMost full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.', '## Accrual of Paid Sick Leave\n\nSick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.', 'The maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.\n\n| Year of Service 12-Month Max. Entitlement | Maximum Accumulation | Maximum Accumulation |\n|-----------------------------------------------------------|-----------------------------------------------------------|------------------------|\n| | 1 through 5 years 80 hours (10 days) 320 hours (40 days) | |\n| More than 5 years 120 hours (15 days) 360 hours (45 days) | | |\n| More than 15 years 160 hours (20 days) 400 hours | | (50 days) |', 'You may not use sick leave with pay accrued under the Civil Service Rules during your first 6 months of employment with the City.\n\nYou may not use sick leave for reasons not set forth th in this Handbook, the Civil Service Rules, the applicable collective bargaining agreement or other applicable laws. Misuse of sick leave is grounds for discipline, up to and including termination.']",Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules.,"Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)",0.99999999995,0.5,1.0,0.9869727487494077 +What are the three categories of employees eligible for health coverage?,"['The following employees are eligible for health coverage:\n\n- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\n- (3) All other employees of the City including temporary y exempt or ""as needed"" employees who have worked more than 1040 hours in any consecutive 12 month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.', ""Employees in category (3) must obtain a signed certification from their department's human resources manager in order to be eligible for health coverage.\n\nAn employee's spouse, domestic partner and children may also be eligible for coverage through HSS. If you have enrolled a domestic partner, same sex spouse and/or their children in your health plan, it is important that you seek tax advice from a qualified professional regarding the tax consequences of such enrollment. A detailed list of eligibility requirements and necessary documentation for enrolling employee dependents is available on the HSS website at www.myhss.org/benefits/ccsf.html ."", 'Please note that the information in this section is subject to change. You should consult with HSS if you have any questions at (415) 554-1750.\n\n## Eligibility\n\nThe following employees are eligible for health coverage:', ""Coverage for a new employee in category (1) or (2) listed above starts on the first day of the coverage period following his or her start work date, provided an enrollment application and other required documentation has been submitted to HSS by applicable deadlines.\n\nEmployees in category (3) must obtain a signed certification from their department's human resources manager in order to be eligible for health coverage."", '- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\n- (3) All other employees of the City including temporary y exempt or ""as needed"" employees who have worked more than 1040 hours in any consecutive 12 month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.\n\nCoverage for a new employee in category (1) or (2) listed above starts on the first day of the coverage period following his or her start work date, provided an enrollment application and other required documentation has been submitted to HSS by applicable deadlines.']","The three categories of employees eligible for health coverage are: + +1. All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours. +2. All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours. +3. All other employees of the City, including temporary, exempt, or ""as needed"" employees who have worked more than 1040 hours in any consecutive 12-month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.","The following employees are eligible: + +All permanent employees working at least 20 hours per week + +All regularly scheduled provisional employees working at least 20 hours per week + +All other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)",0.6999999999766667,1.0,0.8,0.9812535196983435 +How long must an employee wait before using vacation time after starting employment?,"['An additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will receive an additional 40 hours.\n\nThe maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.', 'The maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.\n\n| Year of Service 12-Month Max. Entitlement | Maximum Accumulation | Maximum Accumulation |\n|-----------------------------------------------------------|-----------------------------------------------------------|------------------------|\n| | 1 through 5 years 80 hours (10 days) 320 hours (40 days) | |\n| More than 5 years 120 hours (15 days) 360 hours (45 days) | | |\n| More than 15 years 160 hours (20 days) 400 hours | | (50 days) |', 'The City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.\n\nAn additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will receive an additional 40 hours.', '## Vacation\n\nThe City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.', 'Permanent employees may request unpaid personal leave for a period of up to twelve (12) months within any two-year period. Your department t head has discretion to grant or deny requests for personal leave. With certain exceptions, if you are a temporary or provisional employee, you may request personal leave for a maximum of one month, and only if a replacement for your position is not required.\n\n## Time Off for Voting']","An employee must wait at least one year of continuous service before using vacation time. At the end of the first year, the employee will be awarded a vacation allowance at the rate of 0.0385 of an hour for each hour of paid service in the preceding year, and will be eligible to use accrued vacation time.","Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)",0.4166666666458333,0.6666666666666666,1.0,0.9792513522173962 +What must an employee do if they're summoned for jury duty?,"['You must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.\n\n## Work Schedule While on Jury Duty', '## Jury Duty Leave\n\nYou must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.', '## Compensation While on Jury Duty\n\nAs a guiding principal, you are required to work or r perform jury service for the number of hours for which you are paid during that workweek. For example, if you are regularly on an alternative workweek schedule working 10-hour workdays Monday through Thursday with Friday off and you are required to serve jury duty on a Monday through Thursday, you must work a regular workday on Friday, or use personal leave to compensate for the eight hour balance. However, you and your supervisor may agree to maintain the alternative schedule, whereby you would return to the workplace to work the remaining two hours each day and continue to take Friday off.', 'As a guiding principal, you are required to work or r perform jury service for the number of hours for which you are paid during that workweek. For example, if you are regularly on an alternative workweek schedule working 10-hour workdays Monday through Thursday with Friday off and you are required to serve jury duty on a Monday through Thursday, you must work a regular workday on Friday, or use personal leave to compensate for the eight hour balance. However, you and your supervisor may agree to maintain the alternative schedule, whereby you would return to the workplace to work the remaining two hours each day and continue to take Friday off.\n\nYou are not entitled to a per diem pay from the City or County for which Jury Duty was served, as the City and County of San Francisco already provides regular compensation and benefits to you while performing jury service. You must notify the jury staff that you are a government employee when reporting for jury service', 'If you are summoned as a witness on behalf of the City and County of San Francisco, you are entitled to be paid for any time that you are required to serve in that capacity. If you are summoned to serve as a witness in a case involving outside employment or personal business\n\naffairs, you will be placed on leave without pay unless vacation leave or compensatory time is requested and granted.']","An employee must notify their supervisor as soon as they receive a jury summons. If they are required to report for jury duty during their working hours, they will be excused from work on the work day they perform jury service, provided they give prior notification to their supervisor.","An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)",0.9166666666361111,0.75,1.0,0.9654143234777989 +What happens if an employee is absent without authorization for more than five consecutive working days?,"['If you are absent from your job for any period of time without proper authorization, you may be subject to discipline. If you are absent from your r job without proper authorization for more than five consecutive working days, or if you fail to return from an approved leave, your absence will be deemed an ""automatic resignation."" If you receive a notice of automatic resignation and wish to appeal the determination to the Civil Service Commission, you will have fifteen days from the date on which the notice was mailed to do so. The rules regarding automatic resignation may be affected by the collective bargaining agreement between the City and your employee organization. For more information, consult your departmental personnel officer or your collective bargaining agreement.\n\n## Layoff', '## Automatic Resignation\n\nIf you are absent from your job for any period of time without proper authorization, you may be subject to discipline. If you are absent from your r job without proper authorization for more than five consecutive working days, or if you fail to return from an approved leave, your absence will be deemed an ""automatic resignation."" If you receive a notice of automatic resignation and wish to appeal the determination to the Civil Service Commission, you will have fifteen days from the date on which the notice was mailed to do so. The rules regarding automatic resignation may be affected by the collective bargaining agreement between the City and your employee organization. For more information, consult your departmental personnel officer or your collective bargaining agreement.', ""If you are absent from your job for more than five consecutive working days, you must submit to your supervisor a completed Request for Leave form stating the duration of your illness, signed by a doctor, dentist, podiatrist, licensed clinical psychologist, Christian Science practitioner or licensed doctor of chiropractic medicine. This form is available from your supervisor or departmental personnel officer.\n\nUnder certain conditions, a department head may require you to furnish a doctor's certification for any amount of absence due to illness."", 'All planned absences must be requested and approved d in advance. If illness or some other emergency causes an unplanned or unforeseeable absence, you must notify your department as soon as possible on the first day of absence, and keep the department advised daily during the absence. In the case of an extended unforeseeable absence, you may be asked to complete forms and submit medical certifications as appropriate during your leave. Improper use of sick leave, failure to present medical certification when required, excessive absenteeism, tardiness, unauthorized absence or failure to notify your deparartment when you are unable to report to work, may result in sick leave restriction, disciplinary action or termination.\n\n## Appearance and Dress Code', 'Regular and prompt attendance is an essential requirement of your job. As a City employee, you are to report to your work station at your scheduled work time. Your time records must accurately reflect the time you start work and the number of hours worked in every work day.\n\nAll planned absences must be requested and approved d in advance. If illness or some other emergency causes an unplanned or unforeseeable absence, you must notify your department as soon as possible on the first day of absence, and keep the department advised daily during the absence. In the case of an extended unforeseeable absence, you may be asked to complete forms and submit medical certifications as appropriate during your leave. Improper use of sick leave, failure to present medical certification when required, excessive absenteeism, tardiness, unauthorized absence or failure to notify your deparartment when you are unable to report to work, may result in sick leave restriction, disciplinary action or termination.']","If an employee is absent without authorization for more than five consecutive working days, their absence will be deemed an ""automatic resignation"".","If an employee is absent from their job without proper authorization for more than five consecutive working days, or fails to return from an approved leave, their absence will be deemed an 'automatic resignation.' (Source: Page 19, 'Automatic Resignation' section)",0.99999999998,1.0,1.0,1.0000000000000004 +How long is the normal probationary period for permanent civil service positions?,"['## Probationary Period\n\nAll appointments to permanent civil service positions require that you serve a probationary period. The probationary period is the final and most important phase of the selection process. This period is used to evaluate your performance and suitability for the position. The duration of the probationary period is governed by the collective bargaining agreement. Absences from work will extend your probationary period. For infoformation about the length of your probationary period, consult your departmental personnel officer or collective bargaining agreement.', ""All appointments to permanent civil service positions require that you serve a probationary period. The probationary period is the final and most important phase of the selection process. This period is used to evaluate your performance and suitability for the position. The duration of the probationary period is governed by the collective bargaining agreement. Absences from work will extend your probationary period. For infoformation about the length of your probationary period, consult your departmental personnel officer or collective bargaining agreement.\n\nThe department's appointing officer may release an employee at any time during the probationary period. At the conclusion of the probationary period, your department head may recommend permanent appointment, release from the position, or an extension of the probationary period. You may also be required to serve an additional probationary period in certain circumstances, such as transferring to a different department."", '## Permanent\n\nPersons who have competed in a civil service examination and are appointed from a list of eligibles to a permanent position. Permanent empmployees who have successfully completed the probationary period may only be removed for cause.\n\n## Exempt', '## Reappointment\n\nAn employee who has resigned with satisfactory services after completing probation may request reappointment to his or her position. Such h former employees may request to reenter the City service to a vacant position in the class from m which they resigned within four years from the date of resignation. Former members of the uniformed ranks of the Police and Fire departments may be reappointed within two years of the date of resignation. Former employees may request reappointment to a vacancy in any class in which the employee has completed the probationary period. The receiving department, at i its discretion, may approve a request for reappointment.', 'An employee who has resigned with satisfactory services after completing probation may request reappointment to his or her position. Such h former employees may request to reenter the City service to a vacant position in the class from m which they resigned within four years from the date of resignation. Former members of the uniformed ranks of the Police and Fire departments may be reappointed within two years of the date of resignation. Former employees may request reappointment to a vacancy in any class in which the employee has completed the probationary period. The receiving department, at i its discretion, may approve a request for reappointment.\n\n## Reinstatement']","The duration of the probationary period for permanent civil service positions is governed by the collective bargaining agreement. However, the exact length of the probationary period is not specified in the provided documents. It is recommended to consult the departmental personnel officer or collective bargaining agreement for information on the length of the probationary period.","The document states that all appointments to permanent civil service positions require a probationary period, but the duration is governed by the collective bargaining agreement. Absences from work will extend the probationary period. (Source: Page 14, 'Probationary Period' section)",0.9999999999,0.6666666666666666,1.0,0.0 +What are employees required to do in case of a catastrophic event while off duty?,"['Catastrophic Event While off Duty: Should a catastrophic event occur while you are off duty, you should ensure the safety of your family and follow your department\'s instructions. In the event that phone lines are down, you are required to listen to the radio for any reporting instructions.\n\nIn the event of an emergency, the City may contact you via its two-way emergency communication system on your home phone, cell phone, work phone, and/or work email. Please be aware that the system uses an electronic communication, so you may think it is an unsolicited ""robo-call."" Do NOT hang up; you are required to listen to the entire message, follow the instructions and report for your emergency assignment if so directed.', ""Catastrophic Event While on Duty: Should a catastrophic event occur while you are on duty, report immediately to your supervisor or designated areas for assignment.\n\nCatastrophic Event While off Duty: Should a catastrophic event occur while you are off duty, you should ensure the safety of your family and follow your department's instructions. In the event that phone lines are down, you are required to listen to the radio for any reporting instructions."", 'All Disaster Services Workers will be issued a citywide identification badge. You must have your badge with you at all times.\n\nCatastrophic Event While on Duty: Should a catastrophic event occur while you are on duty, report immediately to your supervisor or designated areas for assignment.', ""The safety and well-being of our employees is very important, and in order to promote a safe and healthy work environment, the City works to identify and eliminate employee exposures to avoidable hazards and conditions that can lead to injury or illness. City departments have Injury and Illness Prevention Programs that comply with federal and state regulations, laws, and statutes in order to help maintain a safe and healthful work environment.\n\nSafety is every City employee's responsibility. All employees are required to remain alert and to correct hazardous conditions and unsafe acts-if it is safe to do so-and to report unsafe conditions to their supervisors."", 'All City employees are designated Disaster Service Workers under state and local law. If the Mayor or an authorized City official proclaims a local emergency due to actual or threatened disaster such as earthquake, fire, riot, etc., City employees, as Disaster Service Workers, may be required to provide services during the emergency and subsequent period of assessment and recovery.\n\nSimply put, being a Disaster Service Worker means that any time a catastrophic event- natural or manmade-places life or property in jeopardy, you could be assigned to any disaster service activity that promotes the protection of public health and safety. Your assignment might require you to serve at a location, at times and/or under conditions that significantly differ from your normal work assignment. As a Disaster Service Worker, however, you have an absolute obligation to serve the public in a time of need in whatever role you are directed to perform by a superior or by law.']","In case of a catastrophic event while off duty, employees are required to: + +1. Ensure the safety of their family. +2. Follow their department's instructions. +3. In the event of phone lines being down, listen to the radio for reporting instructions. +4. If contacted by the City via its two-way emergency communication system on their home phone, cell phone, work phone, or work email, they must: + * Not hang up. + * Listen to the entire message. + * Follow the instructions. + * Report for their emergency assignment if directed to do so.","Employees should ensure the safety of their family and follow their department's instructions. If phone lines are down, they are required to listen to the radio for any reporting instructions. (Source: Page 51, 'Catastrophic Event While off Duty' section)",0.8666666666377778,0.6666666666666666,1.0,0.9872406055919023 +What is the city's policy on accepting gifts from subordinates?,"['You may not solicit or accept any gift from any person or entity who has a contract with your department or who has attempted to influence you in a governmental decision during the past 12 months.\n\nYou may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to you .', '## Gifts\n\nYou may not accept any gift that is intended to inflfluence you in the performance of your job.\n\nYou may not solicit or accept any gift from any person or entity who has a contract with your department or who has attempted to influence you in a governmental decision during the past 12 months.', 'There are exceptions to all of these gift rules - please check with the Ethics Commission or your departmental personnel officer if you have questions.\n\n## Outside or Additional Employment', 'You may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to you .\n\nIf you are required to file a Statement of Economic Interests (""SEI"") financial disclosure, you cannot accept gifts worth more than a certain amount in a calendar year from any source that you must report on your SEI. That amount is $420.00 for calendar year 2011; please contact the Ethics Commission to determine what the amount is for future calendar years.', 'If you are required to file a Statement of Economic Interests (""SEI"") financial disclosure, you cannot accept gifts worth more than a certain amount in a calendar year from any source that you must report on your SEI. That amount is $420.00 for calendar year 2011; please contact the Ethics Commission to determine what the amount is for future calendar years.\n\nThere are exceptions to all of these gift rules - please check with the Ethics Commission or your departmental personnel officer if you have questions.']","According to the document, the city's policy is that employees may not accept gifts from subordinates, as well as candidates or applicants for positions as employees or subordinates.","Employees may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to them. (Source: Page 49, 'Gifts' section)",0.8874999999778125,1.0,0.6666666666666666,0.9569189442440059 diff --git a/examples/E2E-RAG-App/data/eval/evaluation_results2.csv b/examples/E2E-RAG-App/data/eval/evaluation_results2.csv new file mode 100644 index 000000000..e80a7f000 --- /dev/null +++ b/examples/E2E-RAG-App/data/eval/evaluation_results2.csv @@ -0,0 +1,39 @@ +user_input,retrieved_contexts,response,reference,context_precision,context_recall,faithfulness,answer_relevancy,factual_correctness,semantic_similarity +What is the policy regarding smoking in City offices?,"['## Smoke-Free Workplace\n\nSmoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings.\n\n## Drug-Free Workplace', '## Policy Regarding the Treatment of Co-Workers and Members of the Public\n\nCity policy requires employees to treat co-workers and members of the public with courtesy and respect. City employees and managers are responsible for maintaining a safe and productive workplace which is free from inappropriate workplace behavior.\n\n## Smoke-Free Workplace', '## Political Activity\n\nIt is unlawful for City employees to use public resources or personnel to engage in political activity relating to elective offices and ballot measures. City employees may not engage in political activities while on duty or in the workplace. Employees may not use City resources, such as photocopier or fax machines, telephones, postage, or email, for political activities. The ban on engaging in political activity while on duty prohibits such activities as circulating petitions, addressing campaign mailers or engaging in any other political activities that use City resources or divert employees from their assigned duties.', '## Use of City and County Property for Business Purposes Only\n\nNo officer or employee may use, nor allow any other r person to use, City resources for any non-City business purpose. Use of City resources fofor personal, political, employee organization or other non-City business is strictly prohibited. City resources include, but are not limited to, facilities, equipment, devices, telephones, computers, copier, fax machine, email, internet access, supplies and any time for which you are receiving compensation from the City. Inappropriate uses of City resources include, but are not limited to: online gambling; viewing sports events online; playing games, streaming video or music on a work computer; viewing or distributing materials that are not related to City business or that are sexually explicit; and frequent talking on a personal cell phone or texting during work hours.', ""The safety and well-being of our employees is very important, and in order to promote a safe and healthy work environment, the City works to identify and eliminate employee exposures to avoidable hazards and conditions that can lead to injury or illness. City departments have Injury and Illness Prevention Programs that comply with federal and state regulations, laws, and statutes in order to help maintain a safe and healthful work environment.\n\nSafety is every City employee's responsibility. All employees are required to remain alert and to correct hazardous conditions and unsafe acts-if it is safe to do so-and to report unsafe conditions to their supervisors.""]","According to the Employee Handbook, smoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings.","Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)",0.99999999995,1.0,1.0,0.9536062736849935,0.8,0.9695324605746768 +How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?,"['Most full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.\n\nSome employees may be entitled to accrue sick leave under Chapter 12W of the Administrative Code. Please contact your departmental personnel officer for more information.', 'Sick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.\n\nMost full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.', '## Accrual of Paid Sick Leave\n\nSick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.', 'The maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.\n\n| Year of Service 12-Month Max. Entitlement | Maximum Accumulation | Maximum Accumulation |\n|-----------------------------------------------------------|-----------------------------------------------------------|------------------------|\n| | 1 through 5 years 80 hours (10 days) 320 hours (40 days) | |\n| More than 5 years 120 hours (15 days) 360 hours (45 days) | | |\n| More than 15 years 160 hours (20 days) 400 hours | | (50 days) |', 'You may not use sick leave with pay accrued under the Civil Service Rules during your first 6 months of employment with the City.\n\nYou may not use sick leave for reasons not set forth th in this Handbook, the Civil Service Rules, the applicable collective bargaining agreement or other applicable laws. Misuse of sick leave is grounds for discipline, up to and including termination.']","According to the Employee Handbook, most full-time employees earn 13 days of paid sick leave per year under the Civil Service Rules.","Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)",0.99999999995,0.5,1.0,0.948902942120787,0.67,0.9666512821564128 +What are the three categories of employees eligible for health coverage?,"['The following employees are eligible for health coverage:\n\n- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\n- (3) All other employees of the City including temporary y exempt or ""as needed"" employees who have worked more than 1040 hours in any consecutive 12 month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.', ""Employees in category (3) must obtain a signed certification from their department's human resources manager in order to be eligible for health coverage.\n\nAn employee's spouse, domestic partner and children may also be eligible for coverage through HSS. If you have enrolled a domestic partner, same sex spouse and/or their children in your health plan, it is important that you seek tax advice from a qualified professional regarding the tax consequences of such enrollment. A detailed list of eligibility requirements and necessary documentation for enrolling employee dependents is available on the HSS website at www.myhss.org/benefits/ccsf.html ."", 'Please note that the information in this section is subject to change. You should consult with HSS if you have any questions at (415) 554-1750.\n\n## Eligibility\n\nThe following employees are eligible for health coverage:', ""Coverage for a new employee in category (1) or (2) listed above starts on the first day of the coverage period following his or her start work date, provided an enrollment application and other required documentation has been submitted to HSS by applicable deadlines.\n\nEmployees in category (3) must obtain a signed certification from their department's human resources manager in order to be eligible for health coverage."", '- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\n- (3) All other employees of the City including temporary y exempt or ""as needed"" employees who have worked more than 1040 hours in any consecutive 12 month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.\n\nCoverage for a new employee in category (1) or (2) listed above starts on the first day of the coverage period following his or her start work date, provided an enrollment application and other required documentation has been submitted to HSS by applicable deadlines.']","According to the Employee Handbook, the three categories of employees eligible for health coverage are: + +1. All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours. +2. All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours. +3. All other employees of the City, including temporary, exempt, or ""as needed"" employees who have worked more than 1040 hours in any consecutive 12-month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.","The following employees are eligible: + +All permanent employees working at least 20 hours per week + +All regularly scheduled provisional employees working at least 20 hours per week + +All other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)",0.6999999999766667,1.0,0.8,0.9463983778035442,1.0,0.9263984404512609 +How long must an employee wait before using vacation time after starting employment?,"['An additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will receive an additional 40 hours.\n\nThe maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.', 'The maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.\n\n| Year of Service 12-Month Max. Entitlement | Maximum Accumulation | Maximum Accumulation |\n|-----------------------------------------------------------|-----------------------------------------------------------|------------------------|\n| | 1 through 5 years 80 hours (10 days) 320 hours (40 days) | |\n| More than 5 years 120 hours (15 days) 360 hours (45 days) | | |\n| More than 15 years 160 hours (20 days) 400 hours | | (50 days) |', 'The City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.\n\nAn additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will receive an additional 40 hours.', '## Vacation\n\nThe City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.', 'Permanent employees may request unpaid personal leave for a period of up to twelve (12) months within any two-year period. Your department t head has discretion to grant or deny requests for personal leave. With certain exceptions, if you are a temporary or provisional employee, you may request personal leave for a maximum of one month, and only if a replacement for your position is not required.\n\n## Time Off for Voting']","According to the Employee Handbook, an employee is not eligible to use vacation in the first year of continuous service. However, at the end of one year of continuous service, they will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year, and will be eligible to use accrued vacation time.","Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)",0.8055555555287036,0.6666666666666666,1.0,0.8803037847487826,0.89,0.9834024693854851 +What must an employee do if they're summoned for jury duty?,"['You must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.\n\n## Work Schedule While on Jury Duty', '## Jury Duty Leave\n\nYou must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.', '## Compensation While on Jury Duty\n\nAs a guiding principal, you are required to work or r perform jury service for the number of hours for which you are paid during that workweek. For example, if you are regularly on an alternative workweek schedule working 10-hour workdays Monday through Thursday with Friday off and you are required to serve jury duty on a Monday through Thursday, you must work a regular workday on Friday, or use personal leave to compensate for the eight hour balance. However, you and your supervisor may agree to maintain the alternative schedule, whereby you would return to the workplace to work the remaining two hours each day and continue to take Friday off.', 'As a guiding principal, you are required to work or r perform jury service for the number of hours for which you are paid during that workweek. For example, if you are regularly on an alternative workweek schedule working 10-hour workdays Monday through Thursday with Friday off and you are required to serve jury duty on a Monday through Thursday, you must work a regular workday on Friday, or use personal leave to compensate for the eight hour balance. However, you and your supervisor may agree to maintain the alternative schedule, whereby you would return to the workplace to work the remaining two hours each day and continue to take Friday off.\n\nYou are not entitled to a per diem pay from the City or County for which Jury Duty was served, as the City and County of San Francisco already provides regular compensation and benefits to you while performing jury service. You must notify the jury staff that you are a government employee when reporting for jury service', 'If you are summoned as a witness on behalf of the City and County of San Francisco, you are entitled to be paid for any time that you are required to serve in that capacity. If you are summoned to serve as a witness in a case involving outside employment or personal business\n\naffairs, you will be placed on leave without pay unless vacation leave or compensatory time is requested and granted.']","According to the Employee Handbook, if an employee is summoned for jury duty, they must: + +1. Notify their supervisor as soon as they receive the summons. +2. If they are required to report for jury duty during their working hours, they will be excused from work on the day they perform jury service, provided they give prior notification to their supervisor. +3. If they report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, they must return to work as soon as possible. +4. They are not entitled to a per diem pay from the City or County for jury duty served, as the City and County already provides regular compensation and benefits to them while performing jury service. + +Note that they must also give prior notification to their supervisor if they are required to report for jury duty during their working hours.","An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)",0.9166666666361111,0.75,1.0,0.9595106560491319,0.73,0.9666893503074692 +What happens if an employee is absent without authorization for more than five consecutive working days?,"['If you are absent from your job for any period of time without proper authorization, you may be subject to discipline. If you are absent from your r job without proper authorization for more than five consecutive working days, or if you fail to return from an approved leave, your absence will be deemed an ""automatic resignation."" If you receive a notice of automatic resignation and wish to appeal the determination to the Civil Service Commission, you will have fifteen days from the date on which the notice was mailed to do so. The rules regarding automatic resignation may be affected by the collective bargaining agreement between the City and your employee organization. For more information, consult your departmental personnel officer or your collective bargaining agreement.\n\n## Layoff', '## Automatic Resignation\n\nIf you are absent from your job for any period of time without proper authorization, you may be subject to discipline. If you are absent from your r job without proper authorization for more than five consecutive working days, or if you fail to return from an approved leave, your absence will be deemed an ""automatic resignation."" If you receive a notice of automatic resignation and wish to appeal the determination to the Civil Service Commission, you will have fifteen days from the date on which the notice was mailed to do so. The rules regarding automatic resignation may be affected by the collective bargaining agreement between the City and your employee organization. For more information, consult your departmental personnel officer or your collective bargaining agreement.', ""If you are absent from your job for more than five consecutive working days, you must submit to your supervisor a completed Request for Leave form stating the duration of your illness, signed by a doctor, dentist, podiatrist, licensed clinical psychologist, Christian Science practitioner or licensed doctor of chiropractic medicine. This form is available from your supervisor or departmental personnel officer.\n\nUnder certain conditions, a department head may require you to furnish a doctor's certification for any amount of absence due to illness."", 'All planned absences must be requested and approved d in advance. If illness or some other emergency causes an unplanned or unforeseeable absence, you must notify your department as soon as possible on the first day of absence, and keep the department advised daily during the absence. In the case of an extended unforeseeable absence, you may be asked to complete forms and submit medical certifications as appropriate during your leave. Improper use of sick leave, failure to present medical certification when required, excessive absenteeism, tardiness, unauthorized absence or failure to notify your deparartment when you are unable to report to work, may result in sick leave restriction, disciplinary action or termination.\n\n## Appearance and Dress Code', 'Regular and prompt attendance is an essential requirement of your job. As a City employee, you are to report to your work station at your scheduled work time. Your time records must accurately reflect the time you start work and the number of hours worked in every work day.\n\nAll planned absences must be requested and approved d in advance. If illness or some other emergency causes an unplanned or unforeseeable absence, you must notify your department as soon as possible on the first day of absence, and keep the department advised daily during the absence. In the case of an extended unforeseeable absence, you may be asked to complete forms and submit medical certifications as appropriate during your leave. Improper use of sick leave, failure to present medical certification when required, excessive absenteeism, tardiness, unauthorized absence or failure to notify your deparartment when you are unable to report to work, may result in sick leave restriction, disciplinary action or termination.']","According to the Employee Handbook, if an employee is absent without authorization for more than five consecutive working days, their absence will be deemed an ""automatic resignation."" This means that their absence will be considered a resignation of their position, and they will be considered to have quit their job. However, the employee can appeal the determination to the Civil Service Commission within 15 days of receiving the notice of automatic resignation.","If an employee is absent from their job without proper authorization for more than five consecutive working days, or fails to return from an approved leave, their absence will be deemed an 'automatic resignation.' (Source: Page 19, 'Automatic Resignation' section)",0.94999999997625,1.0,1.0,1.0000000000000004,0.57,0.9676767185463638 +How long is the normal probationary period for permanent civil service positions?,"['## Probationary Period\n\nAll appointments to permanent civil service positions require that you serve a probationary period. The probationary period is the final and most important phase of the selection process. This period is used to evaluate your performance and suitability for the position. The duration of the probationary period is governed by the collective bargaining agreement. Absences from work will extend your probationary period. For infoformation about the length of your probationary period, consult your departmental personnel officer or collective bargaining agreement.', ""All appointments to permanent civil service positions require that you serve a probationary period. The probationary period is the final and most important phase of the selection process. This period is used to evaluate your performance and suitability for the position. The duration of the probationary period is governed by the collective bargaining agreement. Absences from work will extend your probationary period. For infoformation about the length of your probationary period, consult your departmental personnel officer or collective bargaining agreement.\n\nThe department's appointing officer may release an employee at any time during the probationary period. At the conclusion of the probationary period, your department head may recommend permanent appointment, release from the position, or an extension of the probationary period. You may also be required to serve an additional probationary period in certain circumstances, such as transferring to a different department."", '## Permanent\n\nPersons who have competed in a civil service examination and are appointed from a list of eligibles to a permanent position. Permanent empmployees who have successfully completed the probationary period may only be removed for cause.\n\n## Exempt', '## Reappointment\n\nAn employee who has resigned with satisfactory services after completing probation may request reappointment to his or her position. Such h former employees may request to reenter the City service to a vacant position in the class from m which they resigned within four years from the date of resignation. Former members of the uniformed ranks of the Police and Fire departments may be reappointed within two years of the date of resignation. Former employees may request reappointment to a vacancy in any class in which the employee has completed the probationary period. The receiving department, at i its discretion, may approve a request for reappointment.', 'An employee who has resigned with satisfactory services after completing probation may request reappointment to his or her position. Such h former employees may request to reenter the City service to a vacant position in the class from m which they resigned within four years from the date of resignation. Former members of the uniformed ranks of the Police and Fire departments may be reappointed within two years of the date of resignation. Former employees may request reappointment to a vacancy in any class in which the employee has completed the probationary period. The receiving department, at i its discretion, may approve a request for reappointment.\n\n## Reinstatement']","The duration of the probationary period is governed by the collective bargaining agreement, but the exact length is not specified in the provided text. However, it is mentioned that the probationary period may be extended by absences from work, and that an employee may be required to serve an additional probationary period in certain circumstances, such as transferring to a different department. + +It is also mentioned that former employees who have resigned with satisfactory services after completing probation may request reappointment to their position, and that the receiving department may approve a request for reappointment at its discretion. + +Therefore, it can be inferred that the probationary period for permanent civil service positions is a flexible period that can be adjusted based on individual circumstances, but its duration is not fixed.","The document states that all appointments to permanent civil service positions require a probationary period, but the duration is governed by the collective bargaining agreement. Absences from work will extend the probationary period. (Source: Page 14, 'Probationary Period' section)",0.9999999999,0.6666666666666666,0.7,0.0,0.46,0.9538981887418683 +What are employees required to do in case of a catastrophic event while off duty?,"['Catastrophic Event While off Duty: Should a catastrophic event occur while you are off duty, you should ensure the safety of your family and follow your department\'s instructions. In the event that phone lines are down, you are required to listen to the radio for any reporting instructions.\n\nIn the event of an emergency, the City may contact you via its two-way emergency communication system on your home phone, cell phone, work phone, and/or work email. Please be aware that the system uses an electronic communication, so you may think it is an unsolicited ""robo-call."" Do NOT hang up; you are required to listen to the entire message, follow the instructions and report for your emergency assignment if so directed.', ""Catastrophic Event While on Duty: Should a catastrophic event occur while you are on duty, report immediately to your supervisor or designated areas for assignment.\n\nCatastrophic Event While off Duty: Should a catastrophic event occur while you are off duty, you should ensure the safety of your family and follow your department's instructions. In the event that phone lines are down, you are required to listen to the radio for any reporting instructions."", 'All Disaster Services Workers will be issued a citywide identification badge. You must have your badge with you at all times.\n\nCatastrophic Event While on Duty: Should a catastrophic event occur while you are on duty, report immediately to your supervisor or designated areas for assignment.', ""The safety and well-being of our employees is very important, and in order to promote a safe and healthy work environment, the City works to identify and eliminate employee exposures to avoidable hazards and conditions that can lead to injury or illness. City departments have Injury and Illness Prevention Programs that comply with federal and state regulations, laws, and statutes in order to help maintain a safe and healthful work environment.\n\nSafety is every City employee's responsibility. All employees are required to remain alert and to correct hazardous conditions and unsafe acts-if it is safe to do so-and to report unsafe conditions to their supervisors."", 'All City employees are designated Disaster Service Workers under state and local law. If the Mayor or an authorized City official proclaims a local emergency due to actual or threatened disaster such as earthquake, fire, riot, etc., City employees, as Disaster Service Workers, may be required to provide services during the emergency and subsequent period of assessment and recovery.\n\nSimply put, being a Disaster Service Worker means that any time a catastrophic event- natural or manmade-places life or property in jeopardy, you could be assigned to any disaster service activity that promotes the protection of public health and safety. Your assignment might require you to serve at a location, at times and/or under conditions that significantly differ from your normal work assignment. As a Disaster Service Worker, however, you have an absolute obligation to serve the public in a time of need in whatever role you are directed to perform by a superior or by law.']","According to the Employee Handbook, if a catastrophic event occurs while an employee is off duty, they should: + +1. Ensure the safety of their family. +2. Follow their department's instructions. +3. If phone lines are down, listen to the radio for any reporting instructions. +4. Do not hang up the phone, as it may be an unsolicited ""robo-call"". +5. Listen to the entire message, follow the instructions, and report for their emergency assignment if directed to do so. + +They must also have their citywide identification badge with them at all times.","Employees should ensure the safety of their family and follow their department's instructions. If phone lines are down, they are required to listen to the radio for any reporting instructions. (Source: Page 51, 'Catastrophic Event While off Duty' section)",0.8666666666377778,0.6666666666666666,1.0,0.9794192074279029,0.6,0.9508825814602573 +What is the city's policy on accepting gifts from subordinates?,"['You may not solicit or accept any gift from any person or entity who has a contract with your department or who has attempted to influence you in a governmental decision during the past 12 months.\n\nYou may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to you .', '## Gifts\n\nYou may not accept any gift that is intended to inflfluence you in the performance of your job.\n\nYou may not solicit or accept any gift from any person or entity who has a contract with your department or who has attempted to influence you in a governmental decision during the past 12 months.', 'There are exceptions to all of these gift rules - please check with the Ethics Commission or your departmental personnel officer if you have questions.\n\n## Outside or Additional Employment', 'You may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to you .\n\nIf you are required to file a Statement of Economic Interests (""SEI"") financial disclosure, you cannot accept gifts worth more than a certain amount in a calendar year from any source that you must report on your SEI. That amount is $420.00 for calendar year 2011; please contact the Ethics Commission to determine what the amount is for future calendar years.', 'If you are required to file a Statement of Economic Interests (""SEI"") financial disclosure, you cannot accept gifts worth more than a certain amount in a calendar year from any source that you must report on your SEI. That amount is $420.00 for calendar year 2011; please contact the Ethics Commission to determine what the amount is for future calendar years.\n\nThere are exceptions to all of these gift rules - please check with the Ethics Commission or your departmental personnel officer if you have questions.']","According to the Employee Handbook, the city has a policy of not accepting gifts from subordinates. This policy is outlined in section 411, which states that ""You may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to you."" This means that employees should not receive or accept any gifts, benefits, or favors from their subordinates, as well as from candidates or applicants for positions that they may have authority over.","Employees may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to them. (Source: Page 49, 'Gifts' section)",0.8874999999778125,1.0,0.375,0.9316696175848537,0.44,0.9459774250884739 diff --git a/examples/E2E-RAG-App/data/eval/explain-eval.ipynb b/examples/E2E-RAG-App/data/eval/explain-eval.ipynb new file mode 100644 index 000000000..d0c8c4d36 --- /dev/null +++ b/examples/E2E-RAG-App/data/eval/explain-eval.ipynb @@ -0,0 +1,745 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "### **RAGAS Evaluation Results Analysis**\n", + "\n", + "Let's anaylze our RAG based on these four metrics, Context Precision, Context Recall, Faithfullness, and Answer Relevancy." + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set_style('whitegrid')\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "#### Understanding the Metrics\n", + "\n", + "**Context Precision**\n", + "\n", + "- **Definition**: Measures the proportion of relevant chunks in the retrieved contexts.\n", + "- **Interpretation**: A higher value indicates that most of the retrieved contexts are relevant to the user query.\n", + "\n", + "**Context Recall**\n", + "\n", + "- **Definition**: Measures how many of the relevant documents were successfully retrieved.\n", + "- **Interpretation**: A higher value means fewer relevant documents were left out.\n", + "\n", + "**Faithfulness**\n", + "\n", + "- **Definition**: Measures the factual consistency of the generated answer against the given context.\n", + "- **Interpretation**: A higher value indicates that the answer is well-supported by the retrieved contexts.\n", + "\n", + "**Answer Relevancy**\n", + "\n", + "- **Definition**: Assesses how pertinent the generated answer is to the given prompt.\n", + "- **Interpretation**: Higher scores indicate that the answer directly addresses the user's question." + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "#### Data Preparation\n", + "\n", + "Lets visualize our dataset before we dive into the metrics." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_inputretrieved_contextsresponsereferencecontext_precisioncontext_recallfaithfulnessanswer_relevancy
0What is the policy regarding smoking in City o...['## Smoke-Free Workplace\\n\\nSmoking is not pe...According to the document, smoking is not perm...Smoking is not permitted in City offices, or w...1.0000001.0000001.00.968476
1How many days of paid sick leave do most full-...['Most full time employees earn 13 8-hour work...Most full-time employees earn 13 8-hour workin...Most full-time employees earn 13 8-hour workin...1.0000000.5000001.00.986973
2What are the three categories of employees eli...['The following employees are eligible for hea...The three categories of employees eligible for...The following employees are eligible:\\n\\nAll p...0.7000001.0000000.80.981254
3How long must an employee wait before using va...['An additional vacation entitlement will be a...An employee must wait at least one year of con...Employees are not eligible to use vacation in ...0.4166670.6666671.00.979251
4What must an employee do if they're summoned f...['You must notify your supervisor as soon as y...An employee must notify their supervisor as so...An employee must notify their supervisor as so...0.9166670.7500001.00.965414
\n", + "" + ], + "text/plain": [ + " user_input \\\n", + "0 What is the policy regarding smoking in City o... \n", + "1 How many days of paid sick leave do most full-... \n", + "2 What are the three categories of employees eli... \n", + "3 How long must an employee wait before using va... \n", + "4 What must an employee do if they're summoned f... \n", + "\n", + " retrieved_contexts \\\n", + "0 ['## Smoke-Free Workplace\\n\\nSmoking is not pe... \n", + "1 ['Most full time employees earn 13 8-hour work... \n", + "2 ['The following employees are eligible for hea... \n", + "3 ['An additional vacation entitlement will be a... \n", + "4 ['You must notify your supervisor as soon as y... \n", + "\n", + " response \\\n", + "0 According to the document, smoking is not perm... \n", + "1 Most full-time employees earn 13 8-hour workin... \n", + "2 The three categories of employees eligible for... \n", + "3 An employee must wait at least one year of con... \n", + "4 An employee must notify their supervisor as so... \n", + "\n", + " reference context_precision \\\n", + "0 Smoking is not permitted in City offices, or w... 1.000000 \n", + "1 Most full-time employees earn 13 8-hour workin... 1.000000 \n", + "2 The following employees are eligible:\\n\\nAll p... 0.700000 \n", + "3 Employees are not eligible to use vacation in ... 0.416667 \n", + "4 An employee must notify their supervisor as so... 0.916667 \n", + "\n", + " context_recall faithfulness answer_relevancy \n", + "0 1.000000 1.0 0.968476 \n", + "1 0.500000 1.0 0.986973 \n", + "2 1.000000 0.8 0.981254 \n", + "3 0.666667 1.0 0.979251 \n", + "4 0.750000 1.0 0.965414 " + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.reset_option('display.max_colwidth')\n", + "pd.reset_option('display.max_rows')\n", + "\n", + "data = pd.read_csv('evaluation_results.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "667df35e-e61e-41a4-bb1a-b59ce9656f78", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_inputresponse
0What is the policy regarding smoking in City offices?According to the document, smoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings.
1How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules.
2What are the three categories of employees eligible for health coverage?The three categories of employees eligible for health coverage are:\\n\\n1. All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours.\\n2. All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours.\\n3. All other employees of the City, including temporary, exempt, or \"as needed\" employees who have worked more than 1040 hours in any consecutive 12-month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.
3How long must an employee wait before using vacation time after starting employment?An employee must wait at least one year of continuous service before using vacation time. At the end of the first year, the employee will be awarded a vacation allowance at the rate of 0.0385 of an hour for each hour of paid service in the preceding year, and will be eligible to use accrued vacation time.
4What must an employee do if they're summoned for jury duty?An employee must notify their supervisor as soon as they receive a jury summons. If they are required to report for jury duty during their working hours, they will be excused from work on the work day they perform jury service, provided they give prior notification to their supervisor.
\n", + "
" + ], + "text/plain": [ + " user_input \\\n", + "0 What is the policy regarding smoking in City offices? \n", + "1 How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules? \n", + "2 What are the three categories of employees eligible for health coverage? \n", + "3 How long must an employee wait before using vacation time after starting employment? \n", + "4 What must an employee do if they're summoned for jury duty? \n", + "\n", + " response \n", + "0 According to the document, smoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings. \n", + "1 Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. \n", + "2 The three categories of employees eligible for health coverage are:\\n\\n1. All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours.\\n2. All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours.\\n3. All other employees of the City, including temporary, exempt, or \"as needed\" employees who have worked more than 1040 hours in any consecutive 12-month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours. \n", + "3 An employee must wait at least one year of continuous service before using vacation time. At the end of the first year, the employee will be awarded a vacation allowance at the rate of 0.0385 of an hour for each hour of paid service in the preceding year, and will be eligible to use accrued vacation time. \n", + "4 An employee must notify their supervisor as soon as they receive a jury summons. If they are required to report for jury duty during their working hours, they will be excused from work on the work day they perform jury service, provided they give prior notification to their supervisor. " + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.set_option('display.max_colwidth', None)\n", + "pd.set_option('display.max_rows', None) \n", + "\n", + "data[['user_input', 'response']].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "df53125e-70e1-401c-85b9-10e3c668ba08", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
retrieved_contexts
0['## Smoke-Free Workplace\\n\\nSmoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings.\\n\\n## Drug-Free Workplace', '## Policy Regarding the Treatment of Co-Workers and Members of the Public\\n\\nCity policy requires employees to treat co-workers and members of the public with courtesy and respect. City employees and managers are responsible for maintaining a safe and productive workplace which is free from inappropriate workplace behavior.\\n\\n## Smoke-Free Workplace', '## Political Activity\\n\\nIt is unlawful for City employees to use public resources or personnel to engage in political activity relating to elective offices and ballot measures. City employees may not engage in political activities while on duty or in the workplace. Employees may not use City resources, such as photocopier or fax machines, telephones, postage, or email, for political activities. The ban on engaging in political activity while on duty prohibits such activities as circulating petitions, addressing campaign mailers or engaging in any other political activities that use City resources or divert employees from their assigned duties.', '## Use of City and County Property for Business Purposes Only\\n\\nNo officer or employee may use, nor allow any other r person to use, City resources for any non-City business purpose. Use of City resources fofor personal, political, employee organization or other non-City business is strictly prohibited. City resources include, but are not limited to, facilities, equipment, devices, telephones, computers, copier, fax machine, email, internet access, supplies and any time for which you are receiving compensation from the City. Inappropriate uses of City resources include, but are not limited to: online gambling; viewing sports events online; playing games, streaming video or music on a work computer; viewing or distributing materials that are not related to City business or that are sexually explicit; and frequent talking on a personal cell phone or texting during work hours.', \"The safety and well-being of our employees is very important, and in order to promote a safe and healthy work environment, the City works to identify and eliminate employee exposures to avoidable hazards and conditions that can lead to injury or illness. City departments have Injury and Illness Prevention Programs that comply with federal and state regulations, laws, and statutes in order to help maintain a safe and healthful work environment.\\n\\nSafety is every City employee's responsibility. All employees are required to remain alert and to correct hazardous conditions and unsafe acts-if it is safe to do so-and to report unsafe conditions to their supervisors.\"]
1['Most full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.\\n\\nSome employees may be entitled to accrue sick leave under Chapter 12W of the Administrative Code. Please contact your departmental personnel officer for more information.', 'Sick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.\\n\\nMost full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.', '## Accrual of Paid Sick Leave\\n\\nSick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.', 'The maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.\\n\\n| Year of Service 12-Month Max. Entitlement | Maximum Accumulation | Maximum Accumulation |\\n|-----------------------------------------------------------|-----------------------------------------------------------|------------------------|\\n| | 1 through 5 years 80 hours (10 days) 320 hours (40 days) | |\\n| More than 5 years 120 hours (15 days) 360 hours (45 days) | | |\\n| More than 15 years 160 hours (20 days) 400 hours | | (50 days) |', 'You may not use sick leave with pay accrued under the Civil Service Rules during your first 6 months of employment with the City.\\n\\nYou may not use sick leave for reasons not set forth th in this Handbook, the Civil Service Rules, the applicable collective bargaining agreement or other applicable laws. Misuse of sick leave is grounds for discipline, up to and including termination.']
2['The following employees are eligible for health coverage:\\n\\n- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\\n- (3) All other employees of the City including temporary y exempt or \"as needed\" employees who have worked more than 1040 hours in any consecutive 12 month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.', \"Employees in category (3) must obtain a signed certification from their department's human resources manager in order to be eligible for health coverage.\\n\\nAn employee's spouse, domestic partner and children may also be eligible for coverage through HSS. If you have enrolled a domestic partner, same sex spouse and/or their children in your health plan, it is important that you seek tax advice from a qualified professional regarding the tax consequences of such enrollment. A detailed list of eligibility requirements and necessary documentation for enrolling employee dependents is available on the HSS website at www.myhss.org/benefits/ccsf.html .\", 'Please note that the information in this section is subject to change. You should consult with HSS if you have any questions at (415) 554-1750.\\n\\n## Eligibility\\n\\nThe following employees are eligible for health coverage:', \"Coverage for a new employee in category (1) or (2) listed above starts on the first day of the coverage period following his or her start work date, provided an enrollment application and other required documentation has been submitted to HSS by applicable deadlines.\\n\\nEmployees in category (3) must obtain a signed certification from their department's human resources manager in order to be eligible for health coverage.\", '- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\\n- (3) All other employees of the City including temporary y exempt or \"as needed\" employees who have worked more than 1040 hours in any consecutive 12 month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.\\n\\nCoverage for a new employee in category (1) or (2) listed above starts on the first day of the coverage period following his or her start work date, provided an enrollment application and other required documentation has been submitted to HSS by applicable deadlines.']
3['An additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will receive an additional 40 hours.\\n\\nThe maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.', 'The maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.\\n\\n| Year of Service 12-Month Max. Entitlement | Maximum Accumulation | Maximum Accumulation |\\n|-----------------------------------------------------------|-----------------------------------------------------------|------------------------|\\n| | 1 through 5 years 80 hours (10 days) 320 hours (40 days) | |\\n| More than 5 years 120 hours (15 days) 360 hours (45 days) | | |\\n| More than 15 years 160 hours (20 days) 400 hours | | (50 days) |', 'The City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.\\n\\nAn additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will receive an additional 40 hours.', '## Vacation\\n\\nThe City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.', 'Permanent employees may request unpaid personal leave for a period of up to twelve (12) months within any two-year period. Your department t head has discretion to grant or deny requests for personal leave. With certain exceptions, if you are a temporary or provisional employee, you may request personal leave for a maximum of one month, and only if a replacement for your position is not required.\\n\\n## Time Off for Voting']
4['You must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.\\n\\n## Work Schedule While on Jury Duty', '## Jury Duty Leave\\n\\nYou must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.', '## Compensation While on Jury Duty\\n\\nAs a guiding principal, you are required to work or r perform jury service for the number of hours for which you are paid during that workweek. For example, if you are regularly on an alternative workweek schedule working 10-hour workdays Monday through Thursday with Friday off and you are required to serve jury duty on a Monday through Thursday, you must work a regular workday on Friday, or use personal leave to compensate for the eight hour balance. However, you and your supervisor may agree to maintain the alternative schedule, whereby you would return to the workplace to work the remaining two hours each day and continue to take Friday off.', 'As a guiding principal, you are required to work or r perform jury service for the number of hours for which you are paid during that workweek. For example, if you are regularly on an alternative workweek schedule working 10-hour workdays Monday through Thursday with Friday off and you are required to serve jury duty on a Monday through Thursday, you must work a regular workday on Friday, or use personal leave to compensate for the eight hour balance. However, you and your supervisor may agree to maintain the alternative schedule, whereby you would return to the workplace to work the remaining two hours each day and continue to take Friday off.\\n\\nYou are not entitled to a per diem pay from the City or County for which Jury Duty was served, as the City and County of San Francisco already provides regular compensation and benefits to you while performing jury service. You must notify the jury staff that you are a government employee when reporting for jury service', 'If you are summoned as a witness on behalf of the City and County of San Francisco, you are entitled to be paid for any time that you are required to serve in that capacity. If you are summoned to serve as a witness in a case involving outside employment or personal business\\n\\naffairs, you will be placed on leave without pay unless vacation leave or compensatory time is requested and granted.']
\n", + "
" + ], + "text/plain": [ + " retrieved_contexts\n", + "0 ['## Smoke-Free Workplace\\n\\nSmoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings.\\n\\n## Drug-Free Workplace', '## Policy Regarding the Treatment of Co-Workers and Members of the Public\\n\\nCity policy requires employees to treat co-workers and members of the public with courtesy and respect. City employees and managers are responsible for maintaining a safe and productive workplace which is free from inappropriate workplace behavior.\\n\\n## Smoke-Free Workplace', '## Political Activity\\n\\nIt is unlawful for City employees to use public resources or personnel to engage in political activity relating to elective offices and ballot measures. City employees may not engage in political activities while on duty or in the workplace. Employees may not use City resources, such as photocopier or fax machines, telephones, postage, or email, for political activities. The ban on engaging in political activity while on duty prohibits such activities as circulating petitions, addressing campaign mailers or engaging in any other political activities that use City resources or divert employees from their assigned duties.', '## Use of City and County Property for Business Purposes Only\\n\\nNo officer or employee may use, nor allow any other r person to use, City resources for any non-City business purpose. Use of City resources fofor personal, political, employee organization or other non-City business is strictly prohibited. City resources include, but are not limited to, facilities, equipment, devices, telephones, computers, copier, fax machine, email, internet access, supplies and any time for which you are receiving compensation from the City. Inappropriate uses of City resources include, but are not limited to: online gambling; viewing sports events online; playing games, streaming video or music on a work computer; viewing or distributing materials that are not related to City business or that are sexually explicit; and frequent talking on a personal cell phone or texting during work hours.', \"The safety and well-being of our employees is very important, and in order to promote a safe and healthy work environment, the City works to identify and eliminate employee exposures to avoidable hazards and conditions that can lead to injury or illness. City departments have Injury and Illness Prevention Programs that comply with federal and state regulations, laws, and statutes in order to help maintain a safe and healthful work environment.\\n\\nSafety is every City employee's responsibility. All employees are required to remain alert and to correct hazardous conditions and unsafe acts-if it is safe to do so-and to report unsafe conditions to their supervisors.\"]\n", + "1 ['Most full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.\\n\\nSome employees may be entitled to accrue sick leave under Chapter 12W of the Administrative Code. Please contact your departmental personnel officer for more information.', 'Sick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.\\n\\nMost full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.', '## Accrual of Paid Sick Leave\\n\\nSick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.', 'The maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.\\n\\n| Year of Service 12-Month Max. Entitlement | Maximum Accumulation | Maximum Accumulation |\\n|-----------------------------------------------------------|-----------------------------------------------------------|------------------------|\\n| | 1 through 5 years 80 hours (10 days) 320 hours (40 days) | |\\n| More than 5 years 120 hours (15 days) 360 hours (45 days) | | |\\n| More than 15 years 160 hours (20 days) 400 hours | | (50 days) |', 'You may not use sick leave with pay accrued under the Civil Service Rules during your first 6 months of employment with the City.\\n\\nYou may not use sick leave for reasons not set forth th in this Handbook, the Civil Service Rules, the applicable collective bargaining agreement or other applicable laws. Misuse of sick leave is grounds for discipline, up to and including termination.']\n", + "2 ['The following employees are eligible for health coverage:\\n\\n- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\\n- (3) All other employees of the City including temporary y exempt or \"as needed\" employees who have worked more than 1040 hours in any consecutive 12 month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.', \"Employees in category (3) must obtain a signed certification from their department's human resources manager in order to be eligible for health coverage.\\n\\nAn employee's spouse, domestic partner and children may also be eligible for coverage through HSS. If you have enrolled a domestic partner, same sex spouse and/or their children in your health plan, it is important that you seek tax advice from a qualified professional regarding the tax consequences of such enrollment. A detailed list of eligibility requirements and necessary documentation for enrolling employee dependents is available on the HSS website at www.myhss.org/benefits/ccsf.html .\", 'Please note that the information in this section is subject to change. You should consult with HSS if you have any questions at (415) 554-1750.\\n\\n## Eligibility\\n\\nThe following employees are eligible for health coverage:', \"Coverage for a new employee in category (1) or (2) listed above starts on the first day of the coverage period following his or her start work date, provided an enrollment application and other required documentation has been submitted to HSS by applicable deadlines.\\n\\nEmployees in category (3) must obtain a signed certification from their department's human resources manager in order to be eligible for health coverage.\", '- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\\n- (3) All other employees of the City including temporary y exempt or \"as needed\" employees who have worked more than 1040 hours in any consecutive 12 month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.\\n\\nCoverage for a new employee in category (1) or (2) listed above starts on the first day of the coverage period following his or her start work date, provided an enrollment application and other required documentation has been submitted to HSS by applicable deadlines.']\n", + "3 ['An additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will receive an additional 40 hours.\\n\\nThe maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.', 'The maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.\\n\\n| Year of Service 12-Month Max. Entitlement | Maximum Accumulation | Maximum Accumulation |\\n|-----------------------------------------------------------|-----------------------------------------------------------|------------------------|\\n| | 1 through 5 years 80 hours (10 days) 320 hours (40 days) | |\\n| More than 5 years 120 hours (15 days) 360 hours (45 days) | | |\\n| More than 15 years 160 hours (20 days) 400 hours | | (50 days) |', 'The City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.\\n\\nAn additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will receive an additional 40 hours.', '## Vacation\\n\\nThe City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.', 'Permanent employees may request unpaid personal leave for a period of up to twelve (12) months within any two-year period. Your department t head has discretion to grant or deny requests for personal leave. With certain exceptions, if you are a temporary or provisional employee, you may request personal leave for a maximum of one month, and only if a replacement for your position is not required.\\n\\n## Time Off for Voting']\n", + "4 ['You must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.\\n\\n## Work Schedule While on Jury Duty', '## Jury Duty Leave\\n\\nYou must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.', '## Compensation While on Jury Duty\\n\\nAs a guiding principal, you are required to work or r perform jury service for the number of hours for which you are paid during that workweek. For example, if you are regularly on an alternative workweek schedule working 10-hour workdays Monday through Thursday with Friday off and you are required to serve jury duty on a Monday through Thursday, you must work a regular workday on Friday, or use personal leave to compensate for the eight hour balance. However, you and your supervisor may agree to maintain the alternative schedule, whereby you would return to the workplace to work the remaining two hours each day and continue to take Friday off.', 'As a guiding principal, you are required to work or r perform jury service for the number of hours for which you are paid during that workweek. For example, if you are regularly on an alternative workweek schedule working 10-hour workdays Monday through Thursday with Friday off and you are required to serve jury duty on a Monday through Thursday, you must work a regular workday on Friday, or use personal leave to compensate for the eight hour balance. However, you and your supervisor may agree to maintain the alternative schedule, whereby you would return to the workplace to work the remaining two hours each day and continue to take Friday off.\\n\\nYou are not entitled to a per diem pay from the City or County for which Jury Duty was served, as the City and County of San Francisco already provides regular compensation and benefits to you while performing jury service. You must notify the jury staff that you are a government employee when reporting for jury service', 'If you are summoned as a witness on behalf of the City and County of San Francisco, you are entitled to be paid for any time that you are required to serve in that capacity. If you are summoned to serve as a witness in a case involving outside employment or personal business\\n\\naffairs, you will be placed on leave without pay unless vacation leave or compensatory time is requested and granted.']" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[['retrieved_contexts']].head()" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "#### **4. Visualizing the Metrics**\n", + "\n", + "Let's create bar charts to visualize each metric for the different user inputs." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_metric(metric):\n", + " data['short_user_input'] = data['user_input'].apply(lambda x: x[:10] + '...' if len(x) > 10 else x)\n", + " plt.figure(figsize=(7, 4)) \n", + " sns.barplot(x='short_user_input', y=metric, data=data, hue='short_user_input', palette='viridis', legend=False) \n", + " plt.xticks(rotation=75, ha='right', fontsize=8) \n", + " plt.ylim(0, 1.1)\n", + " plt.title(f'{metric.replace(\"_\", \" \").title()} for Each User Input')\n", + " plt.ylabel(metric.replace(\"_\", \" \").title())\n", + " plt.xlabel('User Input')\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "210298cf-b285-4eea-b980-0190520cc20d", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmUAAAHMCAYAAACUdN+cAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABm9UlEQVR4nO3dd1hT5/8+8BsCyHQwVZYTEAERB+Kuo1hX3XvbOj5qrVu/te6KbdWq1bZOqlZFK4Ja0VatOHFWxQUqoqAgsmUHkvz+8JdoBJUocCLnfl2X1yUnJ8k7D8nhzjPO0VEoFAoQERERkaB0hS6AiIiIiBjKiIiIiLQCQxkRERGRFmAoIyIiItICDGVEREREWoChjIiIiEgLMJQRERERaQGGMiIiIiItwFBGREREpAUYyoioTO3btw/Ozs54/PixRvdzdnbGzz//XEpVvd2pU6fw+eefw93dHc7Oznj+/LkgdZSmdu3aYezYsUKXQSRqDGVU7sTExGDevHlo37493N3d4eXlhQEDBmDr1q3Izc0ttee9f/8+fv75Z43Dxvv47bffcOzYsWLt+/jxYzg7O6v+1atXD23btsWECRNw586dUq7045eamoqvv/4ahoaGmDdvHn744QcYGRmV2vMpQ+ub/l27dq3Unvt9Kd9jmzdvLvL2zZs3v1cQLy2zZ89Gw4YNhS5DpSyPHaTd9IQugKgkhYaGYvLkyTAwMMDnn38OJycn5Ofn48qVK/jxxx9x//59LF68uFSe+/79+1i7di2aNm0KOzu7UnkOpfXr18PX1xcdOnQo9n26du2K1q1bQy6XIyoqCrt27cKpU6ewZ88e1KtXrxSrVff555+jS5cuMDAw0Oh+4eHhkEgkpVTVm924cQNZWVmYPHkymjdvXmbP+9VXXxX5PnJwcCizGqhslOWxg7QbQxmVG7GxsZgyZQqqV6+OrVu3wtraWnXb4MGD8ejRI4SGhgpXoMBcXV3x+eefq3728vLC+PHjsWvXLixatKjI+2RnZ8PY2LhE65BIJO8VripUqFCidRRXSkoKAMDMzKzEHrM47dq6dWu4u7uX2HOKiUKhQF5eHgwNDYUuhUgjHL6kcmPTpk3Izs7Gd999pxbIlBwdHTF8+HDVzwUFBVi3bh06dOgANzc3tGvXDitXroRUKlW7n3KuzeXLl9GnTx+4u7ujffv2CA4OVu2zb98+TJ48GQAwbNgw1VDThQsXVPucPHkSgwYNgqenJxo2bIgxY8bg3r17qtvDwsLg4uKC1atXqz3/wYMH4ezsjJ07dwJ4MbcqOzsbQUFBqueZPXu2xu3VrFkzAFANmSiHzS5evIgFCxbAx8cHbdq0KXb9SlFRUZg8eTKaNWsGDw8P+Pr64qefflJrq9eHsm7cuIHRo0fD29sbHh4eaNeuHebMmaP2uEXNKbt9+za++OILeHl5oWHDhhg+fHih4T3l8125cgV+fn5o1qwZPD09MWHCBFXgepOhQ4di1qxZAIA+ffoUauvDhw+jV69e8PDwgLe3N6ZPn46EhAS1x1AOlcXExODLL79Ew4YNMX369Lc+b3Ft3rwZAwYMULVbr169cOTIkSL33b9/P/r06YMGDRqgSZMmGDx4MM6cOVNov7e9z0tScX7ncrkcv//+O7p06QJ3d3c0b94c8+bNQ3p6utp+ys/o6dOnVb+PgIAAjeopzuccePl+unTpEubNmwdvb294eXlh5syZhep60zzIdu3aqd5HxTl2kHiwp4zKjRMnTsDe3h5eXl7F2n/u3LkICgqCr68vRo4cifDwcKxfvx5RUVFYt26d2r6PHj3C5MmT0adPH/Ts2ROBgYGYPXs26tevj7p166JJkyYYOnQotm/fjnHjxqFWrVoAgNq1awMAgoODMXv2bLRs2RLTp09HTk4Odu3ahUGDBiEoKAh2dnbw8fHBoEGDsGHDBnTo0AH169fHs2fPsGTJEjRv3hwDBw4EAPzwww+YO3cuPDw80K9fPwDvN6QVExMDAKhcubLa9oULF8Lc3BwTJkxAdnZ2sesHgIiICAwePBh6enro378/bG1tERMTg3///RdTpkwpso7k5GSMHj0aVapUwZgxY1CxYkU8fvwYR48efWv99+7dw+DBg2FiYoIvvvgCenp62L17N4YOHYo//vgDDRo0UNt/yZIlqFixIiZOnIgnT55g69atWLRoEVatWvXG5xg3bhxq1qyJ3bt3q4YTlW29b98+zJkzB+7u7pg6dSqSk5Oxbds2/PfffwgODkbFihVVj1NQUIDRo0ejUaNGmDVrVrF6cDIzMwuFRh0dHVSpUkX187Zt29CuXTt069YN+fn5OHToECZPnoz169ejbdu2qv3Wrl2Ln3/+GQ0bNsRXX30FfX19XL9+HefPn0fLli1V+73rfV5Sivs7nzdvHoKCgtCrVy8MHToUjx8/xo4dO3D79m3s2rUL+vr6qn2jo6Mxbdo09O/fH/369UPNmjU1rkuT179o0SLV+yk6Ohq7du1CXFwctm/fDh0dnWI/57uOHSQyCqJyICMjQ+Hk5KQYP358sfa/c+eOwsnJSfHNN9+obV+2bJnCyclJERYWptr2ySefKJycnBSXLl1SbUtOTla4ubkpli1bptp2+PBhhZOTk+L8+fNqj5mZmalo3LixYu7cuWrbExMTFY0aNVLbnp2drejYsaOiS5cuiry8PMWYMWMUXl5eiidPnqjd19PTUzFr1qxivdbY2FiFk5OT4ueff1YkJycrEhMTFRcuXFD06NFD4eTkpPj7778VCoVCERgYqHByclIMHDhQUVBQ8F71Dx48WNGwYcNC9crlctX/lc8TGxurUCgUiqNHjyqcnJwU4eHhb30dTk5OijVr1qh+/t///qeoX7++IiYmRrUtISFB0bBhQ8XgwYMLPd+IESPU6li6dKmiXr16iufPn7/1eZX3f7U+qVSq8PHxUXTt2lWRm5ur2n7ixAmFk5OTYvXq1apts2bNUjg5OSmWL1/+1ud5/fmK+ufm5qa2b05OjtrPUqlU0bVrV8WwYcNU2x4+fKhwcXFRTJgwQSGTydT2f7U9ivs+L4ryPbZp06Yib9+0aZPGv/NLly4pnJycFAcOHFDbfurUqULblbWfOnXqrXUqzZo1S+Hp6am2rbivX/n76dmzp0Iqlaq2b9y4UeHk5KQ4duyYatvr79lXn+vVz++bjh0kPhy+pHIhMzMTAGBiYlKs/U+ePAkAGDlypNr2UaNGqd2uVKdOHTRu3Fj1s7m5OWrWrInY2Nh3Pte5c+fw/PlzdOnSBSkpKap/urq6aNCggdowhZGREfz8/BAVFYXBgwcjNDQUc+bMQfXq1Yv1ut7m559/ho+PD1q0aIGhQ4ciJiYG06dPx6effqq2X79+/dTmfBW3/pSUFFy6dAm9e/cuVO/beg6Uc7VCQ0ORn59frNcik8lw9uxZdOjQAfb29qrt1tbW6Nq1K65cuaJ6T7z6ul6to3HjxpDJZHjy5EmxnvNVN2/eRHJyMgYOHKg2161t27aoVatWkXMXlT2dxTVv3jz4+/ur/du4caPaPq/2uKWnpyMjIwONGjXC7du3VduPHTsGuVyOCRMmQFdX/ZD/+u/lQ97nmijO7/zIkSMwMzNDixYt1N539evXh7GxcaHhPTs7O7Rq1eqD6tLk9ffv31+tp27gwIHQ09MrdOwg0gSHL6lcMDU1BQBkZWUVa/8nT55AV1e30LCflZUVKlasWOgPdbVq1Qo9RqVKlQrNISnKw4cPAUBtPturlLUrNWrUCAMHDsSOHTvQsmVL9OnT553PURz9+/dHp06doKOjg4oVK6Ju3bpFroB8ffVXcetX/uFycnLSqK6mTZvC19cXa9euxe+//46mTZuiQ4cO6Nat2xtXaKakpCAnJ6fIIaratWtDLpcjPj5ebcjp9aCoHF58n3OOxcXFAUCRz1+rVi1cuXJFbZuenh6qVq2q0XN4eHi8c6L/iRMn8Ouvv+LOnTtqcyFfDVsxMTHQ1dUt1nDYh7zPi0NZV3F+548ePUJGRgZ8fHyKfKzk5GS1n0ti1aImr9/R0VHtZxMTE1hZWb1XyCdSYiijcsHU1BTW1tZFTjx/m+LO/fiQUzEoFAoAL+aCWVlZvfOxpVIpLl68COBF0MnJySmR82I5OjoW65QOr69y1LR+Teno6GDNmjW4du0aTpw4gdOnT+P//u//4O/vj927dxe79/NdXu8lUlK+vtJkYGDwxud/X5cvX8b48ePRpEkTzJ8/H1ZWVtDX10dgYCD++uuv93rM9/1dKt8zbzoPYE5Ojtp+xfmdy+VyWFhYYPny5UU+prm5udrPJbHSsqxOuSKTycrkeejjw1BG5cYnn3yC3bt34+rVq+88MaStrS3kcjkePXqk1oOQlJSE58+fw9bWVuPnf1PAUw6vWVhYFCsUrVmzBlFRUZg1axaWL1+OFStWYO7cuRrXU1KKW79yv7t3777X83h6esLT0xNTpkzBwYMHMX36dISEhKBv376F9jU3N4eRkRGio6ML3fbgwQPo6uoW2etRUpS9btHR0YV6cqKjo0tkuPld/v77b1SoUAGbN29W61EMDAxU28/BwUF1brrSOh/d234fwIs2MTIyUlukALz9d+7g4ICwsDB4eXlp5aktHj16pFrBDLzopU9MTETr1q1V2ypVqlSoJ1YqlSIxMVFtmyYLA6h845wyKje++OILGBsbY+7cuUhKSip0e0xMDLZu3QoAqlM9KH9W8vf3V7tdE8rerIyMDLXtrVq1gqmpKdavX1/k/JlXV9hdv34dW7ZswfDhwzFq1CiMHj0af/zxh6rnTMnY2LjMLvVT3PrNzc3RpEkTBAYGqob3lN7WG5Wenl7odmV4eP30JEoSiQQtWrTA8ePH1U6tkZSUhL/++guNGjUqNCxcktzc3GBhYYGAgAC1Gk+ePImoqCi1lY+lRSKRQEdHR63X5fHjxzh+/Ljafh06dICuri7WrVsHuVyudltJ9RIqfx8nTpwo9LuPi4vDiRMn0KJFC1VPVHF+55999hlkMhl++eWXQs9XUFAg+KWudu/erfZ52LVrFwoKCtRCmb29PS5fvqx2vz179hTqKXvTsYPEhz1lVG44ODhg+fLlmDJlCjp37qw6o79UKsXVq1dx5MgR9OrVCwDg4uKCnj17Yvfu3Xj+/DmaNGmCGzduICgoCB06dFD7Blxc9erVg0QiwcaNG5GRkQEDAwM0a9YMFhYWWLBgAWbOnIlevXqhc+fOMDc3R1xcHE6ePAkvLy/MmzcPeXl5mDVrFhwdHVWnj5g0aRJOnDiBOXPm4ODBg6oTjtavXx9hYWHw9/eHtbU17OzsCp0CoqSYmpoWq37gxWlGBg4ciJ49e6J///6ws7PDkydPEBoaiv379xf5+EFBQdi1axc6dOgABwcHZGVlYc+ePTA1NVX7A/e6r7/+GufOncOgQYMwaNAgSCQS7N69G1KpFDNmzCiVtlDS19fH9OnTMWfOHAwZMgRdunRRnRLD1tYWI0aM+ODnOHXqFB48eFBou5eXF+zt7dGmTRv4+/vjiy++QNeuXZGcnIydO3fCwcEBkZGRqv0dHR0xbtw4/PLLLxg0aBA+/fRTGBgY4MaNG7C2tsa0adM+uFYAmDp1Kvr166f63dva2uLJkyfYvXs3dHR0MHXqVNW+xfmdN23aFP3798f69etx584dtGjRAvr6+nj48CGOHDmCb775Bp06dSqR2t9Hfn4+RowYgc8++wzR0dHYuXMnGjVqhPbt26v26du3L+bPn49JkyahefPmiIiIwJkzZwr1GL7t2EHiwlBG5Ur79u1x4MABbN68GcePH8euXbtgYGCgOumn8rxewIvzVtnZ2SEoKAjHjh2DpaUlxo4di4kTJ77Xc1tZWWHhwoVYv349vvnmG8hkMmzbtg0WFhbo1q0brK2tsWHDBmzevBlSqRQ2NjZo3LixKiiuXLkSMTExCAgIUM29MTAwwLJly9C/f3/88MMPWLBgAYAXJySdN28eVq1ahdzcXPTs2bPUQhmAYtUPvAi7e/bswerVq7Fr1y7k5eWhevXq+Oyzz9742E2bNsWNGzcQEhKCpKQkmJmZwcPDA8uXL1dbWfm6unXrYseOHVixYgXWr18PhUIBDw8P/Pjjj6XaFkq9evWCoaEhNm7ciOXLl8PY2BgdOnTAjBkz1M5R9r7WrFlT5HY/Pz/Y29vDx8cH3333HTZu3IilS5fCzs4O06dPx5MnT9RCGQBMnjwZdnZ2+OOPP/DTTz/ByMgIzs7Oald4+FC1a9fGnj17sHbtWuzduxfp6emoVKkSWrRogQkTJqhNEyju73zRokVwc3NDQEAAfvrpJ0gkEtja2qJ79+7FPh9haZk3bx4OHjyINWvWID8/H126dMHcuXPVhiL79euHx48fY+/evTh9+jQaNWoEf3//QqH9bccOEhcdRVnMciUiIioHlCcN3rt3Ly+DRSWOc8qIiIiItABDGREREZEWYCgjIiIi0gKcU0ZERESkBdhTRkRERKQFGMqIiIiItIDozlMml8tRUFAAXV1dXtqCiIiISp1CoYBcLoeent5br4MrulBWUFCAGzduCF0GERERiYy7u7vatWpfJ7pQpkyo7u7uquuwEREREZUWmUyGGzduvLWXDBBhKFMOWUokEoYyIiIiKjPvmjbFif5EREREWoChjIiIiEgLMJQRERERaQGGMiIiIiItwFBGREREpAUYyoiIiIi0AEMZERERkRZgKCMiIiLSAgxlRERERFqAoYyIiIhICzCUEREREWkBhjIiIiIiLcBQRkRERKQFGMqIiIiItABDGREREZEWYCgjIiIi0gIMZURERERagKGMiIiISAswlBERERFpAYYyIiIiIi3AUEZERESkBQQNZZcuXcK4cePQsmVLODs749ixY++8z4ULF9CzZ0+4ubmhY8eO2LdvXxlUSkRERFS6BA1l2dnZcHZ2xvz584u1f2xsLMaOHQtvb2/s378fw4cPx9y5c3H69OlSrpSIiIiodOkJ+eRt2rRBmzZtir1/QEAA7OzsMHv2bABA7dq1ceXKFfz+++9o1apVaZVJREREVOoEDWWaunbtGnx8fNS2tWzZEkuXLtX4sWQyWUmV9V5+/fVX7N+/H59//jnGjx8vaC1ERERUeoqbOT6qUJaUlARLS0u1bZaWlsjMzERubi4MDQ2L/Vg3btwo6fKKTSqVIjg4GAqFAvv370ejRo1gYGAgWD1voq+vD1dXV+jpfVRvk2IpKCjA7du3kZ+fL3QpROWOvr4+XOu7Qk9SDo8dsgLcvqXZsYPtQcVV/t4hxeTu7g6JRCLIc2dmZkKhUAAA5HI5XF1dYWpqKkgt7yKRSPDDT/sQ8zhR6FJKjIOdFWZO6YX69esLXQpRuSWRSLAsbDdinj8TupQS41DRGrN9+r/XsUMikSDg9hY8y44vhcqEYW1cDQNcR/FYWgwymaxYnUEfVSiztLREUlKS2rakpCSYmppq1EsGvPiACBXKXn9eIWspjpjHiYh68FToMkqcNrc5UXkQ8/wZ7qfGCV1GiXvfY8ez7HjEZcaWcDXC47G05HxU5ynz9PTE+fPn1badO3cOnp6ewhREREREVEIEDWVZWVm4c+cO7ty5AwB4/Pgx7ty5g7i4F9+sVqxYgZkzZ6r2HzBgAGJjY/HDDz8gKioKO3bswOHDhzFixAghyiciIiIqMYIOX968eRPDhg1T/ezn5wcA6NmzJ5YtW4bExETEx78cf7e3t8f69evh5+eHbdu2oWrVqliyZAlPh0FEREQfPUFDmbe3NyIjI994+7Jly4q8T3BwcClWRURERFT2Pqo5ZURERETlFUMZERERkRZgKCMiIiLSAgxlRERERFqAoYyIiIhICzCUEREREWkBhjIiIiIiLcBQRkRERKQFGMqIiIiItABD2WvkMrnQJZSK8vq6iIiIygtBL7OkjXQluvhx7jbERieU2nPI5PlqP8/6cg0kuvql9nz2NW0wY8mwd+9IREREgmEoK0JsdAKiIh6X2uMrdGRApZc/R9+Lg45CUmrPR0RERNqPw5dEREREWoChjIiIiEgLMJQRERERaQGGMiIiIiItwFBGRFQCZPLye9qZ8vzaiLQJV18SEZUAia4u5hzeiwcpSUKXUqJqmVvC77M+QpdBJAoMZUREJeRBShIinsULXQYRfaQ4fElERESkBRjKiIiIiLQAQxkRERGRFmAoIyIiItICDGVEREREWoChjIiIiEgLMJQJQaEDKJT///8/ExERkagxlAlAB7qQSC0ABSCRWkCHvwYiIiLR48ljBaKfUw36OdWELoOIiIi0BLtoiIiIiLQAQxkRERGRFmAoIyIiojKlUMiELqFUfOjr4pwyIiIiKlM6OhKcv/cdnufECF1Kialo5IBmdb/5oMdgKCMiIqIy9zwnBmnZ94QuQ6tw+JKIiIhICzCUEREREWkBhjIiIiIiLcBQRkRERKQFGMqIiIiItABDGREREZEWYCgjIiIi0gIMZURERERagKGMiIiISAswlBERERFpAYYyIiIiIi3AUEZERESkBQQPZTt27EC7du3g7u6Ovn37Ijw8/K37//777/D19YWHhwfatGmDpUuXIi8vr4yqJSIiIiodgoaykJAQ+Pn5YcKECQgKCoKLiwtGjx6N5OTkIvc/ePAgVqxYgYkTJyIkJATfffcdQkJCsHLlyjKunIiIiKhkCRrK/P390a9fP/Tu3Rt16tTBwoULYWhoiMDAwCL3v3r1Kry8vNCtWzfY2dmhZcuW6Nq16zt714iIiIi0nZ5QTyyVSnHr1i2MHTtWtU1XVxfNmzfH1atXi7xPw4YNceDAAYSHh8PDwwOxsbE4efIkPv/8c42fXyaTFbldIpFo/Fgfize95rdhexAVT3n+rACaf17Kc3vwWKqO7aGuqPYobhsJFspSU1Mhk8lgYWGhtt3CwgIPHjwo8j7dunVDamoqBg0aBIVCgYKCAgwYMADjxo3T+Plv3LhRaJuRkRFcXV01fqyPRWRkJHJycoq9P9uDqHjK+2cF0OzzUt7bg8dSdWwPdR/yt0WwUPY+Lly4gPXr12P+/Pnw8PBATEwMvvvuO6xbtw4TJkzQ6LHc3d3LdVIvirOzs9AlaBW2B1Hx8fPyEttCHdtDXVHtIZPJiuwMep1goaxKlSqQSCSFJvUnJyfD0tKyyPusXr0a3bt3R9++fQG8eOHZ2dmYN28exo8fD13d4k+Rk0gkogtlYnu978L2ICo+fl5eYluoY3uo+5D2EGyiv4GBAerXr4+wsDDVNrlcjrCwMDRs2LDI++Tm5hYKXsoXr1AoSq9YIiIiolIm6PDlyJEjMWvWLLi5ucHDwwNbt25FTk4OevXqBQCYOXMmbGxsMG3aNADAJ598An9/f7i6uqqGL1evXo1PPvmESZ2IiIg+aoKGss6dOyMlJQVr1qxBYmIi6tWrh02bNqmGL+Pj49V6xsaPHw8dHR2sWrUKCQkJMDc3xyeffIIpU6YI9RKIiIiISoTgE/2HDBmCIUOGFHnb9u3b1X7W09PDxIkTMXHixLIojYiIiKjMCH6ZJSIiIiJiKCMiIiLSCgxlRERERFqAoYyIiIhICzCUEREREWmB91p9GRYWhrCwMCQnJ0Mul6vd5ufnVyKFEREREYmJxqFs7dq1WLduHdzc3GBlZQUdHZ3SqIuIiIhIVDQOZQEBAfDz80OPHj1KoRwiIiIicdJ4Tll+fj68vLxKoxYiIiIi0dI4lPXp0wcHDx4sjVqIiIiIREvj4cu8vDzs2bMHYWFhcHZ2hp6e+kPMmTOnxIojIiIiEguNQ1lkZCRcXFwAAHfv3lW7jZP+iYiIiN6PxqHs9YuEExEREdGH+6CTxz59+hRPnz4tqVqIiIiIREvjnjK5XI5ffvkF/v7+yM7OBgCYmJhg5MiRGD9+PHR1eZEAIiIiIk1pHMp++ukn7N27F9OmTVOdGuPKlStYu3YtpFIppkyZUuJFEhEREZV3GoeyoKAgLFmyBO3bt1dtc3FxgY2NDRYuXMhQRkRERPQeNB5rTE9PR61atQptr1WrFtLT00ukKCIiIiKx0TiUubi4YMeOHYW279ixQ3WqDCIiIiLSjMbDlzNmzMDYsWNx7tw5eHp6AgCuXbuG+Ph4bNy4saTrIyIiIhIFjXvKmjZtiiNHjqBjx47IyMhARkYGOnbsiCNHjqBx48alUSMRERFRuadxTxkA2NjYcEI/iZJMLoekHJ72pby+LiKij0mxQllERAScnJygq6uLiIiIt+7LeWVUnkl0dbFocxAexScJXUqJcaxmiXmjewpdBhGR6BUrlPXo0QNnz56FhYUFevToAR0dHSgUikL76ejo4M6dOyVeJJE2eRSfhLuxvJIFERGVrGKFsuPHj8Pc3Fz1fyIiIiIqWcUKZba2tkX+n4iIiIhKhsYze4OCghAaGqr6+YcffkDjxo0xYMAAPHnypCRrIyIiIhINjUPZb7/9hgoVKgAArl69ih07dmDGjBmoXLky/Pz8SrxAIiIiIjHQ+JQYT58+haOjIwDg2LFj8PX1Rf/+/eHl5YWhQ4eWeIFEREREYqBxT5mxsTHS0tIAAGfPnkXz5s0BABUqVEBeXl6JFkdEREQkFhr3lDVv3hxz585FvXr18PDhQ7Rp0wYAcO/ePS4CICIiInpPGveUzZ8/H56enkhJScGaNWtQpUoVAMCtW7fQpUuXEi+QiIiISAw07imrWLEi5s2bV2j7V199VSIFEREREYkRL7NEREREpAVK5DJLyp95mSUiIiKi98PLLBERERFpAV5miYiIiEgLaLz6cv369di7d2+h7Xv37sWGDRtKpCgiIiIisdE4lO3evRu1atUqtL1u3boICAgokaKIiIiIxEbjUJaYmAgrK6tC283NzZGYmFgiRRERERGJjcahrFq1avjvv/8Kbb9y5Qqsra1LpCgiIiIisdH45LF9+/bF0qVLUVBQgGbNmgEAwsLC8OOPP2LUqFElXiARERGRGGgcyr744gukpaVh4cKFyM/PB/DiYuRffPEFxo4dW+IFEhEREYmBxqFMR0cHM2bMwP/+9z9ERUXB0NAQNWrUgIGBQWnUR0RERCQKGs8pU0pKSkJ6ejocHBxgYGCgdoZ/IiIiItKMxqEsNTUVw4cPh6+vL8aMGaNacfl///d/WLZsmcYF7NixA+3atYO7uzv69u2L8PDwt+7//PlzLFy4EC1btoSbmxt8fX1x8uRJjZ+XiIiISJtoHMr8/Pygp6eH0NBQGBoaqrZ37twZp0+f1uixQkJC4OfnhwkTJiAoKAguLi4YPXo0kpOTi9xfKpVi5MiRePLkCVavXo0jR45g8eLFsLGx0fRlEBEREWkVjeeUnT17Fps3b0bVqlXVtteoUQNxcXEaPZa/vz/69euH3r17AwAWLlyI0NBQBAYGYsyYMYX2DwwMRHp6OgICAqCvrw8AsLOz0/QlEBEREWkdjUNZdna2Wg+ZUlpamkaT/aVSKW7duqW2YlNXVxfNmzfH1atXi7zPv//+C09PTyxatEh1kfSuXbviyy+/hEQi0eh1yGSyIrdr+jgfkze95rdhe6hje9CblOf3BqD5+6M8twePHerYHuqKao/itpHGoaxx48YIDg7G119/rdoml8uxadMmeHt7F/txUlNTIZPJYGFhobbdwsICDx48KPI+sbGxOH/+PLp164YNGzYgJiYGCxcuREFBASZOnKjR67hx40ahbUZGRnB1ddXocT4mkZGRyMnJKfb+bA91bA96k/L+3gA0e3+U9/bgsUMd20PdhxxLNQ5lM2bMwIgRI3Dz5k3k5+fjxx9/xP3795Geno5du3a9VxHFpVAoYGFhgcWLF0MikcDNzQ0JCQnYvHmzxqHM3d29XCf1ojg7OwtdglZhe6hje9Db8P3xEttCHdtDXVHtIZPJiuwMep3GoczJyQl///03/vjjD5iYmCA7OxsdO3bE4MGDNbrMUpUqVSCRSApN6k9OToalpWWR97GysoKenp5amKpVqxYSExMhlUo1Gj6VSCSiC2Vie73vwvZQx/agt+H74yW2hTq2h7oPaQ+NQll+fj6++OILLFy4EOPHj3/vJwUAAwMD1K9fH2FhYejQoQOAF8OgYWFhGDJkSJH38fLywl9//QW5XA5d3RcLRx8+fAgrKyuevJaIiIg+ahqdEkNfXx+RkZEl9uQjR47Enj17EBQUhKioKCxYsAA5OTno1asXAGDmzJlYsWKFav+BAwciLS0N3333HaKjoxEaGor169dj8ODBJVYTERERkRA0Hr7s3r079u7di+nTp3/wk3fu3BkpKSlYs2YNEhMTUa9ePWzatEk1fBkfH6/qEQOAatWqYfPmzfDz80P37t1hY2ODYcOG4csvv/zgWoiIiIiEpHEok8lk2LVrF86dOwc3NzcYGRmp3T5nzhyNHm/IkCFvHK7cvn17oW0NGzbEnj17NHoOIiIiIm2ncSi7e/euailrdHS02m06OjolUxURERGRyGgcyorqvSIiIiKiD6NRKAsJCcHx48eRn58PHx8fDBw4sLTqIiIiIhKVYoeynTt3YtGiRXB0dIShoSGOHj2KmJgYzJo1qzTrIyIiIhKFYp8SY8eOHZg4cSL+/vtv7N+/H8uWLSv1M/gTERERiUWxQ1lsbCx69Oih+rlbt24oKCjAs2fPSqMuIiIiIlEpdiiTSqUwNjZ+eUddXejr6yMvL69UCiMiIiISE40m+q9atUrtvGT5+fn49ddfYWZmptqm6XnKiIiIiEiDUNakSZNC5yVr2LAhYmNjVT/zPGVERERE76fYoYznJyMiIiIqPRpdkJyIiIiISgdDGREREZEWYCgjIiIi0gIMZURERERaQONQFhcXB4VCUWi7QqFAXFxciRRFREREJDYah7L27dsjJSWl0Pa0tDS0b9++RIoiIiIiEhuNQ5lCoSjyfGTZ2dmoUKFCiRRFREREJDbFPk+Zn58fgBcniH39zP4ymQzh4eFwcXEp+QqJiIiIRKDYoez27dsAXvSU3b17F/r6+qrbDAwM4OLiglGjRpV8hUREREQioPEZ/efMmYNvvvkGpqampVYUERERkdhoPKdsxowZbwxkkZGRH1wQERERkRhpHMq6deuG0NDQQts3b96Mvn37lkRNRERERKKjcSgbMWIEJk2ahPnz5yM3NxcJCQkYPnw4Nm3ahBUrVpRGjURERETlXrHnlCl9+eWXaNGiBWbOnInu3bsjPT0dHh4eOHDgAKysrEqjRiIiIqJy770us+Tg4IC6deviyZMnyMzMROfOnRnIiIiIiD6AxqHsypUr6N69Ox49eoQDBw5gwYIFWLx4Mb7++mukp6eXRo1ERERE5Z7GoWz48OHo3Lkzdu/ejdq1a6Nv374IDg5GfHw8unXrVho1EhEREZV7Gs8p27JlC5o2baq2zcHBAbt27cKvv/5aYoURERERiYnGPWXKQPbo0SOcPn0aubm5AF5cfmnChAklWx0RERGRSGjcU5aamoqvv/4aFy5cgI6ODv755x/Y29vj//7v/1C5cmXMmjWrNOokIiIiKtc07inz8/ODnp4eQkNDYWhoqNreuXNnnDp1qkSLIyIiIhILjXvKzp49i82bN6Nq1apq22vUqIG4uLgSK4yIiIhITDTuKcvOzlbrIVNKS0uDgYFBiRRFREREJDYah7LGjRsjODhYbZtcLsemTZvg7e1dUnURERERiYrGw5czZszAiBEjcPPmTeTn5+PHH3/E/fv3kZ6ejl27dpVGjURERETlnsahzMnJCX///Tf++OMPmJiYIDs7Gx07dsTgwYNhbW1dGjUSERERlXsah7K4uDhUq1YN48ePL/K26tWrl0hhRERERGKi8Zyy9u3bIyUlpdD21NRUtG/fvkSKIiIiIhIbjUOZQqGAjo5Ooe3Z2dmoUKFCiRRFREREJDbFHr708/MD8OJySqtWrYKRkZHqNplMhvDwcLi4uJR8hUREREQiUOxQdvv2bQAvesru3r0LfX191W0GBgZwcXHBqFGjSr5CIiIiIhEodijbvn07AGDOnDn45ptvYGpqWmpFEREREYmNxqsvlcOYRERERFRyNA5l2dnZ2LBhA86fP4/k5GTI5XK1248fP15ixRERERGJhcahbO7cubh48SI+//xzWFlZFbkSk4jKP5lcDomuxgu4Pwrl+bURkfbSOJSdOnUK69evR6NGjUqsiB07dmDz5s1ITEyEi4sLvv32W3h4eLzzfocOHcLUqVPRvn17/PLLLyVWDxG9m0RXF3N370P0syShSylRNa0tsaR/L6HLICIR0jiUVaxYEZUrVy6xAkJCQuDn54eFCxeiQYMG2Lp1K0aPHo0jR47AwsLijfd7/Pgxvv/+ezRu3LjEaiEizUQ/S0Jk3FOhyyAiKhc07p+fPHkyVq9ejZycnBIpwN/fH/369UPv3r1Rp04dLFy4EIaGhggMDHzjfWQyGaZPn45JkybB3t6+ROogIiIiEpLGPWX+/v6IiYlB8+bNYWdnBz099YcICgoq9mNJpVLcunULY8eOVW3T1dVF8+bNcfXq1Tfeb926dbCwsEDfvn1x5coVTV8CgBfBrigSieS9Hu9j8KbX/DZsD3Vsj5fKc1sAbI/XsT1e4rFDHdtDXVHtUdw20jiUdejQQdO7vFFqaipkMlmhYUoLCws8ePCgyPtcvnwZe/fuRXBw8Ac9940bNwptMzIygqur6wc9rjaLjIzUqIeT7aGO7fFSeW8LgO3xOrbHSzx2qGN7qNO0PV6lcSibOHHiez1RScjMzMTMmTOxePFimJubf9Bjubu7l+ukXhRnZ2ehS9AqbA91bA91bA91bI+X2Bbq2B7qimoPmUxWZGfQ6zQOZUo3b95EVFQUAKBu3brvlXqrVKkCiUSC5ORkte3JycmwtLQstH9sbCyePHmC8ePHq7Ypz5Pm6uqKI0eOwMHBoVjPLZFIRBfKxPZ634XtoY7toY7toY7t8RLbQh3bQ92HtIfGoSw5ORlTpkzBxYsXUbFiRQDA8+fP4e3tjZ9++kmjHiwDAwPUr18fYWFhqmFRuVyOsLAwDBkypND+tWrVwsGDB9W2rVq1CllZWfjmm29QtWpVTV8OERERkVbQOJQtXrwYWVlZOHToEGrXrg0AuH//PmbNmoUlS5Zg5cqVGj3eyJEjMWvWLLi5ucHDwwNbt25FTk4OevV6cZ6gmTNnwsbGBtOmTUOFChXg5OSkdn9lMHx9OxEREdHHRONQdvr0afj7+6sCGQDUqVMH8+fPx6hRozQuoHPnzkhJScGaNWuQmJiIevXqYdOmTarhy/j4eOjyzNpERERUzmkcyuRyOfT19Qs/kJ5eoetgFteQIUOKHK4EgO3bt7/1vsuWLXuv5yQiIiLSJhp3QTVr1gzfffcdEhISVNsSEhLg5+cHHx+fEi2OiIiISCw07imbN28exo8fj/bt26sm1j99+hR169bFjz/+WOIFEhEREYmBxqGsWrVqCAoKwrlz51QneK1duzaaN29e4sURERERicV7nadMR0cHLVq0QIsWLUq6HiIiIiJRKvacsrCwMHTu3BmZmZmFbsvIyECXLl1w+fLlEi2OiIiISCyKHcq2bt2Kfv36wdTUtNBtZmZm6N+/P/z9/Uu0OCIiIiKxKHYoi4yMRKtWrd54e4sWLXDr1q0SKYqIiIhIbIodypKSkqCn9+YpaHp6ekhJSSmRooiIiIjEptihzMbGBvfu3Xvj7ZGRkbCysiqRooiIiIjEptihrE2bNli9ejXy8vIK3Zabm4uff/4Zn3zySYkWR0RERCQWxT4lxvjx4/HPP//A19cXgwcPRs2aNQEADx48wM6dOyGTyTBu3LhSK5SIiIioPCt2KLO0tERAQAAWLFiAlStXQqFQAHhxzrKWLVti3rx5qouIExEREZFmNDp5rK2tLTZu3Ij09HQ8evQIAODo6IhKlSqVSnFEREREYvFeZ/SvVKkSPDw8SroWIiIiItEq9kR/IiIiIio9DGVEREREWoChjIiIiEgLMJQRERERaQGGMiIiIiItwFBGREREpAUYyoiIiIi0AEMZERERkRZgKCMiIiLSAgxlRERERFqAoYyIiIhICzCUEREREWkBhjIiIiIiLcBQRkRERKQFGMqIiIiItABDGREREZEWYCgjIiIi0gIMZURERERagKGMiIiISAswlBERERFpAYYyIiIiIi3AUEZERESkBRjKiIiIiLQAQxkRERGRFmAoIyIiItICDGVEREREWoChjIiIiEgLMJQRERERaQGGMiIiIiItwFBGREREpAUYyoiIiIi0gFaEsh07dqBdu3Zwd3dH3759ER4e/sZ99+zZg0GDBqFJkyZo0qQJRowY8db9iYiIiD4GgoeykJAQ+Pn5YcKECQgKCoKLiwtGjx6N5OTkIve/cOECunTpgm3btiEgIADVqlXDqFGjkJCQUMaVExEREZUcwUOZv78/+vXrh969e6NOnTpYuHAhDA0NERgYWOT+K1aswODBg1GvXj3Url0bS5YsgVwuR1hYWBlXTkRERFRy9IR8cqlUilu3bmHs2LGqbbq6umjevDmuXr1arMfIyclBQUEBKlWqpNFzy2SyIrdLJBKNHudj8qbX/DZsD3Vsj5fKc1sAbI/XsT1e4rFDHdtDXVHtUdw2EjSUpaamQiaTwcLCQm27hYUFHjx4UKzHWL58OaytrdG8eXONnvvGjRuFthkZGcHV1VWjx/mYREZGIicnp9j7sz3UsT1eKu9tAbA9Xsf2eInHDnVsD3WatserBA1lH2rDhg0ICQnBtm3bUKFCBY3u6+7uXq6TelGcnZ2FLkGrsD3UsT3UsT3UsT1eYluoY3uoK6o9ZDJZkZ1BrxM0lFWpUgUSiaTQpP7k5GRYWlq+9b6bN2/Ghg0b4O/vDxcXF42fWyKRiC6Uie31vgvbQx3bQx3bQx3b4yW2hTq2h7oPaQ9BJ/obGBigfv36apP0lZP2GzZs+Mb7bdy4Eb/88gs2bdoEd3f3siiViIiIqFQJPnw5cuRIzJo1C25ubvDw8MDWrVuRk5ODXr16AQBmzpwJGxsbTJs2DcCLIcs1a9ZgxYoVsLW1RWJiIgDA2NgYJiYmgr0OIiIiog8heCjr3LkzUlJSsGbNGiQmJqJevXrYtGmTavgyPj4eurovO/QCAgKQn5+Pr776Su1xJk6ciEmTJpVp7UREREQlRfBQBgBDhgzBkCFDirxt+/btaj//+++/ZVESERERUZkS/OSxRERERMRQRkRERKQVGMqIiIiItABDGREREZEWYCgjIiIi0gIMZURERERagKGMiIiISAswlBERERFpAYYyIiIiIi3AUEZERESkBRjKiIiIiLQAQxkRERGRFmAoIyIiItICDGVEREREWoChjIiIiEgLMJQRERERaQGGMiIiIiItwFBGREREpAUYyoiIiIi0AEMZERERkRZgKCMiIiLSAgxlRERERFqAoYyIiIhICzCUEREREWkBhjIiIiIiLcBQRkRERKQFGMqIiIiItABDGREREZEWYCgjIiIi0gIMZURERERagKGMiIiISAswlBERERFpAYYyIiIiIi3AUEZERESkBRjKiIiIiLQAQxkRERGRFmAoIyIiItICDGVEREREWoChjIiIiEgLMJQRERERaQGGMiIiIiItwFBGREREpAUYyoiIiIi0AEMZERERkRZgKCMiIiLSAloRynbs2IF27drB3d0dffv2RXh4+Fv3P3z4MDp16gR3d3d069YNJ0+eLKNKiYiIiEqH4KEsJCQEfn5+mDBhAoKCguDi4oLRo0cjOTm5yP3/++8/TJs2DX369EFwcDDat2+PCRMm4O7du2VcOREREVHJETyU+fv7o1+/fujduzfq1KmDhQsXwtDQEIGBgUXuv23bNrRq1QpffPEFateuja+//hqurq74448/yrhyIiIiopKjJ+STS6VS3Lp1C2PHjlVt09XVRfPmzXH16tUi73Pt2jWMGDFCbVvLli1x7NixYj2nQqFQPbdEIil0u0QiQY261aBvUPi2j5WtozVkMhlkMpnG95VIJKjpaA19PcHze4mxs7X8oPaobWtVrtrDwcbivdpDIpGgro0VDCTlpy0AwNHy/dvDycIKBrrlqz1qVHn/9qhZ0Qb6OuXnWGpn9mHHjqpGtpCg/LSHlVHVD2qPioY1oaPQL4XKhGFmaPfG9lBuU2aQNxE0lKWmpkImk8HCwkJtu4WFBR48eFDkfZKSkmBpaVlo/6SkpGI9p1wuBwDcvn37jfu06VYPQL1iPd7H4tq1a+9937YtHAA4lFgt2uBD2sPXwx6AfYnVog3etz261XYAapev9wbw/u3Rw6oGYFWjJEvRCu/bHu0MagMWtUu2GIF9yLGjNtxRW8e95IoRWu6HtYc+OsK85KoRXsa720OZQd5E0FAmBD09Pbi7u0NXVxc6OjpCl0NERETlnEKhgFwuh57e22OXoKGsSpUqkEgkhSb1JycnF+oNU7K0tCzUK/a2/V+nq6sLAwOD9yuYiIiIqJQIOvnBwMAA9evXR1hYmGqbXC5HWFgYGjZsWOR9PD09cf78ebVt586dg6enZ2mWSkRERFSqBJ+ROnLkSOzZswdBQUGIiorCggULkJOTg169egEAZs6ciRUrVqj2HzZsGE6fPo0tW7YgKioKP//8M27evIkhQ4YI9RKIiIiIPpjgc8o6d+6MlJQUrFmzBomJiahXrx42bdqkGo6Mj4+H7iurmby8vLB8+XKsWrUKK1euRI0aNbBu3To4OTkJ9RKIiIiIPpiO4l3rM4mIiIio1Ak+fElEREREDGVEREREWoGhjIiIiEgLMJQRERERaQGGMiIiIiItwFBWhgoKCoQugYiIqFz7mE8qwVBWBpRvkJCQEKSlpQlbjJaQyWTIzc0VugytERsbi/379wtdhuCUn5WUlBRERkYKXI3wlO2RlZUlcCXaJTs7W+gSSMsoL/Sdl5cHHR2djzaYMZSVAR0dHeTn5+P27duoXLmy0OUISvlBuX79Oo4fPy5wNdrj8ePHMDY2FroMweno6AAAwsPDceLECQAvD7ZipGyPkSNHIiUlReBqtMeePXtE/6VOeSw9ffo0Nm7cKHA1wlN+VqZNm4Zz586pfv7Y8OSxZUChUHy0b5DScubMGZiZmaFBgwaibx+5XA5dXV3Rt8OrpFIp5HI5DA0NhS5FKyQlJamuciJ2GRkZ2LVrF8aMGSN0KVohKioKz549g4+PD2QyGSQSidAlCSolJQWVKlX6aNuBoayUKf/gPnjwAPb29tDX1xe6JNIyBQUFSE5Oho2NjdClaA3l54ag+kPLNuEXXHq78vAZ+bir/wgo3yA7duxAUlKSwNUI69X5QhEREQJXoz2uX7+OX375RegyBKd8f0RGRmLXrl0CVyM8ZXscOnSo0DWAxUgZyM6fP4+cnByhy9EKWVlZCAsLE7oMwSmnOFy8eBF3794VuJoPI+5PeRmRy+Xo27cvqlWrJnQpgnp1vlBoaCgAcc8XUnJ1dcXkyZOFLkNwyveHTCZTDdWJ+f2hbI+YmBhYWFgIXI3wlO1x7Ngx0QdUpVu3buHBgwcAPu4Vhx9K+X7477//Pvr3BocvqcyJeb4Qh1+ouPheKaygoADx8fGwt7cXuhStkJWVhdzcXFhYWPD9Uk583JFSSylzrkwmAwDs2rUL8fHxQpZU5t6W9fX09EQZyAAUOmimpqZi7969AlWjfaRSKU6ePCl0GYKTy+XQ0dFBYmIiAHH3grxKT0+PgewVJiYmql5UsQYy5WcjOTkZmZmZAlfz4RjKSoHyw6Fc/RETE4MKFSoIWVKZUX5AXh2KksvlqmEoMc8XCgsLQ2xsrNq28nIgKSkPHjzA/fv3AYg7iCiHYJYvX46IiAjR/sFVfrFVvheWLl2KZ8+eCVmS4JTH0gcPHmDBggXCFiOQoo4NISEhOHv2rADVlCyGshLw6hvk0aNHCA0NxdOnT1XbZs2aBXNzcyFKK3M6Ojr4+++/8ejRIwAvgqmurq7qj4xUKhXdfKGMjAwAwO+//67qIVT+salSpQpGjBghVGmCUygUSEtLU03crlWrFnr06CFsUVpk0qRJcHFxEboMwSi/2Oro6EAul8PW1hbW1tYCV1V2Ll26hNzcXGRmZqqOl8pjqYmJCTp37gxAPMdSJR0dHdy9exe5ubmqLyytWrVCy5YtBa7sw+kJXUB5UFBQAH19faxatQp3796FQqFAZmYmKlasiKpVq6J///5wcnISuswyIZPJsGfPHsTExMDY2Bje3t5o1KgRvL29UblyZbi7u8Pd3R0APvoJmcWRm5uLEydO4OLFi7h37x5iY2Oho6OjCqaLFy/G4sWLYWZmJnClZaugoAA//PADwsLC4Obmhlq1aqFOnTqws7ND3bp1AYh3OOZVdnZ2QpdQppSnNEhJScGff/6J69evY9y4cfDw8ICuri6GDx9eLk57UFyhoaFo0qQJ1qxZAxMTEzg6OsLOzg7VqlWDjY2N6jQ6YmgP5e89KioKx48fx8mTJ1G7dm0sWrQI8fHxqF69OgwMDIQu84MxlJUA5bnHjh8/jlmzZsHW1hbJycmIjo7GlStXRHXNS4lEgs2bNyMiIgKbN2/GyZMnER4eju+//x62trbw8vLClClThC6zzMjlclhbWyM+Ph4ymQzBwcEoKCiAjY0NcnJy8PDhQ1EFMuWBNTQ0FHfu3MG3336LGzdu4Pbt2zhx4gSqVq2KlStXCl2mVhDzxO05c+agffv2uHLlClJTUwG86Gn+5JNP4OjoKHB1ZWfUqFHIz89HTk4O4uPjcePGDRgZGcHCwgK2trYYMGCAaN4jyhGp3bt3w9LSEg4ODqpe01OnTsHExARdu3YVssQSwVD2gRISEvDo0SNYWFjA1dVV1X1as2ZNeHh4oF27dqJazq480eXx48dha2uLKVOmwNzcHP/88w9+//13tUUQH+sZlzVhbGyMZs2aoVKlSkhJSYGenh7u3LmDhIQE6Onp4X//+5/QJZYp5Tf6hIQEdOrUCU2bNkXTpk0BAGlpaaphfzH1hkilUhw7dgydOnVSe81i+WP7Kl1dXeTl5SE5ORn9+vXDvn370KBBAwDAgQMH0K5dO4ErLFvKvx2zZs1Cfn4+oqKiEBERgRs3buDZs2eqazyK4b2i/GyEh4cjICAA06ZNQ6NGjQAAJ0+ehK+vr5DllRiGsg908+ZNbN68GZaWloiOjsbSpUvRs2dP1K1bFwYGBqIKZMDLPyQXL17EtGnTUL16dQBA9+7dcfv2bXh7ewMQR3f7q+rVq6f6v7e3N7KysqCvr18uutvfR+vWrfH777/j7NmzqF27NszNzVG5cmVUrlwZCoVCFO+P3NxcGBoa4saNGwgNDUXnzp1Vf2CzsrKQlpYGW1tbocssc7du3YKLiwtSU1NhaGiIypUrIy4uDgDg4OAgcHVlT/lFVl9fHy4uLnBxcUGPHj0glUoFrqxsKT8X9vb2uHLlCqKiouDj4wMAePLkSbmYTwYwlH0wd3d3TJ06FfHx8bh37x7u3r2LNWvWQF9fH6amphg+fDicnZ2FLrPMKK/h6OXlhbVr12L8+PFwdHSEubk5zpw5gz59+ghdomBenahrYmIicDXCSUtLw+LFi3Hz5k08e/YMtra2cHBwQNWqVdGmTRtR9KACL65ZuH//fpw7dw5OTk5ITEyEmZkZDA0NcfHiRVy9ehVTp04Vuswy5+TkBCMjIwwePBju7u6QSqU4cOCAal6uWHrZlV79kqJQKFQ/K7/QiaGXDHjx2k1MTNCzZ0+sX78eZmZmuHDhAvbv348aNWqUmw4QhrIPZG1tDWtra5w8eRJjxoxBcnIy4uLi8OTJE0RGRoryj6+Ojg5GjBiBn3/+GUFBQSgoKMCtW7fg5eWFOnXqqPYRGzH0/ryNckjy+PHjMDQ0xNGjRxEWFobbt2/j3LlzsLCwENXwlKWlJby9vbFnzx5Ur14dCxYsgEQigb29Pc6ePYvRo0cLXaIgTE1NMXDgQOTm5uL58+fo1KkT2rVrh3HjxgEQ37FDedyQy+WiGaosivJ1u7m5oU2bNjh06BC2bNmCZs2a4dNPPxW4upLDM/p/AKlUCj09PYSFhcHf3x+bNm1S3ZaXl4c7d+7A09NTuAIFlpCQgLCwMMhkMtStWxe1a9eGiYmJKA8seXl5SEpKgo2NDfT0xPldSBnKfvvtN1StWrXQqS+Sk5NhYWEhqvlkcrkc4eHhsLCwQExMDGJjY1XnNRw9ejRMTU2FLrHMFRQUQE9PDxEREUhOToaLiwtMTU1Fc65H4GVv4IkTJ6BQKODj4wMjIyO1fcR4HH3d8+fPUbFiRaHLKFHi/OtQQh48eICzZ8/i9OnTsLa2RlpaGvLy8mBjY4Pz58/j+vXrog5lNjY2RZ5zSmwHkoCAABw+fBgmJiZYvny56o+O2MKZMmglJSXhn3/+QUFBAVxdXVG1alWYm5urLhUjhkCm/KN7+fJlJCcnw9PTU9RnqlcG8djYWOzcuROHDh2Cj48P3Nzc0KBBA1EFMuDl+dnOnTuHgIAA1YhM69at0apVK7i5uYnuOKqkPBm5np5euQtkAE8e+0HMzc1hYGCA6Oho3L17F3PnzsWmTZsQHByM3377rVy+YTShnP8gRsrXffHiRYSGhqJ37954+vQpjI2Nce/ePaxYsULgCoWRlZUFY2NjODs74/Tp09i+fTu2bNmCnTt3ivKbf2BgoOqKDuvWrUODBg2wcuVK5OXlCVxZ2VJ+XpYtWwYDAwOsXr0aTZo0QXBwMObPn4/8/HyBKyx7ubm5yMvLw7Rp0/D999+jS5cu2L17N2bOnImZM2eqTkotNrq6uuX6C235fWVlwNraGkOHDkW1atXg5eWFq1ev4vLlyzh27Bh8fX3Rq1cvoUssM8o/qElJSahYsSIMDAwgl8tFNSH3VcrXfv78eXTo0AGVKlWCq6srgBc9rDExMQJXKAwTExNMnToVBQUFuH//vmp5f2JioqiW9ys/F5GRkfj+++9x7tw5PHr0CBs3bsSWLVsQHx+PGjVqCFtkGXr1ODFo0CDY2NigYcOG6NOnDwYMGIDIyEi4ubkJWGHZefVcfs+ePcOiRYtQUFAALy8vGBsb4+HDh0hLS8OePXtEN+9QDNNAyuerKiPKD0+HDh0AAO3bt0f79u0BiG+FkNK+ffsglUoxceJEUb5+JeVrz8nJQb169fDHH3+oVp5euXIFLVq0ELI8wSh7RPT09ES9vB94MedSX18fe/fuxZEjRzBixAg0bdoUCxcuVJ1KRgyUQTwuLg4KhQLh4eHo2LEjgBd/hDMzM0UVUJVfSrKzs2FgYICMjAyYmpqqtuvo6MDX1xd//vmnkGWWObFMAylfr6aM6erqqoKZcqhOoVBAIpGILpAoDxhNmzbFsmXLcPnyZUyePBkNGzYUuDJhDRo0CNOmTcPNmzfh4+ODR48e4d69exg7dqzQpQmCy/tfkMvlsLGxwZAhQ3D8+HHUrVsXLVu2xJkzZ2BhYSGq89cpf+cPHz5EWFgYTp06BXd3d7i5uSEvLw8DBgwQ1YIHZXu0bdsWJ06cwHfffYdGjRohNjYWly9fxrRp0xAWFqY6cWp5pgzsr04D+f3332FsbIwbN24gJCQEs2bNErrMEsXVlyVILEMv7yKVSvH7778jOTkZvXr1Up2nTSztowzq169fR2pqKhwcHLB3715cvHgRlSpVwqxZs0RzLdQ3eXV5vxgm9r9K+bqV3/SV8vPzsXv3bhgbG4tq6sOrFAoFIiIicObMGZw6dQrR0dHQ1dVFnTp18NVXX4lu4VRqaioCAgJUvYjNmjVDx44dMWnSJHz77bflfnGIcsRpzZo1qF69OqysrHD06FEsWbIE+/fvxz///IN169YJXWaJYij7ANnZ2Xj27JmoutbfJSsrCyYmJnj8+DEmT56MBw8e4NtvvxXVHxllKPv+++9RrVo1DBs2DDk5OWpL2sUSUAEu738TX19f2NraolmzZmjRogXq168PQLxTH7Kzs5GQkABdXV1YWVnB2NgYmZmZuHXrFg4dOoTevXurLrkkBo8fP8bz589Rt25d1fWVlRISElQXIxeD77//Hl27dlVNA2nUqBHmzZsHFxcXDBo0SOjyShSHL9+D8o/u4cOHERYWhuXLlyMiIgJ//fUXmjZtitatWwtdYplStseOHTsQFRWFs2fPws3NDZ6enrCyssLWrVvx6NEjjB8/HoaGhkKXW+qUPT9ZWVmoXLkyAIg2kAFc3l8UuVyOH3/8EeHh4bh06RICAwMBAI0aNcLSpUsFrq7sKI8dDx8+xJYtW3D48GHV2dl79OiBTp06wdvbW3V5tvJOGch/+eUXREREICsrC3l5ebC3t4e9vT169OiB6tWrw8bGRlTHETFNA2Eoew/KzsVTp06hd+/eSE9Px44dO3D37l2Eh4ejSpUqcHd3F7jKsqMMIQ8fPkTDhg0xePBgpKamws3NDYaGhkhPT8eIESPQuXNn0VxyKjMzEzdv3kRgYCAOHTqEVq1awcfHB7Vr1xbNgfRVry7vd3NzQ0REBDZt2oSDBw/Czc0N3377LczMzIQus0wo59F5eHjA3d0dAwcOxNWrV7Ft2zbVFS/EQnks3blzJ3R1dXHp0iU8ffoUwcHB+OOPP1C/fv1yP0T3KuUXmIMHD2L69OmwtbVFUlIS7t+/j8uXL6Ndu3aoXr26KE6w/Po0kGXLlmHv3r3Ytm0bKlWqhIULF5bLa8MylL0H5Qfn8ePHqFmzJgIDA2Fra4vFixdj/PjxyM7OFrhCYXzzzTfIz88v1NVeqVIlVK9eHTVr1hSosrKnr6+PFStW4PHjx7h27RouXLiAP/74A/Xq1cNPP/0kdHllhsv71Sl7NxITE2FlZQUdHR1IJBI0btwYx44dE10oUwYLIyMj+Pr6AgCqVq2KcePGITw8HOfPn4e9vb0oQojyvREbGwtPT0/VSn4A8PLywmeffaYasizvbfGqI0eOoFq1amjbti0mTZpU7kcdGMrek1QqRdu2bTFjxgykpqZi165dyM7ORlxcHBo3bix0eWVG2d1+9epVXLhwAXFxcTA1NYWzs7PqX0FBAebOnSuaFWUKhQIVKlSAvb09atasiVatWkEmk+Hhw4eiOwkml/er09HRQV5eHkaOHIlKlSqhfv36aNasGerXr48TJ06gf//+QpdYpnR0dJCTk4MTJ06ormwgkUiQnZ0NmUwmmmFL4GXAiIyMxJUrV7BhwwZ07twZdnZ2MDY2hrGxsdAllimxTgPhRH8NKSeyK129ehUVK1ZE7dq1cezYMQQFBZW71SBvo/wGqzyJrrOzMzIyMpCUlIS4uDiMGTMGzZo1E7rMMnf58mVcvHgRqampsLW1RZMmTVCjRg1RXqAeAFJSUjB//nyYmJgUuby/UqVKGDp0qNBllgmpVIqIiAhERUXh2rVruHXrFp4+fYo+ffrg66+/Frq8MpeYmIjFixfjxo0bSE5Ohr29PYyMjODp6Ym5c+cKXV6ZCw0NxaFDh/Ds2TMYGhrCzMwMVlZWGDRokKiGcoEX00CGDRuGyMhItGzZUm0aSHnFUKahMWPGYOnSpfjrr79gb2+PVq1aqXqAkpOTkZmZCUdHR4GrLFs5OTkYM2YMtm/fDuDFH+CEhAQ8fPgQrVq1gqmpabn9VvMq5Wu8e/cuFi5cCBcXF5w5cwYeHh4IDw9HjRo18Ntvv5X7dngTsS/vf9XrnwepVKrqYRWziIgInD59GleuXMH9+/eRkJCAMWPGYNKkSUKXVmaUow8pKSmIiIjAvXv3cOfOHUyaNKlczqF6m7y8PMTFxammgdy9exf37t0r19NAGMo0lJaWhsqVK2PevHk4f/480tLSYGtri7Zt26Jt27aiWrKt/MNy584drFq1CtOnT0fdunWFLkswyoPphg0bYGRkhDp16uDPP//EjBkzsGTJErRu3Vp0w1NKXN7/0qlTp3Ds2DGkpqbC09NTNXwpZrdu3YKNjQ0sLS1V27Kzs3Ht2jWYmprCw8NDwOrKjlwuR1RUFIKDg1GjRg20b98e5ubmovuMAEWfz+/VaSAuLi4CV1g6GMo+gEKhQExMDC5evIh///0XDx8+xOHDh4Uuq8woPzQhISH49ddfYWxsjGbNmsHOzg6Ojo5wc3MT1TwIZXuMGTMGEydOxKFDh1CvXj306NEDP/30EywtLUUzRAcUf3k/UH7nh7wuPj4e/fr1w5gxY1BQUIDr168jJiYGOTk52LdvX6Hzt5V3aWlp2LJlCx4+fIj79+/D3NwcTZs2hYeHB9q2bSt0eWVG+VnZtm0bwsLCULlyZdy7dw9PnjzBzJkz0bNnT9F8Rl4lxmkgnOj/AXR0dODo6AhHR0f07dsXBQUFQpdUppQHCHd3d3z77beIjo7Go0ePcO3aNRw9ehSTJ08WVQ+Asj169OgBe3t7WFlZ4d9//4WpqSn+/fdffP/99wJXWLa4vP8l5R/U69evo2PHjhg6dCiys7PRq1cvpKSkIDk5WVSBTPk7P3r0KJKSkuDl5YWcnBw0bdpUFdLatm0rmiCifP//888/mDhxomoe7pkzZ7B79240atQIDg4OQpZYZl6dBvLTTz+pTQPZtWtXuZ8GwlBWgsrbhVGLS9nz0bRpUxQUFCA+Ph73798X7VBm586dAQD9+/dHXFwcDh48CG9vb7i6ugpcWdnh8n51yj8gjo6OiI6OBgBVL3KlSpVEdboY4OX5yc6fP4+RI0fi9OnT8Pb2xhdffIHExES0aNFCtV95/eP7Kh0dHWRlZSErK0ttYVTLli2xcuVKUc01lMvlkEgkCA0NRadOnVCnTh2kpqZi6tSpqmkg5fk9Ic4UQR9M+U33yZMnOHbsGGJjY2FhYYE6deqgTp068Pb2Fs0pMN7EzMwM48ePR35+vmqYTiy4vL9oN2/exK5du7Br1y74+PigVatWaNasmdpcKjFQ9qJmZGTAysoK9+/fR9++fQEA9+/fxyeffAJAPBeol8lk0NPTQ6NGjTBkyBCMHTsWjo6OSE5ORkFBgajmkym/pF2+fFk1DaR169aoVq0a6tSpA6lUKnCFpYtzyjR04sQJREdHY9SoUUKXIijlHIjZs2ejatWqOHToEKpWrYqUlBTo6elh6tSpaNOmjdBlksC4vP9lQL19+zbGjBmDVatWITMzE5cvX8Z///2HhIQEHD16VBQ9hq978uQJzM3NceTIEaxduxb169dHdHQ0goODRXn9z+TkZGzZsgUpKSm4c+cOzMzMMHr0aLRt21Z010QNCQmBj48PAgMDER4eju7du2P16tX4/vvvy/WoA3vKiunRo0ewtrbGiRMnVCuBlKtCLl68CFNT03L9Rnmd8uAQGRmJZcuWITw8HAsWLMCzZ8+watUq2NnZARDP8AMVrVWrVmjbtm2h5f1iDCCJiYkYNWoUGjdujPz8fLRu3Ro5OTnIzMwUZXsAUJ3ioWfPnjA0NMSTJ08wcOBASCQS0Rw7Zs+eDXd3d/j4+KBWrVqYMWMGoqKiYGRkBGNjY9WJU8UUyADxTgNhKCumq1evYuHChcjNzcWTJ09gbGyMJk2awMrKCmvWrMHAgQPL/Zvldffv30elSpWQmZmJ5ORkODg4wMHBARUqVECNGjUAiGf4AWAv6uvkcjkePHigtry/efPmolver5wjc/ToUTx//hxPnz5F1apVAQAmJibleiVZUZRh6+HDhwgKCoK+vj4+/fRTfPbZZ4X2Ke8yMzNhZWWFCxcuIDAwEAYGBnBzc0OLFi3g7u6uCmRiJrZpIBy+1EB6ejqmTZuGGjVq4N9//0VcXBwcHR1RvXp1rF27VnQH16SkJERHR6NmzZpYtmwZPDw8kJ6errrOoxhW1QEve1H9/Pzg4eGBPn36iLoXlcv7i+bn54d///0Xz549Q61atdCqVSu0bt1aVJdlUyooKEDPnj3h7e2N+/fv49q1a5BIJHBwcMCWLVtQpUoVoUssM1KpFFlZWUhMTMT9+/cRHh6Oe/fuITY2Fk2bNsWSJUuELpHKEENZMSkvtJ2Xl6daCZOXl4eLFy/C2toazs7OAlcorNDQUGzYsAFOTk5o37696nqPYuhyDw4OVvWiNm/eHL1791b1og4ZMgQDBw5Ely5dhC6zzCgD15AhQ4pc3j9jxgzRLO8vyqNHj3Dx4kWcOHECDx8+REhIiNAllblbt27hxx9/xO+//67advv2bZw+fRpjx44VrjCBKRQKSKVSPH/+HDExMTAwMIC7u7tojqXEUFYsyj8yeXl5CAkJQXZ2Nry9vVGnTh2hSxNURkYGzpw5AxMTEzRu3BgGBgaiPS0Ie1HVZWVlYciQIQgKClLb3qtXL/z666+iGr58m1fPVi4GW7duhbGxMUxMTJCYmIjhw4cLXZJWEMuoQnGIfRqIeI4GH0A5zDJp0iQ4OTlh+/btMDIygpmZGezt7fHjjz/CwsJC4CrLhvLgcf36dSxduhS2trbIz8/H1q1bYWlpCTc3N1GdtR540YtaqVIlrFu3DhUqVMDcuXPVelHFFsi4vL/4xBTIACA3Nxd79+5Famoq0tLScPnyZfTp0wfNmjUT1bm4XsdAxsV0SuI6IrwHZS/ZzZs3IZfL0b9/f/z3339Ys2YNZs+ejQoVKogmkAEvT/p47NgxNG3aFCNGjEB8fDyePn2qtqpOLN/8FAqFalj79V7UVq1aCV2eICQSCSQSCcaPH48tW7YgJCREtbx/6tSpAMDhGJEaO3Ysxo4di8TERFy7dg3//vsv/Pz8kJqaisqVKyMwMBCmpqZCl1kmlMfIu3fvwsTEBLa2tpDL5QDEGdK4mO4FDl++g/KDo7zItKOjI/bv348VK1bg8OHDiImJEeUciNWrVxe6AHtGRgZ0dXVF1zMEAGPGjBF9LypQeHk/gCKX94uJ2IdjiuPhw4e4efMmunbtKnQpZW7Dhg1ITEzEiBEjVKcIEStOA2FP2Tspv7EYGhrCzc0NcXFxMDMzA/DiEiFWVlZClicIhUKBNm3aYO7cuejevTuaN2+O+vXrq9pFLNiLqo7L+9VxOKb4atSooTqNjth06tQJgYGBGDx4MBo1aoThw4fDzc1NdL1lnAbyAnvKNCCVSpGamorJkyfj/v37cHV1xXfffSeaM5Mrew2VZ992c3ODjo4Onj59itzcXDRq1AjTp08Xuswyw17Uwri8/yWuyqU3eXXxWFZWFipWrIiMjAysXr0ad+/exYABA9C9e3ehyywzXEz3EnvKNGBgYAAbGxsEBATg3r17qFy5sih7yq5fv46BAwdi8ODBiIuLQ2pqKu7fv6/6JiOW+ULsRS3MwMAABgYGqFKlCurWrYv27durLe8HxPP+6NGjBz755BPVcMzy5cvVhmPatm0rdIllRvlHNz09HfHx8XBxcRG6JEEpTyj866+/4u+//4aenh7MzMzg4OCAx48fIzQ0FN27dxfNZ4WL6V5iKHsL5QfiwoUL2LJlC6KiotCiRQu0bdsWHh4esLCwENVJMJUhRCqVqs7LVr16dVSvXh0uLi6qRQBiOIi8atiwYZBKpahevTq2b9+Oxo0bw9XVFV988YXQpQlC2YOoo6ODChUqwMrKSi2giuX9weGYl169qsGVK1fg5+cHqVQKAwMD5OXlQU9PTzTvC+DlZ8DW1hZVq1ZF586d0bRpUxgaGmLRokWqvyliGMLkNBB1HL58C+X8j88++wxjx46FkZERTp06hWvXriEqKgrr168X3UW309LSMGDAACQkJKBTp05o2bIlmjZtKspeoTcRcy8qvcDhGHVHjhzB4cOH8fDhQ/Tt2xdDhgxR3bZmzRpUrlwZw4YNE7BC4cTFxWHLli149uwZOnbsiObNm4sqhHAaiDr2lL2F8hxCjo6O6NGjBwDA19cXAJCQkCC6icvAi6G6lStXIjo6Gjdv3kRQUBBWr14tuvlC7EVVx+X96jgco87b2xsxMTE4d+4c/Pz88PPPP6Nx48b47LPPcObMGUyZMgWAOE6lo3yNN2/exN9//42GDRvC29sbgYGBmD17NgYOHIi5c+cKXWaZ4TQQdQxlb7Bjxw6sX78enTp1gqWlJS5cuABvb2/V7WI9AaahoSFcXV3h6uqKDh06iHa+kLKDecGCBWq9qMuXLxdlL6rywBoaGir65f0cjimsSpUqGDNmDGxtbdGlSxeEhYXhyJEj2Lx5Mz799FP4+PgAEEeAV77GpKQkPHnyBLdu3YKtrS2aNm2KSpUqwdraGoD4rvbAaSAvcPjyDTIzMxEUFITQ0FBcv34dOTk5aNasmeq6jmJZcfk6MXyT1cS4cePw22+/qW1T9qKK8QzlMTExCAwMxP79+0W7vJ/DMYUpv6xlZWUVmksnph7lt1FeX5leEOs0EIayN/jzzz/Rt29f1c+3b9/GkSNHcOrUKURERMDf31/17Y7E5dVe1OzsbHTr1k2tF1VsuLy/aNu2bYO7uzvi4uJw6dIlLFiwAPPnz4eVlRUmTpwodHllRhlSDx06hPPnz+P8+fOoWrUq2rZti5YtW8LZ2Vl0X/akUilOnTqFo0ePwszMDI6OjujVq5eoFn8AnAZSFIayIiQkJGDmzJkwNDREjx490LFjx0LdyGJ6o3C+kDr2oqpTHlhXrVpVaHn/uXPn0LhxY6xcuVI0Q9uvEvu5DZWkUil69uyJ+fPnY968eejUqRMCAgKQlpaGv//+G46OjkKXWCaUn4E9e/bgxIkTqFGjBqpXr47Tp0+jUqVKWLRoEYyMjIQus8xwMV1hDGVvEB0djT///BMnTpyAm5sbunXrhpYtW4oyhCjxciAvsBe1aH/++SdCQkLUlvdXqVIFOjo60NfXF9UXmaKIcThG+YXuzJkzOHjwIGbMmIGJEyciICAAAQEByM/Px9ChQ4Uus8wo22P8+PEYOXIkmjZtCuDFqvZvv/0WgwYNEuWxg9NAXhJvwniHmjVrYubMmdi+fTuqV6+OHTt24LfffsOTJ0+ELk0wnTp1grGxMQYPHoxp06YhPDxc1WMmFgkJCfjrr78wduxYHD58GAUFBXB1dcXUqVMRHByMiIgINGvWTOgyBdG3b1989913iIyMxIoVK3Dx4kVkZGSo5smIIZDJZDIAwIULFzB27Fh06NAB8+fPx4kTJ2Bubg4rKyuI6Xuw8nceGRkJb29v/Pfff3BycgLwYvJ/eHg4AIjmOKL8Ul9QUIDExEQAL0ZdKleujNTUVNWCKTG8R3bs2IHWrVtj6dKlqsV0r7KxsRFdIAPYU1aITCbDtWvXkJCQgJycHLi5uSE3NxeBgYHYs2cPzMzMEBwcLJqeIs4XKoy9qC8VtbxfJpMhMDAQp0+fFt3yfg7HFO3u3bswNDREbm4uZs+eDWdnZzx+/Bg9evRA7969RTe0ferUKWzatAmff/456tevj/PnzyMkJAR79uwRurQyw2kgRWMoe83Zs2cxevRo1akw7ty5g1q1aqFu3bqIiorCrVu3sHfvXqHLLDOcL/RmSUlJ2L59OyIiItCgQQN8/vnnognrrwsNDcWBAweQlpYGW1tb1KxZE5GRkahduzbGjBkjuuX9HI55s5MnT+Lw4cNwcXFB//79RTOH6vnz59DT04OxsTHy8/Oxd+9e7NmzBxkZGWjbti06duwIb29v0Sx64DSQojGUvSYzMxN79+7FnTt30KZNG/j6+kImkyEvL091Qjsx4nwh9qJqQozL+7kqV53ymJCdnY2wsDDk5+ejfv36ou0BWbp0KczMzODq6opatWqhZs2aAF70NstkMtEcRwEupnsbhrIi5Obm4sKFCwgMDERGRgY6dOiAzz77DObm5kKXJiixXw6EvahF4/L+Fzgco04ZzL///ntEREQgJiYGOTk5qFq1KmrXro3Ro0eL5sLkMplM1av++PFjpKenw83NDXXr1kW9evVgbW2NmjVriqKHTInTQIrGUPYKmUwGqVSq1p1+9epVXL9+Henp6fDx8VGtlhEDzhdSx15UdVzer47DMUXz9fXFjh07YGlpiby8PJw9exZBQUEYNmwYmjRpIprhOqVTp05h5cqV6NWrFy5duoSEhAQ4ODhg+fLlQpcmCE4DUcdQ9oorV65g5cqVSE9Ph62tLezt7fHs2TNcvXoViYmJsLa2xqlTp4Qus8xxvtBL7EV9icv7X+JwjLrHjx/jwYMHcHNzw86dOzFkyBBRXiv4VVKpFAYGBvjpp59QpUoVjBgxAsCLUPL06VO4ubmJIqByGsjbMZS9IiIiAhERETA1NUVUVBQAoGrVqqhQoQJiY2NhZ2eHzz77TOAqhSfG+ULsRX2zL7/8Ej169ECXLl1UwWPIkCGYMmUKGjVqJJowwuGYl44dO4aJEyfCwMAAEokE3t7emDp1qup0GGIjlUqhr68PHR0dDBs2DDNmzIC7u7soF0hxGsjbMZTRW3G+0AvsRX0zLu9Xx+EYqK5xmZCQgBMnTuDw4cO4evUqKlSoADs7OyxcuBAeHh5Cl1kmYmNjMX36dLRr1w516tTB6tWrceDAAaHLEgyngbwdQ1kxiaFb+VWcL6SOvajquLz/JQ7HqMvPz8f06dPh6+uLzp07q90WERGBwMBA9O3bF05OTqJ4f2RkZGD37t24fv067t69i7S0NLRt2xb169dHw4YNUbNmTZiamgpdZpniNJA3YyijInG+EL0Nl/e/xOEYdZmZmfD390dQUBB0dXUxbNgwdOjQAdWrVxe6NK1w584dHDt2DP/99x+uXbuGiRMnYvTo0UKXVSY4DeTdxDE7mzSmyeVAxPCH923E8G3/VTKZDNWrV0dERAQuXLgg+uX9DRo0wOzZs3Hnzh14eXlhzpw5oh6OMTU1xaRJkzBp0iQEBATg0qVLSElJQe/evUV3WpCi1KtXD/Xq1QPwYnpIdnY2AHEcR65du/bWaSCBgYGinQaixJ4yeivOF6K34fL+Fzgc85JUKkVGRobqHIZnz57F5s2bcevWLbRr1w5ff/01bGxsBK6ShMBpIO/GUEaFcL4QvQuX97/A4Rh1ycnJmDp1Kuzs7BATEwM9PT3VCtTdu3fj4cOHOHbsGOzs7IQulUgrMZRRIZwvRG/D5f0vcVWuunv37mHKlClwd3fHiBEjoFAo8OjRIxgbG8PGxgaJiYlo0aKF0GWSFhLDl7jiYCgjNbwcCL0Nl/er43BMYffv31fNJWvbti26d++O2rVrq27nFzqiN2MoozfifCF6HZf3U3ElJCTg+PHjyMjIgIeHB1drExUDQxkVwvlCVFxiXt7/LmL7jKSmpuLAgQOwtLSEvb09srOzERUVhWvXruG///6DhYUFtm3bBkNDQ6FLJdJaPCUGqVHOFwJeTFieMWMGgBfDmpaWlrC0tAQAUf2xoTcT8/L+dxHb679z5w7CwsKQlJSExMRE1K9fH5UrV0ZcXBwyMzNRs2ZNGBoacviS6C0Yykjl9flCaWlpcHd3BwDRTeAmzRkYGKjOXye2QEKAt7c3GjRooLoEW3x8PBQKheqksampqQA4p4zobTh8SSqcL0REJY0hjKj4GMrojThfiIiIqOwwlFGxKOcLVa5cmfOFiIiISgFDGREREZEWYHcHERERkRZgKCMiIiLSAgxlRERERFqAoYyIiIhICzCUEREREWkBhjIi0npDhw7Fd999V2j7vn370LhxY62pp7T9/PPP+Pzzz8v8eYmobDCUERG9QX5+vtAlEJGIMJQRUblx4cIF9OnTB56enmjcuDEGDBiAJ0+eqG4/duwYevbsCXd3d7Rv3x5r165FQUGB6nZnZ2fs3LkT48aNg6enJ3777bdiPW+7du3w22+/Yc6cOWjYsCHatm2L3bt3q25//PgxnJ2dcejQIQwYMADu7u7o2rUrLl68qNqnqF6/Y8eOwdnZWXX72rVrERERAWdnZzg7O2Pfvn3v1U5EpJ0YyoioXCgoKMCECRPQpEkTHDhwALt370b//v1V1128fPkyZs2ahWHDhiEkJASLFi3Cvn37CgWvtWvXomPHjjh48CB69+5d7Of39/eHm5sbgoODMWjQICxYsAAPHjxQ2+eHH37AyJEjERwcDE9PT4wbN051oe536dy5M0aNGoW6devizJkzOHPmDDp37lzs+ohI+zGUEVG5kJmZiYyMDHzyySdwcHBA7dq10bNnT1SvXh3Ai7A1ZswY9OzZE/b29mjRogUmT56MgIAAtcfp2rUrevfuDXt7e9V9i6N169YYPHgwHB0d8eWXX6JKlSq4cOGC2j6DBw+Gr68vateujQULFsDMzAx79+4t1uMbGhrC2NgYEokEVlZWsLKygqGhYbHrIyLtpyd0AUREJaFy5cro1asXRo8ejRYtWsDHxwefffYZrK2tAQARERH477//1HrGZDIZ8vLykJOTAyMjIwCAm5vbez2/cpgRAHR0dGBpaYnk5GS1fRo2bKj6v56eHtzc3Ar1phGReDGUEZHWMzExQWZmZqHtz58/h5mZmepnPz8/DB06FKdPn8bhw4exatUq+Pv7w9PTE9nZ2Zg0aRI+/fTTQo9ToUIF1f+NjY3fq0Y9PfXDqY6ODjS5tLCurm6h/bnQgEhcOHxJRFqvZs2auHXrVqHtt2/fRo0aNdS2ubq6YuzYsQgICICTkxP++usv1fbo6Gg4OjoW+qerWzaHwmvXrqn+X1BQgFu3bqFWrVoAgCpVqiArKwvZ2dmqfSIiItTur6+vD7lcXia1ElHZY08ZEWm9QYMGYceOHViyZAn69OkDAwMDnDx5EocOHcKvv/4KAIiNjcWePXvQrl07WFtbIzo6Gg8fPlSd12vChAkYN24cqlevDl9fX+jq6iIiIgJ3797FlClTyuR17Ny5EzVq1ECtWrWwdetWpKenqxYTNGjQAEZGRli5ciWGDRuG69evF1pdaWtri8ePH+POnTuwsbGBqakpDAwMyqR2Iip9DGVEpPXs7e3xxx9/YNWqVRg5ciTy8/NRq1YtrF69Gq1btwYAGBkZ4cGDBwgKCkJaWhqsra0xePBgDBgwAADQqlUr/Pbbb1i3bh02btwIPT091KpVC3379i2z1zFt2jRs2LABd+7cgaOjI3799VeYm5sDeDEn7scff8QPP/yAP//8Ez4+Ppg0aRK+/fZb1f19fX1x9OhRDBs2DM+fP4efnx969epVZvUTUenSUWgy6YGIiDT2+PFjtG/fHsHBwahXr57Q5RCRluKcMiIiIiItwFBGREREpAU4fElERESkBdhTRkRERKQFGMqIiIiItABDGREREZEWYCgjIiIi0gIMZURERERagKGMiIiISAswlBERERFpAYYyIiIiIi3AUEZERESkBf4f+fWrwvVkZBsAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_metric('context_precision')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "8c96efbc-2d1c-424b-a045-4dff88985669", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmUAAAHMCAYAAACUdN+cAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABli0lEQVR4nO3dd1QUZ/828AsWkKpIVRGwAtIUG2Kvj0ajscaGPbE86mOMiZo3JtGoQRM11l+isZdYIkI0liTYC0o0KDZAUAQFkar0hd19//Dsygoqq8CMzPU5x3PCbJnv3pndvfYuM3oqlUoFIiIiIhKUvtAFEBERERFDGREREZEoMJQRERERiQBDGREREZEIMJQRERERiQBDGREREZEIMJQRERERiQBDGREREZEIMJQRERERiQBDGRG9c0aNGoVRo0Zp/n7w4AFcXV1x4MCB1z42Li4O48ePR4sWLeDq6oqQkJCKLFUQc+fOhY+Pj9BlEJGOGMpI8uLj4/H111+jW7du8PLyQvPmzTFs2DBs27YN+fn5FbbfmJgYrFmzBg8ePKiwfaj9/PPPZQ4f6oCj/ufm5obWrVvjo48+Qnh4eAVXWvHmzp2L6OhozJw5E99//z08PT0rbF8vtuWL/zZs2FBh+34bXbt2xaRJk0q97fr162UOwJXhwIEDcHV1xfXr14UuBQCQl5eHNWvW4NKlS0KXQu8gA6ELIBLSqVOnMGPGDBgZGeGDDz6Ai4sLCgsLceXKFfzwww+IiYnBwoULK2TfMTExWLt2LVq3bo26detWyD7U1q9fj549e6J79+5lfsz777+Pjh07QqlUIi4uDr/++itGjx6N/fv3w9XVtQKrrTj5+fkIDw/H5MmT4e/vX2n7Vbfli9zd3SutBqoceXl5WLt2LaZNmwZfX1+hy6F3DEMZSVZCQgJmzpyJOnXqYNu2bbCzs9PcNnLkSNy/fx+nTp0SrkCBubu744MPPtD83aJFC3z88cfYvXs35s+fL1xhbyE9PR0AUL169XJ7ztzcXJiamr7yPi+2JemmLG1MVBVw+JIka+PGjcjNzcXixYu1Apmas7MzxowZo/m7qKgI69atQ/fu3eHp6YmuXbtixYoVkMvlWo9TD/1cvnwZgwcPhpeXF7p164bg4GDNfQ4cOIAZM2YAAEaPHq0Zzio+5HH69GmMGDECzZo1g4+PDyZOnIg7d+5obg8NDYWbmxtWrVqltf9Dhw7B1dUVv/76KwDA1dUVubm5CAoK0uxn7ty5OrdXy5YtATwLs8U9ffoUixcvRqdOneDp6YkePXpgw4YNUCqVWvdTKpXYtm0b+vbtCy8vL7Rp0wYTJkzQGnYKDAzE6NGj4efnB09PT/Tu3VvzOt7WmjVr0KVLFwDA999/D1dXV3Tt2lVz+61bt/DRRx+hefPm8PHxwZgxY3D16lWt51APlYWFhWH+/Pnw8/NDp06dyqW+kJAQTJw4Ee3bt4enpye6d++OdevWQaFQlLjvtWvX8PHHH6NVq1Zo1qwZ+vbti23btpW4X3JyMv773//Cx8cHbdq0wdKlS0t9vreVkpKCL774Ah07doSnpyfat2+PKVOmlBiaf90xDTyfDxcfH4+PP/4YPj4++Oyzz3SqR/0cr3v96uHlTZs2YevWrejSpQu8vb3h7++P6Ohored8cR5j8X2pj6MHDx7Az88PALB27VrN+23NmjU61U/SxZ4ykqyTJ0/C0dERzZs3L9P9582bh6CgIPTs2RPjxo1DREQE1q9fj9jYWKxbt07rvvfv38eMGTMwePBgDBgwAIGBgZg7dy48PDzQuHFjtGrVCqNGjcKOHTswefJkNGjQAADQsGFDAEBwcDDmzp2L9u3b47PPPkNeXh52796NESNGICgoCHXr1oWfnx9GjBiBDRs2oHv37vDw8MDjx4+xaNEitG3bFsOHDwfwLIDMmzcP3t7e+PDDDwEATk5OOrfXw4cPAWj3MuXl5cHf3x/JyckYNmwYateujfDwcKxYsQIpKSn48ssvNff98ssvceDAAXTs2BGDBw+GQqHA5cuXce3aNXh5eQEAdu/ejcaNG6Nr164wMDDAyZMnsWDBAqhUKowcOVLnmovr0aMHLCwsEBAQoBlONDMzAwDcuXMHI0eOhJmZGT766CMYGBhg7969GDVqFHbu3ImmTZtqPdeCBQtgZWWFqVOnIjc397X7zsvL0/TSFVe9enUYGDz7GA4KCoKpqSnGjRsHU1NTXLx4EatXr0Z2djbmzJmjecz58+cxadIk2NnZYfTo0bCxsUFsbCxOnTql9SNCoVBgwoQJ8Pb2xuzZsxEaGorNmzfD0dERI0aMeKM2fJnp06cjJiYG/v7+cHBwQHp6Os6fP4+kpCTN0HxZjmm1oqIiTJgwAS1atMCcOXNgbGysc026vP7g4GDk5ORgxIgRKCgowI4dOzBmzBgcOnQINjY2Zd6nlZUV5s+fj/nz56NHjx7o0aMHALyzw/0kABWRBGVlZalcXFxUU6ZMKdP9b9++rXJxcVF9+eWXWtuXLFmicnFxUYWGhmq2denSReXi4qL6559/NNvS0tJUnp6eqiVLlmi2HT16VOXi4qK6ePGi1nNmZ2erWrZsqZo3b57W9pSUFFWLFi20tufm5qp69Oih6tOnj6qgoEA1ceJEVfPmzVUPHz7UemyzZs1Uc+bMKdNrTUhIULm4uKjWrFmjSktLU6WkpKj++ecf1aBBg1QuLi6qo0ePau67bt06VbNmzVT37t3Teo5ly5apmjRpokpMTFSpVCpVaGioysXFRbVw4cIS+1MqlZr/zsvLK3H7+PHjVd26ddPa5u/vr/L39y9Rc2BgYJle28aNG7W2//e//1V5eHio4uPjNduSk5NVPj4+qpEjR2q2BQYGqlxcXFTDhw9XFRUVvXJfxff3sn/h4eGvfO1fffWVqmnTpqqCggKVSqVSFRUVqbp27arq0qWL6smTJ1r3Ld6Oc+bMUbm4uKjWrl2rdZ/+/furBgwY8Nq6u3Tpopo4cWKpt0VERGi19ZMnT0pt0+J0OabVtS9btuy1dapUz/+fRERElHiO171+9f8fb29v1aNHjzTbr127pnJxcVF99913mm0vHnPF99WlSxfN32lpaSoXFxfV6tWry1Q/UXEcviRJys7OBgBNT8nrnD59GgAwbtw4re3jx4/Xul2tUaNGmuE+4Nkv6Pr165cY+ivNhQsX8PTpU/Tp0wfp6emaf/r6+mjatKnWEKeJiQkCAgIQGxuLkSNH4tSpU/jiiy9Qp06dMr2uV1mzZg38/PzQrl07jBw5ErGxsZg7dy569eqluc+xY8fQokULVK9eXavWtm3bQqFQ4J9//gEA/PXXX9DT08O0adNK7EdPT0/z38V7RLKyspCeno7WrVsjISEBWVlZb/2aSqNQKHD+/Hl0794djo6Omu12dnZ4//33ceXKFc3xovbhhx9CJpOVeR9Dhw7Fli1bSvxr1KiR5j7FX3t2djbS09PRsmVL5OXl4e7duwCeDbE+ePAAo0ePLjEvrng7qql7S9VatGhR7qt9jY2NYWhoiLCwMDx58qTU++hyTL+s9jdR1tffvXt32Nvba/729vZG06ZNS7yviSoahy9JkszNzQEAOTk5Zbr/w4cPoa+vX2LYz9bWFtWrV9cM7anVrl27xHPUqFHjpV9axcXFxQGA1lBUcera1Vq0aIHhw4dj165daN++PQYPHvzafZTF0KFD0atXLxQUFODixYvYsWNHiflI9+/fR1RUlGYezYvUQ3bx8fGws7ODpaXlK/d55coVrFmzBlevXkVeXp7WbVlZWbCwsHjzF/QS6enpyMvLQ/369Uvc1rBhQyiVSiQlJaFx48aa7bqulnV2dkbbtm1feZ87d+5g5cqVuHjxYokQqA6k6lDv4uLy2n1Wq1YNVlZWWtvKegyWhToEGhkZ4bPPPsPSpUvRrl07NG3aFJ07d0b//v1ha2sLQPdj2sDAALVq1Xqr+nR5/c7OziW21atXD0ePHn2rGoh0xVBGkmRubg47O7sSk4xfp7TeiNLo0ovyIpVKBeDZXDD1l9qrnlsulyMsLAzAsy/tvLw8mJiYvPH+1YoHiS5dukBfXx/Lly+Hr6+vZg6YUqlEu3bt8NFHH5X6HPXq1Svz/uLj4zF27Fg0aNAAc+fORe3atWFoaIjTp09j69atJRYOCKlatWrl+nxPnz6Fv78/zM3N8b///Q9OTk6oVq0abt68iWXLlr3Ra3+bY9DIyOil5+hTby/eBmPHjkXXrl0REhKCc+fOYdWqVdiwYQO2bdsGd3d3nY9pIyMj6Ou/3UDO27x+XVTEwgmSLoYykqwuXbpg7969CA8Pf+3Zzx0cHKBUKnH//n3NZHwASE1NxdOnT+Hg4KDz/l8W8NRDaNbW1q/tXQGA1atXIzY2FnPmzMGyZcuwfPlyzJs3T+d6XmfKlCn47bffsHLlSmzatAnAswUDubm5r63TyckJ586dQ2Zm5kt7y06cOAG5XI6ffvpJa/i1ok/CaWVlBRMTE9y7d6/EbXfv3oW+vn6pPZ/lKSwsDJmZmVi7di1atWql2f7iUJv62IiOji7TsfGmHBwcEBMTU+pt6nZ6cYjcyckJ48ePx/jx4xEXF4f+/ftj8+bNWLZsmc7HdGW7f/9+iW1xcXFa7+saNWqUOv0gMTFR6++y/nAjKg3nlJFkffTRRzA1NcW8efOQmppa4vb4+HjNaQbUpz148bQDW7Zs0bpdF+rerBfnSnXo0AHm5uZYv349CgsLSzyu+Cq+a9euYfPmzRgzZgzGjx+PCRMmYOfOnZqeMzVTU1M8ffpU5xqLq169OoYOHYpz587h9u3bAID33nsP4eHhOHv2bIn7P336FEVFRQCA//znP1CpVFi7dm2J+6l7UdQ9G+q/gWdtExgY+FZ1v45MJkO7du1w/PhxrRCUmpqKP/74Ay1atCgxvFbe1L1CxV+7XC4vcToQDw8P1K1bF9u3by/x/7P4Y99Wx44d8ejRoxJXgZDL5fjtt99gbW2tOfFtXl4eCgoKtO7n5OQEMzMzzelidDmmhRASEoLk5GTN3xEREbh27ZrWCX8dHR1x9+5drVojIyPx77//aj2X+n39tu83kib2lJFkOTk5YdmyZZg5cyZ69+6tOaO/XC5HeHg4jh07hoEDBwIA3NzcMGDAAOzduxdPnz5Fq1atcP36dQQFBaF79+5o06aNzvtv0qQJZDIZfvnlF2RlZcHIyAht2rSBtbU15s+fj9mzZ2PgwIHo3bs3rKyskJiYiNOnT6N58+b4+uuvUVBQgDlz5sDZ2RkzZ84E8OzUBCdPnsQXX3yBQ4cOaU646eHhgdDQUGzZsgV2dnaoW7duidM8lMXo0aOxbds2bNiwAT/++CMmTJiAEydOYPLkyRgwYAA8PDyQl5eH6Oho/Pnnnzh+/DisrKzQpk0bfPDBB9ixYwfu37+PDh06QKlU4sqVK/D19YW/vz/atWsHQ0NDTJ48GcOGDUNOTo4mAKSkpOhcqy4++eQTXLhwASNGjMCIESMgk8mwd+9eyOVyfP7552/9/Ldu3cLvv/9eYruTkxN8fHzg4+ODGjVqYO7cuRg1ahT09PTw+++/lwha+vr6mD9/PqZMmYL+/ftj4MCBsLW1xd27dxETE6PpwXxbQ4cORWBgIGbMmIFBgwahSZMmyMzMxJEjR3Dnzh0sXboURkZGAJ71KI0dOxa9evVCo0aNIJPJEBISgtTUVPTp0wfAs+kCZTmmheLk5IThw4dj+PDhkMvl2L59OywtLbWG5QcPHoytW7diwoQJGDx4MNLS0rBnzx40atRIa26qsbExGjVqhKNHj6JevXqwtLRE48aNyzQPkIihjCStW7duOHjwIDZt2oTjx49j9+7dMDIy0pxgVX1eLwBYtGgR6tati6CgIISEhMDGxgaTJk0qdUVhWdja2mLBggVYv349vvzySygUCmzfvh3W1tbo27cv7OzssGHDBmzatAlyuRz29vZo2bKlJiiuWLEC8fHx2LNnj2Z+j5GREZYsWYKhQ4fi+++/15x5f+7cufj666+xcuVK5OfnY8CAAW8Uyuzt7dG3b1/8/vvviI+Ph5OTE3bs2IH169fj2LFjCA4Ohrm5OerVq4fp06drTcwPCAiAq6sr9u/fj++//x4WFhbw9PTUDB03aNAAq1evxsqVK7F06VLY2Nhg+PDhsLKywv/7f//vjdq4rBo3boxdu3Zh+fLlWL9+PVQqFby9vfHDDz+8UTu96I8//sAff/xRYvuAAQPg4+ODmjVr4ueff8bSpUuxcuVKVK9eHf369YOfnx8mTJig9ZgOHTpg27ZtWLduHTZv3gyVSgVHR0etY/VtGRsbY+fOnVi3bh1CQkJw4MABVKtWDR4eHtiwYYNWD1KtWrXQp08fhIaG4uDBg5DJZGjQoAFWrlyJnj17au5XlmNaKP3794e+vj62bduGtLQ0eHt746uvvtI6qXTDhg2xdOlSrF69GgEBAWjUqBG+//57/PHHHyV6phctWoSFCxciICAAhYWFmDZtGkMZlYmeqjz7vImIiN4RDx48QLdu3TB79uwS4ZdICJxTRkRERCQCDGVEREREIsBQRkRERCQCnFNGREREJALsKSMiIiISAYYyIiIiIhGQ3HnKlEolioqKoK+vz8thEBERUYVTqVRQKpUwMDB45XVdJRfKioqKcP36daHLICIiIonx8vLSXA2jNJILZeqE6uXlpbnWHhEREVFFUSgUuH79+it7yQAJhjL1kKVMJmMoIyIiokrzumlTnOhPREREJAIMZUREREQiwFBGREREJAIMZUREREQiwFBGREREJAIMZUREREQiwFBGREREJAIMZUREREQiwFBGREREJAIMZUREREQiwFBGREREJAIMZUREREQiwFBGREREJAIMZUREREQiwFBGREREJAIMZUREREQiwFBGREREJAIMZUREREQiwFBGREREJAIMZUREREQiwFBGREREJAKChrJ//vkHkydPRvv27eHq6oqQkJDXPubSpUsYMGAAPD090aNHDxw4cKASKiUiIiKqWIKGstzcXLi6uuKbb74p0/0TEhIwadIk+Pr64vfff8eYMWMwb948nD17toIrJSIiIqpYBkLuvFOnTujUqVOZ779nzx7UrVsXc+fOBQA0bNgQV65cwdatW9GhQ4eKKpOIiIiowgkaynR19epV+Pn5aW1r3749vvvuO52fS6FQlFdZRERERC9V1szxToWy1NRU2NjYaG2zsbFBdnY28vPzYWxsXObnun79eolthoaGcHd3h4HBO9UsZVJUVIRbt26hsLBQ6FLeWc+ODw8YGMiELqXcFRUpcOvWTR4fVC4MDQ3h7uEOA1kV/CxVFOHWTX6WUsWoeu+YMvLy8oJMVvLLVSaT4Yd525FwL1mAqiqGY317fL5oNDw8PIQu5Z0nk8nw7aYg3E9KFbqUcuNc2wZfTxjA44PKlUwmw5LQvYh/+ljoUsqNU3U7zPUbyvcK6UyhUJTaGfSidyqU2djYIDVV+8swNTUV5ubmOvWSAc8+MEoLZQCQcC8ZsZEP3rhOsXrZ6yXd3E9KRXTCI6HLKHc8Pqi8xT99jJiMRKHLKHd8r1BFeafOU9asWTNcvHhRa9uFCxfQrFkzYQoiIiIiKieChrKcnBzcvn0bt2/fBgA8ePAAt2/fRmLis19Wy5cvx+zZszX3HzZsGBISEvD9998jNjYWu3btwtGjRzF27FghyiciIiIqN4IOX964cQOjR4/W/B0QEAAAGDBgAJYsWYKUlBQkJSVpbnd0dMT69esREBCA7du3o1atWli0aBFPh0FERETvPEFDma+vL6Kiol56+5IlS0p9THBwcAVWRURERFT53qk5ZURERERVFUMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJAEMZERERkQgwlBERERGJgOChbNeuXejatSu8vLwwZMgQREREvPL+W7duRc+ePeHt7Y1OnTrhu+++Q0FBQSVVS0RERFQxBA1lR44cQUBAAKZOnYqgoCC4ublhwoQJSEtLK/X+hw4dwvLlyzFt2jQcOXIEixcvxpEjR7BixYpKrpyIiIiofAkayrZs2YIPP/wQgwYNQqNGjbBgwQIYGxsjMDCw1PuHh4ejefPm6Nu3L+rWrYv27dvj/ffff23vGhEREZHYGQi1Y7lcjps3b2LSpEmabfr6+mjbti3Cw8NLfYyPjw8OHjyIiIgIeHt7IyEhAadPn8YHH3yg8/4VCkWp22Uymc7P9a542WumsuPxQVQ2fK8QPVfWY0awUJaRkQGFQgFra2ut7dbW1rh7926pj+nbty8yMjIwYsQIqFQqFBUVYdiwYZg8ebLO+79+/XqJbSYmJnB3d9f5ud4VUVFRyMvLE7qMdxaPD6Ky4XuF6M0IFsrexKVLl7B+/Xp888038Pb2Rnx8PBYvXox169Zh6tSpOj2Xl5dXlf4lVxpXV1ehSyAR4/FBVDZ8r5CuFApFqZ1BLxIslNWsWRMymazEpP60tDTY2NiU+phVq1ahX79+GDJkCIBnb4zc3Fx8/fXXmDJlCvT1yz5FTiaTSS6USe31km54fBCVDd8rVFEEm+hvZGQEDw8PhIaGarYplUqEhobCx8en1Mfk5+eXCF7qN4dKpaq4YomIiIgqmKDDl+PGjcOcOXPg6ekJb29vbNu2DXl5eRg4cCAAYPbs2bC3t8esWbMAAF26dMGWLVvg7u6uGb5ctWoVunTpwl8uRERE9E4TNJT17t0b6enpWL16NVJSUtCkSRNs3LhRM3yZlJSk1TM2ZcoU6OnpYeXKlUhOToaVlRW6dOmCmTNnCvUSiIiIiMqF4BP9/f394e/vX+ptO3bs0PrbwMAA06ZNw7Rp0yqjNCIiIqJKI/hlloiIiIiIoYyIiIhIFBjKiIiIiESAoYyIiIhIBBjKiIiIiESAoYyIiIhIBBjKiIiIiESAoYyIqBwolEqhS6gwVfm1EYmJ4CePJSKqCmT6+vji6H7cTU8VupRy1cDKBgHvDRa6DCJJYCgjIiond9NTEfk4SegyiOgdxeFLIiIiIhFgKCMiIiISAYYyIiIiIhFgKCMiIiISAYYyIiIiIhEo0+rL7OzsMj+hubn5GxdDREREJFVlCmUtW7aEnp7eK++jUqmgp6eH27dvl0thRERERFJSplC2ffv2iq6DiIiISNLKFMpat25d0XUQERERSVqZQllkZGSZn9DNze2NiyEiIiKSqjKFsv79+0NPTw8qleqV9+OcMiIiIqI3U6ZQdvz48Yqug4iIiEjSyhTKHBwcKroOIiIiIkkrUygrTUxMDBITE1FYWKi1vVu3bm9dFBEREZHU6BzKEhISMHXqVERHR2vNM1Ofx4xzyoiIiIh0p/NllhYvXoy6deviwoULMDY2xuHDh7Fz5054enpix44dFVEjERERUZWncygLDw/H//73P1hZWUFfXx96enpo2bIlPv30UyxatKgiaiQiIiKq8nQOZUqlEmZmZgCAmjVr4vHjxwCeLQa4d+9e+VZHREREJBE6zylr3LgxoqKi4OjoiKZNm2Ljxo0wNDTEvn374OjoWBE1EhEREVV5OveUTZkyBUqlEgDwv//9Dw8ePMDIkSNx+vRpfPnll+VeIBEREZEU6NxT1qFDB81/Ozs749ixY8jMzESNGjU0KzCJqOpTKJWQ6ev8u+6dUJVfGwlDqVJCX6/qHVNv+rpUKgX09GQVUJGw3vZ16RzKsrKyoFAoYGlpqdlmaWmJzMxMGBgYwNzc/I2LIaJ3h0xfH/P2HsC9x6lCl1Ku6tvZYNHQgUKXQVWMvp4+9tzajMe5SUKXUm7sTGtjmPv4N3qsnp4MF+8sxtO8+HKuSjjVTZzQpvHbjRjqHMpmzpyJLl26YOTIkVrbjx49ihMnTuCXX355q4KI6N1x73EqohIfCV0G0TvhcW4SErMThC5DNJ7mxSMz947QZYiKzn2OERERaNOmTYntrVu3RkRERLkURURERCQ1OocyuVyOoqKiEtuLioqQn59fLkURERERSY3OoczLywv79u0rsX3Pnj3w8PAol6KIiIiIpEbnOWWffPIJxo0bh8jISPj5+QEAQkNDcf36dWzevLncCyQiIiKSAp17ylq0aIG9e/eiVq1amsn9Tk5OOHjwIFq2bFkRNRIRERFVeTr3lAFAkyZNsHz58vKuhYiIiEiy3uhMdvHx8fjxxx8xa9YspKWlAQBOnz6NO3e4tJWIiIjoTegcysLCwtC3b19ERETgzz//RG5uLgAgKioKa9asKfcCiYiIiKRA51C2fPlyfPLJJ9iyZQsMDQ0129u0aYOrV6+WZ21EREREkqFzKIuOjkb37t1LbLeyskJGRka5FEVEREQkNTqHMgsLC6SkpJTYfvv2bdjb25dLUURERERSo3Mo69OnD5YtW4aUlBTo6elBqVTiypUrWLp0Kfr3718BJRIRERFVfTqHspkzZ6JBgwbo3LkzcnNz0adPH/j7+8PHxweTJ0/WuYBdu3aha9eu8PLywpAhQ157/cynT59iwYIFaN++PTw9PdGzZ0+cPn1a5/0SERERiYnO5ykzMjLCokWLMHXqVERHRyMnJwfu7u6oV6+ezjs/cuQIAgICsGDBAjRt2hTbtm3DhAkTcOzYMVhbW5e4v1wux7hx42BtbY1Vq1bB3t4eiYmJqF69us77JiIiIhKTNzp5LADUrl0btWvX1vz9119/Yc2aNTh06FCZn2PLli348MMPMWjQIADAggULcOrUKQQGBmLixIkl7h8YGIgnT55gz549mpWfdevWfdOXQERERCQaOoWyPXv24MKFCzA0NMTo0aPRtGlThIaGYunSpYiLi8MHH3xQ5ueSy+W4efMmJk2apNmmr6+Ptm3bIjw8vNTHnDhxAs2aNcO3336L48ePw8rKCu+//z4+/vhjyGQyXV4KFApFqdt1fZ53ycteM5Udj4/nqnJbAGyPF7E9nnuTz1K2hzaptUdZ26jMoWzDhg1YvXo1XFxccO/ePRw/fhyTJ0/Gzp07MXr0aAwdOhQ1atQoc9EZGRlQKBQlhimtra1x9+7dUh+TkJCAixcvom/fvtiwYQPi4+OxYMECFBUVYdq0aWXeNwBcv369xDYTExO4u7vr9DzvkqioKOTl5QldxjuLx8dzVb0tALbHi9gez+n6Wcr20Mb2eLkyh7LAwEAsXLgQAwYMwOXLl+Hv74/w8HD89ddfMDU1faOd60qlUsHa2hoLFy6ETCaDp6cnkpOTsWnTJp1DmZeXV5VO6qVxdXUVugQSMR4f2tge2tgez7EttLE9tJXWHgqFotTOoBeVOZQlJSWhTZs2AICWLVvCwMAA06dPf+NAVrNmTchkMs21M9XS0tJgY2NT6mNsbW1hYGCgFaYaNGiAlJQUyOVyGBkZlXn/MplMcqFMaq+XdMPjQxvbQxvb4zm2hTa2h7a3aY8ynxJDLpejWrVqmr8NDQ11Gq58kZGRETw8PBAaGqrZplQqERoaCh8fn1If07x5c8THx0OpVGq2xcXFwdbWVqdARkRERCQ2Ok30X7lyJUxMTAAAhYWF+Omnn2BhYaF1ny+++KLMzzdu3DjMmTMHnp6e8Pb2xrZt25CXl4eBAwcCAGbPng17e3vMmjULADB8+HDs3LkTixcvhr+/P+7fv4/169dj1KhRurwMIiIiItEpcyhr1aoV7t27p/nbx8cHCQkJWvfR09PTaee9e/dGeno6Vq9ejZSUFDRp0gQbN27UDF8mJSVBX/95Z17t2rWxadMmBAQEoF+/frC3t8fo0aPx8ccf67RfIiIiIrEpcyjbsWNHhRTg7+8Pf3//Mu/Tx8cH+/btq5BaiIiIiISi82WWiIiIiKj8MZQRERERiQBDGREREZEIMJQRERERiYDOoSwxMREqlarEdpVKhcTExHIpioiIiEhqdA5l3bp1Q3p6eontmZmZ6NatW7kURURERCQ1OocylUpV6vnIcnNztc74T0RERERlV+bzlAUEBAB4doLY4mf2B55daDMiIgJubm7lXyERERGRBJQ5lN26dQvAs56y6OhoGBoaam4zMjKCm5sbxo8fX/4VEhEREUmAzmf0/+KLL/Dll1/C3Ny8wooiIiIikhqd55R9/vnnLw1kUVFRb10QERERkRTpHMr69u2LU6dOldi+adMmDBkypDxqIiIiIpIcnUPZ2LFjMX36dHzzzTfIz89HcnIyxowZg40bN2L58uUVUSMRERFRlVfmOWVqH3/8Mdq1a4fZs2ejX79+ePLkCby9vXHw4EHY2tpWRI1EREREVd4bXWbJyckJjRs3xsOHD5GdnY3evXszkBERERG9BZ1D2ZUrV9CvXz/cv38fBw8exPz587Fw4UJ88sknePLkSUXUSERERFTl6RzKxowZg969e2Pv3r1o2LAhhgwZguDgYCQlJaFv374VUSMRERFRlafznLLNmzejdevWWtucnJywe/du/PTTT+VWGBEREZGU6NxTpg5k9+/fx9mzZ5Gfnw/g2eWXpk6dWr7VEREREUmEzj1lGRkZ+OSTT3Dp0iXo6enhr7/+gqOjI/7f//t/sLS0xJw5cyqiTiIiIqIqTeeesoCAABgYGODUqVMwNjbWbO/duzfOnDlTrsURERERSYXOPWXnz5/Hpk2bUKtWLa3t9erVQ2JiYrkVRuKgVCqhr/9GZ04Rtar6uoiI6N2lcyjLzc3V6iFTy8zMhJGRUbkUReKhr6+P7388gPgHKUKXUm6c6tpi9syBQpdBRESkRedQ1rJlSwQHB+OTTz7RbFMqldi4cSN8fX3LszYSifgHKYi9+0joMoiIiKo0nUPZ559/jrFjx+LGjRsoLCzEDz/8gJiYGDx58gS7d++uiBqJiIiIqjydQ5mLiwv+/PNP7Ny5E2ZmZsjNzUWPHj0wcuRI2NnZVUSNRERERFWezqEsMTERtWvXxpQpU0q9rU6dOuVSGBEREZGU6Lz8rFu3bkhPTy+xPSMjA926dSuXooiIiIikRudQplKpoKenV2J7bm4uqlWrVi5FEREREUlNmYcvAwICADy7nNLKlSthYmKiuU2hUCAiIgJubm7lXyERERGRBJQ5lN26dQvAs56y6OhoGBoaam4zMjKCm5sbxo8fX/4VEhEREUlAmUPZjh07AABffPEFvvzyS5ibm1dYUURERERSo/PqS/UwJhERERGVnze6zNKGDRtw8eJFpKWlQalUat1+/PjxciuOiIiISCp0DmXz5s1DWFgYPvjgA9ja2pa6EpOIiIiIdKNzKDtz5gzWr1+PFi1aVEQ9RERERJKk83nKqlevDktLywoohYiIiEi6dA5lM2bMwKpVq5CXl1cR9RARERFJks7Dl1u2bEF8fDzatm2LunXrwsBA+ymCgoLKrTgiIiIiqdA5lHXv3r0i6iAiIiKSNJ1D2bRp0yqiDiIiIiJJ0zmUqd24cQOxsbEAgMaNG8Pd3b3ciiIiIiKSGp1DWVpaGmbOnImwsDBUr14dAPD06VP4+vrixx9/hJWVVbkXSURERFTV6bz6cuHChcjJycHhw4cRFhaGsLAw/PHHH8jOzsaiRYsqokYiIiKiKk/nUHb27Fl88803aNiwoWZbo0aN8M033+DMmTPlWhwRERGRVOgcypRKJQwNDUtsNzAwKHEdzLLatWsXunbtCi8vLwwZMgQRERFletzhw4fh6uqK//73v2+0XyIiIiKx0DmUtWnTBosXL0ZycrJmW3JyMgICAuDn56dzAUeOHEFAQACmTp2KoKAguLm5YcKECUhLS3vl4x48eIClS5eiZcuWOu+TiIiISGx0DmVff/01srOz0a1bN3Tv3h3du3dHt27dkJ2dja+++krnArZs2YIPP/wQgwYNQqNGjbBgwQIYGxsjMDDwpY9RKBT47LPPMH36dDg6Ouq8TyIiIiKx0Xn1Ze3atREUFIQLFy7g7t27AICGDRuibdu2Ou9cLpfj5s2bmDRpkmabvr4+2rZti/Dw8Jc+bt26dbC2tsaQIUNw5coVnfcLPAt2pZHJZG/0fO+Cl73mV2F7aGN7PFeV2wJge7yI7fEcPzu0sT20ldYeZW2jNzpPmZ6eHtq1a4d27dq9ycM1MjIyoFAoYG1trbXd2tpaE/hedPnyZezfvx/BwcFvte/r16+X2GZiYlKlz7cWFRWl0zVL2R7a2B7PVfW2ANgeL2J7PMfPDm1sD226tkdxZQ5loaGhWLhwIfbt2wdzc3Ot27KysjBs2DAsWLCgQud4ZWdnY/bs2Vi4cOFbnw/Ny8urSif10ri6ugpdgqiwPbSxPbSxPbSxPZ5jW2hje2grrT0UCkWpnUEvKnMo27ZtGz788MMSgQwALCwsMHToUGzZskWnUFazZk3IZLISk/rT0tJgY2NT4v4JCQl4+PAhpkyZotmmXvHp7u6OY8eOwcnJqUz7lslkkgtlUnu9r8P20Mb20Mb20Mb2eI5toY3toe1t2qPME/2joqLQoUOHl97erl073Lx5U6edGxkZwcPDA6GhoZptSqUSoaGh8PHxKXH/Bg0a4NChQwgODtb869q1K3x9fREcHIxatWrptH8iIiIisShzT1lqaioMDF5+dwMDA6Snp+tcwLhx4zBnzhx4enrC29sb27ZtQ15eHgYOHAgAmD17Nuzt7TFr1ixUq1YNLi4uWo9XX+rpxe1ERERE75IyhzJ7e3vcuXMHzs7Opd4eFRUFW1tbnQvo3bs30tPTsXr1aqSkpKBJkybYuHGjZvgyKSkJ+vo6n7mDiIiI6J1S5lDWqVMnrFq1Ch06dEC1atW0bsvPz8eaNWvQpUuXNyrC398f/v7+pd62Y8eOVz52yZIlb7RPIiIiIjEpcyibMmUK/vrrL/Ts2RMjR45E/fr1AQB3797Fr7/+CoVCgcmTJ1dYoURERERVWZlDmY2NDfbs2YP58+djxYoVUKlUAJ6ds6x9+/b4+uuvS10xSURERESvp9PJYx0cHPDLL7/gyZMnuH//PgDA2dkZNWrUqJDiiIiIiKTijc7oX6NGDXh7e5d3LURERESSxWWNRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCIgilC2a9cudO3aFV5eXhgyZAgiIiJeet99+/ZhxIgRaNWqFVq1aoWxY8e+8v5ERERE7wLBQ9mRI0cQEBCAqVOnIigoCG5ubpgwYQLS0tJKvf+lS5fQp08fbN++HXv27EHt2rUxfvx4JCcnV3LlREREROVH8FC2ZcsWfPjhhxg0aBAaNWqEBQsWwNjYGIGBgaXef/ny5Rg5ciSaNGmChg0bYtGiRVAqlQgNDa3kyomIiIjKj4GQO5fL5bh58yYmTZqk2aavr4+2bdsiPDy8TM+Rl5eHoqIi1KhRQ6d9KxSKUrfLZDKdnudd8rLX/CpsD21sj+eqclsAbI8XsT2e42eHNraHttLao6xtJGgoy8jIgEKhgLW1tdZ2a2tr3L17t0zPsWzZMtjZ2aFt27Y67fv69esltpmYmMDd3V2n53mXREVFIS8vr8z3Z3toY3s8V9XbAmB7vIjt8Rw/O7SxPbTp2h7FCRrK3taGDRtw5MgRbN++HdWqVdPpsV5eXlU6qZfG1dVV6BJEhe2hje2hje2hje3xHNtCG9tDW2ntoVAoSu0MepGgoaxmzZqQyWQlJvWnpaXBxsbmlY/dtGkTNmzYgC1btsDNzU3nfctkMsmFMqm93tdhe2hje2hje2hjezzHttDG9tD2Nu0h6ER/IyMjeHh4aE3SV0/a9/HxeenjfvnlF/zf//0fNm7cCC8vr8oolYiIiKhCCT58OW7cOMyZMweenp7w9vbGtm3bkJeXh4EDBwIAZs+eDXt7e8yaNQvAsyHL1atXY/ny5XBwcEBKSgoAwNTUFGZmZoK9DiIiIqK3IXgo6927N9LT07F69WqkpKSgSZMm2Lhxo2b4MikpCfr6zzv09uzZg8LCQvzvf//Tep5p06Zh+vTplVo7ERERUXkRPJQBgL+/P/z9/Uu9bceOHVp/nzhxojJKIiIiIqpUgp88loiIiIgYyoiIiIhEgaGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAREEcp27dqFrl27wsvLC0OGDEFERMQr73/06FH06tULXl5e6Nu3L06fPl1JlRIRERFVDMFD2ZEjRxAQEICpU6ciKCgIbm5umDBhAtLS0kq9/7///otZs2Zh8ODBCA4ORrdu3TB16lRER0dXcuVERERE5UfwULZlyxZ8+OGHGDRoEBo1aoQFCxbA2NgYgYGBpd5/+/bt6NChAz766CM0bNgQn3zyCdzd3bFz585KrpyIiIio/BgIuXO5XI6bN29i0qRJmm36+vpo27YtwsPDS33M1atXMXbsWK1t7du3R0hISJn2qVKpNPuWyWQlbpfJZKjXuDYMjUre9q5ycLaDQqGAQqHQ+bEymQz1ne1gaCB4fi83dR1s3qo9GjrYVqn2cLK3fqP2kMlkaGxvCyNZ1WkLAHC2efP2cLG2hZF+1WqPejXfvD3qV7eHoV7V+Syta/F2nx21TBwgQ9VpD1uTWm/VHtWN60NPZVgBlQnDwrjuS9tDvU2dQV5G0FCWkZEBhUIBa2trre3W1ta4e/duqY9JTU2FjY1NifunpqaWaZ9KpRIAcOvWrZfep1PfJgCalOn53hVXr15948d2bucEwKncahGDt2mPnt6OABzLrRYxeNP26NvQCWhYtY4N4M3bo79tPcC2XnmWIgpv2h5djRoC1g3LtxiBvc1nR0N4oaGeV/kVI7T8t2sPQ/SAVflVI7ys17eHOoO8jKChTAgGBgbw8vKCvr4+9PT0hC6HiIiIqjiVSgWlUgkDg1fHLkFDWc2aNSGTyUpM6k9LSyvRG6ZmY2NTolfsVfd/kb6+PoyMjN6sYCIiIqIKIujkByMjI3h4eCA0NFSzTalUIjQ0FD4+PqU+plmzZrh48aLWtgsXLqBZs2YVWSoRERFRhRJ8Ruq4ceOwb98+BAUFITY2FvPnz0deXh4GDhwIAJg9ezaWL1+uuf/o0aNx9uxZbN68GbGxsVizZg1u3LgBf39/oV4CERER0VsTfE5Z7969kZ6ejtWrVyMlJQVNmjTBxo0bNcORSUlJ0C+2mql58+ZYtmwZVq5ciRUrVqBevXpYt24dXFxchHoJRERERG9NT/W69ZlEREREVOEEH74kIiIiIoYyIiIiIlFgKCMiIiISAYYyIiIiIhFgKCMiIiISAYaySlRUVCR0CURERFXau3xSCYaySqA+QI4cOYLMzExhixEJhUKB/Px8ocsQjYSEBPz+++9ClyE49XslPT0dUVFRAlcjPHV75OTkCFyJuOTm5gpdAomM+kLfBQUF0NPTe2eDGUNZJdDT00NhYSFu3boFS0tLocsRlPqNcu3aNRw/flzgasTjwYMHMDU1FboMwenp6QEAIiIicPLkSQDPP2ylSN0e48aNQ3p6usDViMe+ffsk/6NO/Vl69uxZ/PLLLwJXIzz1e2XWrFm4cOGC5u93DU8eWwlUKtU7e4BUlHPnzsHCwgJNmzaVfPsolUro6+tLvh2Kk8vlUCqVMDY2FroUUUhNTdVc5UTqsrKysHv3bkycOFHoUkQhNjYWjx8/hp+fHxQKBWQymdAlCSo9PR01atR4Z9uBoayCqb9w7969C0dHRxgaGgpdEolMUVER0tLSYG9vL3QpoqF+3xA0X7RsE/7ApVerCu+Rd7v6d4D6ANm1axdSU1MFrkZYxecLRUZGClyNeFy7dg3/93//J3QZglMfH1FRUdi9e7fA1QhP3R6HDx8ucQ1gKVIHsosXLyIvL0/ockQhJycHoaGhQpchOPUUh7CwMERHRwtczduR9ru8kiiVSgwZMgS1a9cWuhRBFZ8vdOrUKQDSni+k5u7ujhkzZghdhuDUx4dCodAM1Un5+FC3R3x8PKytrQWuRnjq9ggJCZF8QFW7efMm7t69C+DdXnH4ttTHw7///vvOHxscvqRKJ+X5Qhx+obLisVJSUVERkpKS4OjoKHQpopCTk4P8/HxYW1vzeKki3u1IKVLqnKtQKAAAu3fvRlJSkpAlVbpXZX0DAwNJBjIAJT40MzIysH//foGqER+5XI7Tp08LXYbglEol9PT0kJKSAkDavSDFGRgYMJAVY2ZmpulFlWogU7830tLSkJ2dLXA1b4+hrAKo3xzq1R/x8fGoVq2akCVVGvUbpPhQlFKp1AxDSXm+UGhoKBISErS2VZUPkvJy9+5dxMTEAJB2EFEPwSxbtgyRkZGS/cJV/7BVHwvfffcdHj9+LGRJglN/lt69exfz588XthiBlPbZcOTIEZw/f16AasoXQ1k5KH6A3L9/H6dOncKjR4802+bMmQMrKyshSqt0enp6+PPPP3H//n0Az4Kpvr6+5ktGLpdLbr5QVlYWAGDr1q2aHkL1l03NmjUxduxYoUoTnEqlQmZmpmbidoMGDdC/f39hixKR6dOnw83NTegyBKP+YaunpwelUgkHBwfY2dkJXFXl+eeff5Cfn4/s7GzN56X6s9TMzAy9e/cGIJ3PUjU9PT1ER0cjPz9f84OlQ4cOaN++vcCVvT0DoQuoCoqKimBoaIiVK1ciOjoaKpUK2dnZqF69OmrVqoWhQ4fCxcVF6DIrhUKhwL59+xAfHw9TU1P4+vqiRYsW8PX1haWlJby8vODl5QUA7/yEzLLIz8/HyZMnERYWhjt37iAhIQF6enqaYLpw4UIsXLgQFhYWAldauYqKivD9998jNDQUnp6eaNCgARo1aoS6deuicePGAKQ7HFNc3bp1hS6hUqlPaZCeno7ffvsN165dw+TJk+Ht7Q19fX2MGTOmSpz2oKxOnTqFVq1aYfXq1TAzM4OzszPq1q2L2rVrw97eXnMaHSm0h/r/e2xsLI4fP47Tp0+jYcOG+Pbbb5GUlIQ6derAyMhI6DLfGkNZOVCfe+z48eOYM2cOHBwckJaWhnv37uHKlSuSuualTCbDpk2bEBkZiU2bNuH06dOIiIjA0qVL4eDggObNm2PmzJlCl1lplEol7OzskJSUBIVCgeDgYBQVFcHe3h55eXmIi4uTVCBTf7CeOnUKt2/fxldffYXr16/j1q1bOHnyJGrVqoUVK1YIXaYoSHni9hdffIFu3brhypUryMjIAPCsp7lLly5wdnYWuLrKM378eBQWFiIvLw9JSUm4fv06TExMYG1tDQcHBwwbNkwyx4h6RGrv3r2wsbGBk5OTptf0zJkzMDMzw/vvvy9kieWCoewtJScn4/79+7C2toa7u7um+7R+/frw9vZG165dJbWcXX2iy+PHj8PBwQEzZ86ElZUV/vrrL2zdulVrEcS7esZlXZiamqJNmzaoUaMG0tPTYWBggNu3byM5ORkGBgb473//K3SJlUr9iz45ORm9evVC69at0bp1awBAZmamZthfSr0hcrkcISEh6NWrl9ZrlsqXbXH6+vooKChAWloaPvzwQxw4cABNmzYFABw8eBBdu3YVuMLKpf7umDNnDgoLCxEbG4vIyEhcv34djx8/1lzjUQrHivq9ERERgT179mDWrFlo0aIFAOD06dPo2bOnkOWVG4ayt3Tjxg1s2rQJNjY2uHfvHr777jsMGDAAjRs3hpGRkaQCGfD8iyQsLAyzZs1CnTp1AAD9+vXDrVu34OvrC0Aa3e3FNWnSRPPfvr6+yMnJgaGhYZXobn8THTt2xNatW3H+/Hk0bNgQVlZWsLS0hKWlJVQqlSSOj/z8fBgbG+P69es4deoUevfurfmCzcnJQWZmJhwcHIQus9LdvHkTbm5uyMjIgLGxMSwtLZGYmAgAcHJyEri6yqf+IWtoaAg3Nze4ubmhf//+kMvlAldWudTvC0dHR1y5cgWxsbHw8/MDADx8+LBKzCcDGMrempeXFz799FMkJSXhzp07iI6OxurVq2FoaAhzc3OMGTMGrq6uQpdZadTXcGzevDnWrl2LKVOmwNnZGVZWVjh37hwGDx4sdImCKT5R18zMTOBqhJOZmYmFCxfixo0bePz4MRwcHODk5IRatWqhU6dOkuhBBZ5ds/D333/HhQsX4OLigpSUFFhYWMDY2BhhYWEIDw/Hp59+KnSZlc7FxQUmJiYYOXIkvLy8IJfLcfDgQc28XKn0sqsV/5GiUqk0f6t/0Emhlwx49trNzMwwYMAArF+/HhYWFrh06RJ+//131KtXr8p0gDCUvSU7OzvY2dnh9OnTmDhxItLS0pCYmIiHDx8iKipKkl++enp6GDt2LNasWYOgoCAUFRXh5s2baN68ORo1aqS5j9RIoffnVdRDksePH4exsTH+/vtvhIaG4tatW7hw4QKsra0lNTxlY2MDX19f7Nu3D3Xq1MH8+fMhk8ng6OiI8+fPY8KECUKXKAhzc3MMHz4c+fn5ePr0KXr16oWuXbti8uTJAKT32aH+3FAqlZIZqiyN+nV7enqiU6dOOHz4MDZv3ow2bdrgP//5j8DVlR+e0f8tyOVyGBgYIDQ0FFu2bMHGjRs1txUUFOD27dto1qyZcAUKLDk5GaGhoVAoFGjcuDEaNmwIMzMzSX6wFBQUIDU1Ffb29jAwkOZvIXUo+/nnn1GrVq0Sp75IS0uDtbW1pOaTKZVKREREwNraGvHx8UhISNCc13DChAkwNzcXusRKV1RUBAMDA0RGRiItLQ1ubm4wNzeXzLkegee9gSdPnoRKpYKfnx9MTEy07iPFz9EXPX36FNWrVxe6jHIlzW+HcnL37l2cP38eZ8+ehZ2dHTIzM1FQUAB7e3tcvHgR165dk3Qos7e3L/WcU1L7INmzZw+OHj0KMzMzLFu2TPOlI7Vwpg5aqamp+Ouvv1BUVAR3d3fUqlULVlZWmkvFSCGQqb90L1++jLS0NDRr1kzSZ6pXB/GEhAT8+uuvOHz4MPz8/ODp6YmmTZtKKpABz8/PduHCBezZs0czItOxY0d06NABnp6ekvscVVOfjNzAwKDKBTKAJ499K1ZWVjAyMsK9e/cQHR2NefPmYePGjQgODsbPP/9cJQ8YXajnP0iR+nWHhYXh1KlTGDRoEB49egRTU1PcuXMHy5cvF7hCYeTk5MDU1BSurq44e/YsduzYgc2bN+PXX3+V5C//wMBAzRUd1q1bh6ZNm2LFihUoKCgQuLLKpX6/LFmyBEZGRli1ahVatWqF4OBgfPPNNygsLBS4wsqXn5+PgoICzJo1C0uXLkWfPn2wd+9ezJ49G7Nnz9aclFpq9PX1q/QP2qr7yiqBnZ0dRo0ahdq1a6N58+YIDw/H5cuXERISgp49e2LgwIFCl1hp1F+oqampqF69OoyMjKBUKiU1Ibc49Wu/ePEiunfvjho1asDd3R3Asx7W+Ph4gSsUhpmZGT799FMUFRUhJiZGs7w/JSVFUsv71e+LqKgoLF26FBcuXMD9+/fxyy+/YPPmzUhKSkK9evWELbISFf+cGDFiBOzt7eHj44PBgwdj2LBhiIqKgqenp4AVVp7i5/J7/Pgxvv32WxQVFaF58+YwNTVFXFwcMjMzsW/fPsnNO5TCNJCq+aoqifrN0717dwBAt27d0K1bNwDSWyGkduDAAcjlckybNk2Sr19N/drz8vLQpEkT7Ny5U7Py9MqVK2jXrp2Q5QlG3SNiYGAg6eX9wLM5l4aGhti/fz+OHTuGsWPHonXr1liwYIHmVDJSoA7iiYmJUKlUiIiIQI8ePQA8+xLOzs6WVEBV/yjJzc2FkZERsrKyYG5urtmup6eHnj174rfffhOyzEonlWkgVevVVDJ9fX1NMFMP1alUKshkMskFEvUHRuvWrbFkyRJcvnwZM2bMgI+Pj8CVCWvEiBGYNWsWbty4AT8/P9y/fx937tzBpEmThC5NEFze/4xSqYS9vT38/f1x/PhxNG7cGO3bt8e5c+dgbW0tqfPXqf+fx8XFITQ0FGfOnIGXlxc8PT1RUFCAYcOGSWrBg7o9OnfujJMnT2Lx4sVo0aIFEhIScPnyZcyaNQuhoaGaE6dWZerAXnwayNatW2Fqaorr16/jyJEjmDNnjtBlliuuvixHUhl6eR25XI6tW7ciLS0NAwcO1JynTSrtow7q165dQ0ZGBpycnLB//36EhYWhRo0amDNnjmSuhfoyxZf3S2Fif3Hq163+pa9WWFiIvXv3wtTUVFJTH4pTqVSIjIzEuXPncObMGdy7dw/6+vpo1KgR/ve//0lu4VRGRgb27Nmj6UVs06YNevTogenTp+Orr76q8otD1CNOq1evRp06dWBra4u///4bixYtwu+//46//voL69atE7rMcsVQ9hZyc3Px+PFjSXWtv05OTg7MzMzw4MEDzJgxA3fv3sVXX30lqS8ZdShbunQpateujdGjRyMvL09rSbtUAirA5f0v07NnTzg4OKBNmzZo164dPDw8AEh36kNubi6Sk5Ohr68PW1tbmJqaIjs7Gzdv3sThw4cxaNAgzSWXpODBgwd4+vQpGjdurLm+slpycrLmYuRSsHTpUrz//vuaaSAtWrTA119/DTc3N4wYMULo8soVhy/fgPpL9+jRowgNDcWyZcsQGRmJP/74A61bt0bHjh2FLrFSqdtj165diI2Nxfnz5+Hp6YlmzZrB1tYW27Ztw/379zFlyhQYGxsLXW6FU/f85OTkwNLSEgAkG8gALu8vjVKpxA8//ICIiAj8888/CAwMBAC0aNEC3333ncDVVR71Z0dcXBw2b96Mo0ePas7O3r9/f/Tq1Qu+vr6ay7NVdepA/n//93+IjIxETk4OCgoK4OjoCEdHR/Tv3x916tSBvb29pD5HpDQNhKHsDag7F8+cOYNBgwbhyZMn2LVrF6KjoxEREYGaNWvCy8tL4CorjzqExMXFwcfHByNHjkRGRgY8PT1hbGyMJ0+eYOzYsejdu7dkLjmVnZ2NGzduIDAwEIcPH0aHDh3g5+eHhg0bSuaDtLjiy/s9PT0RGRmJjRs34tChQ/D09MRXX30FCwsLocusFOp5dN7e3vDy8sLw4cMRHh6O7du3a654IRXqz9Jff/0V+vr6+Oeff/Do0SMEBwdj586d8PDwqPJDdMWpf8AcOnQIn332GRwcHJCamoqYmBhcvnwZXbt2RZ06dSRxguUXp4EsWbIE+/fvx/bt21GjRg0sWLCgSl4blqHsDajfOA8ePED9+vURGBgIBwcHLFy4EFOmTEFubq7AFQrjyy+/RGFhYYmu9ho1aqBOnTqoX7++QJVVPkNDQyxfvhwPHjzA1atXcenSJezcuRNNmjTBjz/+KHR5lYbL+7WpezdSUlJga2sLPT09yGQytGzZEiEhIZILZepgYWJigp49ewIAatWqhcmTJyMiIgIXL16Eo6OjJEKI+thISEhAs2bNNCv5AaB58+Z47733NEOWVb0tijt27Bhq166Nzp07Y/r06VV+1IGh7A3J5XJ07twZn3/+OTIyMrB7927k5uYiMTERLVu2FLq8SqPubg8PD8elS5eQmJgIc3NzuLq6av4VFRVh3rx5kllRplKpUK1aNTg6OqJ+/fro0KEDFAoF4uLiJHcSTC7v16anp4eCggKMGzcONWrUgIeHB9q0aQMPDw+cPHkSQ4cOFbrESqWnp4e8vDycPHlSc2UDmUyG3NxcKBQKyQxbAs8DRlRUFK5cuYINGzagd+/eqFu3LkxNTWFqaip0iZVKqtNAONFfR+qJ7Grh4eGoXr06GjZsiJCQEAQFBVW51SCvov4Fqz6JrqurK7KyspCamorExERMnDgRbdq0EbrMSnf58mWEhYUhIyMDDg4OaNWqFerVqyfJC9QDQHp6Or755huYmZmVury/Ro0aGDVqlNBlVgq5XI7IyEjExsbi6tWruHnzJh49eoTBgwfjk08+Ebq8SpeSkoKFCxfi+vXrSEtLg6OjI0xMTNCsWTPMmzdP6PIq3alTp3D48GE8fvwYxsbGsLCwgK2tLUaMGCGpoVzg2TSQ0aNHIyoqCu3bt9eaBlJVMZTpaOLEifjuu+/wxx9/wNHRER06dND0AKWlpSE7OxvOzs4CV1m58vLyMHHiROzYsQPAsy/g5ORkxMXFoUOHDjA3N6+yv2qKU7/G6OhoLFiwAG5ubjh37hy8vb0RERGBevXq4eeff67y7fAyUl/eX9yL7we5XK7pYZWyyMhInD17FleuXEFMTAySk5MxceJETJ8+XejSKo169CE9PR2RkZG4c+cObt++jenTp1fJOVSvUlBQgMTERM00kOjoaNy5c6dKTwNhKNNRZmYmLC0t8fXXX+PixYvIzMyEg4MDOnfujM6dO0tqybb6i+X27dtYuXIlPvvsMzRu3FjosgSj/jDdsGEDTExM0KhRI/z222/4/PPPsWjRInTs2FFyw1NqXN7/3JkzZxASEoKMjAw0a9ZMM3wpZTdv3oS9vT1sbGw023Jzc3H16lWYm5vD29tbwOoqj1KpRGxsLIKDg1GvXj1069YNVlZWknuPAKWfz6/4NBA3NzeBK6wYDGVvQaVSIT4+HmFhYThx4gTi4uJw9OhRocuqNOo3zZEjR/DTTz/B1NQUbdq0Qd26deHs7AxPT09JzYNQt8fEiRMxbdo0HD58GE2aNEH//v3x448/wsbGRjJDdEDZl/cDVXd+yIuSkpLw4YcfYuLEiSgqKsK1a9cQHx+PvLw8HDhwoMT526q6zMxMbN68GXFxcYiJiYGVlRVat24Nb29vdO7cWejyKo36vbJ9+3aEhobC0tISd+7cwcOHDzF79mwMGDBAMu+R4qQ4DYQT/d+Cnp4enJ2d4ezsjCFDhqCoqEjokiqV+gPCy8sLX331Fe7du4f79+/j6tWr+PvvvzFjxgxJ9QCo26N///5wdHSEra0tTpw4AXNzc5w4cQJLly4VuMLKxeX9z6m/UK9du4YePXpg1KhRyM3NxcCBA5Geno60tDRJBTL1//O///4bqampaN68OfLy8tC6dWtNSOvcubNkgoj6+P/rr78wbdo0zTzcc+fOYe/evWjRogWcnJyELLHSFJ8G8uOPP2pNA9m9e3eVnwbCUFaOqtqFUctK3fPRunVrFBUVISkpCTExMZIdyuzduzcAYOjQoUhMTMShQ4fg6+sLd3d3gSurPFzer039BeLs7Ix79+4BgKYXuUaNGpI6XQzw/PxkFy9exLhx43D27Fn4+vrio48+QkpKCtq1a6e5X1X98i1OT08POTk5yMnJ0VoY1b59e6xYsUJScw2VSiVkMhlOnTqFXr16oVGjRsjIyMCnn36qmQZSlY8JaaYIemvqX7oPHz5ESEgIEhISYG1tjUaNGqFRo0bw9fWVzCkwXsbCwgJTpkxBYWGhZphOKri8v3Q3btzA7t27sXv3bvj5+aFDhw5o06aN1lwqKVD3omZlZcHW1hYxMTEYMmQIACAmJgZdunQBIJ0L1CsUChgYGKBFixbw9/fHpEmT4OzsjLS0NBQVFUlqPpn6R9rly5c100A6duyI2rVro1GjRpDL5QJXWLE4p0xHJ0+exL179zB+/HihSxGUeg7E3LlzUatWLRw+fBi1atVCeno6DAwM8Omnn6JTp05Cl0kC4/L+5wH11q1bmDhxIlauXIns7GxcvnwZ//77L5KTk/H3339LosfwRQ8fPoSVlRWOHTuGtWvXwsPDA/fu3UNwcLAkr/+ZlpaGzZs3Iz09Hbdv34aFhQUmTJiAzp07S+6aqEeOHIGfnx8CAwMRERGBfv36YdWqVVi6dGmVHnVgT1kZ3b9/H3Z2djh58qRmJZB6VUhYWBjMzc2r9IHyIvWHQ1RUFJYsWYKIiAjMnz8fjx8/xsqVK1G3bl0A0hl+oNJ16NABnTt3LrG8X4oBJCUlBePHj0fLli1RWFiIjh07Ii8vD9nZ2ZJsDwCaUzwMGDAAxsbGePjwIYYPHw6ZTCaZz465c+fCy8sLfn5+aNCgAT7//HPExsbCxMQEpqammhOnSimQAdKdBsJQVkbh4eFYsGAB8vPz8fDhQ5iamqJVq1awtbXF6tWrMXz48Cp/sLwoJiYGNWrUQHZ2NtLS0uDk5AQnJydUq1YN9erVAyCd4QeAvagvUiqVuHv3rtby/rZt20pueb96jszff/+Np0+f4tGjR6hVqxYAwMzMrEqvJCuNOmzFxcUhKCgIhoaG+M9//oP33nuvxH2quuzsbNja2uLSpUsIDAyEkZERPD090a5dO3h5eWkCmZRJbRoIhy918OTJE8yaNQv16tXDiRMnkJiYCGdnZ9SpUwdr166V3Idramoq7t27h/r162PJkiXw9vbGkydPNNd5lMKqOuB5L2pAQAC8vb0xePBgSfeicnl/6QICAnDixAk8fvwYDRo0QIcOHdCxY0dJXZZNraioCAMGDICvry9iYmJw9epVyGQyODk5YfPmzahZs6bQJVYauVyOnJwcpKSkICYmBhEREbhz5w4SEhLQunVrLFq0SOgSqRIxlJWR+kLbBQUFmpUwBQUFCAsLg52dHVxdXQWuUFinTp3Chg0b4OLigm7dummu9yiFLvfg4GBNL2rbtm0xaNAgTS+qv78/hg8fjj59+ghdZqVRBy5/f/9Sl/d//vnnklneX5r79+8jLCwMJ0+eRFxcHI4cOSJ0SZXu5s2b+OGHH7B161bNtlu3buHs2bOYNGmScIUJTKVSQS6X4+nTp4iPj4eRkRG8vLwk81lKDGVlov6SKSgowJEjR5CbmwtfX180atRI6NIElZWVhXPnzsHMzAwtW7aEkZGRZE8Lwl5UbTk5OfD390dQUJDW9oEDB+Knn36S1PDlqxQ/W7kUbNu2DaampjAzM0NKSgrGjBkjdEmiIJVRhbKQ+jQQ6XwavAX1MMv06dPh4uKCHTt2wMTEBBYWFnB0dMQPP/wAa2trgausHOoPj2vXruG7776Dg4MDCgsLsW3bNtjY2MDT01NSZ60HnvWi1qhRA+vWrUO1atUwb948rV5UqQUyLu8vOykFMgDIz8/H/v37kZGRgczMTFy+fBmDBw9GmzZtJHUurhcxkHExnZq0PhHegLqX7MaNG1AqlRg6dCj+/fdfrF69GnPnzkW1atUkE8iA5yd9DAkJQevWrTF27FgkJSXh0aNHWqvqpPLLT6VSaYa1X+xF7dChg9DlCUImk0Emk2HKlCnYvHkzjhw5olne/+mnnwIAh2MkatKkSZg0aRJSUlJw9epVnDhxAgEBAcjIyIClpSUCAwNhbm4udJmVQv0ZGR0dDTMzMzg4OECpVAKQZkjjYrpnOHz5Guo3jvoi087Ozvj999+xfPlyHD16FPHx8ZKcA7Fq1aoSF2DPysqCvr6+5HqGAGDixImS70UFSi7vB1Dq8n4pkfpwTFnExcXhxo0beP/994UupdJt2LABKSkpGDt2rOYUIVLFaSDsKXst9S8WY2NjeHp6IjExERYWFgCeXSLE1tZWyPIEoVKp0KlTJ8ybNw/9+vVD27Zt4eHhoWkXqWAvqjYu79fG4Ziyq1evnuY0OlLTq1cvBAYGYuTIkWjRogXGjBkDT09PyfWWcRrIM+wp04FcLkdGRgZmzJiBmJgYuLu7Y/HixZI5M7m611B99m1PT0/o6enh0aNHyM/PR4sWLfDZZ58JXWalYS9qSVze/xxX5dLLFF88lpOTg+rVqyMrKwurVq1CdHQ0hg0bhn79+gldZqXhYrrn2FOmAyMjI9jb22PPnj24c+cOLC0tJdlTdu3aNQwfPhwjR45EYmIiMjIyEBMTo/klI5X5QuxFLcnIyAhGRkaoWbMmGjdujG7dumkt7wekc3z0798fXbp00QzHLFu2TGs4pnPnzkKXWGnUX7pPnjxBUlIS3NzchC5JUOoTCv/000/4888/YWBgAAsLCzg5OeHBgwc4deoU+vXrJ5n3ChfTPcdQ9grqN8SlS5ewefNmxMbGol27dujcuTO8vb1hbW0tqZNgqkOIXC7XnJetTp06qFOnDtzc3DSLAKTwIVLc6NGjIZfLUadOHezYsQMtW7aEu7s7PvroI6FLE4S6B1FPTw/VqlWDra2tVkCVyvHB4Zjnil/V4MqVKwgICIBcLoeRkREKCgpgYGAgmeMCeP4ecHBwQK1atdC7d2+0bt0axsbG+PbbbzXfKVIYwuQ0EG0cvnwF9fyP9957D5MmTYKJiQnOnDmDq1evIjY2FuvXr5fcRbczMzMxbNgwJCcno1evXmjfvj1at24tyV6hl5FyLyo9w+EYbceOHcPRo0cRFxeHIUOGwN/fX3Pb6tWrYWlpidGjRwtYoXASExOxefNmPH78GD169EDbtm0lFUI4DUQbe8peQX0OIWdnZ/Tv3x8A0LNnTwBAcnKy5CYuA8+G6lasWIF79+7hxo0bCAoKwqpVqyQ3X4i9qNq4vF8bh2O0+fr6Ij4+HhcuXEBAQADWrFmDli1b4r333sO5c+cwc+ZMANI4lY76Nd64cQN//vknfHx84Ovri8DAQMydOxfDhw/HvHnzhC6z0nAaiDaGspfYtWsX1q9fj169esHGxgaXLl2Cr6+v5napngDT2NgY7u7ucHd3R/fu3SU7X0jdwTx//nytXtRly5ZJshdV/cF66tQpyS/v53BMSTVr1sTEiRPh4OCAPn36IDQ0FMeOHcOmTZvwn//8B35+fgCkEeDVrzE1NRUPHz7EzZs34eDggNatW6NGjRqws7MDIL2rPXAayDMcvnyJ7OxsBAUF4dSpU7h27Rry8vLQpk0bzXUdpbLi8kVS+CWri8mTJ+Pnn3/W2qbuRZXiGcrj4+MRGBiI33//XbLL+zkcU5L6x1pOTk6JuXRS6lF+FfX1lekZqU4DYSh7id9++w1DhgzR/H3r1i0cO3YMZ86cQWRkJLZs2aL5dUfSUrwXNTc3F3379tXqRZUaLu8v3fbt2+Hl5YXExET8888/mD9/Pr755hvY2tpi2rRpQpdXadQh9fDhw7h48SIuXryIWrVqoXPnzmjfvj1cXV0l92NPLpfjzJkz+Pvvv2FhYQFnZ2cMHDhQUos/AE4DKQ1DWSmSk5Mxe/ZsGBsbo3///ujRo0eJbmQpHSicL6SNvaja1B+sK1euLLG8/8KFC2jZsiVWrFghmaHt4qR+bkM1uVyOAQMG4JtvvsHXX3+NXr16Yc+ePcjMzMSff/4JZ2dnoUusFOr3wL59+3Dy5EnUq1cPderUwdmzZ1GjRg18++23MDExEbrMSsPFdCUxlL3EvXv38Ntvv+HkyZPw9PRE37590b59e0mGEDVeDuQZ9qKW7rfffsORI0e0lvfXrFkTenp6MDQ0lNQPmdJIcThG/YPu3LlzOHToED7//HNMmzYNe/bswZ49e1BYWIhRo0YJXWalUbfHlClTMG7cOLRu3RrAs1XtX331FUaMGCHJzw5OA3lOugnjNerXr4/Zs2djx44dqFOnDnbt2oWff/4ZDx8+FLo0wfTq1QumpqYYOXIkZs2ahYiICE2PmVQkJyfjjz/+wKRJk3D06FEUFRXB3d0dn376KYKDgxEZGYk2bdoIXaYghgwZgsWLFyMqKgrLly9HWFgYsrKyNPNkpBDIFAoFAODSpUuYNGkSunfvjm+++QYnT56ElZUVbG1tIaXfwer/51FRUfD19cW///4LFxcXAM8m/0dERACAZD5H1D/qi4qKkJKSAuDZqIulpSUyMjI0C6akcIzs2rULHTt2xHfffadZTFecvb295AIZwJ6yEhQKBa5evYrk5GTk5eXB09MT+fn5CAwMxL59+2BhYYHg4GDJ9BRxvlBJ7EV9rrTl/QqFAoGBgTh79qzklvdzOKZ00dHRMDY2Rn5+PubOnQtXV1c8ePAA/fv3x6BBgyQ3tH3mzBls3LgRH3zwATw8PHDx4kUcOXIE+/btE7q0SsNpIKVjKHvB+fPnMWHCBM2pMG7fvo0GDRqgcePGiI2Nxc2bN7F//36hy6w0nC/0cqmpqdixYwciIyPRtGlTfPDBB5IJ6y86deoUDh48iMzMTDg4OKB+/fqIiopCw4YNMXHiRMkt7+dwzMudPn0aR48ehZubG4YOHSqZOVRPnz6FgYEBTE1NUVhYiP3792Pfvn3IyspC586d0aNHD/j6+kpm0QOngZSOoewF2dnZ2L9/P27fvo1OnTqhZ8+eUCgUKCgo0JzQToo4X4i9qLqQ4vJ+rsrVpv5MyM3NRWhoKAoLC+Hh4SHZHpDvvvsOFhYWcHd3R4MGDVC/fn0Az3qbFQqFZD5HAS6mexWGslLk5+fj0qVLCAwMRFZWFrp374733nsPVlZWQpcmKKlfDoS9qKXj8v5nOByjTR3Mly5disjISMTHxyMvLw+1atVCw4YNMWHCBMlcmFyhUGh61R88eIAnT57A09MTjRs3RpMmTWBnZ4f69etLoodMjdNASsdQVoxCoYBcLtfqTg8PD8e1a9fw5MkT+Pn5aVbLSAHnC2ljL6o2Lu/XxuGY0vXs2RO7du2CjY0NCgoKcP78eQQFBWH06NFo1aqVZIbr1M6cOYMVK1Zg4MCB+Oeff5CcnAwnJycsW7ZM6NIEwWkg2hjKirly5QpWrFiBJ0+ewMHBAY6Ojnj8+DHCw8ORkpICOzs7nDlzRugyKx3nCz3HXtTnuLz/OQ7HaHvw4AHu3r0LT09P/Prrr/D395fktYKLk8vlMDIywo8//oiaNWti7NixAJ6FkkePHsHT01MSAZXTQF6NoayYyMhIREZGwtzcHLGxsQCAWrVqoVq1akhISEDdunXx3nvvCVyl8KQ4X4i9qC/38ccfo3///ujTp48mePj7+2PmzJlo0aKFZMIIh2OeCwkJwbRp02BkZASZTAZfX198+umnmtNhSI1cLoehoSH09PQwevRofP755/Dy8pLkAilOA3k1hjJ6Jc4Xeoa9qC/H5f3aOBwDzTUuk5OTcfLkSRw9ehTh4eGoVq0a6tatiwULFsDb21voMitFQkICPvvsM3Tt2hWNGjXCqlWrcPDgQaHLEgyngbwaQ1kZSaFbuTjOF9LGXlRtXN7/HIdjtBUWFuKzzz5Dz5490bt3b63bIiMjERgYiCFDhsDFxUUSx0dWVhb27t2La9euITo6GpmZmejcuTM8PDzg4+OD+vXrw9zcXOgyKxWngbwcQxmVivOF6FW4vP85Dsdoy87OxpYtWxAUFAR9fX2MHj0a3bt3R506dYQuTRRu376NkJAQ/Pvvv7h69SqmTZuGCRMmCF1WpeA0kNeTxuxs0pkulwORwhfvq0jh135xCoUCderUQWRkJC5duiT55f1NmzbF3Llzcfv2bTRv3hxffPGFpIdjzM3NMX36dEyfPh179uzBP//8g/T0dAwaNEhypwUpTZMmTdCkSRMAz6aH5ObmApDG58jVq1dfOQ0kMDBQstNA1NhTRq/E+UL0Klze/wyHY56Ty+XIysrSnMPw/Pnz2LRpE27evImuXbvik08+gb29vcBVkhA4DeT1GMqoBM4Xotfh8v5nOByjLS0tDZ9++inq1q2L+Ph4GBgYaFag7t27F3FxcQgJCUHdunWFLpVIlBjKqATOF6JX4fL+57gqV9udO3cwc+ZMeHl5YezYsVCpVLh//z5MTU1hb2+PlJQUtGvXTugySYSk8COuLBjKSAsvB0KvwuX92jgcU1JMTIxmLlnnzp3Rr18/NGzYUHM7f9ARvRxDGb0U5wvRi7i8n8oqOTkZx48fR1ZWFry9vblam6gMGMqoBM4XorKS8vL+15HaeyQjIwMHDx6EjY0NHB0dkZubi9jYWFy9ehX//vsvrK2tsX37dhgbGwtdKpFo8ZQYpEU9Xwh4NmH5888/B/BsWNPGxgY2NjYAIKkvG3o5KS/vfx2pvf7bt28jNDQUqampSElJgYeHBywtLZGYmIjs7GzUr18fxsbGHL4kegWGMtJ4cb5QZmYmvLy8AEByE7hJd0ZGRprz10ktkBDg6+uLpk2bai7BlpSUBJVKpTlpbEZGBgDOKSN6FQ5fkgbnCxFReWMIIyo7hjJ6Kc4XIiIiqjwMZVQm6vlClpaWnC9ERERUARjKiIiIiESA3R1EREREIsBQRkRERCQCDGVEREREIsBQRkRERCQCDGVEREREIsBQRkSiN2rUKCxevLjE9gMHDqBly5aiqaeirVmzBh988EGl75eIKgdDGRHRSxQWFgpdAhFJCEMZEVUZly5dwuDBg9GsWTO0bNkSw4YNw8OHDzW3h4SEYMCAAfDy8kK3bt2wdu1aFBUVaW53dXXFr7/+ismTJ6NZs2b4+eefy7Tfrl274ueff8YXX3wBHx8fdO7cGXv37tXc/uDBA7i6uuLw4cMYNmwYvLy88P777yMsLExzn9J6/UJCQuDq6qq5fe3atYiMjISrqytcXV1x4MCBN2onIhInhjIiqhKKioowdepUtGrVCgcPHsTevXsxdOhQzXUXL1++jDlz5mD06NE4cuQIvv32Wxw4cKBE8Fq7di169OiBQ4cOYdCgQWXe/5YtW+Dp6Yng4GCMGDEC8+fPx927d7Xu8/3332PcuHEIDg5Gs2bNMHnyZM2Ful+nd+/eGD9+PBo3boxz587h3Llz6N27d5nrIyLxYygjoiohOzsbWVlZ6NKlC5ycnNCwYUMMGDAAderUAfAsbE2cOBEDBgyAo6Mj2rVrhxkzZmDPnj1az/P+++9j0KBBcHR01Dy2LDp27IiRI0fC2dkZH3/8MWrWrIlLly5p3WfkyJHo2bMnGjZsiPnz58PCwgL79+8v0/MbGxvD1NQUMpkMtra2sLW1hbGxcZnrIyLxMxC6ACKi8mBpaYmBAwdiwoQJaNeuHfz8/PDee+/Bzs4OABAZGYl///1Xq2dMoVCgoKAAeXl5MDExAQB4enq+0f7Vw4wAoKenBxsbG6SlpWndx8fHR/PfBgYG8PT0LNGbRkTSxVBGRKJnZmaG7OzsEtufPn0KCwsLzd8BAQEYNWoUzp49i6NHj2LlypXYsmULmjVrhtzcXEyfPh3/+c9/SjxPtWrVNP9tamr6RjUaGGh/nOrp6UGXSwvr6+uXuD8XGhBJC4cviUj06tevj5s3b5bYfuvWLdSrV09rm7u7OyZNmoQ9e/bAxcUFf/zxh2b7vXv34OzsXOKfvn7lfBRevXpV899FRUW4efMmGjRoAACoWbMmcnJykJubq7lPZGSk1uMNDQ2hVCorpVYiqnzsKSMi0RsxYgR27dqFRYsWYfDgwTAyMsLp06dx+PBh/PTTTwCAhIQE7Nu3D127doWdnR3u3buHuLg4zXm9pk6dismTJ6NOnTro2bMn9PX1ERkZiejoaMycObNSXsevv/6KevXqoUGDBti2bRuePHmiWUzQtGlTmJiYYMWKFRg9ejSuXbtWYnWlg4MDHjx4gNu3b8Pe3h7m5uYwMjKqlNqJqOIxlBGR6Dk6OmLnzp1YuXIlxo0bh8LCQjRo0ACrVq1Cx44dAQAmJia4e/cugoKCkJmZCTs7O4wcORLDhg0DAHTo0AE///wz1q1bh19++QUGBgZo0KABhgwZUmmvY9asWdiwYQNu374NZ2dn/PTTT7CysgLwbE7cDz/8gO+//x6//fYb/Pz8MH36dHz11Veax/fs2RN///03Ro8ejadPnyIgIAADBw6stPqJqGLpqXSZ9EBERDp78OABunXrhuDgYDRp0kTocohIpDinjIiIiEgEGMqIiIiIRIDDl0REREQiwJ4yIiIiIhFgKCMiIiISAYYyIiIiIhFgKCMiIiISAYYyIiIiIhFgKCMiIiISAYYyIiIiIhFgKCMiIiISAYYyIiIiIhH4/xPjsFN5oZlSAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_metric('context_recall')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "81778279-2362-4cf8-aa2d-247e267a86d1", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmUAAAHMCAYAAACUdN+cAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABiEklEQVR4nO3deVhUZf8G8HsYQEAQZQ8EVFSQzV3EDUINRTOX3Jc0U7O0csnl91pqqWguqWlZmZpmrghmouWGK2KuKIILioIosir7wDC/P7xmZAQVEDlHzv25rvd648zMme88zpy551nOkalUKhWIiIiISFA6QhdARERERAxlRERERKLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRERERCLAUEZEREQkAgxlRFUoIiICzs7OiIiIKPN99+/fX6Z9h4SEoFu3bnBzc0OrVq1eW11ilZKSgs8++wxeXl5wdnbGhg0bhC6p0v3www9wdnZGWlqa0KUQ0WugK3QBRG+CXbt2YebMmaXeNmbMGEydOrXC+96zZw9SU1MxcuTICu8jNjYWM2fORMeOHTF27FgYGBhUeF9vqsDAQBw/fhwTJkyAhYUF3N3dX+vzOTs7P/e2gQMH4ptvvnmtz18Rw4cPR3p6Ov7+++8St6WlpcHb2xsTJkzAxIkTBahOW0REBEaMGIEVK1agW7duQpcDAFizZg0aNmyILl26CF0KVVMMZUTl8Nlnn6Fu3bpa2xo3blzmx7du3RqRkZHQ09PTbPv7779x48aNVwplZ86cQVFREf73v//B0dGxwvt5k50+fRqdO3fG6NGjq+w527dvj/fee6/E9vr161dZDVR1fv75Z/j7+zOU0WvDUEZUDp06dYKHh0eFH6+jo4MaNWpUYkVPpKamAgBMTEwqfd9vitTUVNSqVavS9pefnw89PT3o6Dx/lke9evVKDWVUNjk5OTAyMhK6DCLR4Jwyokpw7949zJkzB/7+/vD09ISXlxc+++wzJCQkaN3v2blbw4cPR1hYGO7duwdnZ2c4OzvDz89P6zFFRUX46aefNIHwgw8+wJ07dzS3+/n54YcffgAAeHt7w9nZWfN38f8uzs/PDzNmzHjhaxo+fDh69uyJmzdvYvjw4WjatCk6duyIX3/9tcR9FQoFVq5cia5du8Ld3R0+Pj747rvvoFAotO538uRJDB48GK1atULz5s3h7++PZcuWad1n06ZN6NGjB5o2bYrWrVujb9++2LNnz3Pr3LVrF5ydnaFSqbB582ZNO6rFx8fjs88+Q5s2bdC0aVMMGDAAYWFhWvtQ/7vs3bsX33//PTp27IimTZsiKyvrhW1UFmfPnsVnn30GX19fTdssWLAAeXl5Je4bGxuLzz//HG3btoWnpyf8/f3x/fffl7hfZmYmZsyYgVatWqFly5aYOXMmcnNzX7nWZ2VlZWH+/Pnw8/ODu7s7vL29MWrUKERFRWnd79KlSxg9ejRatmyJpk2bYtiwYTh37pzWfdTz4W7evIkpU6agdevWGDJkSLnqUe/jzp07L339zs7O+Oabb/DXX3/B398fHh4e6Nu3L/777z+t+82YMaPEZ674cxXfX05ODoKDgzXvsZd9hojKiz1lROWQlZVVYpK1mZkZLl++jAsXLqBHjx6wsbHBvXv3sGXLFowYMQJ79+6FoaFhqfv7+OOPkZmZiQcPHmjmrNWsWVPrPr/++itkMhk+/PBDZGVlYe3atZg6dSp27NgBAPi///s/hISE4MCBA5gzZw6MjIxeON+pPB49eoSPPvoIXbt2Rffu3fHPP/9gyZIlaNy4MXx8fAA8CY3jx4/HuXPnMGDAADg5OeH69ev4/fffERcXhx9//BEAcOPGDYwbNw7Ozs747LPPoK+vjzt37uD8+fOa59u+fTvmzZsHf39/jBgxAvn5+bh27RouXbqEd999t9QaW7duje+++w7Tpk0rMZyYkpKCQYMGITc3F8OHD0edOnUQHByM8ePHa0JkcT/++CP09PQwevRoKBQKrWHm0uTn55c66d7Y2Bj6+voAgP379yMvLw+DBw9G7dq1ERkZiT/++AMPHjzAypUrNY+JiYnB0KFDoauri4EDB8LOzg53797F4cOHMWnSJK39f/HFF6hbty4mT56Mq1evYseOHTAzM8OXX375wnrLa/bs2fjnn38wbNgwODk5ISMjA+fOnUNsbCzc3NwAAOHh4RgzZgzc3d0xYcIEyGQy7Nq1Cx988AH+/PNPeHp6au3z888/h6OjIyZNmgSVSlWhusr6+v/77z+EhoZi+PDh0NfXx5YtW/DRRx9hx44d5Zp2AADfffcdZs2aBU9PTwwYMAAA4ODgUKH6iZ6HoYyoHEqb93Xt2jX4+vqWmIz89ttvY+DAgfjnn3/Qu3fvUvfXvn17bNy4EY8fP37uMFh+fj5CQkI0X/K1atXC/Pnzcf36dTRu3BhdunRBdHQ0Dhw4AH9/f5iZmb3Sayzu4cOHWLRokab+999/H35+fggKCtKEsj179uDUqVPYtGmT1qrPRo0aYfbs2Th//jxatGiBkydPoqCgAL/++utzawwLC0OjRo20wsrL2Nvbw97eHtOmTSsxnPjLL78gJSUFmzdv1tTWv39/9OrVC4GBgejcubPW8GR+fj6CgoLKvFBi586d2LlzZ4nty5YtQ48ePQAAU6dO1drfwIED4ejoiGXLliExMRG2trYAgHnz5kGlUiE4OFizTf34ZzVp0gQLFizQ/J2RkYGdO3dWeig7evQoBgwYoNUjNGbMGM1/q1QqzJkzB15eXli7di1kMhkAYNCgQejRoweWL1+OdevWae3TxcUFS5cufaW6yvr6r1+/jqCgIM2ijx49eqBbt25YuXIlVq1aVa7nfO+99zBnzhzY29tzyJpeG4YyonL4+uuvS53EXfxLt6CgAFlZWXBwcECtWrVw9erV54aysujbt68mkAHQhIv4+Phy/9ovLyMjI60vIH19fXh4eCA+Pl6zbf/+/XByckKDBg20eo3atm0L4MnQYIsWLTTzvQ4dOoR+/fqVOlerVq1aePDgASIjI0v0sFTE0aNH4enpqRUWa9asiYEDB2Lp0qW4efOmVhv27t27XCtXO3fujGHDhpXYXnyfxfeXk5ODvLw8NG/eHCqVClevXoWtrS3S0tLw33//YcSIEVqBDIAm6BQ3aNAgrb9btWqFAwcOICsrC8bGxmWu/2Vq1aqFS5cuISkpCdbW1iVuj46ORlxcHMaPH4/09HSt27y9vbF7924UFRVp/Vs/W3tFlPX1N2/eXGsVrq2tLTp37owjR45AqVRCLpe/ci1ElYmhjKgcPD09S53on5eXh59//hm7du1CUlKS1rBMZmbmKz3ns1/S6nDz+PHjV9pvWdjY2JQIBaamprh27Zrm7zt37iA2Nhbe3t6l7kO9CCEgIAA7duzArFmzsHTpUnh7e6Nr167o1q2b5kt7zJgxOHXqFPr37w9HR0e0b98ePXv2RMuWLStUf2JiIpo2bVpie4MGDTS3Fw9Qz66sfRkbGxu0a9fupTWsXLkShw8fxqNHj7RuU89ZU4fcsobs570nHj169MqhrPi/99SpUzFjxgz4+vrCzc0NPj4+6N27N+zt7QEAcXFxAIDp06c/d3+ZmZkwNTXV/F3eNi5NWV9/aSuR69Wrh9zcXKSlpcHS0vKVayGqTAxlRJXg22+/1cyjadasGUxMTCCTyV5p3oza81b/vcp+lUplme5Xlp6EoqIiNG7c+LnncbOxsQHwpMdo8+bNiIiIQFhYGI4fP47Q0FBs27YN69atg1wuh5OTE/bv36+5/d9//8Wff/6JTz/9FJ999lnZX2AFVfb53ZRKJUaNGqWZm9egQQMYGRkhKSkJM2bMQFFRUYX2W9H3hL6+fqkLDABothfvlQ0ICND0Qp08eRK//fYbfv31V/zwww/w8fHRPN+0adPQpEmTUvf77OrKylh9XJmfidJ6IoGyf0aIKhNDGVElUM8bKz73Jj8/v0y9ZM/7UqgMpqamJXrUFAoFkpOTK+05HBwcEBMTA29v75e+Fh0dHXh7e8Pb2xszZ87EmjVr8P333yMiIkLT42RkZISAgAAEBARAoVBg4sSJWLNmDcaNG1fuL3RbW1vcvn27xPZbt25pbn+drl+/jri4OK15ecCTVajFqXuerl+//lrrsbOzQ0REBPLy8koEUHU72dnZaW23srLC0KFDMXToUKSmpqJPnz5Ys2YNfHx8NHUbGxu/tMdQCMVXKavFxcXB0NBQM6+xVq1apfY6JyYmvvb6iJ7FU2IQVYLSepQ2bdpUpl/bhoaGrzzE+Tz29vY4e/as1rbt27dXai9A9+7dkZSUhO3bt5e4LS8vDzk5OQCeTMZ+lrp3RX3qjGfnJenr68PJyQkqlQoFBQXlrs3HxweRkZG4cOGCZltOTg62b98OOzs7NGzYsNz7LA91j07xHhyVSoWNGzdq3c/MzAytW7dGUFBQiTDwqj2txXXq1AkFBQXYunWr1vaioiJs2bIFenp6mmFopVJZ4n1pbm4OKysrzb+Xu7s7HBwcsG7dOmRnZ5d4PqEvB3XhwgWt03fcv38fhw4dQvv27TWfWQcHB2RmZiImJkZzv4cPH+LAgQMl9mdkZFQl0wZIuthTRlQJfH19sXv3bhgbG6Nhw4a4ePEiTp06hdq1a7/0sW5ubggNDUVgYCA8PDxgZGRU6nmTKqJ///6YPXs2Jk6ciHbt2iEmJgYnTpxAnTp1KmX/wJNVafv27cPs2bM1k/qVSiVu3bqF/fv3Y+3atfDw8MDq1atx9uxZ+Pj4wM7ODqmpqfjzzz9hY2OjmTM2evRoWFhYoEWLFjA3N8etW7fwxx9/wMfHp0JzpcaOHYu9e/dizJgxGD58OExNTRESEoKEhAT88MMPLzwxbFnExcVh9+7dJbZbWFigffv2aNCgARwcHLBo0SIkJSXB2NgY//zzT6lf7LNmzcLgwYPRp08fDBw4EHXr1sW9e/cQFhZW6nNUhJ+fHzp06IDAwEBcvnwZzZs3R25uLg4fPozz58/jiy++0PQgZWdnw8fHB/7+/nBxcYGRkRFOnTqFy5cva3qEdXR0MG/ePIwZMwY9e/ZE3759YW1tjaSkJERERMDY2Bhr1qyplNoronHjxhg9erTWKTEAaF1GKiAgAEuWLMGECRMwfPhw5OXlYcuWLahfv36J87G5ubkhPDwc69evh5WVFerWrVvqnEWiimIoI6oE//vf/6Cjo4M9e/YgPz8fLVq0wPr16/HRRx+99LFDhgxBdHQ0du3ahQ0bNsDOzq7SQtmAAQOQkJCAnTt34vjx42jZsiXWr1//Spd0epaOjg5Wr16NDRs2YPfu3Thw4AAMDQ1Rt25dDB8+XLNa1c/PD/fu3UNQUBDS09NRp04dtGnTBhMnTtRciWDgwIHYs2cP1q9fj5ycHNjY2GD48OH45JNPKlSbhYUFtm7disWLF+OPP/5Afn4+nJ2dsWbNGvj6+r7yaz958mSJoUgAaNOmDdq3bw89PT2sWbMG8+bNw88//4waNWqga9euGDp0aInTKri4uGD79u1YsWIFtmzZgvz8fNja2qJ79+6vXKeajo4OfvrpJ/zyyy/Yu3cv/v33X+jq6qJx48ZYvHgxevXqpbmvgYEBBg8ejJMnT+Lff/+FSqWCg4MDZs+erXXSVy8vL2zbtg0//vgj/vjjD+Tk5MDS0hKenp4YOHBgpdVeEa1bt0azZs2wevVqJCYmomHDhggMDISLi4vmPnXq1MGqVauwcOFCLF68WHP+szt37pQIZTNmzMDXX3+N5cuXIy8vD3369GEoo0olU1Vm3zgREZEIODs7Y+jQofj666+FLoWozDinjIiIiEgEGMqIiIiIRIChjIiIiEgEOKeMiIiISATYU0ZEREQkAgxlRERERCIgufOUFRUVobCwEDo6Oq/18jZEREREwJMrcxQVFUFXV/eFJ62WXCgrLCzE5cuXhS6DiIiIJMbDwwP6+vrPvV1yoUydUD08PEq9XiERERFRZVIqlbh8+fJLL+0muVCmHrKUy+UMZURERFRlXjZtihP9iYiIiESAoYyIiIhIBBjKiIiIiESAoYyIiIhIBBjKiIiIiESAoYyIiIhIBBjKiIiIiESAoYyIiIhIBBjKiIiIiESAoYyIiIhIBBjKiIiIiESAoYyIiIhIBBjKiIiIiESAoYyIiIhIBBjKiIiIiESAoYyIiIhIBBjKiIiIiESAoYyIiIhIBBjKiIiIiESAoYyIiIhIBBjKiIiIiERA0FD233//4eOPP0aHDh3g7OyMgwcPvvQxERER6NOnD9zd3dG1a1fs2rWrCiolIiIier0EDWU5OTlwdnbG7Nmzy3T/+Ph4jBs3Dl5eXti9ezc++OADzJo1C8ePH3/NlRIRERG9XrpCPrmPjw98fHzKfP+tW7eibt26mDFjBgDAyckJ586dw4YNG9CxY8fXVSYRERHRaydoKCuvixcvwtvbW2tbhw4dsGDBgnLvS6lUVlZZFfLTTz9h9+7deO+99zB+/HhBayEiIqLXp6yZ440KZSkpKbCwsNDaZmFhgaysLOTl5cHAwKDM+7p8+XJll1dmCoUCISEhUKlU2L17N1q2bAl9fX3B6nkePT09uLq6Qlf3jXqblElhYSGuXr2KgoICoUt5Iz15b7hBV1cudCmvRWGhElevRpX5/aGnpwdXN1foyqvfZwUACpWFuBpV9s9LdW6P8rYFwPZ4lp6eHtzcXCGvhu2hVBYiqpztUVz1a5Ey8vDwgFwuzBdKVlYWVCoVAKCoqAiurq4wNjYWpJaXkcvl+O77XbibkCx0KZXGoa4lpk3qCzc3N6FLeaPJ5XLM2rYLtx+mCF1KpapvZYF5A8v//pDL5Zi5bydupVWv9mhgZoHA7u9XqD0Whm/D3ccPX1NlVc+hlhVmeA+s0LFDLpdj69V1eJhz/zVUJgwro7cwyPXDCrfH6Rvz8Tj37muoTBi1DB3QttH/Sm0PpVJZps6gNyqUWVhYICVF+4CXkpICY2PjcvWSAU/eEEKFsmefV8hayuJuQjJibz0QuoxKJ+Y2f1PcfpiCa4nV770BVOz9cSstBTEPq8+XbnEVaY+7jx/iZnria6hGWBU9djzMuY/ErPhKrkZ4FW2Px7l3kZFzo5KrEd6rfLe8Uecpa9asGU6fPq217dSpU2jWrJkwBRERERFVEkFDWXZ2NqKjoxEdHQ0ASEhIQHR0NBITn/yyWrp0KaZNm6a5/6BBgxAfH4/vvvsOsbGx2Lx5M/bt24eRI0cKUT4RERFRpRF0+PLKlSsYMWKE5u/AwEAAQJ8+fbBw4UIkJyfj/v2nQwH29vb4+eefERgYiI0bN8LGxgbz5s3j6TCIiIjojSdoKPPy8sK1a9eee/vChQtLfUxISMhrrIqIiIio6r1Rc8qIiIiIqiuGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCgjIiIiEgGGMiIiIiIRYCh7RpGySOgSXovq+rqIiIiqC12hCxAbHbkOFs/aiPjbSa/tOZRFBVp/Tx+zEnIdvdf2fPb1rfHlvBGvbf9ERET06hjKShF/OwmxMQmvbf8qmRIwffr37RuJkKnkr+35iIiISPw4fElEREQkAgxlRERERCIgeCjbvHkz/Pz84OHhgf79+yMyMvKF99+wYQP8/f3h6ekJHx8fLFiwAPn5+VVULREREdHrIWgoCw0NRWBgID799FMEBwfDxcUFo0ePRmpqaqn337NnD5YuXYoJEyYgNDQU8+fPR2hoKJYtW1bFlRMRERFVLkFD2fr16zFgwAD069cPDRs2xNy5c2FgYICgoKBS73/hwgW0aNEC7777LurWrYsOHTqgZ8+eL+1dIyIiIhI7wVZfKhQKREVFYdy4cZptOjo6aNeuHS5cuFDqY5o3b46//voLkZGR8PT0RHx8PI4ePYr33nuv3M+vVCpL3S6XV99VkM97zS/C9qDnqc7vDaD87w+2h7bq3B48lmpje2grrT3K2kaChbL09HQolUqYm5trbTc3N8etW7dKfcy7776L9PR0DBkyBCqVCoWFhRg0aBA+/vjjcj//5cuXS2wzNDSEq6truff1prh27Rpyc3PLfH+2hzY9PT24urpBV7f6HUwKC5W4ejUKBQUFL78zqv97Ayjf+4Ptoa26twePpdrYHtrK2x7FvVHnKYuIiMDPP/+M2bNnw9PTE3fv3sX8+fOxevVqfPrpp+Xal4eHR7VO6qVxdnYWugRRqUh7yOVyfPNbMO7cT3kNFQnD8S0LfD26D9zc3IQuRVT4edHG9niKbaGN7aGttPZQKpWldgY9S7BQVqdOHcjl8hKT+lNTU2FhYVHqY1asWIFevXqhf//+AJ688JycHHz99dcYP348dHTKPkVOLpdLLpRJ7fW+TEXb4879FFyPf1DJ1QiP7w9tbA9tbI+n2Bba2B7aXqU9BJvor6+vDzc3N4SHh2u2FRUVITw8HM2bNy/1MXl5eSWCl/rFq1Sq11csERER0Wsm6PDlqFGjMH36dLi7u8PT0xO///47cnNz0bdvXwDAtGnTYG1tjSlTpgAA3n77baxfvx6urq6a4csVK1bg7bffZlInIiKiN5qgoSwgIABpaWlYuXIlkpOT0aRJE6xdu1YzfHn//n2tnrHx48dDJpNh+fLlSEpKgpmZGd5++21MmjRJqJdAREREVCkEn+g/bNgwDBs2rNTbNm3apPW3rq4uJkyYgAkTJlRFaURERERVRvDLLBERERERQxkRERGRKDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCFQolAUHByMsLEzz93fffYdWrVph0KBBuHfvXmXVRkRERCQZFQpla9asQY0aNQAAFy5cwJ9//okvv/wStWvXRmBgYKUWSERERCQFuhV50IMHD+Do6AgAOHjwIN555x0MHDgQLVq0wPDhwyu1QCIiIiIpqFBPmZGRETIyMgAAJ0+eRLt27QAANWrUQH5+fqUVR0RERCQVFeopa9euHWbNmoUmTZogLi4OPj4+AIAbN27Azs6uUgusllQyQAVAhif/r5IJXBAREREJrUI9ZbNnz0azZs2QlpaGlStXok6dOgCAqKgo9OjRo1ILrI5k0IFcYQ6oALnCHDIugiUiIpK8CvWU1apVC19//XWJ7Z999tkrFyQVerlvQS/3LaHLICIiIpGoUBfNsWPHcPbsWc3fmzdvxnvvvYcpU6bg0aNHlVYcERERkVRUKJQtXrwY2dnZAIBr165h4cKF8PHxQUJCAhYuXFipBRIRERFJQYWGLxMSEuDk5AQA+Pfff/H2229j8uTJiIqKwtixYyu1QCIiIiIpqFBPmZ6eHvLy8gAAp06dQvv27QEApqamyMrKqrzqiIiIiCSiQj1lLVq0QGBgIFq0aIHLly9j+fLlAIC4uDjY2NhUZn1EREREklChnrKvv/4aurq6+OeffzB79mxYW1sDeLIAoGPHjpVaIBEREZEUVKinzNbWFj///HOJ7f/3f//3ygURERERSVGFz1p69+5dfP/995g8eTJSU1MBAEePHsWNGzcqrTgiIiIiqahQKDtz5gzeffddREZG4t9//0VOTg6AJ6fH+OGHHyq1QCIiIiIpqFAoW7p0Kb744gusX78eenp6mu1t27bFxYsXK6s2IiIiIsmoUCi7fv06unTpUmK7mZkZ0tPTX7koIiIiIqmpUCgzMTFBcnJyie3R0dGalZhEREREVHYVCmU9evTAkiVLkJycDJlMhqKiIpw7dw6LFi1C7969y7WvzZs3w8/PDx4eHujfvz8iIyNfeP/Hjx9j7ty56NChA9zd3eHv74+jR49W5GUQERERiUaFTokxadIkfPPNN/D19YVSqUSPHj2gVCrRs2dPjB8/vsz7CQ0NRWBgIObOnYumTZvi999/x+jRo7F//36Ym5uXuL9CocCoUaNgbm6OFStWwNraGomJiahVq1ZFXgYRERGRaFQolOnr62PevHn45JNPcOPGDWRnZ8PV1RX16tUr137Wr1+PAQMGoF+/fgCAuXPnIiwsDEFBQaVeQzMoKAiPHj3C1q1bNQsM6tatW5GXQERERCQqFQplara2trC1ta3QYxUKBaKiojBu3DjNNh0dHbRr1w4XLlwo9TGHDx9Gs2bN8M033+DQoUMwMzNDz549MWbMGMjl8nI9v1KpLHV7effzJnnea34Rtoc2tsdT1bktALbHs9geT/HYoY3toa209ihrG1UolCmVSuzatQunT59GamoqioqKtG7fuHHjS/eRnp4OpVJZYpjS3Nwct27dKvUx8fHxOH36NN5991388ssvuHv3LubOnYvCwkJMmDChXK/h8uXLJbYZGhrC1dW1XPt5k1y7dg25ubllvj/bQxvb46nq3hYA2+NZbI+neOzQxvbQVt72KK5CoWz+/PkIDg6Gj48PGjVqBJlMVqEnLy+VSgVzc3N8++23kMvlcHd3R1JSEn777bdyhzIPD49qndRL4+zsLHQJosL20Mb20Mb20Mb2eIptoY3toa209lAqlaV2Bj2rQqFs7969WL58OXx8fCrycABAnTp1IJfLNZdoUktNTYWFhUWpj7G0tISurq5WmGrQoAGSk5OhUCigr69f5ueXy+WSC2VSe70vw/bQxvbQxvbQxvZ4im2hje2h7VXao0KnxNDT04ODg0OFnxR4sljAzc0N4eHhmm1FRUUIDw9H8+bNS31MixYtcPfuXa3h0ri4OFhaWpYrkBERERGJTYVC2YcffoiNGzdCpVK90pOPGjUK27dvR3BwMGJjYzFnzhzk5uaib9++AIBp06Zh6dKlmvsPHjwYGRkZmD9/Pm7fvo2wsDD8/PPPGDp06CvVQURERCS0Cg1fnjt3DhERETh27BgaNWoEXV3t3axatapM+wkICEBaWhpWrlyJ5ORkNGnSBGvXrtUMX96/fx86Ok9z41tvvYXffvsNgYGB6NWrF6ytrTFixAiMGTOmIi+DiIiISDQqFMpq1aqFrl27VkoBw4YNw7Bhw0q9bdOmTSW2NW/eHNu3b6+U5yYiIiISiwqFssDAwMqug4iIiEjSKjSnjIiIiIgqV5l7ynr37l3m85EFBwdXuCAiIiIiKSpzKOvSpcvrrIOIiIhI0socysp7xnwiIiIiKjvOKSMiIiISgQqtvnRxcXnh/LLo6OgKF0REREQkRRUKZc+eHLawsBDR0dEIDg7GxIkTK6UwIiIiIimpUCgrbdJ/t27d0LBhQ4SGhqJ///6vXBgRERGRlFTqnLJmzZrh9OnTlblLIiIiIkmotFCWl5eHjRs3wsrKqrJ2SURERCQZFRq+bN26tdZEf5VKhezsbBgYGGDx4sWVVhwRERGRVFQolP3f//2f1t8ymQxmZmZo2rQpTE1NK6UwIiIiIikp18ljFy5cCGNjY8hkMgQEBEBfX/911kZEREQkGWWeUxYWFoacnBwAwMyZM5GZmfnaiiIiIiKSmjL3lNWvXx/Lli2Dl5cXVCoV9u3bB2Nj41Lv27t378qqj4iIiEgSyhzK5s6di4ULFyIsLAwymQzLly8v9az+MpmMoYyIiIionMocylq0aIHt27cDeHKZpX/++Qfm5uavrTAiIiIiKanQecoOHToEMzOzyq6FiIiISLIqdEoMOzs7PH78GJGRkUhNTYVKpdK6ncOXREREROVToVB2+PBhTJ06FTk5OZpTZKhxThkRERFR+VUolC1atAj9+vXD5MmTYWhoWNk1EREREUlOheaUJSUlYcSIEQxkRERERJWkQqGsQ4cOuHz5cmXXQkRERCRZZR6+PHTokOa/fXx8sHjxYsTGxqJx48bQ1dXeTefOnSuvQiIiIiIJKHMo+/TTT0tsW716dYltMpkM0dHRr1YVERERkcSUOZTFxMS8zjqIiIiIJK1Cc8pCQkKgUChKbFcoFAgJCXnVmoiIiIgkp0KhbObMmcjMzCyxPTs7GzNnznzlooiIiIikpkKhTKVSlXox8qSkJJiYmLxyUURERERSU66Tx/bu3RsymQwymQwffPCB1qpLpVKJhIQEdOzYsdKLJCIiIqruyhXKunTpAgCIjo5Ghw4dULNmTc1tenp6sLOzwzvvvFO5FRIRERFJQLlC2YQJEwA8uSB5QEAAatSo8VqKIiIiIpKaCl37sk+fPpVdBxEREZGklTmUtWnTBvv374eZmRlat25d6kR/tTNnzlRKcURERERSUeZQNnPmTBgbGwMA/u///u+1FUREREQkRWUOZcWHLDl8SURERFS5KjSnrLj8/HwUFBRobVP3qBERERFR2VQolOXk5GDJkiXYt28fMjIyStzOC5ITERERlU+Fzui/ePFinD59GnPmzIG+vj7mzZuHiRMnwsrKCosWLarsGomIiIiqvQqFsiNHjmD27Nnw9/eHXC5Hq1at8Mknn2DSpEnYs2dPZddIREREVO1VKJQ9evQI9vb2AJ7MH3v06BEAoGXLljh79mzlVUdEREQkERUKZXXr1kVCQgIAoEGDBti3bx+AJz1ovCA5ERERUfmVK5TFx8ejqKgI/fr1Q0xMDABg7Nix2Lx5Mzw8PBAYGIjRo0e/lkKJiIiIqrNyrb585513cOLECYwcORIA8MUXX2DWrFnYt28foqKi4ODgABcXl9dRJxEREVG1Vq6eMpVKpfX30aNHkZubCzs7O7zzzjsVDmSbN2+Gn58fPDw80L9/f0RGRpbpcXv37oWzszM++eSTCj0vERERkVhUaE5ZZQoNDUVgYCA+/fRTBAcHw8XFBaNHj0ZqauoLH5eQkIBFixahVatWVVQpERER0etTrlAmk8leeCHyili/fj0GDBiAfv36oWHDhpg7dy4MDAwQFBT03McolUpMnToVEydO1KwCJSIiInqTlWtOmUqlwowZM6Cvrw8AUCgUmDNnDgwNDbXut2rVqjLtT6FQICoqCuPGjdNs09HRQbt27XDhwoXnPm716tUwNzdH//79ce7cufK8BA2lUlnqdrlcXqH9vQme95pfhO2hje3xVHVuC4Dt8Sy2x1M8dmhje2grrT3K2kblCmXPXoi8V69e5Xl4Cenp6VAqlTA3N9fabm5ujlu3bpX6mLNnz2Lnzp0ICQl5pee+fPlyiW2GhoZwdXV9pf2K2bVr15Cbm1vm+7M9tLE9nqrubQGwPZ7F9niKxw5tbA9t5W2P4soVygIDAyv0JJUlKysL06ZNw7fffgszM7NX2peHh0e1TuqlcXZ2FroEUWF7aGN7aGN7aGN7PMW20Mb20FZaeyiVylI7g55VoQuSV5Y6depALpeXmNSfmpoKCwuLEvePj4/HvXv3MH78eM22oqIiAICrqyv2798PBweHMj23XC6XXCiT2ut9GbaHNraHNraHNrbHU2wLbWwPba/SHoKGMn19fbi5uSE8PBxdunQB8CRkhYeHY9iwYSXu36BBgxLX1ly+fDmys7Pxv//9DzY2NlVSNxEREVFlEzSUAcCoUaMwffp0uLu7w9PTE7///jtyc3PRt29fAMC0adNgbW2NKVOmoEaNGmjcuLHW42vVqgUAJbYTERERvUkED2UBAQFIS0vDypUrkZycjCZNmmDt2rWa4cv79+9DR0fw06kRERERvVaChzIAGDZsWKnDlQCwadOmFz524cKFr6MkIiIioirFLigiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhjIiIiIiERBFKNu8eTP8/Pzg4eGB/v37IzIy8rn33b59O4YMGYLWrVujdevWGDly5AvvT0RERPQmEDyUhYaGIjAwEJ9++imCg4Ph4uKC0aNHIzU1tdT7R0REoEePHti4cSO2bt2Kt956Cx9++CGSkpKquHIiIiKiyiN4KFu/fj0GDBiAfv36oWHDhpg7dy4MDAwQFBRU6v2XLl2KoUOHokmTJnBycsK8efNQVFSE8PDwKq6ciIiIqPLoCvnkCoUCUVFRGDdunGabjo4O2rVrhwsXLpRpH7m5uSgsLISpqWm5nlupVJa6XS6Xl2s/b5LnveYXYXtoY3s8VZ3bAmB7PIvt8RSPHdrYHtpKa4+ytpGgoSw9PR1KpRLm5uZa283NzXHr1q0y7WPJkiWwsrJCu3btyvXcly9fLrHN0NAQrq6u5drPm+TatWvIzc0t8/3ZHtrYHk9V97YA2B7PYns8xWOHNraHtvK2R3GChrJX9csvvyA0NBQbN25EjRo1yvVYDw+Pap3US+Ps7Cx0CaLC9tDG9tDG9tDG9niKbaGN7aGttPZQKpWldgY9S9BQVqdOHcjl8hKT+lNTU2FhYfHCx/7222/45ZdfsH79eri4uJT7ueVyueRCmdRe78uwPbSxPbSxPbSxPZ5iW2hje2h7lfYQdKK/vr4+3NzctCbpqyftN2/e/LmP+/XXX/Hjjz9i7dq18PDwqIpSiYiIiF4rwYcvR40ahenTp8Pd3R2enp74/fffkZubi759+wIApk2bBmtra0yZMgXAkyHLlStXYunSpbCzs0NycjIAwMjICDVr1hTsdRARERG9CsFDWUBAANLS0rBy5UokJyejSZMmWLt2rWb48v79+9DRedqht3XrVhQUFOCzzz7T2s+ECRMwceLEKq2diIiIqLIIHsoAYNiwYRg2bFipt23atEnr78OHD1dFSURERERVSvCTxxIRERERQxkRERGRKDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYkAQxkRERGRCDCUEREREYmAKELZ5s2b4efnBw8PD/Tv3x+RkZEvvP++ffvQrVs3eHh44N1338XRo0erqFIiIiKi10PwUBYaGorAwEB8+umnCA4OhouLC0aPHo3U1NRS73/+/HlMmTIF77//PkJCQtC5c2d8+umnuH79ehVXTkRERFR5BA9l69evx4ABA9CvXz80bNgQc+fOhYGBAYKCgkq9/8aNG9GxY0d89NFHcHJywhdffAFXV1f88ccfVVw5ERERUeXRFfLJFQoFoqKiMG7cOM02HR0dtGvXDhcuXCj1MRcvXsTIkSO1tnXo0AEHDx4s03OqVCrNc8vl8hK3y+Vy1Gv0FvT0S972prJztIJSqYRSqSz3Y+VyOeo7WkFPV/D8Xmnq2lm8Uns42VlWq/ZwsDavUHvI5XI0sraEvrz6tAUAOFpUvD0am1tCX6d6tUe9OhVvj/q1rKEnqz7H0romr3bssDG0gxzVpz0sDW1eqT1qGdSHTKX3GioTholB3ee2h3qbOoM8j6ChLD09HUqlEubm5lrbzc3NcevWrVIfk5KSAgsLixL3T0lJKdNzFhUVAQCuXr363Pv4vNsEQJMy7e9NcfHixQo/1re9AwCHSqtFDF6lPfw97QHYV1otYlDR9njXyQFwql7vDaDi7dHbsh5gWa8ySxGFiraHn74TYO5UucUI7FWOHU7wgJPMo/KKEVreq7WHHrrCrPKqEV7my9tDnUGeR9BQJgRdXV14eHhAR0cHMplM6HKIiIiomlOpVCgqKoKu7otjl6ChrE6dOpDL5SUm9aemppboDVOzsLAo0Sv2ovs/S0dHB/r6+hUrmIiIiOg1EXTyg76+Ptzc3BAeHq7ZVlRUhPDwcDRv3rzUxzRr1gynT5/W2nbq1Ck0a9bsdZZKRERE9FoJPiN11KhR2L59O4KDgxEbG4s5c+YgNzcXffv2BQBMmzYNS5cu1dx/xIgROH78ONatW4fY2Fj88MMPuHLlCoYNGybUSyAiIiJ6ZYLPKQsICEBaWhpWrlyJ5ORkNGnSBGvXrtUMR96/fx86xVYztWjRAkuWLMHy5cuxbNky1KtXD6tXr0bjxo2FeglEREREr0ymetn6TCIiIiJ67QQfviQiIiIihjIiIiIiUWAoIyIiIhIBhjIiIiIiEWAoIyIiIhIBhrIqVFhYKHQJRERE1dqbfFIJhrIqoH6DhIaGIiMjQ9hiREKpVCIvL0/oMkQjPj4eu3fvFroMwak/K2lpabh27ZrA1QhP3R7Z2dkCVyIuOTk5QpdAIqO+0Hd+fj5kMtkbG8wYyqqATCZDQUEBrl69itq1awtdjqDUH5RLly7h0KFDAlcjHgkJCTAyMhK6DMHJZDIAQGRkJI4cOQLg6cFWitTtMWrUKKSlpQlcjXhs375d8j/q1MfS48eP49dffxW4GuGpPytTpkzBqVOnNH+/aXjy2CqgUqne2DfI63LixAmYmJigadOmkm+foqIi6OjoSL4dilMoFCgqKoKBgYHQpYhCSkqK5ionUpeZmYktW7Zg7NixQpciCrGxsXj48CG8vb2hVCohl8uFLklQaWlpMDU1fWPbgaHsNVN/4d66dQv29vbQ09MTuiQSmcLCQqSmpsLa2lroUkRD/bkhaL5o2Sb8gUsvVh0+I2929W8A9Rtk8+bNSElJEbgaYRWfLxQTEyNwNeJx6dIl/Pjjj0KXITj1++PatWvYsmWLwNUIT90ee/fuLXENYClSB7LTp08jNzdX6HJEITs7G+Hh4UKXITj1FIczZ87g+vXrAlfzaqT9Ka8iRUVF6N+/P9566y2hSxFU8flCYWFhAKQ9X0jN1dUVn3/+udBlCE79/lAqlZqhOim/P9TtcffuXZibmwtcjfDU7XHw4EHJB1S1qKgo3Lp1C8CbveLwVanfD+fPn3/j3xscvqQqJ+X5Qhx+obLie6WkwsJC3L9/H/b29kKXIgrZ2dnIy8uDubk53y/VxJsdKUVKnXOVSiUAYMuWLbh//76QJVW5F2V9XV1dSQYyACUOmunp6di5c6dA1YiPQqHA0aNHhS5DcEVFRZDJZEhOTgYg7V6Q4nR1dRnIiqlZs6amF1WqgUz92UhNTUVWVpbA1bw6hrLXQP3hUK/+uHv3LmrUqCFkSVVG/QEpPhRVVFSkGYaS8nyh8PBwxMfHa22rLgeSynLr1i3cvHkTgLSDiHoIZsmSJYiJiZHsF676h636vbBgwQI8fPhQyJIEpz6W3rp1C3PmzBG2GIGUdmwIDQ3FyZMnBaimcjGUVYLib5A7d+4gLCwMDx480GybPn06zMzMhCityslkMvzzzz+4c+cOgCfBVEdHR/Mlo1AoJDdfKDMzEwCwYcMGTQ+h+sumTp06GDlypFClCU6lUiEjI0MzcbtBgwbo3bu3sEWJyMSJE+Hi4iJ0GYJR/7CVyWQoKiqCnZ0drKysBK6q6vz333/Iy8tDVlaW5nipPpbWrFkTAQEBAKRzLFWTyWS4fv068vLyND9YOnbsiA4dOghc2avTFbqA6qCwsBB6enpYvnw5rl+/DpVKhaysLNSqVQs2NjYYOHAgGjduLHSZVUKpVGL79u24e/cujIyM4OXlhZYtW8LLywu1a9eGh4cHPDw8AOCNn5BZFnl5eThy5AjOnDmDGzduID4+HjKZTBNMv/32W3z77bcwMTERuNKqVVhYiO+++w7h4eFwd3dHgwYN0LBhQ9StWxeNGjUCIN3hmOLq1q0rdAlVSn1Kg7S0NOzYsQOXLl3Cxx9/DE9PT+jo6OCDDz6oFqc9KKuwsDC0bt0aK1euRM2aNeHo6Ii6devirbfegrW1teY0OlJoD/W/e2xsLA4dOoSjR4/CyckJ33zzDe7fvw9bW1vo6+sLXeYrYyirBOpzjx06dAjTp0+HnZ0dUlNTcfv2bZw7d05S17yUy+X47bffEBMTg99++w1Hjx5FZGQkFi1aBDs7O7Ro0QKTJk0SuswqU1RUBCsrK9y/fx9KpRIhISEoLCyEtbU1cnNzERcXJ6lApj6whoWFITo6Gl999RUuX76Mq1ev4siRI7CxscGyZcuELlMUpDxxe+bMmejcuTPOnTuH9PR0AE96mt9++204OjoKXF3V+fDDD1FQUIDc3Fzcv38fly9fhqGhIczNzWFnZ4dBgwZJ5j2iHpHatm0bLCws4ODgoOk1PXbsGGrWrImePXsKWWKlYCh7RUlJSbhz5w7Mzc3h6uqq6T6tX78+PD094efnJ6nl7OoTXR46dAh2dnaYNGkSzMzM8O+//2LDhg1aiyDe1DMul4eRkRHatm0LU1NTpKWlQVdXF9HR0UhKSoKuri4++eQToUusUupf9ElJSejWrRvatGmDNm3aAAAyMjI0w/5S6g1RKBQ4ePAgunXrpvWapfJlW5yOjg7y8/ORmpqKAQMGYNeuXWjatCkA4K+//oKfn5/AFVYt9XfH9OnTUVBQgNjYWMTExODy5ct4+PCh5hqPUnivqD8bkZGR2Lp1K6ZMmYKWLVsCAI4ePQp/f38hy6s0DGWv6MqVK/jtt99gYWGB27dvY8GCBejTpw8aNWoEfX19SQUy4OkXyZkzZzBlyhTY2toCAHr16oWrV6/Cy8sLgDS624tr0qSJ5r+9vLyQnZ0NPT29atHdXhGdOnXChg0bcPLkSTg5OcHMzAy1a9dG7dq1oVKpJPH+yMvLg4GBAS5fvoywsDAEBARovmCzs7ORkZEBOzs7ocusclFRUXBxcUF6ejoMDAxQu3ZtJCYmAgAcHBwErq7qqX/I6unpwcXFBS4uLujduzcUCoXAlVUt9efC3t4e586dQ2xsLLy9vQEA9+7dqxbzyQCGslfm4eGByZMn4/79+7hx4wauX7+OlStXQk9PD8bGxvjggw/g7OwsdJlVRn0NxxYtWmDVqlUYP348HB0dYWZmhhMnTuD9998XukTBFJ+oW7NmTYGrEU5GRga+/fZbXLlyBQ8fPoSdnR0cHBxgY2MDHx8fSfSgAk+uWbh7926cOnUKjRs3RnJyMkxMTGBgYIAzZ87gwoULmDx5stBlVrnGjRvD0NAQQ4cOhYeHBxQKBf766y/NvFyp9LKrFf+RolKpNH+rf9BJoZcMePLaa9asiT59+uDnn3+GiYkJIiIisHv3btSrV6/adIAwlL0iKysrWFlZ4ejRoxg7dixSU1ORmJiIe/fu4dq1a5L88pXJZBg5ciR++OEHBAcHo7CwEFFRUWjRogUaNmyouY/USKH350XUQ5KHDh2CgYEBDhw4gPDwcFy9ehWnTp2Cubm5pIanLCws4OXlhe3bt8PW1hZz5syBXC6Hvb09Tp48idGjRwtdoiCMjY0xePBg5OXl4fHjx+jWrRv8/Pzw8ccfA5DesUN93CgqKpLMUGVp1K/b3d0dPj4+2Lt3L9atW4e2bdvinXfeEbi6ysMz+r8ChUIBXV1dhIeHY/369Vi7dq3mtvz8fERHR6NZs2bCFSiwpKQkhIeHQ6lUolGjRnByckLNmjUleWDJz89HSkoKrK2toasrzd9C6lC2Zs0a2NjYlDj1RWpqKszNzSU1n6yoqAiRkZEwNzfH3bt3ER8frzmv4ejRo2FsbCx0iVWusLAQurq6iImJQWpqKlxcXGBsbCyZcz0CT3sDjxw5ApVKBW9vbxgaGmrdR4rH0Wc9fvwYtWrVErqMSiXNb4dKcuvWLZw8eRLHjx+HlZUVMjIykJ+fD2tra5w+fRqXLl2SdCiztrYu9ZxTUjuQbN26Ffv27UPNmjWxZMkSzZeO1MKZOmilpKTg33//RWFhIVxdXWFjYwMzMzPNpWKkEMjUX7pnz55FamoqmjVrJukz1auDeHx8PP7880/s3bsX3t7ecHd3R9OmTSUVyICn52c7deoUtm7dqhmR6dSpEzp27Ah3d3fJHUfV1Ccj19XVrXaBDODJY1+JmZkZ9PX1cfv2bVy/fh2zZs3C2rVrERISgjVr1lTLN0x5qOc/SJH6dZ85cwZhYWHo168fHjx4ACMjI9y4cQNLly4VuEJhZGdnw8jICM7Ozjh+/Dg2bdqEdevW4c8//5TkL/+goCDNFR1Wr16Npk2bYtmyZcjPzxe4sqql/rwsXLgQ+vr6WLFiBVq3bo2QkBDMnj0bBQUFAldY9fLy8pCfn48pU6Zg0aJF6NGjB7Zt24Zp06Zh2rRpmpNSS42Ojk61/kFbfV9ZFbCyssLw4cPx1ltvoUWLFrhw4QLOnj2LgwcPwt/fH3379hW6xCqj/kJNSUlBrVq1oK+vj6KiIklNyC1O/dpPnz6NLl26wNTUFK6urgCe9LDevXtX4AqFUbNmTUyePBmFhYW4efOmZnl/cnKypJb3qz8X165dw6JFi3Dq1CncuXMHv/76K9atW4f79++jXr16whZZhYofJ4YMGQJra2s0b94c77//PgYNGoRr167B3d1dwAqrTvFz+T18+BDffPMNCgsL0aJFCxgZGSEuLg4ZGRnYvn275OYdSmEaSPV8VVVE/eHp0qULAKBz587o3LkzAOmtEFLbtWsXFAoFJkyYIMnXr6Z+7bm5uWjSpAn++OMPzcrTc+fOoX379kKWJxh1j4iurq6kl/cDT+Zc6unpYefOndi/fz9GjhyJNm3aYO7cuZpTyUiBOognJiZCpVIhMjISXbt2BfDkSzgrK0tSAVX9oyQnJwf6+vrIzMyEsbGxZrtMJoO/vz927NghZJlVTirTQKrXq6liOjo6mmCmHqpTqVSQy+WSCyTqA0abNm2wcOFCnD17Fp9//jmaN28ucGXCGjJkCKZMmYIrV67A29sbd+7cwY0bNzBu3DihSxMEl/c/UVRUBGtrawwbNgyHDh1Co0aN0KFDB5w4cQLm5uaSOn+d+t88Li4O4eHhOHbsGDw8PODu7o78/HwMGjRIUgse1O3h6+uLI0eOYP78+WjZsiXi4+Nx9uxZTJkyBeHh4ZoTp1Zn6sBefBrIhg0bYGRkhMuXLyM0NBTTp08XusxKxdWXlUgqQy8vo1AosGHDBqSmpqJv376a87RJpX3UQf3SpUtIT0+Hg4MDdu7ciTNnzsDU1BTTp0+XzLVQn6f48n4pTOwvTv261b/01QoKCrBt2zYYGRlJaupDcSqVCjExMThx4gSOHTuG27dvQ0dHBw0bNsRnn30muYVT6enp2Lp1q6YXsW3btujatSsmTpyIr776qtovDlGPOK1cuRK2trawtLTEgQMHMG/ePOzevRv//vsvVq9eLXSZlYqh7BXk5OTg4cOHkupaf5ns7GzUrFkTCQkJ+Pzzz3Hr1i189dVXkvqSUYeyRYsW4a233sKIESOQm5urtaRdKgEV4PL+5/H394ednR3atm2L9u3bw83NDYB0pz7k5OQgKSkJOjo6sLS0hJGREbKyshAVFYW9e/eiX79+mksuSUFCQgIeP36MRo0aaa6vrJaUlKS5GLkULFq0CD179tRMA2nZsiW+/vpruLi4YMiQIUKXV6k4fFkB6i/dffv2ITw8HEuWLEFMTAz+/vtvtGnTBp06dRK6xCqlbo/NmzcjNjYWJ0+ehLu7O5o1awZLS0v8/vvvuHPnDsaPHw8DAwOhy33t1D0/2dnZqF27NgBINpABXN5fmqKiIixevBiRkZH477//EBQUBABo2bIlFixYIHB1VUd97IiLi8O6deuwb98+zdnZe/fujW7dusHLy0tzebbqTh3If/zxR8TExCA7Oxv5+fmwt7eHvb09evfuDVtbW1hbW0vqOCKlaSAMZRWg7lw8duwY+vXrh0ePHmHz5s24fv06IiMjUadOHXh4eAhcZdVRh5C4uDg0b94cQ4cORXp6Otzd3WFgYIBHjx5h5MiRCAgIkMwlp7KysnDlyhUEBQVh79696NixI7y9veHk5CSZA2lxxZf3u7u7IyYmBmvXrsWePXvg7u6Or776CiYmJkKXWSXU8+g8PT3h4eGBwYMH48KFC9i4caPmihdSoT6W/vnnn9DR0cF///2HBw8eICQkBH/88Qfc3Nyq/RBdceofMHv27MHUqVNhZ2eHlJQU3Lx5E2fPnoWfnx9sbW0lcYLlZ6eBLFy4EDt37sTGjRthamqKuXPnVstrwzKUVYD6g5OQkID69esjKCgIdnZ2+PbbbzF+/Hjk5OQIXKEw/ve//6GgoKBEV7upqSlsbW1Rv359gSqrenp6eli6dCkSEhJw8eJFRERE4I8//kCTJk3w/fffC11eleHyfm3q3o3k5GRYWlpCJpNBLpejVatWOHjwoORCmTpYGBoawt/fHwBgY2ODjz/+GJGRkTh9+jTs7e0lEULU7434+Hg0a9ZMs5IfAFq0aIHu3btrhiyre1sUt3//frz11lvw9fXFxIkTq/2oA0NZBSkUCvj6+uLLL79Eeno6tmzZgpycHCQmJqJVq1ZCl1dl1N3tFy5cQEREBBITE2FsbAxnZ2fN/woLCzFr1izJrChTqVSoUaMG7O3tUb9+fXTs2BFKpRJxcXGSOwkml/drk8lkyM/Px6hRo2Bqago3Nze0bdsWbm5uOHLkCAYOHCh0iVVKJpMhNzcXR44c0VzZQC6XIycnB0qlUjLDlsDTgHHt2jWcO3cOv/zyCwICAlC3bl0YGRnByMhI6BKrlFSngXCifzmpJ7KrXbhwAbVq1YKTkxMOHjyI4ODgarca5EXUv2DVJ9F1dnZGZmYmUlJSkJiYiLFjx6Jt27ZCl1nlzp49izNnziA9PR12dnZo3bo16tWrJ8kL1ANAWloaZs+ejZo1a5a6vN/U1BTDhw8XuswqoVAoEBMTg9jYWFy8eBFRUVF48OAB3n//fXzxxRdCl1flkpOT8e233+Ly5ctITU2Fvb09DA0N0axZM8yaNUvo8qpcWFgY9u7di4cPH8LAwAAmJiawtLTEkCFDJDWUCzyZBjJixAhcu3YNHTp00JoGUl0xlJXT2LFjsWDBAvz999+wt7dHx44dNT1AqampyMrKgqOjo8BVVq3c3FyMHTsWmzZtAvDkCzgpKQlxcXHo2LEjjI2Nq+2vmuLUr/H69euYO3cuXFxccOLECXh6eiIyMhL16tXDmjVrqn07PI/Ul/cX9+znQaFQaHpYpSwmJgbHjx/HuXPncPPmTSQlJWHs2LGYOHGi0KVVGfXoQ1paGmJiYnDjxg1ER0dj4sSJ1XIO1Yvk5+cjMTFRMw3k+vXruHHjRrWeBsJQVk4ZGRmoXbs2vv76a5w+fRoZGRmws7ODr68vfH19JbVkW/3FEh0djeXLl2Pq1Klo1KiR0GUJRn0w/eWXX2BoaIiGDRtix44d+PLLLzFv3jx06tRJcsNTalze/9SxY8dw8OBBpKeno1mzZprhSymLioqCtbU1LCwsNNtycnJw8eJFGBsbw9PTU8Dqqk5RURFiY2MREhKCevXqoXPnzjAzM5PcZwQo/Xx+xaeBuLi4CFzh68FQ9gpUKhXu3r2LM2fO4PDhw4iLi8O+ffuELqvKqD80oaGh+Omnn2BkZIS2bduibt26cHR0hLu7u6TmQajbY+zYsZgwYQL27t2LJk2aoHfv3vj+++9hYWEhmSE6oOzL+4HqOz/kWffv38eAAQMwduxYFBYW4tKlS7h79y5yc3Oxa9euEudvq+4yMjKwbt06xMXF4ebNmzAzM0ObNm3g6ekJX19focurMurPysaNGxEeHo7atWvjxo0buHfvHqZNm4Y+ffpI5jNSnBSngXCi/yuQyWRwdHSEo6Mj+vfvj8LCQqFLqlLqA4SHhwe++uor3L59G3fu3MHFixdx4MABfP7555LqAVC3R+/evWFvbw9LS0scPnwYxsbGOHz4MBYtWiRwhVWLy/ufUn+hXrp0CV27dsXw4cORk5ODvn37Ii0tDampqZIKZOp/8wMHDiAlJQUtWrRAbm4u2rRpowlpvr6+kgki6vf/v//+iwkTJmjm4Z44cQLbtm1Dy5Yt4eDgIGSJVab4NJDvv/9eaxrIli1bqv00EIaySlTdLoxaVuqejzZt2qCwsBD379/HzZs3JTuUGRAQAAAYOHAgEhMTsWfPHnh5ecHV1VXgyqoOl/drU3+BODo64vbt2wCg6UU2NTWV1OligKfnJzt9+jRGjRqF48ePw8vLCx999BGSk5PRvn17zf2q65dvcTKZDNnZ2cjOztZaGNWhQwcsW7ZMUnMNi4qKIJfLERYWhm7duqFhw4ZIT0/H5MmTNdNAqvN7Qpopgl6Z+pfuvXv3cPDgQcTHx8Pc3BwNGzZEw4YN4eXlJZlTYDyPiYkJxo8fj4KCAs0wnVRweX/prly5gi1btmDLli3w9vZGx44d0bZtW625VFKg7kXNzMyEpaUlbt68if79+wMAbt68ibfffhuAdC5Qr1Qqoauri5YtW2LYsGEYN24cHB0dkZqaisLCQknNJ1P/SDt79qxmGkinTp3w1ltvoWHDhlAoFAJX+HpxTlk5HTlyBLdv38aHH34odCmCUs+BmDFjBmxsbLB3717Y2NggLS0Nurq6mDx5Mnx8fIQukwTG5f1PA+rVq1cxduxYLF++HFlZWTh79izOnz+PpKQkHDhwQBI9hs+6d+8ezMzMsH//fqxatQpubm64ffs2QkJCJHn9z9TUVKxbtw5paWmIjo6GiYkJRo8eDV9fX8ldEzU0NBTe3t4ICgpCZGQkevXqhRUrVmDRokXVetSBPWVldOfOHVhZWeHIkSOalUDqVSFnzpyBsbFxtX6jPEt9cLh27RoWLlyIyMhIzJkzBw8fPsTy5ctRt25dANIZfqDSdezYEb6+viWW90sxgCQnJ+PDDz9Eq1atUFBQgE6dOiE3NxdZWVmSbA8AmlM89OnTBwYGBrh37x4GDx4MuVwumWPHjBkz4OHhAW9vbzRo0ABffvklYmNjYWhoCCMjI82JU6UUyADpTgNhKCujCxcuYO7cucjLy8O9e/dgZGSE1q1bw9LSEitXrsTgwYOr/ZvlWTdv3oSpqSmysrKQmpoKBwcHODg4oEaNGqhXrx4A6Qw/AOxFfVZRURFu3bqltby/Xbt2klver54jc+DAATx+/BgPHjyAjY0NAKBmzZrVeiVZadRhKy4uDsHBwdDT08M777yD7t27l7hPdZeVlQVLS0tEREQgKCgI+vr6cHd3R/v27eHh4aEJZFImtWkgHL4sh0ePHmHKlCmoV68eDh8+jMTERDg6OsLW1harVq2S3ME1JSUFt2/fRv369bFw4UJ4enri0aNHmus8SmFVHfC0FzUwMBCenp54//33Jd2LyuX9pQsMDMThw4fx8OFDNGjQAB07dkSnTp0kdVk2tcLCQvTp0wdeXl64efMmLl68CLlcDgcHB6xbtw516tQRusQqo1AokJ2djeTkZNy8eRORkZG4ceMG4uPj0aZNG8ybN0/oEqkKMZSVkfpC2/n5+ZqVMPn5+Thz5gysrKzg7OwscIXCCgsLwy+//ILGjRujc+fOmus9SqHLPSQkRNOL2q5dO/Tr10/Tizps2DAMHjwYPXr0ELrMKqMOXMOGDSt1ef+XX34pmeX9pblz5w7OnDmDI0eOIC4uDqGhoUKXVOWioqKwePFibNiwQbPt6tWrOH78OMaNGydcYQJTqVRQKBR4/Pgx7t69C319fXh4eEjmWEoMZWWi/pLJz89HaGgocnJy4OXlhYYNGwpdmqAyMzNx4sQJ1KxZE61atYK+vr5kTwvCXlRt2dnZGDZsGIKDg7W29+3bFz/99JOkhi9fpPjZyqXg999/h5GREWrWrInk5GR88MEHQpckClIZVSgLqU8Dkc7R4BWoh1kmTpyIxo0bY9OmTTA0NISJiQns7e2xePFimJubC1xl1VAfPC5duoQFCxbAzs4OBQUF+P3332FhYQF3d3dJnbUeeNKLampqitWrV6NGjRqYNWuWVi+q1AIZl/eXnZQCGQDk5eVh586dSE9PR0ZGBs6ePYv3338fbdu2ldS5uJ7FQMbFdGrSOiJUgLqX7MqVKygqKsLAgQNx/vx5rFy5EjNmzECNGjUkE8iApyd9PHjwINq0aYORI0fi/v37ePDggdaqOqn88lOpVJph7Wd7UTt27Ch0eYKQy+WQy+UYP3481q1bh9DQUM3y/smTJwMAh2Mkaty4cRg3bhySk5Nx8eJFHD58GIGBgUhPT0ft2rURFBQEY2NjocusEupj5PXr11GzZk3Y2dmhqKgIgDRDGhfTPcHhy5dQf3DUF5l2dHTE7t27sXTpUuzbtw93796V5ByIFStWlLgAe2ZmJnR0dCTXMwQAY8eOlXwvKlByeT+AUpf3S4nUh2PKIi4uDleuXEHPnj2FLqXK/fLLL0hOTsbIkSM1pwiRKk4DYU/ZS6l/sRgYGMDd3R2JiYkwMTEB8OQSIZaWlkKWJwiVSgUfHx/MmjULvXr1Qrt27eDm5qZpF6lgL6o2Lu/XxuGYsqtXr57mNDpS061bNwQFBWHo0KFo2bIlPvjgA7i7u0uut4zTQJ5gT1k5KBQKpKen4/PPP8fNmzfh6uqK+fPnS+bM5OpeQ/XZt93d3SGTyfDgwQPk5eWhZcuWmDp1qtBlVhn2opbE5f1PcVUuPU/xxWPZ2dmoVasWMjMzsWLFCly/fh2DBg1Cr169hC6zynAx3VPsKSsHfX19WFtbY+vWrbhx4wZq164tyZ6yS5cuYfDgwRg6dCgSExORnp6Omzdvan7JSGW+EHtRS9LX14e+vj7q1KmDRo0aoXPnzlrL+wHpvD969+6Nt99+WzMcs2TJEq3hGF9fX6FLrDLqL91Hjx7h/v37cHFxEbokQalPKPzTTz/hn3/+ga6uLkxMTODg4ICEhASEhYWhV69ekvmscDHdUwxlL6D+QERERGDdunWIjY1F+/bt4evrC09PT5ibm0vqJJjqEKJQKDTnZbO1tYWtrS1cXFw0iwCkcBApbsSIEVAoFLC1tcWmTZvQqlUruLq64qOPPhK6NEGoexBlMhlq1KgBS0tLrYAqlfcHh2OeKn5Vg3PnziEwMBAKhQL6+vrIz8+Hrq6uZN4XwNPPgJ2dHWxsbBAQEIA2bdrAwMAA33zzjeY7RQpDmJwGoo3Dly+gnv/RvXt3jBs3DoaGhjh27BguXryI2NhY/Pzzz5K76HZGRgYGDRqEpKQkdOvWDR06dECbNm0k2Sv0PFLuRaUnOByjbf/+/di3bx/i4uLQv39/DBs2THPbypUrUbt2bYwYMULACoWTmJiIdevW4eHDh+jatSvatWsnqRDCaSDa2FP2AupzCDk6OqJ3794AAH9/fwBAUlKS5CYuA0+G6pYtW4bbt2/jypUrCA4OxooVKyQ3X4i9qNq4vF8bh2O0eXl54e7duzh16hQCAwPxww8/oFWrVujevTtOnDiBSZMmAZDGqXTUr/HKlSv4559/0Lx5c3h5eSEoKAgzZszA4MGDMWvWLKHLrDKcBqKNoew5Nm/ejJ9//hndunWDhYUFIiIi4OXlpbldqifANDAwgKurK1xdXdGlSxfJzhdSdzDPmTNHqxd1yZIlkuxFVR9Yw8LCJL+8n8MxJdWpUwdjx46FnZ0devTogfDwcOzfvx+//fYb3nnnHXh7ewOQRoBXv8aUlBTcu3cPUVFRsLOzQ5s2bWBqagorKysA0rvaA6eBPMHhy+fIyspCcHAwwsLCcOnSJeTm5qJt27aa6zpKZcXls6TwS7Y8Pv74Y6xZs0Zrm7oXVYpnKL979y6CgoKwe/duyS7v53BMSeofa9nZ2SXm0kmpR/lF1NdXpiekOg2Eoew5duzYgf79+2v+vnr1Kvbv349jx44hJiYG69ev1/y6I2kp3ouak5ODd999V6sXVWq4vL90GzduhIeHBxITE/Hff/9hzpw5mD17NiwtLTFhwgShy6sy6pC6d+9enD59GqdPn4aNjQ18fX3RoUMHODs7S+7HnkKhwLFjx3DgwAGYmJjA0dERffv2ldTiD4DTQErDUFaKpKQkTJs2DQYGBujduze6du1aohtZSm8UzhfSxl5UbeoD6/Lly0ss7z916hRatWqFZcuWSWZouzipn9tQTaFQoE+fPpg9eza+/vprdOvWDVu3bkVGRgb++ecfODo6Cl1ilVB/BrZv344jR46gXr16sLW1xfHjx2FqaopvvvkGhoaGQpdZZbiYriSGsue4ffs2duzYgSNHjsDd3R3vvvsuOnToIMkQosbLgTzBXtTS7dixA6GhoVrL++vUqQOZTAY9PT1J/ZApjRSHY9Q/6E6cOIE9e/bgyy+/xIQJE7B161Zs3boVBQUFGD58uNBlVhl1e4wfPx6jRo1CmzZtADxZ1f7VV19hyJAhkjx2cBrIU9JNGC9Rv359TJs2DZs2bYKtrS02b96MNWvW4N69e0KXJphu3brByMgIQ4cOxZQpUxAZGanpMZOKpKQk/P333xg3bhz27duHwsJCuLq6YvLkyQgJCUFMTAzatm0rdJmC6N+/P+bPn49r165h6dKlOHPmDDIzMzXzZKQQyJRKJQAgIiIC48aNQ5cuXTB79mwcOXIEZmZmsLS0hJR+B6v/za9duwYvLy+cP38ejRs3BvBk8n9kZCQASOY4ov5RX1hYiOTkZABPRl1q166N9PR0zYIpKbxHNm/ejE6dOmHBggWaxXTFWVtbSy6QAewpK0GpVOLixYtISkpCbm4u3N3dkZeXh6CgIGzfvh0mJiYICQmRTE8R5wuVxF7Up0pb3q9UKhEUFITjx49Lbnk/h2NKd/36dRgYGCAvLw8zZsyAs7MzEhIS0Lt3b/Tr109yQ9vHjh3D2rVr8d5778HNzQ2nT59GaGgotm/fLnRpVYbTQErHUPaMkydPYvTo0ZpTYURHR6NBgwZo1KgRYmNjERUVhZ07dwpdZpXhfKHnS0lJwaZNmxATE4OmTZvivffek0xYf1ZYWBj++usvZGRkwM7ODvXr18e1a9fg5OSEsWPHSm55P4djnu/o0aPYt28fXFxcMHDgQMnMoXr8+DF0dXVhZGSEgoIC7Ny5E9u3b0dmZiZ8fX3RtWtXeHl5SWbRA6eBlI6h7BlZWVnYuXMnoqOj4ePjA39/fyiVSuTn52tOaCdFnC/EXtTykOLyfq7K1aY+JuTk5CA8PBwFBQVwc3OTbA/IggULYGJiAldXVzRo0AD169cH8KS3WalUSuY4CnAx3YswlJUiLy8PERERCAoKQmZmJrp06YLu3bvDzMxM6NIEJfXLgbAXtXRc3v8Eh2O0qYP5okWLEBMTg7t37yI3Nxc2NjZwcnLC6NGjJXNhcqVSqelVT0hIwKNHj+Du7o5GjRqhSZMmsLKyQv369SXRQ6bGaSClYygrRqlUQqFQaHWnX7hwAZcuXcKjR4/g7e2tWS0jBZwvpI29qNq4vF8bh2NK5+/vj82bN8PCwgL5+fk4efIkgoODMWLECLRu3Voyw3Vqx44dw7Jly9C3b1/8999/SEpKgoODA5YsWSJ0aYLgNBBtDGXFnDt3DsuWLcOjR49gZ2cHe3t7PHz4EBcuXEBycjKsrKxw7Ngxocuscpwv9BR7UZ/i8v6nOByjLSEhAbdu3YK7uzv+/PNPDBs2TJLXCi5OoVBAX18f33//PerUqYORI0cCeBJKHjx4AHd3d0kEVE4DeTGGsmJiYmIQExMDY2NjxMbGAgBsbGxQo0YNxMfHo27duujevbvAVQpPivOF2Iv6fGPGjEHv3r3Ro0cPTfAYNmwYJk2ahJYtW0omjHA45qmDBw9iwoQJ0NfXh1wuh5eXFyZPnqw5HYbUKBQK6OnpQSaTYcSIEfjyyy/h4eEhyQVSnAbyYgxl9EKcL/QEe1Gfj8v7tXE4BpprXCYlJeHIkSPYt28fLly4gBo1aqBu3bqYO3cuPD09hS6zSsTHx2Pq1Knw8/NDw4YNsWLFCvz1119ClyUYTgN5MYayMpJCt3JxnC+kjb2o2ri8/ykOx2grKCjA1KlT4e/vj4CAAK3bYmJiEBQUhP79+6Nx48aSeH9kZmZi27ZtuHTpEq5fv46MjAz4+vrCzc0NzZs3R/369WFsbCx0mVWK00Cej6GMSsX5QvQiXN7/FIdjtGVlZWH9+vUIDg6Gjo4ORowYgS5dusDW1lbo0kQhOjoaBw8exPnz53Hx4kVMmDABo0ePFrqsKsFpIC8njdnZVG7luRyIFL54X0QKv/aLUyqVsLW1RUxMDCIiIiS/vL9p06aYMWMGoqOj0aJFC8ycOVPSwzHGxsaYOHEiJk6ciK1bt+K///5DWloa+vXrJ7nTgpSmSZMmaNKkCYAn00NycnIASOM4cvHixRdOAwkKCpLsNBA19pTRC3G+EL0Il/c/weGYpxQKBTIzMzXnMDx58iR+++03REVFwc/PD1988QWsra0FrpKEwGkgL8dQRiVwvhC9DJf3P8HhGG2pqamYPHky6tati7t370JXV1ezAnXbtm2Ii4vDwYMHUbduXaFLJRIlhjIqgfOF6EW4vP8prsrVduPGDUyaNAkeHh4YOXIkVCoV7ty5AyMjI1hbWyM5ORnt27cXukwSISn8iCsLhjLSwsuB0Itweb82DseUdPPmTc1cMl9fX/Tq1QtOTk6a2/mDjuj5GMrouThfiJ7F5f1UVklJSTh06BAyMzPh6enJ1dpEZcBQRiVwvhCVlZSX97+M1D4j6enp+Ouvv2BhYQF7e3vk5OQgNjYWFy9exPnz52Fubo6NGzfCwMBA6FKJRIunxCAt6vlCwJMJy19++SWAJ8OaFhYWsLCwAABJfdnQ80l5ef/LSO31R0dHIzw8HCkpKUhOToabmxtq166NxMREZGVloX79+jAwMODwJdELMJSRxrPzhTIyMuDh4QEAkpvATeWnr6+vOX+d1AIJAV5eXmjatKnmEmz379+HSqXSnDQ2PT0dAOeUEb0Ihy9Jg/OFiKiyMYQRlR1DGT0X5wsRERFVHYYyKhP1fKHatWtzvhAREdFrwFBGREREJALs7iAiIiISAYYyIiIiIhFgKCMiIiISAYYyIiIiIhFgKCMiIiISAYYyIhK94cOHY/78+SW279q1C61atRJNPa/bDz/8gPfee6/Kn5eIqgZDGRHRcxQUFAhdAhFJCEMZEVUbEREReP/999GsWTO0atUKgwYNwr179zS3Hzx4EH369IGHhwc6d+6MVatWobCwUHO7s7Mz/vzzT3z88cdo1qwZ1qxZU6bn9fPzw5o1azBz5kw0b94cvr6+2LZtm+b2hIQEODs7Y+/evRg0aBA8PDzQs2dPnDlzRnOf0nr9Dh48CGdnZ83tq1atQkxMDJydneHs7Ixdu3ZVqJ2ISJwYyoioWigsLMSnn36K1q1b46+//sK2bdswcOBAzXUXz549i+nTp2PEiBEIDQ3FN998g127dpUIXqtWrULXrl2xZ88e9OvXr8zPv379eri7uyMkJARDhgzBnDlzcOvWLa37fPfddxg1ahRCQkLQrFkzfPzxx5oLdb9MQEAAPvzwQzRq1AgnTpzAiRMnEBAQUOb6iEj8GMqIqFrIyspCZmYm3n77bTg4OMDJyQl9+vSBra0tgCdha+zYsejTpw/s7e3Rvn17fP7559i6davWfnr27Il+/frB3t5e89iy6NSpE4YOHQpHR0eMGTMGderUQUREhNZ9hg4dCn9/fzg5OWHOnDkwMTHBzp07y7R/AwMDGBkZQS6Xw9LSEpaWljAwMChzfUQkfrpCF0BEVBlq166Nvn37YvTo0Wjfvj28vb3RvXt3WFlZAQBiYmJw/vx5rZ4xpVKJ/Px85ObmwtDQEADg7u5eoedXDzMCgEwmg4WFBVJTU7Xu07x5c81/6+rqwt3dvURvGhFJF0MZEYlezZo1kZWVVWL748ePYWJiovk7MDAQw4cPx/Hjx7Fv3z4sX74c69evR7NmzZCTk4OJEyfinXfeKbGfGjVqaP7byMioQjXq6mofTmUyGcpzaWEdHZ0S9+dCAyJp4fAlEYle/fr1ERUVVWL71atXUa9ePa1trq6uGDduHLZu3YrGjRvj77//1my/ffs2HB0dS/xPR6dqDoUXL17U/HdhYSGioqLQoEEDAECdOnWQnZ2NnJwczX1iYmK0Hq+np4eioqIqqZWIqh57yohI9IYMGYLNmzdj3rx5eP/996Gvr4+jR49i7969+OmnnwAA8fHx2L59O/z8/GBlZYXbt28jLi5Oc16vTz/9FB9//DFsbW3h7+8PHR0dxMTE4Pr165g0aVKVvI4///wT9erVQ4MGDfD777/j0aNHmsUETZs2haGhIZYtW4YRI0bg0qVLJVZX2tnZISEhAdHR0bC2toaxsTH09fWrpHYiev0YyohI9Ozt7fHHH39g+fLlGDVqFAoKCtCgQQOsWLECnTp1AgAYGhri1q1bCA4ORkZGBqysrDB06FAMGjQIANCxY0esWbMGq1evxq+//gpdXV00aNAA/fv3r7LXMWXKFPzyyy+Ijo6Go6MjfvrpJ5iZmQF4Midu8eLF+O6777Bjxw54e3tj4sSJ+OqrrzSP9/f3x4EDBzBixAg8fvwYgYGB6Nu3b5XVT0Svl0xVnkkPRERUbgkJCejcuTNCQkLQpEkTocshIpHinDIiIiIiEWAoIyIiIhIBDl8SERERiQB7yoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhEgKGMiIiISAQYyoiIiIhE4P8BhMEseqFs4foAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_metric('faithfulness')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "cd595fed-e1fe-4b4f-9ef8-b2cb9a625d2b", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmUAAAHMCAYAAACUdN+cAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABruklEQVR4nO3dd1RTd/8H8DcEkCGKTAUBJxvEBeJE3LPuidbxVO1Pbau2anetVrSPWrX6tLVu6xZBLeDAPXBWRVFQUQSUIlPZI8nvD0+iEVSCwI3c9+scz5Gbm+STL5fkne+4V0sul8tBRERERILSFroAIiIiImIoIyIiItIIDGVEREREGoChjIiIiEgDMJQRERERaQCGMiIiIiINwFBGREREpAEYyoiIiIg0AEMZERERkQZgKCOiUs2dOxd+fn5Cl6FxcnJy8PXXX6Ndu3ZwdHTETz/9JHRJFW7v3r1wdHTEjRs3hC6FSFQYyqha2Lp1KxwdHTF06FChSxHEmDFj4OjoqPzn4eGBfv36YePGjZDJZEKXV6388ccfCAoKwsiRI/Hzzz/jgw8+qNTn8/PzU/ndvvxv4sSJlfrc5TV37lw0b978tbc3b94cc+fOrcKKXi8xMRGOjo5Yt26d0KUobd26FXv37hW6DBKAjtAFEFWEAwcOwMbGBpGRkXj48CHs7e2FLqnK1a1bFzNnzgQAZGRk4O+//0ZAQAAyMjIwY8YMgaurPs6fP49mzZph2rRpVfaczs7OGD9+fIntlpaWVVYDVZ3t27ejTp06GDRokNClUBVjKKP3XkJCAq5evYpVq1bhu+++w4EDB6r0A7MqyGQyFBUVoUaNGq/dx9jYWKXXZuTIkejVqxe2bNmCTz75BBKJpCpKrfbS0tLQpEmTCnu84uJiyGQy6OnpvXYfKyurSu+Rq85yc3NhaGgodBlEb8XhS3rvHThwALVr10anTp3Qo0cPHDhwoMQ+Lw9R7Ny5E127doWbmxsGDx6MyMhIlX1TUlLw5ZdfomPHjnBzc0P79u3x8ccfIzExEQAQEBAAb29vyOVy5X3mz58PR0dHbN68WbktNTUVjo6O2LZtm3JbYWEhVq5ciW7dusHNzQ2dOnXCzz//jMLCQpUaHB0d8eOPP2L//v3o06cP3N3dcfr0abXapUaNGnBzc0NOTg7S0tJUbtu3bx8GDRoEDw8PeHl5YcaMGUhKSnrrY8pkMmzcuFFZU9u2bfHdd9/h6dOnyn0mT56MLl26lHr/4cOHq3z7DwwMxNixY+Hj4wM3Nzf07t1bpb0U/Pz8MHnyZFy+fBlDhgyBu7s7unTpguDg4BL7Pnv2DAsXLoSfnx/c3NzQsWNHzJ49G+np6cjJyYGnpycWLFhQ4n7//vsvnJ2d8ccff5Ra+4ULF+Do6IjExEScOHFCOYSoOC7S0tLw1VdfoW3btnB3d0f//v0RFBSk8hgvH4cbN25E165d4e7ujtjY2FKfUx3R0dGYO3cuunTpAnd3d7Rr1w5ffvklMjIySuybnJyMr776Cu3bt4ebmxv8/Pzw/ffflzgOCwsLERAQgDZt2sDT0xNTp05Fenr6O9f6qqKiIqxatQrdu3eHu7s7vL29MXLkSJw9e1Zlv9jYWHzyySfw8vKCu7s7Bg0ahKNHj6rso5gPd/HiRfzwww/w8fFBp06d1KpH8RhXrlx56+tXHJtnzpzBBx98AHd3d/Tu3RuHDx9W2e/XX3+Fo6Pja59LcRz5+fnh7t27uHjxovIYGzNmjFr10/uLPWX03jtw4AC6desGPT099O3bF9u3b0dkZCQ8PDxK7Pv3338jJycHw4cPh5aWFtauXYvp06cjPDwcurq6AIDp06fj3r178Pf3h42NDdLT03H27FkkJSWhfv36aNWqFTZu3Ii7d+/CwcEBAHD58mVoa2vj8uXLGDt2rHIbALRu3RrA80Dz8ccf48qVKxg2bBgaN26MO3fuYNOmTYiLi8P//vc/lVrPnz+PsLAwjB49GnXq1IGNjY3abfPo0SNoaWmhVq1aym2//fYbVqxYgV69emHIkCFIT0/HX3/9hdGjRyM4OFhl31d99913CAoKwqBBgzBmzBgkJiZi69atuHXrFrZv3w5dXV306tULc+bMKfE7ePToEa5du4bZs2crt23fvh1NmzaFn58fdHR0cPz4ccybNw9yuRyjR49Wee6HDx/i008/xZAhQzBw4EAEBgZi7ty5cHV1RdOmTQE8n4Q/evRoxMbGYvDgwXBxcUFGRgaOHTuG5ORkODs7o2vXrggLC8OXX36p0nv4999/Qy6Xo1+/fqW+9saNG+Pnn39GQEAA6tatqxxONDU1RX5+PsaMGYP4+HiMHj0a9evXx8GDBzF37lw8e/YMH374ocpj7d27FwUFBRg2bBj09PRQu3btN/4ei4uLSw1DhoaG0NfXBwCcO3cOCQkJGDRoECwsLHD37l3s2rUL9+7dw65du6ClpQXgeSAbMmQIsrKyMGzYMDRq1AjJyck4dOgQ8vPzVXrsFixYgFq1amHatGl49OgRNm3ahB9//BHLly9/Y73qWrVqFf744w8MHToUHh4eyM7Oxs2bNxEVFYV27doBAO7evYuRI0fCysoKH330EQwNDREWFoapU6fi119/Rbdu3VQec968eTA1NcXUqVORm5tbrrrK+vrj4uIwY8YMjBgxQnlsfvrpp1i7dq2y/rL66quvMH/+fBgaGmLKlCkAAHNz83LVT+8hOdF77MaNG3IHBwf52bNn5XK5XC6TyeQdO3aUL1iwQGW/hIQEuYODg9zLy0uemZmp3B4eHi53cHCQHzt2TC6Xy+VPnz6VOzg4yNeuXfva50xLS5M7ODjIt27dKpfL5fJnz57JnZyc5J988om8bdu2yv3mz58v9/LykstkMrlcLpcHBwfLnZyc5JcuXVJ5vO3bt8sdHBzkV65cUW5zcHCQOzk5ye/evVumdvD395f37NlTnpaWJk9LS5PHxsbKFy9eLHdwcJBPmjRJuV9iYqLc2dlZ/ttvv6ncPyYmRu7i4qKyfc6cOfLOnTsrf7506ZLcwcFBvn//fpX7njp1SmV7VlaW3M3NTb5o0SKV/f7880+5o6Oj/NGjR8pteXl5JV7LhAkT5F26dFHZ1rlzZ7mDg4NK26WlpZV4nhUrVsgdHBzkhw8fLvG4it/D6dOn5Q4ODvKTJ0+q3N6vXz+5v79/ifu9qnPnziptKpfL5Rs3bpQ7ODjI9+3bp9xWWFgoHz58uNzT01OelZUll8tfHIctWrSQp6WlvfW5FM/n4OBQ6r8//vhDuV9pbfn333+XaLfZs2fLnZyc5JGRkSX2V7RRYGCg3MHBQT5u3DjlNrlcLl+4cKHc2dlZ/uzZszfWPGfOHLmnp+drb/f09JTPmTNH+XP//v1LtOmrPvzwQ3nfvn3lBQUFKvUOHz5c3r17d+U2Re0jR46UFxcXv/Ex5fIXv5OX/+bVef2K38+hQ4eU27KysuTt2rWTDxgwQLlt5cqVcgcHhxLPr3iuhIQE5bY+ffqU6Vik6ofDl/ReO3DgAMzNzeHt7Q0A0NLSQu/evREaGgqpVFpi/969e6v0SrRq1QrA83lpAKCvrw9dXV1cvHhRZUjuZaampmjUqJGyJ+yff/6BRCLBxIkTkZqairi4OADAlStX0KJFC2UPxcGDB9G4cWM0atQI6enpyn9t2rQB8Hx47GWtW7dWa+7S/fv34ePjAx8fH/Tq1Qvr1q2Dn58fAgIClPscOXIEMpkMvXr1UqnB3Nwc9vb2JWp42cGDB2FsbIx27dqp3NfV1RWGhobK+9asWRMdO3ZEWFiYyhBvaGgoPD09YW1trdym6OUBgKysLKSnp8PLywsJCQnIyspSef4mTZoof1/A899Dw4YNlb87ADh8+DCcnJxK9JoAUP4e2rZtC0tLS5Vh7jt37iAmJgb9+/d/fQO/walTp2BhYYG+ffsqt+nq6mLMmDHIzc3FpUuXVPbv3r07TE1Ny/z4zZo1w4YNG0r869Onj3Kfl9uyoKAA6enpaNasGQAgKioKwPPe2vDwcHTu3Bnu7u4lnkfRRgrDhg1T2daqVStIpVI8evSozLWXRa1atXD37l3l386rMjMzcf78efTq1QvZ2dnKYy8jIwPt27dHXFwckpOTS9T+rvMoy/r6LS0tVY65mjVrYsCAAbh16xZSUlLeqQYSFw5f0ntLKpUiJCQE3t7eyvkYAODh4YH169cjIiIC7du3V7lPvXr1VH5WBLRnz54BAPT09PD5559j8eLFaNeuHZo1awZfX18MGDAAFhYWyvu1atUKJ0+eBPB8mNLNzQ3u7u4wMTHB5cuXYW5ujujoaJUP6YcPHyI2NhY+Pj6lvp5X533Vr19frfawsbHBggULIJPJEB8fj99//x0ZGRkqiwPi4uIgl8vRvXv3Uh9DR+f1bwkPHz5EVlZWmerv3bs3wsPDcfXqVbRo0QLx8fGIiorCV199pXKfK1eu4Ndff8W1a9eQl5encltWVhaMjY2VP7/6uwOe//5eDs/x8fGvfW0K2tra6NevH7Zv3468vDwYGBjgwIEDqFGjBnr27PnG+77Oo0ePYG9vD21t1e+5jRs3BgA8fvxYZbu6v9s6deqgbdu2b9wnMzMTq1atQmhoaIljSRFw09PTkZ2drRzufZuXAzQA5dC24u/lXbwcdj755BP83//9H3r06AEHBwe0b98eH3zwAZycnAA8/73K5XKsWLECK1asKPXx0tLSYGVlpfxZ3TYuTVlfv729fYlA26BBAwDPj42X3zuI3oShjN5b58+fR0pKCkJCQhASElLi9gMHDpQIZa/75vxyj864cePg5+eH8PBwnDlzBitWrMCaNWuwadMmuLi4AABatmyJXbt2ISEhAZcvX0bLli2hpaWFFi1a4MqVK7C0tIRMJlPp2ZHJZHBwcMCXX35Zag1169ZV+fnlno+yMDQ0VPngbtGiBQYNGoRffvkF33zzjbIGLS0t/Pnnn6W2xZtWqMlkMpiZmWHJkiWl3v5yz0/nzp1hYGCAsLAwtGjRAmFhYdDW1lYJPfHx8Rg3bhwaNWqEuXPnol69etDV1cXJkydLPb9aRa4eHTBgANatW4fw8HD07dsXf//9N3x9fVVCYGVS93dbFp999hmuXr2KiRMnwtnZGYaGhpDJZPjPf/6jcnyr49WQqfC2x9PT00NhYSHkcnmJsCKXy1FQUKAyd61169Y4cuQIjh49irNnz2LPnj3YtGkT5s2bh6FDhyqPhQkTJqBDhw6lPqednZ3Kz29aqVxW5X39pXm1HRRK69En8WIoo/fWgQMHYGZmhu+++67EbUeOHMGRI0cwb968cn0A2tnZYcKECZgwYQLi4uIwYMAArF+/XhlIWrZsCQA4e/Ysbty4gUmTJgF4/uGyfft2WFpawtDQEK6uriqPGR0dDR8fn9e+QVckJycn9O/fHzt27MCECRNgbW0NOzs7yOVy1K9fHw0bNlTr8ezs7BAREYEWLVq8tU0NDQ3h6+uLgwcP4ssvv0RoaChatWql0pNx7NgxFBYW4rffflPpkXjTEGpZarx79+5b93NwcICLiwsOHDiAunXr4vHjx8rgWh42NjaIiYmBTCZT+SC/f/8+gJI9LhXt6dOniIiIwPTp01VOB/PqcKCpqSlq1qxZpjZ6FzY2NiguLkZ8fHyJcwY+fPgQUqm0xMIVExMTDB48GIMHD0ZOTg78/f3x66+/YujQobC1tQXwfEj4bT2GQnj48GGJAKpoe8XrfLmX7eXFNK/2ogKvD3BU/XFOGb2X8vPzcfjwYfj6+qJnz54l/o0ePRo5OTk4duyYWo+bl5eHgoIClW12dnYwMjJSOV2Ara0trKyssHHjRhQXF6NFixYAng9rxsfH4+DBg2jWrJnKcGCvXr2QnJyMXbt2lfp6yrtC7E3+85//oLi4GBs2bADwfC6TRCLBqlWrSnzbl8vlpZ4+QaFXr16QSqUlVokCz1cHvjqk07t3bzx58gS7d+9GdHQ0evXqpXK7oufr5TqysrIQGBio3ot8Sffu3REdHY0jR46UuO3V1/vBBx/g7Nmz2LRpE0xMTNCxY8dyP2/Hjh2RkpKC0NBQ5bbi4mJs2bIFhoaGyhW4leV1vYibNm1S+VlbWxtdu3bF8ePHS72EUnl71F6laMu//vqrxG1bt25V2QdAiePOyMgIdnZ2yr85MzMzeHl5YefOnXjy5EmJx6yM03So48mTJyrHXHZ2NoKDg+Hs7KwculT05L08vzA3N7fU07oYGBhUyBAxvX/YU0bvpWPHjiEnJ+e112b09PSEqakp9u/fj969e5f5cePi4jBu3Dj07NkTTZo0gUQiQXh4OFJTU1UmVQPPA1hISAgcHByUc9NcXFxgaGiIuLi4EqdW+OCDDxAWFobvv/8eFy5cQIsWLSCVSnH//n0cPHgQa9euLXXy9bto0qQJOnXqhD179uD//u//YGdnh88++wxLly7Fo0eP0LVrVxgZGSExMRHh4eEYNmzYay/d4+XlheHDh+OPP/7A7du30a5dO+jq6iIuLg4HDx7E119/rTI82alTJxgZGWHx4sWQSCTo0aOHyuMp7j9lyhSMGDECOTk52L17N8zMzMo9OXrixIk4dOgQPv30UwwePBiurq54+vQpjh07hnnz5innKAFA37598d///hdHjhzByJEjladEKY/hw4dj586dmDt3LqKiomBjY4NDhw7hn3/+wVdffYWaNWuW+7GB56ex2LdvX4ntRkZG6Nq1K2rWrInWrVtj7dq1KCoqgpWVFc6ePasy11Jh5syZOHv2LMaMGaM8NUtKSgoOHjyIbdu2vfGUKGXl7OyMoUOHYvPmzXj48KGyd+vcuXM4efIkhg4dqvK76NOnD7y8vODq6goTExPcuHEDhw4dgr+/v3Kf77//HqNGjUK/fv0wbNgw2NraIjU1FdeuXcO///6L/fv3v3Pd5dWgQQN8/fXXuHHjBszMzBAYGIi0tDSVRTbt2rWDtbU1vv76a9y/fx8SiQSBgYGoU6dOid4yV1dXbN++Hf/73/9gb28PU1PT187lpOqFoYzeS/v370eNGjVeew4gbW1t+Pr64sCBA2/s/XlV3bp10adPH0RERGD//v2QSCRo1KgRli9fXiJUtGzZEiEhIcqhTOD5RHlPT0+cO3dOZbuiptWrV2Pjxo3Yt28fjhw5AgMDA9SvXx9jxoxRezixrCZOnIgTJ07gr7/+wvTp0zFp0iQ0aNAAGzduxOrVq5Wvu127dm+9APmPP/4INzc37NixA7/88gskEglsbGzQv39/ZW+hQo0aNeDn54cDBw6gbdu2MDMzU7m9UaNGWLlyJZYvX47FixfD3NwcI0eOhKmpaYkFAWVlZGSErVu34tdff8WRI0cQFBQEMzMz+Pj4qAydAs/P/dSuXTucPHnync+Wr6+vjy1btmDJkiUICgpCdnY2GjZsiICAgAq5VM7t27dVzu+mYGNjg65duwIAli5divnz52Pbtm2Qy+Vo164d/vzzzxJzsKysrLBr1y6sWLECBw4cQHZ2NqysrNCxY8cKnev2448/wsHBAYGBgVi2bBkAoGHDhvjmm29KnINuzJgxOHbsGM6ePYvCwkJYW1vjs88+U/mC0KRJEwQGBmLVqlUICgpCZmYmTE1N4eLigqlTp1ZY3eXRoEEDfPvtt/j555/x4MED1K9fH7/88otK2+vq6mLVqlWYN28eVqxYAQsLC3z44YeoVatWiXmmU6dOxePHj7F27Vrk5OTAy8uLoUwktOQV1V9NRPSemTp1Ku7cuVPqcCdRWfj5+aFp06avvRIEkTo4p4yIROnJkycV0ktGRFRROHxJRKKSkJCAf/75B3v27IGOjg6GDx8udElERADYU0ZEInPp0iXMnj0biYmJWLRoEU/sSUQag3PKiIiIiDQAe8qIiIiINABDGREREZEGEN1Ef5lMhuLiYmhra/NSFkRERFTp5HI5ZDIZdHR0XntNVUCEoay4uLjUy4sQERERVSZ3d3fo6em99nbRhTJFQnV3d3/t9eKIiIiIKopUKsWNGzfe2EsGiDCUKYYsJRIJQxkRERFVmbdNm+JEfyIiIiINwFBGREREpAEYyoiIiIg0AEMZERERkQZgKCMiIiLSAAxlRERERBqAoYyIiIhIAzCUEREREWkAhjIiIiIiDcBQRkRERKQBGMqIiIiINABDGREREZEGYCgjIiIi0gAMZUREREQagKGMiIiISAMwlBERERFpAIYyIiIiIg3AUEZERESkARjKiIiIiDQAQxkRERGRBmAoIyIiItIAgoayS5cuYcqUKWjfvj0cHR0RHh7+1vtcuHABAwcOhJubG7p164a9e/dWQaVERERElUvQUJabmwtHR0d8//33Zdo/ISEBkydPhre3N/bt24cPP/wQ33zzDU6fPl3JlRIRERFVLh0hn7xTp07o1KlTmfffsWMH6tevj7lz5wIAGjdujCtXrmDjxo3o0KFDZZVJREREVOkEDWXqunbtGnx8fFS2tW/fHgsXLlT7saRSaUWVRURERPRaZc0c71UoS01Nhbm5uco2c3NzZGdnIz8/H/r6+mV+rBs3blR0eUREBEBXVxcuri7QkbxXHzFlUiwtxq2oWygqKhK6FKqGqt9fTBm5u7tDIpEIXQYRUbUkkUiwKGIn4p89EbqUCmNXyxJzfYbD1dVV6FLoPSOVSsvUGfRehTJzc3OkpqaqbEtNTUXNmjXV6iUDnr9hMJQREVWe+GdPcC/jsdBlVDh+dlBlea/OU+bp6Ynz58+rbDt37hw8PT2FKYiIiIiogggaynJycnD79m3cvn0bAJCYmIjbt2/j8ePn36yWLl2K2bNnK/cfMWIEEhIS8PPPPyM2NhZbt25FWFgYxo0bJ0T5oiCTyYQuoVJU19dFRPQ+kMur52K7d31dgg5f3rx5E2PHjlX+HBAQAAAYOHAgFi1ahJSUFCQlJSlvt7W1xR9//IGAgABs3rwZdevWxYIFC3g6jEqkra2Nn3/Zi/jEFKFLqTB29S0we8agct1XKpNBov1edTCXSXV9XUSkmbS0JDh/9yc8y4sXupQKU8vADm2afv1OjyFoKPP29kZMTMxrb1+0aFGp9wkODq7EquhV8YkpiL3/r9BlaASJtjZ+XBeEh0mpb9/5PWFfzxzfTRyo9v2qc5Crzq+NSFM8y4tHZu5docvQKO/VRH8iTfAwKRV3EhhSJdra+GbnXjx4Un0CKgA0tDTHguHl60klInoXDGUCWb16NYKDgzFgwABMnTpV6HKIyuXBk1TEPGZAJSKqCOyfF0B+fj6Cg4Mhk8kQHByM/Px8oUsiIiIigTGUCaC4uFi5+k8mk6G4uFjgiojoXUmr8Yre6vzaiDQJhy9fIZPKoC2pflm1ur4uIk0h0dbGl2F7cD+9es2xa2RqjoBeQ4Qug0gUGMpeoS3Rxn+/2YyEB8mV9hwylfOYaGHupFXQ1qq8M0TbNrTCFwvGvn1HInon99NTEf0k6e07EhGVgqGsFAkPkhEbnVipzyExMINULw2SQlM8iOGbOBERkdgxlAlEN68edPPqCV0GERERaQhOMiIiIiLSAAxlRERERBqAoYyIiIhIAzCUEREREWkAhjIiIiIiDcBQRkRERKQBGMqIiIiINABDGREREZEGYCgjIiIi0gAMZUREREQagKGMiIiISAMwlBERERFpAIYyIiIiIg3AUEZERESkARjKiIiIiDQAQxkRERGRBmAoIyIiItIADGVEREREGoChjIiIiEgDMJQRERERaQCGMiIiIiINwFBGREREpAEYyoiIiIg0AEMZERERkQZgKCMiIiLSAAxlRERERBqAoYyIiIhIAzCUEREREWkAhjIiIiIiDcBQRkRERKQBGMqIiIiINABDGREREZEGYCgjIiIi0gAMZUREREQagKGMiIiISAMwlBERERFpAIYyIiIiIg0geCjbunUr/Pz84O7ujqFDhyIyMvKN+2/cuBE9evSAh4cHOnXqhIULF6KgoKCKqiUiIiKqHIKGstDQUAQEBGDq1KkICgqCk5MTJk6ciLS0tFL3P3DgAJYuXYpp06YhNDQUP/30E0JDQ7Fs2bIqrpyIiIioYgkayjZs2IBhw4Zh8ODBaNKkCebNmwd9fX0EBgaWuv/Vq1fRokUL9OvXD/Xr10f79u3Rt2/ft/auEREREWk6HaGeuLCwEFFRUZg8ebJym7a2Ntq2bYurV6+Wep/mzZtj//79iIyMhIeHBxISEnDy5El88MEHaj+/VCotdbtEIlH7sd4Xr3vNb8L2UMX2eKE6twXA9ngV2+OF8rx3kCqxHR9lPWYEC2UZGRmQSqUwMzNT2W5mZob79++Xep9+/fohIyMDo0aNglwuR3FxMUaMGIEpU6ao/fw3btwosc3AwAAuLi5qP9b7IiYmBnl5eWXen+2hiu3xQnVvC4Dt8Sq2xwvqvneQKh4frydYKCuPCxcu4I8//sD3338PDw8PxMfH46effsLq1asxdepUtR7L3d29Wif10jg6OgpdgkZhe6hie6hie6hie7zAtqA3Ke34kEqlpXYGvUqwUFanTh1IJJISk/rT0tJgbm5e6n1WrFiB/v37Y+jQoQCev/Dc3Fx89913+Pjjj6GtXfYpchKJRHShTGyv923YHqrYHqrYHqrYHi+wLehN3uX4EGyiv56eHlxdXREREaHcJpPJEBERgebNm5d6n/z8/BLBS/Hi5XJ55RVLREREVMkEHb4cP3485syZAzc3N3h4eGDTpk3Iy8vDoEGDAACzZ8+GlZUVZs2aBQDo3LkzNmzYABcXF+Xw5YoVK9C5c2d+cyEiIqL3mqChrHfv3khPT8fKlSuRkpICZ2dnrF27Vjl8mZSUpNIz9vHHH0NLSwvLly9HcnIyTE1N0blzZ8yYMUOol0BERERUIQSf6O/v7w9/f/9Sb9uyZYvKzzo6Opg2bRqmTZtWFaURERERVRnBL7NEREREROUIZQkJCZVRBxEREZGoqR3KunXrhjFjxmDfvn28EDgRERFRBVE7lAUFBcHR0RGLFi1Cu3bt8N133/Hak0RERETvSO1Q5uzsjG+++QanT5/GwoUL8eTJE4waNQp9+/bFhg0bkJ6eXhl1EhEREVVr5Z7or6Ojg+7du2PlypX4/PPP8fDhQyxevBidOnXC7Nmz8eTJk4qsk4iIiKhaK/cpMW7cuIHAwECEhobCwMAAEyZMwJAhQ5CcnIxVq1bh//7v/7Bnz56KrJWIiIio2lI7lG3YsAF79+7FgwcP0LFjR2XvmOIkr7a2tli0aBH8/PwqvFgiIiKi6krtULZ9+3YMHjwYAwcOhKWlZan7mJqa4qeffnrn4oiIiIjEQu1Qdvjw4bfuo6enh4EDB5arICIiIiIxUnuif2BgIMLCwkpsDwsLQ1BQUIUURURERCQ2aoeyNWvWoE6dOiW2m5mZ4ffff6+QooiIiIjERu1Q9vjxY9SvX7/EdmtrayQlJVVIUURERERio3YoMzMzQ0xMTInt0dHRMDExqYiaiIiIiERH7Yn+ffr0wU8//QQjIyO0bt0aAHDx4kUsXLgQffr0qfACiYiIiMRA7VD26aef4tGjRxg3bhx0dJ7fXSaT4YMPPsCMGTMqvEAiIiIiMVA7lOnp6WH58uV48OABoqOjoa+vDwcHB9jY2FRGfURERESiUO7LLDVs2BANGzasyFqIiIiIREvtUCaVSrF3716cP38eaWlpkMlkKrdv3ry5woojIiIiEgu1Q9lPP/2EoKAgdOrUCU2bNoWWllZl1EVEREQkKmqHspCQECxfvhydOnWqjHqIiIiIREnt85Tp6urCzs6uMmohIiIiEi21Q9mECROwefNmyOXyyqiHiIiISJTUHr68cuUKLly4gFOnTqFp06bKc5UprFq1qsKKIyIiIhILtUNZrVq10K1bt8qohYiIiEi01A5lAQEBlVEHERERkaipPaeMiIiIiCpeuc7of/DgQYSFhSEpKQlFRUUqtwUFBVVIYURERERionZP2ebNm/Hll1/C3Nwct27dgru7O0xMTJCQkICOHTtWRo1ERERE1Z7aPWXbtm3D/Pnz0bdvX+zduxcfffQRbG1tsWLFCjx9+rQyaiQiIiKq9tTuKUtKSkLz5s0BAPr6+sjJyQEAfPDBBwgJCanY6oiIiIhEQu1QZm5uruwRq1evHq5duwYASExM5AlliYiIiMpJ7eHLNm3a4NixY3BxccHgwYMREBCAQ4cO4ebNmzx/GREREVE5qR3K5s+fD5lMBgAYPXo0TExMcPXqVfj5+WH48OEVXiARERGRGKgdyrS1taGt/WLUs0+fPujTp0+FFkVEREQkNmrPKevWrRt+/fVXPHjwoDLqISIiIhIltUPZ6NGjceLECfTu3RuDBw/Gpk2bkJKSUhm1EREREYmG2qFs3LhxCAwMRGhoKDp16oRt27bB19cXEyZMQHBwcCWUSERERFT9lfvalw0bNsQnn3yCQ4cOYevWrUhPT8eXX35ZkbURERERiUa5rn2pEBkZiQMHDiAsLAzZ2dno2bNnRdVFREREJCpqh7IHDx7gwIEDCAkJQWJiItq0aYPPP/8c3bp1g5GRUWXUSERERFTtqR3KevXqBXd3d4waNQp9+vSBubl5ZdRFREREJCpqh7KDBw+iQYMGlVAKERERkXipPdG/QYMGePbsGXbv3o2lS5ciMzMTABAVFYXk5OSKro+IiIhIFNTuKYuOjsa4ceNQq1YtPHr0CMOGDYOJiQkOHz6MpKQk/Pzzz5VRJxEREVG1pnZPWUBAAAYNGoTDhw9DT09Pub1Tp064fPmy2gVs3boVfn5+cHd3x9ChQxEZGfnG/Z89e4Z58+ahffv2cHNzQ48ePXDy5Em1n5eIiIhIk6jdU3bz5k3Mnz+/xHYrKyu1z+wfGhqKgIAAzJs3D82aNcOmTZswceJEHDx4EGZmZiX2LywsxPjx42FmZoYVK1bAysoKjx8/Rq1atdR9GUREREQaRe1Qpqenh+zs7BLb4+LiYGpqqtZjbdiwAcOGDcPgwYMBAPPmzcOJEycQGBiISZMmldg/MDAQT58+xY4dO6CrqwsAqF+/vrovgYiIiEjjqB3K/Pz8sHr1aixfvly57fHjx1iyZAm6d+9e5scpLCxEVFQUJk+erNymra2Ntm3b4urVq6Xe59ixY/D09MSPP/6Io0ePwtTUFH379sVHH30EiUSi1uuQSqWlblf3cd4nr3vNb8L2UMX2eKE6twXA9ngV2+OF8rx3kCqxHR9lPWbUDmVz587FJ598grZt26KgoABjxoxBamoqPD09MWPGjDI/TkZGBqRSaYlhSjMzM9y/f7/U+yQkJOD8+fPo168f1qxZg/j4eMybNw/FxcWYNm2aWq/jxo0bJbYZGBjAxcVFrcd5n8TExCAvL6/M+7M9VLE9XqjubQGwPV7F9nhB3fcOUsXj4/XUDmXGxsbYsGEDLl++jJiYGOTm5sLV1RVt27YtVwHqkMvlMDMzw/z58yGRSODm5obk5GSsW7dO7VDm7u5erZN6aRwdHYUuQaOwPVSxPVSxPVSxPV5gW9CblHZ8SKXSUjuDXlXua1+2atUKrVq1Ku/dUadOHUgkEqSlpalsT0tLe+1VAiwsLKCjo6MSpho1aoSUlBQUFhaqrAZ9G4lEIrpQJrbX+zZsD1VsD1VsD1VsjxfYFvQm73J8lCmUbd68ucwPOHbs2DLtp6enB1dXV0RERKBr164AAJlMhoiICPj7+5d6nxYtWuDvv/+GTCaDtvbzs3nExcXBwsJCrUBGREREpGnKFMo2btxYpgfT0tIqcygDgPHjx2POnDlwc3ODh4cHNm3ahLy8PAwaNAgAMHv2bFhZWWHWrFkAgJEjR+Kvv/7CTz/9BH9/fzx8+BB//PEHxowZU+bnJCIiItJEZQplx44dq5Qn7927N9LT07Fy5UqkpKTA2dkZa9euVQ5fJiUlKXvEAKBevXpYt24dAgIC0L9/f1hZWWHs2LH46KOPKqU+IiIioqpS7jllhYWFSExMhJ2dHXR0yv0w8Pf3f+1w5ZYtW0psa968OXbt2lXu5yMiIiLSRGpfZikvLw9fffUVPD090bdvXyQlJQEA5s+fjzVr1lR4gURERERioHYoW7p0KaKjo7F582bUqFFDud3HxwehoaEVWhwRERGRWKg97nj06FH88ssv8PT0VNnetGlTxMfHV1RdRERERKKidk9Zenp6qRcLz8vLg5aWVoUURURERCQ2aocyNzc3nDhxosT23bt3l+g9IyIiIqKyUXv4csaMGfjoo49w7949SKVSbN68GbGxsbh69WqpqyWJiIiI6O3U7ilr1aoV9u3bB6lUCgcHB5w9exampqbYsWMH3NzcKqNGIiIiomqvXCcYs7Ozw4IFC0psP3jwIHr27PnORRERERGJjVo9ZcXFxbhz5w4ePHigsj08PBz9+/fH559/XqHFEREREYlFmXvK7ty5gylTpihPFtulSxf88MMP+Oyzz3D37l0MHTqUJ48lIiIiKqcyh7IlS5bAzs4O3377Lf7++2+EhIQgNjYWQ4YMwdq1a6Gvr1+ZdRIRERFVa2UOZTdu3MD69evh7OyMVq1aISQkBJMnT8aAAQMqsTwiIiIicSjznLKMjAxYWloCAIyNjWFgYMDzkhERERFVkDL3lGlpaSEnJwc1atSAXC6HlpYW8vPzkZ2drbJfzZo1K7xIIiIiouquzKFMLpejR48eKj8PHDhQ5WctLS3cvn27YiskIiIiEoEyh7LNmzdXZh1EREREolbmUObl5VWZdRARERGJmtqXWSIiIiKiisdQRkRERKQBGMqIiIiINABDGREREZEGUCuUFRUVwcXFBXfu3KmseoiIiIhESa1Qpquri3r16kEmk1VWPURERESipPbw5ZQpU7Bs2TJkZmZWQjlERERE4lTm85QpbN26FQ8fPkSHDh1gbW0NQ0NDlduDgoIqrDgiIiIisVA7lHXt2rUy6iAiIiISNbVD2bRp0yqjDiIiIiJRK9cpMZ49e4bdu3dj6dKlyrllUVFRSE5OrsjaiIiIiERD7Z6y6OhojB8/HsbGxnj06BGGDRsGExMTHD58GElJSfj5558ro04iIiKiak3tnrJFixZh4MCBOHz4MPT09JTbO3XqhMuXL1docURERERioXYou3HjBkaMGFFiu5WVFVJSUiqkKCIiIiKxUTuU6enpITs7u8T2uLg4mJqaVkhRRERERGKjdijz8/PD6tWrUVRUpNz2+PFjLFmyBN27d6/Q4oiIiIjEQu1QNnfuXOTm5qJt27YoKCjAmDFj0L17dxgZGWHGjBmVUSMRERFRtaf26ktjY2Ns2LABly9fRkxMDHJzc+Hq6oq2bdtWRn1EREREoqB2KCsoKECNGjXQqlUrtGrVqjJqIiIiIhIdtUNZq1at4OHhgdatW8Pb2xvNmzeHvr5+ZdRGREREJBpqhzLF0OXFixexadMmFBcXw83NDV5eXvDy8kK7du0qo04iIiKiaq1cPWWtWrXClClTUFxcjBs3bmDnzp1Yu3Yt1qxZg9u3b1dGnURERETVmtqhDAAePHiAixcv4uLFi7hw4QKKiorg6+sLLy+viq6PiIiISBTUDmUdOnRAQUGBcrjyo48+gqOjI7S0tCqjPiIiIiJRUPs8ZaampsjLy0NqaqryX35+fmXURkRERCQaaveU7du3D8+ePcOlS5dw6dIlLFu2DLGxsXB2doa3tzdPIEtERERUDuWaU1arVi106dIFLVq0QPPmzXH06FGEhITg+vXrDGVERERE5aB2KDt8+LBygn9sbCxq166Nli1bYs6cOZzoT0RERFROaoey77//Hq1bt8bw4cPRunVrODo6vnMRW7duxbp165CSkgInJyd8++238PDweOv9QkJCMHPmTHTp0gX/+9//3rkOIiIiIqGoHcoiIiIqtIDQ0FAEBARg3rx5aNasGTZt2oSJEyfi4MGDMDMze+39EhMTsXjxYl7qiYiIiKoFtVdfRkVFISYmRvlzeHg4/u///g/Lli1DYWGh2gVs2LABw4YNw+DBg9GkSRPMmzcP+vr6CAwMfO19pFIpPv/8c0yfPh22trZqPycRERGRplG7p+y7777DpEmT4OjoiISEBMycORPdunXDwYMHkZeXh6+//rrMj1VYWIioqChMnjxZuU1bWxtt27bF1atXX3u/1atXw8zMDEOHDsWVK1fUfQkAnge70kgkknI93vvgda/5TdgeqtgeL1TntgDYHq9ie7xQnvcOUiW246Osx4zaoSwuLg7Ozs4AgLCwMLRu3RpLly7FlStXMHPmTLVCWUZGBqRSaYlhSjMzM9y/f7/U+1y+fBl79uxBcHCwuqWruHHjRoltBgYGcHFxeafH1WQxMTHIy8sr8/5sD1Vsjxeqe1sAbI9XsT1eUPe9g1Tx+Hg9tUOZXC6HTCYD8Hx+ma+vLwCgXr16yMjIKFcRZZWdnY3Zs2dj/vz5MDU1fafHcnd3r9ZJvTQVsSijOmF7qGJ7qGJ7qGJ7vMC2oDcp7fiQSqWldga9Su1Q5ubmht9++w0+Pj64dOkSfvjhBwDPJ96bm5ur9Vh16tSBRCJBWlqayva0tLRSHyshIQGPHj3Cxx9/rNymCIguLi44ePAg7OzsyvTcEolEdKFMbK/3bdgeqtgeqtgeqtgeL7At6E3e5fhQO5R99dVX+OKLLxAeHo4pU6bA3t4eAHDo0CE0b95crcfS09ODq6srIiIi0LVrVwDPQ1ZERAT8/f1L7N+oUSMcOHBAZdvy5cuRk5ODr7/+GnXr1lX35RARERFpBLVDmZOTU4lgBACzZ8+Gtrbaizkxfvx4zJkzB25ubvDw8MCmTZuQl5eHQYMGKR/XysoKs2bNQo0aNeDg4KBy/1q1agFAie1ERERE75NyXWYJeL5yMj09XTl8qGBtba3W4/Tu3Rvp6elYuXIlUlJS4OzsjLVr1yqHL5OSksoV9oiIiIjeJ2qHsgcPHuDrr78uccoKuVwOLS0t3L59W+0i/P39Sx2uBIAtW7a88b6LFi1S+/mIiIiINI3aoezLL7+Ejo4Ofv/9d1haWkJLS6sy6iIiIiISFbVDWXR0NAIDA9G4cePKqIeIiIhIlNSerNW4ceNKPx8ZERERkdioHco+//xzLFmyBBcuXEBGRgays7NV/hERERGR+tQevhw/fjwAYNy4cSrb32WiPxEREZHYqR3KNm/eXBl1EBEREYma2qHMy8vrtbfduXPnnYohIiIiEqtynzxWITs7GyEhIdi9ezeioqI4fElERERUDuUOZZcuXcKePXtw+PBhWFpaolu3bvjuu+8qsjYiIiIi0VArlKWkpCAoKAh79uxBdnY2evXqhcLCQqxevRpNmjSprBqJiIiIqr0yh7IpU6bg0qVL8PX1xVdffYUOHTpAIpFgx44dlVkfERERkSiUOZSdOnUKY8aMwciRI9GgQYNKLImIiIhIfMp88tht27YhJycHgwYNwtChQ/HXX38hPT29MmsjIiIiEo0yhzJPT08sWLAAZ86cwfDhwxESEoKOHTtCJpPh7NmzPJs/ERER0TtQ+zJLhoaGGDJkCLZv3479+/dj/Pjx+PPPP9G2bVtMmTKlMmokIiIiqvbUDmUva9SoEWbPno2TJ09i2bJlFVUTERERkei888ljAUAikaBr167o2rVrRTwcERERkei8U08ZEREREVUMhjIiIiIiDcBQRkRERKQBGMqIiIiINABDGREREZEGYCgjIiIi0gAMZUREREQagKGMiIiISAMwlBERERFpAIYyIiIiIg3AUEZERESkARjKiIiIiDQAQxkRERGRBmAoIyIiItIADGVEREREGoChjIiIiEgDMJQRERERaQCGMiIiIiINwFBGREREpAEYyoiIiIg0AEMZERERkQZgKCMiIiLSAAxlRERERBqAoYyIiIhIAzCUEREREWkAhjIiIiIiDcBQRkRERKQBGMqIiIiINIBGhLKtW7fCz88P7u7uGDp0KCIjI1+7765duzBq1Ci0bt0arVu3xrhx4964PxEREdH7QPBQFhoaioCAAEydOhVBQUFwcnLCxIkTkZaWVur+Fy5cQJ8+fbB582bs2LED9erVw4QJE5CcnFzFlRMRERFVHMFD2YYNGzBs2DAMHjwYTZo0wbx586Cvr4/AwMBS91+6dClGjx4NZ2dnNG7cGAsWLIBMJkNEREQVV05ERERUcXSEfPLCwkJERUVh8uTJym3a2tpo27Ytrl69WqbHyMvLQ3FxMWrXrq3Wc0ul0lK3SyQStR7nffK61/wmbA9VbI8XqnNbAGyPV7E9XijPewepEtvxUdZjRtBQlpGRAalUCjMzM5XtZmZmuH//fpkeY8mSJbC0tETbtm3Veu4bN26U2GZgYAAXFxe1Hud9EhMTg7y8vDLvz/ZQxfZ4obq3BcD2eBXb4wV13ztIFY+P1xM0lL2rNWvWIDQ0FJs3b0aNGjXUuq+7u3u1TuqlcXR0FLoEjcL2UMX2UMX2UMX2eIFtQW9S2vEhlUpL7Qx6laChrE6dOpBIJCUm9aelpcHc3PyN9123bh3WrFmDDRs2wMnJSe3nlkgkogtlYnu9b8P2UMX2UMX2UMX2eIFtQW/yLseHoBP99fT04OrqqjJJXzFpv3nz5q+9359//on//e9/WLt2Ldzd3auiVCIiIqJKJfjw5fjx4zFnzhy4ubnBw8MDmzZtQl5eHgYNGgQAmD17NqysrDBr1iwAz4csV65ciaVLl8LGxgYpKSkAAENDQxgZGQn2OoiIiIjeheChrHfv3khPT8fKlSuRkpICZ2dnrF27Vjl8mZSUBG3tFx16O3bsQFFRET755BOVx5k2bRqmT59epbUTERERVRTBQxkA+Pv7w9/fv9TbtmzZovLzsWPHqqIkIiIioiol+MljiYiIiIihjIiIiEgjMJQRERERaQCGMiIiIiINwFBGREREpAEYyoiIiIg0AEMZERERkQZgKCMiIiLSAAxlRERERBqAoYyIiIhIAzCUEREREWkAhjIiIiIiDcBQRkRERKQBGMqIiIiINABDGREREZEGYCgjIiIi0gAMZUREREQagKGMiIiISAMwlBERERFpAIYyIiIiIg3AUEZERESkARjKiIiIiDQAQxkRERGRBmAoIyIiItIADGVEREREGoChjIiIiEgDMJQRERERaQCGMiIiIiINwFBGREREpAEYyoiIiIg0AEMZERERkQZgKCMiIiLSAAxlRERERBqAoYyIiIhIAzCUEREREWkAhjIiIiIiDcBQRkRERKQBGMqIiIiINABDGREREZEGYCgjIiIi0gAMZUREREQagKGMiIiISAMwlBERERFpAIYyIiIiIg3AUEZERESkATQilG3duhV+fn5wd3fH0KFDERkZ+cb9w8LC0LNnT7i7u6Nfv344efJkFVVKREREVDkED2WhoaEICAjA1KlTERQUBCcnJ0ycOBFpaWml7v/PP/9g1qxZGDJkCIKDg9GlSxdMnToVd+7cqeLKiYiIiCqO4KFsw4YNGDZsGAYPHowmTZpg3rx50NfXR2BgYKn7b968GR06dMB//vMfNG7cGJ999hlcXFzw119/VXHlRERERBVHR8gnLywsRFRUFCZPnqzcpq2tjbZt2+Lq1aul3ufatWsYN26cyrb27dsjPDy8TM8pl8uVzy2RSErcLpFI0KBpPejqlbztfWVjbwmpVAqpVKr2fSUSCRraW0JXR/D8XmHq25i/U3s0trGoVu1hZ2VWrvaQSCRoamUBPUn1aQsAsDcvf3s4mFlAT7t6tUeDOuVvj4a1rKCrVX3eS+sbl/+9g1RJJBLU0m8ILbmu0KVUGGP9+q89PhTbFBnkdQQNZRkZGZBKpTAzM1PZbmZmhvv375d6n9TUVJibm5fYPzU1tUzPKZPJAAC3bt167T6d+jkDcC7T470vrl27Vu77+razA2BXYbVogndpjx4etgBsK6wWTVDe9ujX2A5oXL2ODaD87THAogFg0aAiS9EI5W0PP73GgFnjii1GYO/y3kGqdNENpkIXUZGy3n58KDLI6wgayoSgo6MDd3d3aGtrQ0tLS+hyiIiIqJqTy+WQyWTQ0Xlz7BI0lNWpUwcSiaTEpP60tLQSvWEK5ubmJXrF3rT/q7S1taGnp1e+gomIiIgqiaCTH/T09ODq6oqIiAjlNplMhoiICDRv3rzU+3h6euL8+fMq286dOwdPT8/KLJWIiIioUgk+I3X8+PHYtWsXgoKCEBsbix9++AF5eXkYNGgQAGD27NlYunSpcv+xY8fi9OnTWL9+PWJjY/Hrr7/i5s2b8Pf3F+olEBEREb0zweeU9e7dG+np6Vi5ciVSUlLg7OyMtWvXKocjk5KSoP3SaqYWLVpgyZIlWL58OZYtW4YGDRpg9erVcHBwEOolEBEREb0zLfnb1mcSERERUaUTfPiSiIiIiBjKiIiIiDQCQxkRERGRBmAoIyIiItIADGVEREREGoChrAoVFxcLXQIREVG19j6fVIKhrAooDpDQ0FBkZmYKW4yGkEqlyM/PF7oMjZGQkIB9+/YJXYbgFH8r6enpiImJEbga4SnaIycnR+BKNEtubq7QJZCGUVzou6CgAFpaWu9tMGMoqwJaWlooKirCrVu3YGJiInQ5glL8oVy/fh1Hjx4VuBrNkZiYCENDQ6HLEJyWlhYAIDIyEsePHwfw4s1WjBTtMX78eKSnpwtcjebYtWuX6L/UKd5LT58+jT///FPgaoSn+FuZNWsWzp07p/z5fcOTx1YBuVz+3h4gleXMmTMwNjZGs2bNRN8+MpkM2traom+HlxUWFkImk0FfX1/oUjRCamqq8ionYpeVlYXt27dj0qRJQpeiEWJjY/HkyRP4+PhAKpVCIpEIXZKg0tPTUbt27fe2HRjKKpniA/f+/fuwtbWFrq6u0CWRhikuLkZaWhqsrKyELkVjKP5uCMoPWrYJv+DSm1WHv5H3u/r3gOIA2bp1K1JTUwWuRlgvzxeKjo4WuBrNcf36dfzvf/8TugzBKY6PmJgYbN++XeBqhKdoj5CQkBLXABYjRSA7f/488vLyhC5HI+Tk5CAiIkLoMgSnmOJw8eJF3LlzR+Bq3o24/8qriEwmw9ChQ1GvXj2hSxHUy/OFTpw4AUDc84UUXFxc8OmnnwpdhuAUx4dUKlUO1Yn5+FC0R3x8PMzMzASuRniK9ggPDxd9QFWIiorC/fv3AbzfKw7fleJ4+Oeff977Y4PDl1TlxDxfiMMvVFY8VkoqLi5GUlISbG1thS5FI+Tk5CA/Px9mZmY8XqqJ9ztSaihFzpVKpQCA7du3IykpSciSqtybsr6Ojo4oAxmAEm+aGRkZ2LNnj0DVaJ7CwkKcPHlS6DIEJ5PJoKWlhZSUFADi7gV5mY6ODgPZS4yMjJS9qGINZIq/jbS0NGRnZwtczbtjKKsEij8OxeqP+Ph41KhRQ8iSqoziD+TloSiZTKYchhLzfKGIiAgkJCSobKsubyQV5f79+7h37x4AcQcRxRDMkiVLEB0dLdoPXMUXW8WxsHDhQjx58kTIkgSneC+9f/8+fvjhB2GLEUhp7w2hoaE4e/asANVULIayCvDyAfLw4UOcOHEC//77r3LbnDlzYGpqKkRpVU5LSwuHDh3Cw4cPATwPptra2soPmcLCQtHNF8rKygIAbNy4UdlDqPiwqVOnDsaNGydUaYKTy+XIzMxUTtxu1KgRBgwYIGxRGmT69OlwcnISugzBKL7YamlpQSaTwcbGBpaWlgJXVXUuXbqE/Px8ZGdnK98vFe+lRkZG6N27NwDxvJcqaGlp4c6dO8jPz1d+YenQoQPat28vcGXvTkfoAqqD4uJi6OrqYvny5bhz5w7kcjmys7NRq1Yt1K1bF8OHD4eDg4PQZVYJqVSKXbt2IT4+HoaGhvD29kbLli3h7e0NExMTuLu7w93dHQDe+wmZZZGfn4/jx4/j4sWLuHv3LhISEqClpaUMpvPnz8f8+fNhbGwscKVVq7i4GD///DMiIiLg5uaGRo0aoUmTJqhfvz6aNm0KQLzDMS+rX7++0CVUKcUpDdLT07F7925cv34dU6ZMgYeHB7S1tfHhhx9Wi9MelNWJEyfQunVrrFy5EkZGRrC3t0f9+vVRr149WFlZKU+jI4b2UPzeY2NjcfToUZw8eRKNGzfGjz/+iKSkJFhbW0NPT0/oMt8ZQ1kFUJx77OjRo5gzZw5sbGyQlpaGBw8e4MqVK6K65qVEIsG6desQHR2NdevW4eTJk4iMjMTixYthY2ODFi1aYMaMGUKXWWVkMhksLS2RlJQEqVSK4OBgFBcXw8rKCnl5eYiLixNVIFO8sZ44cQK3b9/Gt99+ixs3buDWrVs4fvw46tati2XLlgldpkYQ88TtL7/8El26dMGVK1eQkZEB4HlPc+fOnWFvby9wdVVnwoQJKCoqQl5eHpKSknDjxg0YGBjAzMwMNjY2GDFihGiOEcWI1M6dO2Fubg47Oztlr+mpU6dgZGSEvn37CllihWAoe0fJycl4+PAhzMzM4OLiouw+bdiwITw8PODn5yeq5eyKE10ePXoUNjY2mDFjBkxNTXH48GFs3LhRZRHE+3rGZXUYGhqiTZs2qF27NtLT06Gjo4Pbt28jOTkZOjo6+L//+z+hS6xSim/0ycnJ6NmzJ7y8vODl5QUAyMzMVA77i6k3pLCwEOHh4ejZs6fKaxbLh+3LtLW1UVBQgLS0NAwbNgx79+5Fs2bNAAD79++Hn5+fwBVWLcVnx5w5c1BUVITY2FhER0fjxo0bePLkifIaj2I4VhR/G5GRkdixYwdmzZqFli1bAgBOnjyJHj16CFlehWEoe0c3b97EunXrYG5ujgcPHmDhwoUYOHAgmjZtCj09PVEFMuDFB8nFixcxa9YsWFtbAwD69++PW7duwdvbG4A4uttf5uzsrPy/t7c3cnJyoKurWy2628ujY8eO2LhxI86ePYvGjRvD1NQUJiYmMDExgVwuF8XxkZ+fD319fdy4cQMnTpxA7969lR+wOTk5yMzMhI2NjdBlVrmoqCg4OTkhIyMD+vr6MDExwePHjwEAdnZ2AldX9RRfZHV1deHk5AQnJycMGDAAhYWFAldWtRR/F7a2trhy5QpiY2Ph4+MDAHj06FG1mE8GMJS9M3d3d8ycORNJSUm4e/cu7ty5g5UrV0JXVxc1a9bEhx9+CEdHR6HLrDKKazi2aNECq1atwscffwx7e3uYmprizJkzGDJkiNAlCublibpGRkYCVyOczMxMzJ8/Hzdv3sSTJ09gY2MDOzs71K1bF506dRJFDyrw/JqF+/btw7lz5+Dg4ICUlBQYGxtDX18fFy9exNWrVzFz5kyhy6xyDg4OMDAwwOjRo+Hu7o7CwkLs379fOS9XLL3sCi9/SZHL5cqfFV/oxNBLBjx/7UZGRhg4cCD++OMPGBsb48KFC9i3bx8aNGhQbTpAGMrekaWlJSwtLXHy5ElMmjQJaWlpePz4MR49eoSYmBhRfvhqaWlh3Lhx+PXXXxEUFITi4mJERUWhRYsWaNKkiXIfsRFD78+bKIYkjx49Cn19fRw5cgQRERG4desWzp07BzMzM1ENT5mbm8Pb2xu7du2CtbU1fvjhB0gkEtja2uLs2bOYOHGi0CUKombNmhg5ciTy8/Px7Nkz9OzZE35+fpgyZQoA8b13KN43ZDKZaIYqS6N43W5ubujUqRNCQkKwfv16tGnTBt27dxe4uorDM/q/g8LCQujo6CAiIgIbNmzA2rVrlbcVFBTg9u3b8PT0FK5AgSUnJyMiIgJSqRRNmzZF48aNYWRkJMo3loKCAqSmpsLKygo6OuL8LqQIZb///jvq1q1b4tQXaWlpMDMzE9V8MplMhsjISJiZmSE+Ph4JCQnK8xpOnDgRNWvWFLrEKldcXAwdHR1ER0cjLS0NTk5OqFmzpmjO9Qi86A08fvw45HI5fHx8YGBgoLKPGN9HX/Xs2TPUqlVL6DIqlDg/HSrI/fv3cfbsWZw+fRqWlpbIzMxEQUEBrKyscP78eVy/fl3UoczKyqrUc06J7Y1kx44dCAsLg5GREZYsWaL80BFbOFMErdTUVBw+fBjFxcVwcXFB3bp1YWpqqrxUjBgCmeJD9/Lly0hLS4Onp6eoz1SvCOIJCQnYtm0bQkJC4OPjAzc3NzRr1kxUgQx4cX62c+fOYceOHcoRmY4dO6JDhw5wc3MT3fuoguJk5Do6OtUukAE8eew7MTU1hZ6eHh48eIA7d+7gm2++wdq1axEcHIzff/+9Wh4w6lDMfxAjxeu+ePEiTpw4gcGDB+Pff/+FoaEh7t69i6VLlwpcoTBycnJgaGgIR0dHnD59Glu2bMH69euxbds2UX7zDwwMVF7RYfXq1WjWrBmWLVuGgoICgSurWoq/l0WLFkFPTw8rVqxA69atERwcjO+//x5FRUUCV1j18vPzUVBQgFmzZmHx4sXo06cPdu7cidmzZ2P27NnKk1KLjba2drX+Qlt9X1kVsLS0xJgxY1CvXj20aNECV69exeXLlxEeHo4ePXpg0KBBQpdYZRQfqKmpqahVqxb09PQgk8lENSH3ZYrXfv78eXTt2hW1a9eGi4sLgOc9rPHx8QJXKAwjIyPMnDkTxcXFuHfvnnJ5f0pKiqiW9yv+LmJiYrB48WKcO3cODx8+xJ9//on169cjKSkJDRo0ELbIKvTy+8SoUaNgZWWF5s2bY8iQIRgxYgRiYmLg5uYmYIVV5+Vz+T158gQ//vgjiouL0aJFCxgaGiIuLg6ZmZnYtWuX6OYdimEaSPV8VVVE8cfTtWtXAECXLl3QpUsXAOJbIaSwd+9eFBYWYtq0aaJ8/QqK156XlwdnZ2f89ddfypWnV65cQbt27YQsTzCKHhEdHR1RL+8Hns+51NXVxZ49e3Dw4EGMGzcOXl5emDdvnvJUMmKgCOKPHz+GXC5HZGQkunXrBuD5h3B2draoAqriS0lubi709PSQlZWFmjVrKrdraWmhR48e2L17t5BlVjmxTAOpXq+mimlrayuDmWKoTi6XQyKRiC6QKN4wvLy8sGjRIly+fBmffvopmjdvLnBlwho1ahRmzZqFmzdvwsfHBw8fPsTdu3cxefJkoUsTBJf3PyeTyWBlZQV/f38cPXoUTZs2Rfv27XHmzBmYmZmJ6vx1it95XFwcIiIicOrUKbi7u8PNzQ0FBQUYMWKEqBY8KNrD19cXx48fx08//YSWLVsiISEBly9fxqxZsxAREaE8cWp1pgjsL08D2bhxIwwNDXHjxg2EhoZizpw5QpdZobj6sgKJZejlbQoLC7Fx40akpaVh0KBByvO0iaV9FEH9+vXryMjIgJ2dHfbs2YOLFy+idu3amDNnjmiuhfo6Ly/vF8PE/pcpXrfim75CUVERdu7cCUNDQ1FNfXiZXC5HdHQ0zpw5g1OnTuHBgwfQ1tZGkyZN8Mknn4hu4VRGRgZ27Nih7EVs06YNunXrhunTp+Pbb7+t9otDFCNOK1euhLW1NSwsLHDkyBEsWLAA+/btw+HDh7F69Wqhy6xQDGXvIDc3F0+ePBFV1/rb5OTkwMjICImJifj0009x//59fPvtt6L6kFGEssWLF6NevXoYO3Ys8vLyVJa0iyWgAlze/zo9evSAjY0N2rRpg3bt2sHV1RWAeKc+5ObmIjk5Gdra2rCwsIChoSGys7MRFRWFkJAQDB48WHnJJTFITEzEs2fP0LRpU+X1lRWSk5OVFyMXg8WLF6Nv377KaSAtW7bEd999BycnJ4waNUro8ioUhy/LQfGhGxYWhoiICCxZsgTR0dH4+++/4eXlhY4dOwpdYpVStMfWrVsRGxuLs2fPws3NDZ6enrCwsMCmTZvw8OFDfPzxx9DX1xe63Eqn6PnJycmBiYkJAIg2kAFc3l8amUyG//73v4iMjMSlS5cQGBgIAGjZsiUWLlwocHVVR/HeERcXh/Xr1yMsLEx5dvYBAwagZ8+e8Pb2Vl6erbpTBPL//e9/iI6ORk5ODgoKCmBrawtbW1sMGDAA1tbWsLKyEtX7iJimgTCUlYOic/HUqVMYPHgwnj59iq1bt+LOnTuIjIxEnTp14O7uLnCVVUcRQuLi4tC8eXOMHj0aGRkZcHNzg76+Pp4+fYpx48ahd+/eornkVHZ2Nm7evInAwECEhISgQ4cO8PHxQePGjUXzRvqyl5f3u7m5ITo6GmvXrsWBAwfg5uaGb7/9FsbGxkKXWSUU8+g8PDzg7u6OkSNH4urVq9i8ebPyihdioXgv3bZtG7S1tXHp0iX8+++/CA4Oxl9//QVXV9dqP0T3MsUXmAMHDuDzzz+HjY0NUlNTce/ePVy+fBl+fn6wtrYWxQmWX50GsmjRIuzZswebN29G7dq1MW/evGp5bViGsnJQ/OEkJiaiYcOGCAwMhI2NDebPn4+PP/4Yubm5AlcojK+//hpFRUUlutpr164Na2trNGzYUKDKqp6uri6WLl2KxMREXLt2DRcuXMBff/0FZ2dn/PLLL0KXV2W4vF+VoncjJSUFFhYW0NLSgkQiQatWrRAeHi66UKYIFgYGBujRowcAoG7dupgyZQoiIyNx/vx52NraiiKEKI6NhIQEeHp6KlfyA0CLFi3Qq1cv5ZBldW+Llx08eBD16tWDr68vpk+fXu1HHRjKyqmwsBC+vr744osvkJGRge3btyM3NxePHz9Gq1athC6vyii6269evYoLFy7g8ePHqFmzJhwdHZX/iouL8c0334hmRZlcLkeNGjVga2uLhg0bokOHDpBKpYiLixPdSTC5vF+VlpYWCgoKMH78eNSuXRuurq5o06YNXF1dcfz4cQwfPlzoEquUlpYW8vLycPz4ceWVDSQSCXJzcyGVSkUzbAm8CBgxMTG4cuUK1qxZg969e6N+/fowNDSEoaGh0CVWKbFOA+FEfzUpJrIrXL16FbVq1ULjxo0RHh6OoKCgarca5E0U32AVJ9F1dHREVlYWUlNT8fjxY0yaNAlt2rQRuswqd/nyZVy8eBEZGRmwsbFB69at0aBBA1FeoB4A0tPT8f3338PIyKjU5f21a9fGmDFjhC6zShQWFiI6OhqxsbG4du0aoqKi8O+//2LIkCH47LPPhC6vyqWkpGD+/Pm4ceMG0tLSYGtrCwMDA3h6euKbb74Rurwqd+LECYSEhODJkyfQ19eHsbExLCwsMGrUKFEN5QLPp4GMHTsWMTExaN++vco0kOqKoUxNkyZNwsKFC/H333/D1tYWHTp0UPYApaWlITs7G/b29gJXWbXy8vIwadIkbNmyBcDzD+Dk5GTExcWhQ4cOqFmzZrX9VvMyxWu8c+cO5s2bBycnJ5w5cwYeHh6IjIxEgwYN8Pvvv1f7dngdsS/vf9mrfw+FhYXKHlYxi46OxunTp3HlyhXcu3cPycnJmDRpEqZPny50aVVGMfqQnp6O6Oho3L17F7dv38b06dOr5RyqNykoKMDjx4+V00Du3LmDu3fvVutpIAxlasrMzISJiQm+++47nD9/HpmZmbCxsYGvry98fX1FtWRb8cFy+/ZtLF++HJ9//jmaNm0qdFmCUbyZrlmzBgYGBmjSpAl2796NL774AgsWLEDHjh1FNzylwOX9L5w6dQrh4eHIyMiAp6encvhSzKKiomBlZQVzc3PlttzcXFy7dg01a9aEh4eHgNVVHZlMhtjYWAQHB6NBgwbo0qULTE1NRfc3ApR+Pr+Xp4E4OTkJXGHlYCh7B3K5HPHx8bh48SKOHTuGuLg4hIWFCV1WlVH80YSGhuK3336DoaEh2rRpg/r168Pe3h5ubm6imgehaI9JkyZh2rRpCAkJgbOzMwYMGIBffvkF5ubmohmiA8q+vB+ovvNDXpWUlIRhw4Zh0qRJKC4uxvXr1xEfH4+8vDzs3bu3xPnbqrvMzEysX78ecXFxuHfvHkxNTeHl5QUPDw/4+voKXV6VUfytbN68GRERETAxMcHdu3fx6NEjzJ49GwMHDhTN38jLxDgNhBP934GWlhbs7e1hb2+PoUOHori4WOiSqpTiDcLd3R3ffvstHjx4gIcPH+LatWs4cuQIPv30U1H1ACjaY8CAAbC1tYWFhQWOHTuGmjVr4tixY1i8eLHAFVYtLu9/QfGBev36dXTr1g1jxoxBbm4uBg0ahPT0dKSlpYkqkCl+50eOHEFqaipatGiBvLw8eHl5KUOar6+vaIKI4vg/fPgwpk2bppyHe+bMGezcuRMtW7aEnZ2dkCVWmZengfzyyy8q00C2b99e7aeBMJRVoOp2YdSyUvR8eHl5obi4GElJSbh3755ohzJ79+4NABg+fDgeP36MAwcOwNvbGy4uLgJXVnW4vF+V4gPE3t4eDx48AABlL3Lt2rVFdboY4MX5yc6fP4/x48fj9OnT8Pb2xn/+8x+kpKSgXbt2yv2q64fvy7S0tJCTk4OcnByVhVHt27fHsmXLRDXXUCaTQSKR4MSJE+jZsyeaNGmCjIwMzJw5UzkNpDofE+JMEfTOFN90Hz16hPDwcCQkJMDMzAxNmjRBkyZN4O3tLZpTYLyOsbExPv74YxQVFSmH6cSCy/tLd/PmTWzfvh3bt2+Hj48POnTogDZt2qjMpRIDRS9qVlYWLCwscO/ePQwdOhQAcO/ePXTu3BmAeC5QL5VKoaOjg5YtW8Lf3x+TJ0+Gvb090tLSUFxcLKr5ZIovaZcvX1ZOA+nYsSPq1auHJk2aoLCwUOAKKxfnlKnp+PHjePDgASZMmCB0KYJSzIGYO3cu6tati5CQENStWxfp6enQ0dHBzJkz0alTJ6HLJIFxef+LgHrr1i1MmjQJy5cvR3Z2Ni5fvox//vkHycnJOHLkiCh6DF/16NEjmJqa4uDBg1i1ahVcXV3x4MEDBAcHi/L6n2lpaVi/fj3S09Nx+/ZtGBsbY+LEifD19RXdNVFDQ0Ph4+ODwMBAREZGon///lixYgUWL15crUcd2FNWRg8fPoSlpSWOHz+uXAmkWBVy8eJF1KxZs1ofKK9SvDnExMRg0aJFiIyMxA8//IAnT55g+fLlqF+/PgDxDD9Q6Tp06ABfX98Sy/vFGEBSUlIwYcIEtGrVCkVFRejYsSPy8vKQnZ0tyvYAoDzFw8CBA6Gvr49Hjx5h5MiRkEgkonnvmDt3Ltzd3eHj44NGjRrhiy++QGxsLAwMDGBoaKg8caqYAhkg3mkgDGVldPXqVcybNw/5+fl49OgRDA0N0bp1a1hYWGDlypUYOXJktT9YXnXv3j3Url0b2dnZSEtLg52dHezs7FCjRg00aNAAgHiGHwD2or5KJpPh/v37Ksv727ZtK7rl/Yo5MkeOHMGzZ8/w77//om7dugAAIyOjar2SrDSKsBUXF4egoCDo6uqie/fu6NWrV4l9qrvs7GxYWFjgwoULCAwMhJ6eHtzc3NCuXTu4u7srA5mYiW0aCIcv1fD06VPMmjULDRo0wLFjx/D48WPY29vD2toaq1atEt2ba2pqKh48eICGDRti0aJF8PDwwNOnT5XXeRTDqjrgRS9qQEAAPDw8MGTIEFH3onJ5f+kCAgJw7NgxPHnyBI0aNUKHDh3QsWNHUV2WTaG4uBgDBw6Et7c37t27h2vXrkEikcDOzg7r169HnTp1hC6xyhQWFiInJwcpKSm4d+8eIiMjcffuXSQkJMDLywsLFiwQukSqQgxlZaS40HZBQYFyJUxBQQEuXrwIS0tLODo6ClyhsE6cOIE1a9bAwcEBXbp0UV7vUQxd7sHBwcpe1LZt22Lw4MHKXlR/f3+MHDkSffr0EbrMKqMIXP7+/qUu7//iiy9Es7y/NA8fPsTFixdx/PhxxMXFITQ0VOiSqlxUVBT++9//YuPGjcptt27dwunTpzF58mThChOYXC5HYWEhnj17hvj4eOjp6cHd3V0076XEUFYmig+ZgoIChIaGIjc3F97e3mjSpInQpQkqKysLZ86cgZGREVq1agU9PT3RnhaEvaiqcnJy4O/vj6CgIJXtgwYNwm+//Saq4cs3efls5WKwadMmGBoawsjICCkpKfjwww+FLkkjiGVUoSzEPg1EPO8G70AxzDJ9+nQ4ODhgy5YtMDAwgLGxMWxtbfHf//4XZmZmAldZNRRvHtevX8fChQthY2ODoqIibNq0Cebm5nBzcxPVWeuB572otWvXxurVq1GjRg188803Kr2oYgtkXN5fdmIKZACQn5+PPXv2ICMjA5mZmbh8+TKGDBmCNm3aiOpcXK9iIONiOgVxvSOUg6KX7ObNm5DJZBg+fDj++ecfrFy5EnPnzkWNGjVEE8iAFyd9DA8Ph5eXF8aNG4ekpCT8+++/KqvqxPLNTy6XK4e1X+1F7dChg9DlCUIikUAikeDjjz/G+vXrERoaqlzeP3PmTADgcIxITZ48GZMnT0ZKSgquXbuGY8eOISAgABkZGTAxMUFgYCBq1qwpdJlVQvEeeefOHRgZGcHGxgYymQyAOEMaF9M9x+HLt1D84SguMm1vb499+/Zh6dKlCAsLQ3x8vCjnQKxYsaLEBdizsrKgra0tup4hAJg0aZLoe1GBksv7AZS6vF9MxD4cUxZxcXG4efMm+vbtK3QpVW7NmjVISUnBuHHjlKcIEStOA2FP2VspvrHo6+vDzc0Njx8/hrGxMYDnlwixsLAQsjxByOVydOrUCd988w369++Ptm3bwtXVVdkuYsFeVFVc3q+KwzFl16BBA+VpdMSmZ8+eCAwMxOjRo9GyZUt8+OGHcHNzE11vGaeBPMeeMjUUFhYiIyMDn376Ke7duwcXFxf89NNPojkzuaLXUHH2bTc3N2hpaeHff/9Ffn4+WrZsic8//1zoMqsMe1FL4vL+F7gql17n5cVjOTk5qFWrFrKysrBixQrcuXMHI0aMQP/+/YUus8pwMd0L7ClTg56eHqysrLBjxw7cvXsXJiYmouwpu379OkaOHInRo0fj8ePHyMjIwL1795TfZMQyX4i9qCXp6elBT08PderUQdOmTdGlSxeV5f2AeI6PAQMGoHPnzsrhmCVLlqgMx/j6+gpdYpVRfOg+ffoUSUlJcHJyErokQSlOKPzbb7/h0KFD0NHRgbGxMezs7JCYmIgTJ06gf//+ovlb4WK6FxjK3kDxB3HhwgWsX78esbGxaNeuHXx9feHh4QEzMzNRnQRTEUIKCwuV52WztraGtbU1nJyclIsAxPAm8rKxY8eisLAQ1tbW2LJlC1q1agUXFxf85z//Ebo0QSh6ELW0tFCjRg1YWFioBFSxHB8cjnnh5asaXLlyBQEBASgsLISenh4KCgqgo6MjmuMCePE3YGNjg7p166J3797w8vKCvr4+fvzxR+VnihiGMDkNRBWHL99AMf+jV69emDx5MgwMDHDq1Clcu3YNsbGx+OOPP0R30e3MzEyMGDECycnJ6NmzJ9q3bw8vLy9R9gq9jph7Uek5DseoOnjwIMLCwhAXF4ehQ4fC399fedvKlSthYmKCsWPHClihcB4/foz169fjyZMn6NatG9q2bSuqEMJpIKrYU/YGinMI2dvbY8CAAQCAHj16AACSk5NFN3EZeD5Ut2zZMjx48AA3b95EUFAQVqxYIbr5QuxFVcXl/ao4HKPK29sb8fHxOHfuHAICAvDrr7+iVatW6NWrF86cOYMZM2YAEMepdBSv8ebNmzh06BCaN28Ob29vBAYGYu7cuRg5ciS++eYbocusMpwGooqh7DW2bt2KP/74Az179oS5uTkuXLgAb29v5e1iPQGmvr4+XFxc4OLigq5du4p2vpCig/mHH35Q6UVdsmSJKHtRFW+sJ06cEP3yfg7HlFSnTh1MmjQJNjY26NOnDyIiInDw4EGsW7cO3bt3h4+PDwBxBHjFa0xNTcWjR48QFRUFGxsbeHl5oXbt2rC0tAQgvqs9cBrIcxy+fI3s7GwEBQXhxIkTuH79OvLy8tCmTRvldR3FsuLyVWL4JquOKVOm4Pfff1fZpuhFFeMZyuPj4xEYGIh9+/aJdnk/h2NKUnxZy8nJKTGXTkw9ym+iuL4yPSfWaSAMZa+xe/duDB06VPnzrVu3cPDgQZw6dQrR0dHYsGGD8tsdicvLvai5ubno16+fSi+q2HB5f+k2b94Md3d3PH78GJcuXcIPP/yA77//HhYWFpg2bZrQ5VUZRUgNCQnB+fPncf78edStWxe+vr5o3749HB0dRfdlr7CwEKdOncKRI0dgbGwMe3t7DBo0SFSLPwBOAykNQ1kpkpOTMXv2bOjr62PAgAHo1q1biW5kMR0onC+kir2oqhRvrMuXLy+xvP/cuXNo1aoVli1bJpqh7ZeJ/dyGCoWFhRg4cCC+//57fPfdd+jZsyd27NiBzMxMHDp0CPb29kKXWCUUfwO7du3C8ePH0aBBA1hbW+P06dOoXbs2fvzxRxgYGAhdZpXhYrqSGMpe48GDB9i9ezeOHz8ONzc39OvXD+3btxdlCFHg5UCeYy9q6Xbv3o3Q0FCV5f116tSBlpYWdHV1RfVFpjRiHI5RfKE7c+YMDhw4gC+++ALTpk3Djh07sGPHDhQVFWHMmDFCl1llFO3x8ccfY/z48fDy8gLwfFX7t99+i1GjRonyvYPTQF4Qb8J4i4YNG2L27NnYsmULrK2tsXXrVvz+++949OiR0KUJpmfPnjA0NMTo0aMxa9YsREZGKnvMxCI5ORl///03Jk+ejLCwMBQXF8PFxQUzZ85EcHAwoqOj0aZNG6HLFMTQoUPx008/ISYmBkuXLsXFixeRlZWlnCcjhkAmlUoBABcuXMDkyZPRtWtXfP/99zh+/DhMTU1hYWEBMX0PVvzOY2Ji4O3tjX/++QcODg4Ank/+j4yMBADRvI8ovtQXFxcjJSUFwPNRFxMTE2RkZCgXTInhGNm6dSs6duyIhQsXKhfTvczKykp0gQxgT1kJUqkU165dQ3JyMvLy8uDm5ob8/HwEBgZi165dMDY2RnBwsGh6ijhfqCT2or5Q2vJ+qVSKwMBAnD59WnTL+zkcU7o7d+5AX18f+fn5mDt3LhwdHZGYmIgBAwZg8ODBohvaPnXqFNauXYsPPvgArq6uOH/+PEJDQ7Fr1y6hS6synAZSOoayV5w9exYTJ05Ungrj9u3baNSoEZo2bYrY2FhERUVhz549QpdZZThf6PVSU1OxZcsWREdHo1mzZvjggw9EE9ZfdeLECezfvx+ZmZmwsbFBw4YNERMTg8aNG2PSpEmiW97P4ZjXO3nyJMLCwuDk5IThw4eLZg7Vs2fPoKOjA0NDQxQVFWHPnj3YtWsXsrKy4Ovri27dusHb21s0ix44DaR0DGWvyM7Oxp49e3D79m106tQJPXr0gFQqRUFBgfKEdmLE+ULsRVWHGJf3c1WuKsV7Qm5uLiIiIlBUVARXV1fR9oAsXLgQxsbGcHFxQaNGjdCwYUMAz3ubpVKpaN5HAS6mexOGslLk5+fjwoULCAwMRFZWFrp27YpevXrB1NRU6NIEJfbLgbAXtXRc3v8ch2NUKYL54sWLER0djfj4eOTl5aFu3bpo3LgxJk6cKJoLk0ulUmWvemJiIp4+fQo3Nzc0bdoUzs7OsLS0RMOGDUXRQ6bAaSClYyh7iVQqRWFhoUp3+tWrV3H9+nU8ffoUPj4+ytUyYsD5QqrYi6qKy/tVcTimdD169MDWrVthbm6OgoICnD17FkFBQRg7dixat24tmuE6hVOnTmHZsmUYNGgQLl26hOTkZNjZ2WHJkiVClyYITgNRxVD2kitXrmDZsmV4+vQpbGxsYGtriydPnuDq1atISUmBpaUlTp06JXSZVY7zhV5gL+oLXN7/AodjVCUmJuL+/ftwc3PDtm3b4O/vL8prBb+ssLAQenp6+OWXX1CnTh2MGzcOwPNQ8u+//8LNzU0UAZXTQN6Moewl0dHRiI6ORs2aNREbGwsAqFu3LmrUqIGEhATUr18fvXr1ErhK4YlxvhB7UV/vo48+woABA9CnTx9l8PD398eMGTPQsmVL0YQRDse8EB4ejmnTpkFPTw8SiQTe3t6YOXOm8nQYYlNYWAhdXV1oaWlh7Nix+OKLL+Du7i7KBVKcBvJmDGX0Rpwv9Bx7UV+Py/tVcTgGymtcJicn4/jx4wgLC8PVq1dRo0YN1K9fH/PmzYOHh4fQZVaJhIQEfP755/Dz80OTJk2wYsUK7N+/X+iyBMNpIG/GUFZGYuhWfhnnC6liL6oqLu9/gcMxqoqKivD555+jR48e6N27t8pt0dHRCAwMxNChQ+Hg4CCK4yMrKws7d+7E9evXcefOHWRmZsLX1xeurq5o3rw5GjZsiJo1awpdZpXiNJDXYyijUnG+EL0Jl/e/wOEYVdnZ2diwYQOCgoKgra2NsWPHomvXrrC2tha6NI1w+/ZthIeH459//sG1a9cwbdo0TJw4UeiyqgSngbydOGZnk9rUuRyIGD5430QM3/ZfJpVKYW1tjejoaFy4cEH0y/ubNWuGuXPn4vbt22jRogW+/PJLUQ/H1KxZE9OnT8f06dOxY8cOXLp0Cenp6Rg8eLDoTgtSGmdnZzg7OwN4Pj0kNzcXgDjeR65du/bGaSCBgYGinQaiwJ4yeiPOF6I34fL+5zgc80JhYSGysrKU5zA8e/Ys1q1bh6ioKPj5+eGzzz6DlZWVwFWSEDgN5O0YyqgEzheit+Hy/uc4HKMqLS0NM2fORP369REfHw8dHR3lCtSdO3ciLi4O4eHhqF+/vtClEmkkhjIqgfOF6E24vP8FrspVdffuXcyYMQPu7u4YN24c5HI5Hj58CENDQ1hZWSElJQXt2rUTukzSQGL4ElcWDGWkgpcDoTfh8n5VHI4p6d69e8q5ZL6+vujfvz8aN26svJ1f6Ihej6GMXovzhehVXN5PZZWcnIyjR48iKysLHh4eXK1NVAYMZVQC5wtRWYl5ef/biO1vJCMjA/v374e5uTlsbW2Rm5uL2NhYXLt2Df/88w/MzMywefNm6OvrC10qkcbiKTFIhWK+EPB8wvIXX3wB4Pmwprm5OczNzQFAVB829HpiXt7/NmJ7/bdv30ZERARSU1ORkpICV1dXmJiY4PHjx8jOzkbDhg2hr6/P4UuiN2AoI6VX5wtlZmbC3d0dAEQ3gZvUp6enpzx/ndgCCQHe3t5o1qyZ8hJsSUlJkMvlypPGZmRkAOCcMqI34fAlKXG+EBFVNIYworJjKKPX4nwhIiKiqsNQRmWimC9kYmLC+UJERESVgKGMiIiISAOwu4OIiIhIAzCUEREREWkAhjIiIiIiDcBQRkRERKQBGMqIiIiINABDGRFpvDFjxuCnn34qsX3v3r1o1aqVxtRT2X799Vd88MEHVf68RFQ1GMqIiF6jqKhI6BKISEQYyoio2rhw4QKGDBkCT09PtGrVCiNGjMCjR4+Ut4eHh2PgwIFwd3dHly5dsGrVKhQXFytvd3R0xLZt2zBlyhR4enri999/L9Pz+vn54ffff8eXX36J5s2bw9fXFzt37lTenpiYCEdHR4SEhGDEiBFwd3dH3759cfHiReU+pfX6hYeHw9HRUXn7qlWrEB0dDUdHRzg6OmLv3r3laici0kwMZURULRQXF2Pq1Klo3bo19u/fj507d2L48OHK6y5evnwZc+bMwdixYxEaGooff/wRe/fuLRG8Vq1ahW7duuHAgQMYPHhwmZ9/w4YNcHNzQ3BwMEaNGoUffvgB9+/fV9nn559/xvjx4xEcHAxPT09MmTJFeaHut+nduzcmTJiApk2b4syZMzhz5gx69+5d5vqISPMxlBFRtZCdnY2srCx07twZdnZ2aNy4MQYOHAhra2sAz8PWpEmTMHDgQNja2qJdu3b49NNPsWPHDpXH6du3LwYPHgxbW1vlfcuiY8eOGD16NOzt7fHRRx+hTp06uHDhgso+o0ePRo8ePdC4cWP88MMPMDY2xp49e8r0+Pr6+jA0NIREIoGFhQUsLCygr69f5vqISPPpCF0AEVFFMDExwaBBgzBx4kS0a9cOPj4+6NWrFywtLQEA0dHR+Oeff1R6xqRSKQoKCpCXlwcDAwMAgJubW7meXzHMCABaWlowNzdHWlqayj7NmzdX/l9HRwdubm4letOISLwYyohI4xkZGSE7O7vE9mfPnsHY2Fj5c0BAAMaMGYPTp08jLCwMy5cvx4YNG+Dp6Ync3FxMnz4d3bt3L/E4NWrUUP7f0NCwXDXq6Ki+nWppaUGdSwtra2uX2J8LDYjEhcOXRKTxGjZsiKioqBLbb926hQYNGqhsc3FxweTJk7Fjxw44ODjg77//Vm5/8OAB7O3tS/zT1q6at8Jr164p/19cXIyoqCg0atQIAFCnTh3k5OQgNzdXuU90dLTK/XV1dSGTyaqkViKqeuwpIyKNN2rUKGzduhULFizAkCFDoKenh5MnTyIkJAS//fYbACAhIQG7du2Cn58fLC0t8eDBA8TFxSnP6zV16lRMmTIF1tbW6NGjB7S1tREdHY07d+5gxowZVfI6tm3bhgYNGqBRo0bYtGkTnj59qlxM0KxZMxgYGGDZsmUYO3Ysrl+/XmJ1pY2NDRITE3H79m1YWVmhZs2a0NPTq5LaiajyMZQRkcaztbXFX3/9heXLl2P8+PEoKipCo0aNsGLFCnTs2BEAYGBggPv37yMoKAiZmZmwtLTE6NGjMWLECABAhw4d8Pvvv2P16tX4888/oaOjg0aNGmHo0KFV9jpmzZqFNWvW4Pbt27C3t8dvv/0GU1NTAM/nxP33v//Fzz//jN27d8PHxwfTp0/Ht99+q7x/jx49cOTIEYwdOxbPnj1DQEAABg0aVGX1E1Hl0pKrM+mBiIjUlpiYiC5duiA4OBjOzs5Cl0NEGopzyoiIiIg0AEMZERERkQbg8CURERGRBmBPGREREZEGYCgjIiIi0gAMZUREREQagKGMiIiISAMwlBERERFpAIYyIiIiIg3AUEZERESkARjKiIiIiDQAQxkRERGRBvh/e2roY4HwA6EAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_metric('answer_relevancy')" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "79dc2a03-da9e-4f9d-8109-a94dfa79127e", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA04AAAHWCAYAAABACtmGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABRlUlEQVR4nO3deVhU5f//8ReLuKa5oJlLFgaigkiCSbiRlbmmpmapueSWVi591LQyt2izRds0jdQylxTL3RbNLDS3klwLF9wiRNMQFJm5f3/4Y75OoIcxcFCfj+vyqrnPfc55z+HcM/Oas4yHMcYIAAAAAHBJnu4uAAAAAAAKOoITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAABX4MyZMxo9erTuueceBQQEaOLEie4uycmiRYsUEBCg+Ph4d5eSp6ZMmaKAgAB3lwHgBuTt7gIA4Frw2Wefady4cQoODtaCBQvcXU6BkpGRoc8//1yxsbFKTEyUp6enKlSooNDQUPXo0UN+fn7uLjFfTJ06VbGxsXryySdVpUqVyz7PqKgoHTlyJMdpkZGRmjFjRn6VecWmTJmid999Vx4eHlqzZo0qVqzoND01NVURERE6d+6cHnvsMb344osur+PDDz9U9erV1axZs7wqGwDyDcEJAHJhyZIlqlSpkrZv366DBw/qtttuc3dJBcbTTz+tdevWqWXLlurYsaMyMzO1b98+rV27VnXr1r1ug9OGDRtUp04dDRo0KFf9AwMD1bNnz2zt5cuXz+vS8pSPj4+WLl2qPn36OLWvXr36Py976tSpeuCBB1wKTgMGDFDfvn3/87oBwFUEJwCwcOjQIW3btk3vvvuuXnzxRS1ZsiTXH5bzit1u1/nz51W4cOGrul4r27dv15o1azRkyBD179/faZrNZtPp06evWi3nzp1ToUKF5Ol5dc5CT0lJUfXq1XPdv0KFCmrbtm0+VpQ/GjdurGXLlmULTkuXLlWTJk20atWqq1JHWlqaihUrJm9vb3l78/EFwNXHNU4AYGHJkiUqVaqUGjdurAceeEBLlixxTDt//rzCw8P13HPPZZsvNTVVQUFBevXVVx1tGRkZmjx5su677z7Vrl1bjRs31muvvaaMjAyneQMCAjRu3Dh99dVXatmypYKCgvTDDz9IkmbMmKFHHnlE9evXV3BwsNq3b6+VK1dmW//Zs2c1YcIE1a9fX3Xr1lX//v2VlJSkgIAATZkyxalvUlKSnnvuOUVERKh27dpq2bKlvvjiC8ttc+jQIUlSaGhotmleXl4qXbp0tvWMGjVKkZGRql27tqKiojRmzBin53/o0CE9/fTTCg8PV506ddSpUyetXbvWaTkbN25UQECAli1bprfeeksNGzZUnTp1lJqaKkn69ddf1bt3b911112qU6eOunbtqi1btlg+H+lCIBo1apQiIiIUFBSkNm3aKDY2Ntu6Dx8+rLVr1yogIMDx+L/avXu3Ro4cqXvvvVdBQUG655579Nxzz+nkyZPZ+uZmW0oX9rno6GjdfffdCgkJ0cCBA3XixIlc19SqVSvt2rVLCQkJjrbk5GRt2LBBrVq1ynGe3OznAQEBSktLU2xsrGMbjhw5UtL/Xcf0xx9/aNiwYQoLC9Ojjz7qNO3fvvzySz388MOqU6eOwsLC9Nhjj2n9+vW5fp4AYIWvbADAwpIlS3TffffJx8dHrVq10ueff67t27crODhYhQoVUrNmzfT1119r7Nix8vHxccz3zTffKCMjQy1atJB04ajRgAEDtGXLFnXq1El+fn7au3evZs6cqQMHDuj99993Wu+GDRu0YsUKPfbYYypdurQqVaokSZo1a5aioqLUunVrnT9/XsuWLdMzzzyjqVOnqkmTJo75R44cqRUrVqht27aqU6eONm3alOMpTsePH1enTp3k4eGhxx57TGXKlNG6des0evRopaamqkePHpfcNrfeeqtjG4WGhl72SEBSUpIefvhh/fPPP+rUqZPuuOMOJSUladWqVTp79qx8fHx0/PhxPfLII0pPT1e3bt1UunRpxcbGasCAAY4P4hd7//33VahQIfXu3VsZGRkqVKiQ4uLi1KdPH9WuXVuDBg2Sh4eHFi1apMcff1xz5sxRcHDwJWs8e/asunXrpsTERD322GOqXLmyVq5cqZEjR+r06dN6/PHH5efnp9dee03R0dG65ZZbHKfflSlT5pLLlaTMzMwcA0uxYsVUpEgRSdJPP/2kQ4cOqX379vL19dXvv/+u+fPn648//tD8+fPl4eGR622ZZcKECSpZsqQGDRqkI0eOaObMmRo3bpzefvvty9abJSwsTLfccouWLl2qZ555RpK0fPlyFStWzGl/y5Lb/fy1117T888/r+DgYHXq1EmSVLVqVadlPfPMM7rttts0ZMgQGWMuWeO7776rKVOmqG7dunr66adVqFAh/frrr9qwYYMiIyNz9TwBwJIBAFxSfHy88ff3Nz/++KMxxhi73W4aNWpkJkyY4Ojzww8/GH9/f/Pdd985zdunTx9z7733Oh4vXrzY1KhRw2zatMmp3+eff278/f3Nli1bHG3+/v6mRo0a5vfff89WU3p6utPjjIwM06pVK9O9e3dH22+//Wb8/f3NxIkTnfqOHDnS+Pv7m8mTJzvaRo0aZe655x5z4sQJp75Dhgwxd911V7b1Xcxut5uuXbsaf39/ExERYYYOHWo+/fRTc+TIkWx9hw8fbmrUqGG2b9+e43KMMWbixInG39/faRulpqaaqKgo07RpU2Oz2YwxxmzYsMH4+/ube++916k+u91u7r//ftOrVy/HMrO2WVRUlOnZs+cln4sxxnzyySfG39/ffPnll462jIwM07lzZxMSEmL++ecfR3vTpk1N3759L7u8i/v6+/vn+G/q1KlOdf7b0qVLs22T3GzLhQsXGn9/f9OjRw+nbfHyyy+bwMBAc/r06cvWPHnyZOPv729SUlLMK6+8Yu677z7HtA4dOpiRI0caYy7sq2PHjnVMc2U/DwkJMSNGjLjkuocOHXrJaVkOHDhgatSoYQYOHOjYP/69LQAgL3CqHgBcxpIlS1SuXDnVr19fkuTh4aEWLVpo+fLlstlskqS7775bpUuX1vLlyx3znTp1Sj/99JPjaJMkrVy5Un5+frrjjjt04sQJx7+7775b0oVTwC4WFhaW4zU0WUcnstbzzz//6K677tLOnTsd7Vmn9WWd3pSla9euTo+NMVq9erWioqJkjHGqKzIyUv/884927Nhxye3j4eGhGTNmaPDgwSpZsqSWLl2qcePGqWnTpho8eLDjGie73a5vvvlGTZs2VVBQUI7LkaTvv/9ewcHBqlevnmNa8eLF1blzZx05ckR//PGH03wPPfSQ0/bYtWuXDhw4oNatW+vkyZOO55KWlqYGDRpo06ZNstvtl3w+69atk6+vr9MpaIUKFVK3bt2UlpamTZs2XXJeK3Xq1FFMTEy2fy1btnT0ufi5nDt3TidOnFCdOnUkyfF3yO22zJJ1NDFLvXr1ZLPZLnmXv5y0bt1aBw8edNwcJT4+Xq1bt86xr6v7+eU88sgjln2++eYb2e12DRw4MNv1bf/eFgDwX3CqHgBcgs1m07Jly1S/fn2n61eCg4P18ccfKy4uTpGRkfL29tb999+vpUuXKiMjQz4+Plq9erXOnz/vFJwOHjyohIQENWjQIMf1paSkOD2uXLlyjv3WrFmjDz74QLt27XK6ZuTiD4lHjx6Vp6dntmX8+26AJ06c0OnTpzVv3jzNmzcvx/VZXQ/j4+OjAQMGaMCAAfrrr7+0adMmzZo1SytWrJC3t7feeOMNnThxQqmpqbrzzjsvu6yjR486gsLF7rjjDsd0f39/R/u/n9+BAwckSSNGjLjkOv755x+VKlUqx2lHjhzRbbfdlu0DeNadAY8ePXrZ+i+ndOnSioiIuGyfv//+W++++66WL1+ebX/4559/JCnX2zJL1umUWUqWLClJLt24o2bNmrrjjju0dOlSlSxZUr6+vo4g9G+u7ueXc6kxcLGsW+Bfr3dvBFBwEJwA4BI2bNig5ORkLVu2TMuWLcs2fcmSJY7rJ1q2bKl58+Zp3bp1atasmVauXKk77rhDNWrUcPS32+3y9/fP8UYSknTLLbc4Pb746EOWzZs3a8CAAQoLC9OYMWPk6+urQoUKaeHChVq6dKnLzzHr6EubNm3Url27HPu48mOj5cuXV8uWLXX//ferVatWWrlypV555RWX68qtf28j8/+vgxk+fLgCAwNznKdYsWL5Vs9/NXjwYG3btk29e/dWYGCgihUrJrvdrieeeOKy1/hczqXuMujq8rKu7ytevLgefPDBSy7X1f38cgraXSQB3NgITgBwCUuWLFHZsmVz/GHPr7/+2nFDiCJFiigsLEy+vr5avny5QkNDtWHDhmy3565atap2796tBg0aXPEpRKtWrVLhwoU1Y8YMpxsALFy40KnfrbfeKrvdrsOHD6tatWqO9oMHDzr1K1OmjIoXLy673W55NMQVhQoVUkBAgA4cOKCTJ0+qbNmyKlGihH7//ffLznfrrbdq//792dr37dvnmH45VapUkSSVKFHiip5PpUqVtGfPHtntdqdgkNv1/xenTp1SXFycnnrqKafb3WcdRctSpkyZXG3LvNa6dWtNnjxZycnJev311y/ZLy/2c1dUrVpVdrtdCQkJlwzLAJAXuMYJAHJw9uxZrV69Wk2aNFHz5s2z/Xvsscd05swZfffdd5IufKvfvHlzrVmzRl999ZUyMzOdTtOTpAcffFBJSUmaP39+jutLS0uzrMvLy0seHh6O66sk6fDhw/r222+d+mUdCZszZ45T+6effppteQ888IBWrVqlvXv3Zluf1Wl6Bw4cyPH0tdOnT2vbtm0qVaqUypQpI09PTzVr1kxr1qxRfHx8tv5ZRz8aN26s7du3a9u2bY5paWlpmj9/vipVqmT5u0m1a9dW1apV9fHHH+vMmTMuP59GjRopOTnZ6Xq1zMxMzZ49W8WKFVNYWNhl5/8vvLy8cmyfOXOm0+Pcbsu8VrVqVY0aNUrDhg277J0JXdnPixUr9p9/66tZs2by9PTUe++9l+36tfzaFgBuTBxxAoAcfPfddzpz5oyioqJynB4SEqIyZcroq6++cgSkBx98ULNnz9bkyZPl7++f7ZqLtm3basWKFRozZow2btyo0NBQ2Ww27du3TytXrtT06dNzvNj/Yo0bN1ZMTIyeeOIJtWrVSikpKZozZ46qVq2qPXv2OPrVrl1bDzzwgGbOnKm///7bcTvyrKMXFx8JGDZsmDZu3KhOnTqpY8eOql69uk6dOqUdO3YoLi5OP//88yXr2b17t5599lk1bNhQ9erVU6lSpZSUlKTFixfrr7/+0qhRoxyBYOjQofrxxx/VrVs3x22qk5OTtXLlSs2ZM0clS5ZU3759HT+22q1bN5UqVUqLFy/W4cOHNWXKFMsft/X09NSECRPUp08ftWrVSu3bt1eFChWUlJSkjRs3qkSJEvrwww8vOX/nzp01b948jRw5Ujt27FClSpW0atUqbd26VaNGjVKJEiUuu/7LSUpK0pdffpmtvXjx4mrWrJlKlCihsLAwTZ8+XefPn1eFChX0448/5vj7ULnZlvnh8ccft+zjyn5eq1YtxcXFKSYmRuXLl1flypVzvMbtcm677Tb1799f77//vh599FHdf//98vHxUXx8vMqXL69hw4Zd0XMFgH8jOAFADr766isVLlxY99xzT47TPT091aRJEy1ZskQnT55U6dKlFRoaqooVK+rYsWPZjjZlzfPee+/pk08+0Zdffqmvv/5aRYsWVeXKldWtWzfdfvvtlnU1aNBAEydO1EcffaSXX35ZlStX1rPPPqsjR444BSdJevXVV1WuXDktW7ZMX3/9tSIiIvTWW2+pefPmTqf5lStXTgsWLNB7772nr7/+Wp9//rluvvlmVa9eXc8+++xl6wkLC9PTTz+tH374QTExMTp58qSKFy+uwMBAPfvss3rggQccfStUqKD58+frnXfe0ZIlS5SamqoKFSqoUaNGjmuVypUrp7lz5+r111/Xp59+qnPnzikgIEAffvhhjr8ZlJP69etr3rx5ev/99/Xpp58qLS1Nvr6+Cg4OVufOnS87b5EiRTR79my98cYbio2NVWpqqm6//XZFR0erffv2uVr/pezatUvDhw/P1l6pUiU1a9ZMkjRp0iSNHz9ec+bMkTFG99xzjz766CM1bNjQaZ7cbEt3cWU/HzlypF588UW9/fbbOnv2rNq1a+dycJIu/N5T5cqV9emnn+qtt95S0aJFFRAQoLZt2+blUwNwg/MwHMcGgBvGrl279NBDD+n1119XmzZt3F0OAADXDK5xAoDr1NmzZ7O1zZw5U56envl6rQ4AANcjTtUDgOvU9OnT9dtvv+nuu++Wl5eX1q1bp3Xr1qlz586qWLGiu8sDAOCawql6AHCd+vHHH/Xuu+8qISFBaWlpqlixotq2bav+/fvL25vvzQAAcAXBCQAAAAAscI0TAAAAAFggOAEAAACAhRvuJHe73a7MzEx5eno6/QAkAAAAgBuLMUZ2u13e3t6WP7J+wwWnzMxMxcfHu7sMAAAAAAVEUFCQ04/D5+SGC05ZSTIoKEheXl5urgYAAACAu9hsNsXHx1sebZJuwOCUdXqel5cXwQkAAABAri7h4eYQAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFtwanDZt2qT+/fsrMjJSAQEB+uabbyzn2bhxo9q1a6fatWvrvvvu06JFi65CpQAAAABuZG4NTmlpaQoICNCYMWNy1f/QoUPq16+f6tevry+//FKPP/64nn/+ef3www/5XCkAAACAG5m3O1feuHFjNW7cONf9586dq8qVK2vkyJGSJD8/P23ZskWffPKJGjZsmF9lAgAAALjBuTU4ueqXX35RgwYNnNoiIyP18ssvu7wsm82WV2UBAAAAuAa5kgmuqeB0/PhxlStXzqmtXLlySk1N1dmzZ1WkSJFcLys+Pj6vywMAAABwnbqmglNeCgoKkpeXl7vLAAAAAOAmNpst1wdUrqngVK5cOR0/ftyp7fjx4ypRooRLR5skycvLi+AEAAAAIFeuqd9xCgkJ0YYNG5zafvrpJ4WEhLinIAAAAAA3BLcGpzNnzmjXrl3atWuXJOnw4cPatWuXjh49KkmaNGmShg8f7uj/yCOP6NChQ3rttdeUkJCgzz77TCtWrFCPHj3cUT4AAACAG4RbT9X77bff1L17d8fj6OhoSVK7du30yiuvKDk5WceOHXNMr1KliqZOnaro6GjNmjVLt9xyiyZMmMCtyAEAAADkKw9jjHF3EVeTzWbTL7/8opCQEK5xAgAAAG5grmSDa+oaJwAAAABwB4ITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAB5wGa3u7sE3CDY1wD38HZ3AQAAXA+8PD31/LcLtP9ksrtLwXXs9tK+mnBvR3eXAdyQCE4AAOSR/SeTtef4MXeXAQDIB5yqBwAAAAAWCE4AAAAAYIHgBAAAAAAWCE4AAAAAYIHgBAAAAAAWCE4AAAAAYIHgBAAAAAAWCE4AAAAAYIHgBAAAAAAWCE4AAAAAYIHgBAAAAAAWCE4AAAAAYIHgBAAAAAAWCE4AAAAAYIHgBAAAAAAWCE4AAAAAYIHgBAAAAAAWCE4AAAAAYIHgBAAAAAAWCE4AAAAAYIHgBAAAAAAWCE4AAAAAYIHgBAAAgDxhN3Z3l4AbhDv2Ne+rvkYAAABclzw9PLVo9zQlpx11dym4jvkWu1Xta/S96uslOOUBu90uT08O3iH/sa8BAAq65LSj+jM10d1lAHmO4JQHPD099fobX+jQ4ePuLgXXsSqVy+l/zz7s7jIAAABuSASnPHLo8HElJBxzdxkAAAAA8gHn/AAAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFhwe3D67LPPFBUVpaCgIHXs2FHbt2+/bP9PPvlEDzzwgIKDg9W4cWO9/PLLOnfu3FWqFgAAAMCNyK3Bafny5YqOjtbAgQMVGxurGjVqqHfv3kpJScmx/5IlSzRp0iQNGjRIy5cv18SJE7V8+XK9+eabV7lyAAAAADcStwanmJgYderUSR06dFD16tU1duxYFSlSRAsXLsyx/7Zt2xQaGqrWrVurcuXKioyMVKtWrSyPUgHIXza73d0l4AbBvgYAcBdvd604IyNDO3bsUL9+/Rxtnp6eioiI0LZt23Kcp27duvrqq6+0fft2BQcH69ChQ/r+++/Vtm1bl9dvs9muuPZ/8/LyyrNlAVbyct/NK15eXnrx01gdSDru7lJwHatWoZzGdW1XIMeAxHsBri7GAZA348CVZbgtOJ08eVI2m01ly5Z1ai9btqz27duX4zytW7fWyZMn9eijj8oYo8zMTD3yyCPq37+/y+uPj4+/orr/rWjRoqpZs2aeLAvIjT179ig9Pd3dZThkjYEDSce158if7i4HN4CCNgYk3gtw9TEOgKs/DtwWnK7Exo0bNXXqVI0ZM0bBwcFKTEzUxIkT9d5772ngwIEuLSsoKIhvRXBNCggIcHcJgFsxBgDGASDlzTiw2Wy5PqDituBUunRpeXl5ZbsRREpKisqVK5fjPO+8847atGmjjh07SrqwsdLS0vTiiy9qwIAB8vTM/SVbXl5eBCdck9hvcaNjDACMA0C6+uPAbTeH8PHxUa1atRQXF+dos9vtiouLU926dXOc5+zZs9nCUdYGM8bkX7EAAAAAbmhuPVWvZ8+eGjFihGrXrq3g4GDNnDlT6enpat++vSRp+PDhqlChgoYNGyZJatq0qWJiYlSzZk3HqXrvvPOOmjZtyjcvAAAAAPKNW4NTixYtdOLECU2ePFnJyckKDAzU9OnTHafqHTt2zOkI04ABA+Th4aG3335bSUlJKlOmjJo2baohQ4a46ykAAAAAuAG4/eYQXbt2VdeuXXOcNnv2bKfH3t7eGjRokAYNGnQ1SgMAAAAASW7+AVwAAAAAuBYQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAAsEJAAAAACwQnAAAAADAwhUFp82bN+vZZ59V586dlZSUJElavHixNm/enKfFAQAAAEBB4HJwWrVqlXr37q0iRYpo586dysjIkCSlpqZq6tSpeV4gAAAAALiby8Hpgw8+0NixYzVhwgR5e3s72kNDQ7Vz5848LQ4AAAAACgKXg9P+/ftVr169bO033XSTTp8+nSdFAQAAAEBB4nJwKleunBITE7O1b9myRVWqVMmTogAAAACgIHE5OHXq1EkTJ07Ur7/+Kg8PDyUlJemrr77Sq6++qi5duuRHjQAAAADgVt7WXZz17dtXdrtdPXr0UHp6urp27SofHx/16tVL3bp1y48aAQAAAMCtXApONptNW7du1WOPPabevXsrMTFRaWlp8vPzU/HixfOrRgAAAABwK5dO1fPy8lKvXr106tQp+fj4qHr16goODiY0AQAAALiuuXyN05133qnDhw/nRy0AAAAAUCC5HJwGDx6sV199VWvWrNFff/2l1NRUp38AAAAAcL25optDSNKAAQPk4eHhaDfGyMPDQ7t27cq76gAAAACgAHA5OM2aNSs/6gAAAACAAsvl4BQeHp4fdQAAAABAgeVycJKk06dP64svvlBCQoKkCzeM6NChg2666aY8LQ4AAAAACgKXbw4RHx+v++67T5988olOnTqlU6dOKSYmRs2aNdOOHTvyo0YAAAAAcCuXjzhFR0crKipK48ePl7f3hdkzMzP1/PPP6+WXX9Znn32W50UCAAAAgDu5fMTpt99+0xNPPOEITZLk7e2tJ554Qr/99lueFgcAAAAABYHLwalEiRI6duxYtvZjx46pePHieVIUAAAAABQkLgenFi1aaPTo0Vq+fLmOHTumY8eOadmyZXr++efVsmXL/KgRAAAAANzK5Wuchg8f7vivzWa7sBBvb3Xp0kXPPvts3lYHAAAAAAWAy8HJx8dHzz//vIYNG6bExERJUtWqVVW0aNE8Lw4AAAAACgKXg9M///wjm82mm2++WQEBAY72v//+W97e3ipRokSeFggAAAAA7ubyNU5DhgzRsmXLsrWvWLFCQ4YMyZOiAAAAAKAgcTk4bd++XXfffXe29vDwcG3fvj1PigIAAACAgsTl4JSRkaHMzMxs7ZmZmTp79myeFAUAAAAABYnLwSkoKEjz58/P1j537lzVqlUrT4oCAAAAgILE5ZtDDB48WD179tTu3bvVoEEDSVJcXJzi4+P18ccfu1zAZ599phkzZig5OVk1atTQCy+8oODg4Ev2P336tN566y19/fXX+vvvv1WpUiWNGjVKjRs3dnndAAAAAJAbLgenu+66S/PmzdP06dO1YsUKFS5cWAEBAZo4caKqVavm0rKWL1+u6OhojR07VnXq1NHMmTPVu3dvrVy5UmXLls3WPyMjQz179lTZsmX1zjvvqEKFCjp69KhKlizp6tMAAAAAgFxzOThJUmBgoCZNmvSfVx4TE6NOnTqpQ4cOkqSxY8dq7dq1Wrhwofr27Zut/8KFC3Xq1CnNnTtXhQoVkiRVrlz5P9cBAAAAAJeT6+CUmZkpu90uHx8fR9vx48c1d+5cpaWlKSoqSvXq1cv1ijMyMrRjxw7169fP0ebp6amIiAht27Ytx3m+++47hYSEaNy4cfr2229VpkwZtWrVSn369JGXl1eu1y1JNpvNpf6X4+q6gf8iL/fdvMIYwNVUEMeAxDjA1cU4APJmHLiyjFwHpxdeeEGFChXSuHHjJEmpqal6+OGHde7cOfn6+mrmzJl6//33c32t0cmTJ2Wz2bKdkle2bFnt27cvx3kOHTqkDRs2qHXr1po2bZoSExM1duxYZWZmatCgQbl9KpKk+Ph4l/pfStGiRVWzZs08WRaQG3v27FF6erq7y3BgDOBqK2hjQGIc4OpjHABXfxzkOjht3bpVL7zwguPxl19+KZvNptWrV+umm27S66+/runTp+frTRqMMSpbtqzGjx8vLy8v1a5dW0lJSZoxY4bLwSkoKIhvRXBNCggIcHcJgFsxBgDGASDlzTiw2Wy5PqCS6+CUlJSk2267zfE4Li5ODzzwgG666SZJUrt27bRo0aJcF1m6dGl5eXkpJSXFqT0lJUXlypXLcR5fX195e3s7BZ477rhDycnJysjIcDqN0IqXlxfBCdck9lvc6BgDAOMAkK7+OMj17zgVLlxY586dczz+5ZdfnG4bXrhwYaWlpeV6xT4+PqpVq5bi4uIcbXa7XXFxcapbt26O84SGhioxMVF2u93RduDAAfn6+roUmgAAAADAFbkOTjVq1NCXX34pSdq8ebNSUlJ09913O6YnJiaqfPnyLq28Z8+emj9/vmJjY5WQkKCXXnpJ6enpat++vSRp+PDhTnfv69Kli/7++29NnDhR+/fv19q1azV16lQ99thjLq0XAAAAAFyR61P1Bg4cqD59+mjFihVKTk5Wu3btnILS119/rdDQUJdW3qJFC504cUKTJ09WcnKyAgMDNX36dMepeseOHZOn5/9lu4oVK2rGjBmKjo5WmzZtVKFCBXXv3l19+vRxab0AAAAA4IpcB6fw8HAtWrRI69evl6+vr5o3b+40PTAw0OnUvdzq2rWrunbtmuO02bNnZ2urW7eu5s+f7/J6AAAAAOBKufQDuH5+fvLz88txWufOnfOkIAAAAAAoaHJ9jRMAAAAA3KgITgAAAABggeAEAAAAABYITgAAAABg4YqC0+nTp7VgwQJNmjRJf//9tyRpx44dSkpKysvaAAAAAKBAcOmuepK0e/du9ezZUzfddJOOHDmiTp066eabb9bq1at17Ngxvfbaa/lRJwAAAAC4jctHnF555RW1a9dOq1evlo+Pj6O9cePG2rx5c54WBwAAAAAFgcvBKT4+Xo888ki29goVKig5OTlPigIAAACAgsTl4OTj46PU1NRs7QcOHFCZMmXypCgAAAAAKEhcDk5RUVF67733dP78eUfb0aNH9cYbb+j+++/P0+IAAAAAoCBwOTiNHDlSaWlpioiI0Llz59StWzfdf//9Kl68uIYMGZIfNQIAAACAW7l8V72bbrpJMTEx2rx5s/bs2aO0tDTVqlVLERER+VEfAAAAALidy8EpS7169VSvXr28rAUAAAAACiSXg9OsWbNybPfw8FDhwoVVtWpVhYWFycvL6z8XBwAAAAAFgcvB6ZNPPtHJkyeVnp6uUqVKSZJOnTqlokWLqlixYkpJSVGVKlU0a9YsVaxYMc8LBgAAAICrzeWbQwwdOlS1a9fW6tWrtXHjRm3cuFGrVq1ScHCwRo8erbVr16pcuXKKjo7Oj3oBAAAA4KpzOTi9/fbbGjVqlKpWrepou+222zRixAhNmjRJt9xyi/73v/9p69ateVooAAAAALiLy8EpOTlZmZmZ2dozMzN1/PhxSVL58uV15syZ/14dAAAAABQALgen+vXra8yYMdq5c6ejbefOnXrppZd09913S5L27t2rypUr512VAAAAAOBGLt8cYuLEiRo+fLjat28vb+8Ls9tsNjVo0EATJ06UJBUrVkwjRozI20oBAAAAwE1cDk6+vr6KiYlRQkKCDhw4IEm6/fbbdccddzj6ZB15AgAAAIDrwRX/AK6fn5/8/PzyshYAAAAAKJCuKDj9+eef+vbbb3Xs2DGdP3/eadpzzz2XJ4UBAAAAQEHhcnCKi4vTgAEDVKVKFe3bt0933nmnjhw5ImOMatasmR81AgAAAIBbuXxXvUmTJqlXr15asmSJfHx8NGXKFK1du1ZhYWFq3rx5ftQIAAAAAG7lcnBKSEjQQw89JEny9vbW2bNnVbx4cT3zzDOaPn16XtcHAAAAAG7ncnAqVqyY47omX19fJSYmOqadPHky7yoDAAAAgALC5Wuc6tSpoy1btsjPz0+NGzfWq6++qr179+rrr79WnTp18qNGAAAAAHArl4PTc889pzNnzkiSnnrqKZ05c0bLly9XtWrVNHLkyDwvEAAAAADczaXgZLPZ9OeffyogIEDShdP2xo0bly+FAQAAAEBB4dI1Tl5eXurVq5dOnTqVX/UAAAAAQIHj8s0h7rzzTh0+fDg/agEAAACAAsnl4DR48GC9+uqrWrNmjf766y+lpqY6/QMAAACA643LN4fo27evJGnAgAHy8PBwtBtj5OHhoV27duVddQAAAABQALgcnGbNmpUfdQAAAABAgeVycAoPD8+POgAAAACgwHL5GidJ2rx5s5599lk98sgjSkpKkiQtXrxYmzdvztPiAAAAAKAgcDk4rVq1Sr1791aRIkW0Y8cOZWRkSJJSU1M1derUPC8QAAAAANzN5eD0wQcfaOzYsZowYYK8vf/vTL/Q0FDt3LkzT4sDAAAAgILA5eC0f/9+1atXL1v7TTfdpNOnT+dJUQAAAABQkLgcnMqVK6fExMRs7Vu2bFGVKlXypCgAAAAAKEhcDk6dOnXSxIkT9euvv8rDw0NJSUn66quv9Oqrr6pLly75USMAAAAAuNUV/QCu3W5Xjx49lJ6erq5du8rHx0e9evVSt27d8qNGAAAAAHArl4OTh4eHBgwYoN69eysxMVFpaWny8/NT8eLF86M+AAAAAHA7l0/V+/LLL5Weni4fHx9Vr15dwcHBhCYAAAAA1zWXg1N0dLQiIiI0bNgwff/997LZbPlRFwAAAAAUGC6fqrd+/Xr98MMPWrp0qQYPHqwiRYqoefPmat26tUJDQ/OjRgAAAABwK5eDk7e3t5o2baqmTZsqPT1dX3/9tZYuXaru3bvrlltu0TfffJMfdQIAAACA27gcnC5WtGhRRUZG6vTp0zp69KgSEhLyqi4AAAAAKDCuKDhlHWlasmSJ4uLiVLFiRbVs2VLvvPNOXtcHAAAAAG7ncnAaMmSI1q5dqyJFiujBBx/Uk08+qbp16+ZHbQAAAABQILgcnDw9PfX2228rMjJSXl5eTtP27t0rf3//PCsOAAAAAAoCl4PTpEmTnB6npqZq2bJlWrBggXbs2KFdu3blWXEAAAAAUBBc8c0hNm3apC+++EKrV69W+fLldd999+nFF1/My9oAAAAAoEBwKTglJycrNjZWX3zxhVJTU/Xggw8qIyND7733nqpXr55fNQIAAACAW+U6OPXv31+bNm1SkyZNNGrUKDVs2FBeXl6aO3duftYHAAAAAG6X6+C0bt06devWTV26dFG1atXysSQAAAAAKFg8c9txzpw5OnPmjNq3b6+OHTvq008/1YkTJ/KzNgAAAAAoEHIdnEJCQjRhwgStX79enTt31rJly9SoUSPZ7Xb9+OOPSk1NveIiPvvsM0VFRSkoKEgdO3bU9u3bczXfsmXLFBAQoCeffPKK1w0AAAAAVnIdnLIUK1ZMDz/8sD7//HN99dVX6tmzpz766CNFRESof//+LhewfPlyRUdHa+DAgYqNjVWNGjXUu3dvpaSkXHa+w4cP69VXX1W9evVcXicAAAAAuMLl4HSxO+64Q8OHD9f333+vN99884qWERMTo06dOqlDhw6qXr26xo4dqyJFimjhwoWXnMdms+nZZ5/VU089pSpVqlxp+QAAAACQK1f8O04X8/LyUrNmzdSsWTOX5svIyNCOHTvUr18/R5unp6ciIiK0bdu2S8733nvvqWzZsurYsaO2bNlyRTXbbLYrmi8nXl5eebYswEpe7rt5hTGAq6kgjgGJcYCri3EA5M04cGUZeRKcrtTJkydls9lUtmxZp/ayZctq3759Oc6zefNmffHFF1q8ePF/Wnd8fPx/mj9L0aJFVbNmzTxZFpAbe/bsUXp6urvLcGAM4GoraGNAYhzg6mMcAFd/HLg1OLkqNTVVw4cP1/jx41WmTJn/tKygoCC+FcE1KSAgwN0lAG7FGAAYB4CUN+PAZrPl+oCKW4NT6dKl5eXlle1GECkpKSpXrly2/ocOHdKRI0c0YMAAR5vdbpck1axZUytXrlTVqlVztW4vLy+CE65J7Le40TEGAMYBIF39ceDW4OTj46NatWopLi7OcX2U3W5XXFycunbtmq3/HXfcoSVLlji1vf322zpz5oxGjx6tW2655arUDQAAAODG4vZT9Xr27KkRI0aodu3aCg4O1syZM5Wenq727dtLkoYPH64KFSpo2LBhKly4sPz9/Z3mL1mypCRlawcAAACAvOL24NSiRQudOHFCkydPVnJysgIDAzV9+nTHqXrHjh2Tp+d/ums6AAAAAPwnbg9OktS1a9ccT82TpNmzZ1923ldeeSU/SgIAAAAABw7lAAAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAIAFghMAAAAAWCA4AQAAAICFAhGcPvvsM0VFRSkoKEgdO3bU9u3bL9l3/vz5evTRRxUWFqawsDD16NHjsv0BAAAA4L9ye3Bavny5oqOjNXDgQMXGxqpGjRrq3bu3UlJScuy/ceNGtWzZUrNmzdLcuXNVsWJF9erVS0lJSVe5cgAAAAA3CrcHp5iYGHXq1EkdOnRQ9erVNXbsWBUpUkQLFy7Msf+kSZP02GOPKTAwUH5+fpowYYLsdrvi4uKucuUAAAAAbhTe7lx5RkaGduzYoX79+jnaPD09FRERoW3btuVqGenp6crMzFSpUqVcWrfNZnOp/+V4eXnl2bIAK3m57+YVxgCupoI4BiTGAa4uxgGQN+PAlWW4NTidPHlSNptNZcuWdWovW7as9u3bl6tlvPHGGypfvrwiIiJcWnd8fLxL/S+laNGiqlmzZp4sC8iNPXv2KD093d1lODAGcLUVtDEgMQ5w9TEOgKs/DtwanP6radOmafny5Zo1a5YKFy7s0rxBQUF8K4JrUkBAgLtLANyKMQAwDgApb8aBzWbL9QEVtwan0qVLy8vLK9uNIFJSUlSuXLnLzjtjxgxNmzZNMTExqlGjhsvr9vLyIjjhmsR+ixsdYwBgHADS1R8Hbr05hI+Pj2rVquV0Y4esGz3UrVv3kvN99NFHev/99zV9+nQFBQVdjVIBAAAA3MDcfqpez549NWLECNWuXVvBwcGaOXOm0tPT1b59e0nS8OHDVaFCBQ0bNkzShdPzJk+erEmTJqlSpUpKTk6WJBUrVkzFixd32/MAAAAAcP1ye3Bq0aKFTpw4ocmTJys5OVmBgYGaPn2641S9Y8eOydPz/w6MzZ07V+fPn9fTTz/ttJxBgwbpqaeeuqq1AwAAALgxuD04SVLXrl3VtWvXHKfNnj3b6fF33313NUoCAAAAAAe3/wAuAAAAABR0BCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBCcAAAAAsEBwAgAAAAALBSI4ffbZZ4qKilJQUJA6duyo7du3X7b/ihUr1Lx5cwUFBal169b6/vvvr1KlAAAAAG5Ebg9Oy5cvV3R0tAYOHKjY2FjVqFFDvXv3VkpKSo79t27dqmHDhunhhx/W4sWLde+992rgwIHau3fvVa4cAAAAwI3C7cEpJiZGnTp1UocOHVS9enWNHTtWRYoU0cKFC3PsP2vWLDVs2FBPPPGE/Pz8NHjwYNWsWVOffvrpVa4cAAAAwI3C250rz8jI0I4dO9SvXz9Hm6enpyIiIrRt27Yc5/nll1/Uo0cPp7bIyEh98803uVqnMcaxbi8vrysr/F+8vLxU7TZfFSrk9hyK61ilW8vKZrPJZrO5u5RsvLy8VL2ir3y8GAPIP1XLF9wxIF0YB3eWLi8fj7x5bwFyctvNBX8clC9aWZ7GrR8xcZ0rV/SWPBsHWcvIygiX49a9+uTJk7LZbCpbtqxTe9myZbVv374c5zl+/LjKlSuXrf/x48dztU673S5J2rlz5xVUfGmNG1WTVC1Plwn82y+//OLuEi6pZUAVKaCKu8vAda4gjwFJan3z7dLNt7u7DFznCvo4qKq6quruInB9S8v7cZCVES7nhvs6wNvbW0FBQfL09JSHh4e7ywEAAADgJsYY2e12eXtbxyK3BqfSpUvLy8sr240gUlJSsh1VylKuXLlsR5cu1//fPD095ePjc2UFAwAAALghufWCBB8fH9WqVUtxcXGONrvdrri4ONWtWzfHeUJCQrRhwwantp9++kkhISH5WSoAAACAG5jbr+Tu2bOn5s+fr9jYWCUkJOill15Senq62rdvL0kaPny4Jk2a5OjfvXt3/fDDD/r444+VkJCgKVOm6LffflPXrl3d9RQAAAAAXOfcfo1TixYtdOLECU2ePFnJyckKDAzU9OnTHafeHTt2TJ6e/5fvQkND9cYbb+jtt9/Wm2++qWrVqum9996Tv7+/u54CAAAAgOuch8nNvfcAAAAA4Abm9lP1AAAAAKCgIzgBAAAAgAWCEwAAAABYIDjhhhIVFaVPPvkkz/sC16vDhw8rICBAu3btkiRt3LhRAQEBOn36tJsrgyuMMXrhhRcUHh7u9Pe8lH//3S+lW7dumjhx4mX7JCcnq2fPngoJCVG9evVyVS/7GZDdyJEj9eSTT7q7jBsawekGkl9B4FoKGF988YU6d+6c531x/WLc4Hqwbt06xcbG6sMPP9T69et15513XrZ/xYoVnfr9lyDzySefKDk5WYsXL9aqVauuqH4AKAjcfjtyIDcyMjLk4+Pzn5dTpkyZfOkLXG02m00eHh5OP9cAXMqhQ4fk6+ur0NDQXPX38vKSr69vnq27Vq1aqlatWp4sDyjIruS1Oa8+4yD/8Y5bgNjtdn300Ue67777VLt2bTVp0kQffPCBJGnPnj3q3r27goODVb9+fb3wwgs6c+aMY96sw7czZsxQZGSk6tevr7Fjx+r8+fOSLpxOceTIEUVHRysgIEABAQGOeTdv3qxHH31UwcHBaty4sSZMmKC0tDRJ0uLFi1W3bl0dOHDA0f+ll15S8+bNlZ6eftnlXsqiRYtUr149ffPNN7r//vsVFBSk3r1769ixY44+U6ZMUdu2bbVgwQJFRUUpODhYknT69GmNHj1ad999t0JDQ9W9e3ft3r3bafnfffedOnTooKCgINWvX18DBw50TLv4W35jjKZMmaImTZqodu3aioyM1IQJE3LsK0lHjx7VgAEDVLduXYWGhuqZZ57R8ePHs9W8ePFiRUVF6a677tKQIUOUmppquU1w5W60cfPtt9+qRYsWCgoK0tGjR5WRkaFXX31VDRs2VEhIiDp27KiNGzc6zbtlyxZ169ZNderUUVhYmHr37q1Tp05JunAkokuXLqpXr57q16+vfv36KTEx8cr+GCiQRo4cqfHjx+vo0aMKCAhQVFSU5d/94lP1Dh8+rO7du0uSwsLCFBAQoJEjRzr6GmP02muvKTw8XPfcc4+mTJnimBYVFaVVq1Zp8eLFjvlyOg3w9OnTCggIyLbvZsna/3/44Qc9+OCDqlu3rnr37q2//vrLqd+CBQv04IMPKigoSM2bN9dnn33mmJaRkaFx48YpMjJSQUFBatq0qaZOnep4Dpd7P8DVdbn9M2v/Wb16teN1rU2bNtq2bZtj/iNHjqh///4KCwtTSEiIWrZsqe+//16S1L59e82YMcPR98knn1StWrUc7w1//vmnAgICdPDgQUmyfI291Gvz5WS993zwwQeKjIxU8+bNJV347dJnnnlG9erVU3h4uAYMGKDDhw9fcjl2u11Tp051fE5q06aNVq5c6ZjWqFEjzZkzx2menTt3qkaNGjpy5IgkKSYmRq1bt1ZISIgaN26sl156yel9Mrdj74svvlDLli0d42fcuHGSpOeee079+vVz6nv+/Hk1aNBACxYsuOx2KpAMCozXXnvNhIWFmUWLFpmDBw+aTZs2mfnz55szZ86Ye+65xwwaNMjs2bPH/PTTTyYqKsqMGDHCMe+IESNMaGioefHFF80ff/xhvvvuO1OnTh0zb948Y4wxJ0+eNI0aNTLvvvuu+euvv8xff/1ljDHm4MGDJiQkxMTExJj9+/ebLVu2mIceesiMHDnSseynn37adOjQwZw/f96sWbPG1KpVy8THx192uZezcOFCU6tWLdO+fXuzdetWEx8fbx5++GHTuXNnR5/JkyebkJAQ07t3b7Njxw6za9cuY4wxPXr0MP369TPbt283+/fvN6+88ooJDw83J0+eNMYYs2bNGhMYGGjeeecd88cff5hdu3aZDz/80LHcpk2bmpiYGGOMMStWrDChoaFm7dq15siRI+bXX391bK9/97XZbKZt27amS5cuJj4+3vzyyy+mXbt2pmvXrtlqzvo7bdq0ydxzzz3mzTfftNwmuHI32rjp3Lmz2bJli0lISDBpaWlm9OjRpnPnzmbTpk3m4MGDZvr06aZ27dpm//79xhhjdu7caWrXrm3GjBljdu3aZfbu3Wtmz55tUlJSjDHGrFy50qxatcocOHDA7Ny50/Tr18+0atXK2Gw2Y4wxhw4dMv7+/mbnzp3GGGM2bNhg/P39zalTp/7jXw5Xy+nTp827775rGjVqZP766y+TkpLi0t89MzPTrFq1yvj7+5t9+/aZv/76y5w+fdoYY0zXrl1NaGiomTJlitm/f7+JjY01AQEBZv369cYYY1JSUkzv3r3NM88845jv3/uUMcacOnXK+Pv7mw0bNhhjsu9nWft/jx49zPbt281vv/1mHnzwQTN06FDHMr788ktzzz33mFWrVpnExESzatUqEx4ebhYtWmSMMWb69OmmcePGZtOmTebw4cNm06ZNZsmSJcYY6/cDXF2X2z+z9p/mzZubNWvWmH379pmnnnrKNG3a1Jw/f94YY0zfvn1Nz549ze7du01iYqL57rvvzM8//2yMMSY6Otr07dvXGGOM3W434eHhpn79+ub77783xlzYjxo2bOioxeo19lKvzZczYsQIExISYv73v/+ZvXv3mr1795qMjAzz4IMPmueee87s3r3b/PHHH2bo0KHmgQceMOfOnXPMN2DAAMdy3n//fdO8eXOzbt06k5iYaBYuXGhq165tNm7caIwx5pVXXjFdunRxWve/22JiYkxcXJw5dOiQ+emnn8wDDzxgxowZ45iem7H32WefmaCgIPPJJ5+Yffv2mV9//dXx+WnLli0mMDDQJCUlOfqvXr3ahISEmNTU1Mtup4KI4FRA/PPPP6Z27dpm/vz52abNmzfPhIWFmTNnzjja1q5da2rUqGGSk5ONMRcGU9OmTU1mZqajz9NPP20GDx7seHxxEMgyatQo88ILLzi1bdq0ydSoUcOcPXvWGGPM33//bRo1amTGjBljIiIizAcffODUP6flXs7ChQuNv7+/+eWXXxxtf/zxh/H39ze//vqrMeZCCKlVq5bjw11WXaGhoY4XkCzNmjUzc+fONcYY07lzZzNs2LBLrvviWj/++GNz//33m4yMDMu+69evN4GBgebo0aOO6b///nu2muvUqWP++ecfR59XX33VdOzY0WqT4ArdiOMm60sEY4w5cuSICQwMNH/++adT38cff9xMmjTJGGPM0KFDzSOPPJLr9aSkpBh/f3+zZ88eYwzB6XoRExNjmjZtesnpV/p379q1a7YPZh06dDCvv/664/GAAQOcvrC40uDk7+9vDh486Jjn008/NREREY7HzZo1cwShLO+9957jS7nx48eb7t27G7vdnu35W70fwL0u3j+z9p+LX/ez3o//+OMPY4wxrVq1MlOmTMlxWd9++6256667TGZmptm1a5e55557zIQJExz77OjRox2hIDevsTm9NlsZMWKEiYiIcPo8s3jxYvPAAw847Z/nzp0zwcHB5ocffnDMlxWczp07Z+rUqWO2bt3qtOxRo0Y56t+5c6cJCAgwR44cMcZc+BK4YcOGZs6cOZesbcWKFSY8PNzxODdjLzIy8rJfErdo0cJMmzbN8bhfv35OXzReS7jGqYDYt2+fMjIydPfdd2eblpCQoICAABUrVszRFhoaKrvdrv3796tcuXKSpOrVq8vLy8vRx9fXV3v37r3senfv3q09e/ZoyZIljjZjjOx2uw4fPiw/Pz+VKlVKEydOVO/evVW3bl317dv3vz5deXt7KygoyPHYz89PJUuWVEJCguO0vFtvvdXpOqM9e/YoLS1N9evXd1rW2bNnHYfwd+3apY4dO+aqhubNm2vmzJlq1qyZGjZsqMaNG6tp06by9s4+LBISEnTLLbeoYsWKjrbq1aurZMmS2rdvn6PmSpUqqUSJEo4+5cuXV0pKSq7qgetutHFTqFAhp9P69u7dK5vN5jjNI0tGRoZuvvlmSRfGxL+nX+zAgQOaPHmyfv31V508eVLGGEkXThnx9/f/zzWjYMrLv/u/TzX19fXNl9e9okWLqmrVqo7HF7++pqWlKTExUaNHj9YLL7zg6JOZmambbrpJktSuXTv16tVLzZs3V8OGDdWkSRNFRkZKcu39APnvcvunn5+fJOf9Lut6vBMnTsjPz0/du3fXSy+9pPXr1ysiIkL333+/atSoIUmqV6+ezpw5o507d2rbtm0KCwtT/fr1NW3aNEnSpk2b1Lt3b0m5e42Vsr8254a/v7/TdU27d+9WYmJitusQz507l+Pp0wcPHlR6erp69erl1H7+/HkFBgZKkgIDA+Xn56elS5eqb9+++vnnn3XixAmn5/PTTz9p6tSp2rdvn1JTU2Wz2XTu3Dmlp6eraNGiki4/9lJSUvTXX3+pQYMGl3yuHTt21Lx589SnTx8dP35cP/zwg2bOnJnbTVWg8IpQQBQuXPg/L+PfL/AeHh6OF5tLSUtL0yOPPKJu3bplm3ZxSNi0aZO8vLyUnJystLQ0p3CQX7IGbJYzZ87I19dXs2fPztY3642xSJEiuV5+xYoVtXLlSv3000/66aefNHbsWM2YMUOzZ89WoUKFrqjmnN5krf4GuHI32rgpUqSIPDw8nOrw8vLSwoULncKfJEdgtBoT/fv3V6VKlTRhwgSVL19edrtdrVq1clznhetTXv7dXR1DWRfNX9wnMzPzP60n6/rC8ePHq06dOjmur1atWvr222+1bt06/fTTTxo8eLAiIiI0efLkfHk/wJXLzf558d8l63XRbrdLuvBBPTIyUmvXrtWPP/6oadOmacSIEerWrZtKliypGjVq6Oeff9Yvv/yiiIgI1atXT4MHD9b+/ft14MABhYWFScrda6yU/bU5N/79GSctLU21atXSG2+8ka1vTjerytrnp06dqgoVKjhNuziQtW7dWkuWLFHfvn21dOlSRUZGqnTp0pIuXC/Wr18/denSRUOGDFGpUqW0ZcsWjR49WufPn3fUeLmxl5v34bZt2+qNN97Qtm3btG3bNlWuXDnXP01Q0HBziAKiWrVqKlKkiDZs2JBtmp+fn+NoS5atW7fK09NTt99+e67XUahQIceLSpaaNWvqjz/+0G233ZbtX9bA27p1q6ZPn64PPvhAxYoV0/jx4y2XayUzM1O//fab4/G+fft0+vRpxzdJOalVq5aOHz8uLy+vbLVmvaj4+/srLi4u13UUKVJEUVFRev755zVr1ixt27Ytx6MNfn5++vPPP51uYPHHH39Y1oz8daONm38LDAyUzWbTiRMnstWR9Q1sQEDAJcfEyZMntX//fg0YMEANGjSQn5+f46YRuH5dyd8960OqzWb7z+vPer1OTk52tFn9XpSVcuXKqXz58jp06FC2sVClShVHvxIlSqhFixaaMGGC3nrrLa1atUp///23pNy/HyB/5dXrUsWKFdWlSxe9++676tmzp+bPn++YFhYWpo0bN2rTpk0KDw/XzTffLD8/P3344Yfy9fV1vEfk5jU2r9SqVUsHDx5U2bJls60r68vhi/n5+cnHx0dHjx7N1v/iL/BatWql33//Xb/99ptWrVqlNm3aOKbt2LFDxhiNHDlSISEhuv3227Pd9MFKiRIlVKlSpct+9ipdurSaNWumRYsWKTY2Vu3bt3dpHQUJwamAKFy4sPr06aPXX39dixcvVmJion755RctWLBArVu3lo+Pj0aOHKm9e/dqw4YNGj9+vNq2bes43Sg3KlWqpE2bNikpKUknTpyQJPXp00fbtm3TuHHjtGvXLh04cEDffPON424oqampGj58uLp166bGjRvrjTfe0PLlyx13bbnUcq0UKlRI48eP16+//qrffvtNzz33nEJCQhynvOUkIiJCISEhGjhwoNavX6/Dhw9r69ateuuttxQfHy9JGjRokJYtW6bJkycrISFBe/bscRx+/7dFixZpwYIF2rt3rw4dOqSvvvpKRYoU0a233prjuv39/fXss89qx44d2r59u4YPH67w8HCnUw5xdd1o4+bfbr/9drVu3VrDhw/X6tWrdejQIW3fvl1Tp07V2rVrJUl9+/ZVfHy8XnrpJe3evVsJCQmaM2eOTpw4oVKlSunmm2/WvHnzdPDgQcXFxemVV165olpw7biSv3ulSpXk4eGhtWvX6sSJE0533XJVkSJFFBISomnTpikhIUE///yz3n777SteXpann35a06ZN06xZs7R//37t2bNHCxcuVExMjKQLdw9bunSpEhIStH//fq1cuVK+vr4qWbKkS+8HyF958bo0ceJE/fDDDzp06JB27NihjRs3On3JGR4ervXr18vb29vRHh4eriVLlig8PNzRLzevsXmldevWKl26tAYMGKDNmzfr0KFD2rhxoyZMmKA///wzW/8SJUqoV69eio6OVmxsrBITE7Vjxw7Nnj1bsbGxjn6VK1dW3bp1NXr0aNlsNkVFRTmm3XbbbTp//rxmz56tQ4cOafHixZo7d67LtT/11FOKiYnRrFmzdODAAUcdF+vYsaNiY2OVkJCghx56yOV1FBQEpwLkySefVM+ePTV58mS1aNFCQ4YM0YkTJ1S0aFHNmDFDf//9tx5++GE988wzatCggdN53Lnx9NNP68iRI2rWrJnjXNQaNWpo9uzZOnDggB599FG1a9dOkydPVvny5SVdePEpWrSohg4dKunCt9dDhw7VmDFjlJSUdMnlWilSpIj69OmjYcOGqUuXLipWrJjeeuuty87j4eGhadOmKSwsTM8995yaN2+uoUOH6siRI44PwvXr19c777yj7777Tm3bttXjjz/uCFX/VrJkSS1YsEBdunRRmzZtFBcXpw8//NBxCPvf637//fdVsmRJde3aVT169FCVKlUsa0b+u5HGTU6io6P10EMP6ZVXXtGDDz6oJ598UvHx8Y5vHG+//XZ9/PHH2r17tzp27KhHHnlE3377rby9veXp6am33npLO3bsUKtWrRQdHa3hw4dfcS24NlzJ371ChQp66qmnNGnSJEVERGQ7guqql19+WTabTe3bt9fLL7+swYMH/6flSRc+mE2YMEGLFi1S69at1a1bN8XGxqpy5cqSpOLFi2v69Ol6+OGH9fDDD+vIkSOaNm2aPD09XXo/QP7Ki9clu92ucePGqUWLFnriiSdUrVo1jRkzxjG9Xr16stvtjlPypAufH2w2m1NwkqxfY/NK0aJF9emnn+rWW2/VoEGD1KJFC40ePVrnzp275GnegwcP1pNPPqmpU6c6nuvatWsd+3yW1q1ba/fu3brvvvucTt+uUaOGnnvuOX300Udq1aqVlixZ4njfckW7du00atQozZkzR61atVK/fv0ct3PPEhERofLlyysyMjLbqYXXEg/DBRi4yhYtWqSXX35ZmzdvdncpAAAAyGdnzpxRo0aNFB0drfvvv9/d5Vwxbg4BAAAAIM/Z7XadPHlSH3/8sUqWLOl0quC1iOCEPPfEE09oy5YtOU7r16+f43QmAP/Hatz079//KlcEALgSdevWveS0jz766Jq9o9yVOHr0qO69917dcssteuWVV675W/xzqh7yXFJSks6ePZvjtKyLPgE4Y9wAwPXh39f3XKxChQou/XQKChaCEwAAAABY4K56AAAAAGCB4AQAAAAAFghOAAAAAGCB4AQAAAAAFghOAADkQkBAgL755ht3lwEAcBOCEwDgmjFy5EgFBAToxRdfzDZt7NixCggI0MiRI3O1rI0bNyogIECnT5/OVf/169erUaNGLtULALh+EJwAANeUihUravny5U6/e3Xu3DktXbpUt956a56vLyMjQ5Lk6+srHx+fPF8+AODaQHACAFxTatasqYoVK2r16tWOttWrV6tixYoKDAx0tNntdk2dOlVRUVEKDg5WmzZttHLlSknS4cOH1b17d0lSWFiY05Gqbt26ady4cZo4caLq16+v3r17S8p+qt6ff/6poUOHKjw8XCEhIWrfvr1+/fXXfH/+AAD38HZ3AQAAuKpDhw5atGiR2rRpI0lauHCh2rdvr59//tnRZ+rUqfrqq680duxYVatWTZs2bdL//vc/lSlTRnfddZemTJmip556SitXrlSJEiVUpEgRx7yxsbHq0qWLPv/88xzXf+bMGXXt2lUVKlTQ+++/L19fX+3YsUN2uz1/nzgAwG0ITgCAa06bNm00adIkHTlyRJK0detWvfnmm47glJGRoalTpyomJkZ169aVJFWpUkVbtmzRvHnzFB4erlKlSkmSypYtq5IlSzotv1q1aho+fPgl17906VKdOHFCX3zxhW6++WZJ0m233ZbXTxMAUIAQnAAA15wyZcqoSZMmio2NlTFGTZo0UZkyZRzTDx48qPT0dPXq1ctpvvPnzzudzncptWrVuuz0Xbt2qWbNmo7QBAC4/hGcAADXpA4dOmjcuHGSpDFjxjhNS0tLk3ThdL0KFSo4TcvNDR6KFi162ekXn9YHALgxEJwAANekhg0b6vz58/Lw8FBkZKTTND8/P/n4+Ojo0aMKDw/Pcf5ChQpJkmw2m8vrDggI0IIFC/T3339z1AkAbhAEJwDANcnLy0srVqxw/P/FSpQooV69eik6OlrGGN111136559/tHXrVpUoUULt2rVTpUqV5OHhobVr16px48YqXLiwihcvnqt1t2zZUh9++KEGDhyooUOHqnz58tq5c6fKly/vuKYKAHB94XbkAIBrVokSJVSiRIkcpw0ePFhPPvmkpk6dqhYtWuiJJ57Q2rVrVblyZUlShQoV9NRTT2nSpEmKiIjQ+PHjc71eHx8fffzxxypbtqz69u2r1q1ba9q0adkCHADg+uFhjDHuLgIAAAAACjKOOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACABYITAAAAAFggOAEAAACAhf8HRJ6WlUx3T2sAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def plot_average_metrics(metrics):\n", + " avg_metrics = {metric: data[metric].mean() for metric in metrics}\n", + " \n", + " avg_metrics_df = pd.DataFrame({\n", + " 'Metric': list(avg_metrics.keys()),\n", + " 'Average Score': list(avg_metrics.values())\n", + " })\n", + "\n", + " plt.figure(figsize=(10, 5))\n", + " sns.barplot(x='Metric', y='Average Score', data=avg_metrics_df, palette='viridis', hue='Metric', dodge=False)\n", + " plt.ylim(0, 1.1)\n", + " plt.title('Average Score of Each Metric')\n", + " plt.ylabel('Average Score')\n", + " plt.xlabel('Metric')\n", + " plt.legend([],[], frameon=False) \n", + " plt.show()\n", + "\n", + "metrics = ['context_precision', 'context_recall', 'faithfulness', 'answer_relevancy']\n", + "plot_average_metrics(metrics)\n" + ] + }, + { + "cell_type": "markdown", + "id": "13", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "\n", + "- **Queries with Low Context Precision (<0.7):**\n", + " - Improve retrieval mechanisms to filter out irrelevant contexts.\n", + "\n", + "- **Queries with Low Context Recall (<0.7):**\n", + " - Enhance the retrieval system to capture more relevant contexts.\n", + "\n", + "- **Queries with Low Faithfulness (<0.9):**\n", + " - Review the generated answers for unsupported claims.\n", + "\n", + "- **Queries with Low Answer Relevancy (<0.7):**\n", + " - Ensure the response generation effectively utilizes the user query and retrieved contexts." + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "18", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_inputcontext_precisioncontext_recallfaithfulnessanswer_relevancy
1How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?1.0000000.5000001.0000000.986973
2What are the three categories of employees eligible for health coverage?0.7000001.0000000.8000000.981254
3How long must an employee wait before using vacation time after starting employment?0.4166670.6666671.0000000.979251
6How long is the normal probationary period for permanent civil service positions?1.0000000.6666671.0000000.000000
7What are employees required to do in case of a catastrophic event while off duty?0.8666670.6666671.0000000.987241
8What is the city's policy on accepting gifts from subordinates?0.8875001.0000000.6666670.956919
\n", + "
" + ], + "text/plain": [ + " user_input \\\n", + "1 How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules? \n", + "2 What are the three categories of employees eligible for health coverage? \n", + "3 How long must an employee wait before using vacation time after starting employment? \n", + "6 How long is the normal probationary period for permanent civil service positions? \n", + "7 What are employees required to do in case of a catastrophic event while off duty? \n", + "8 What is the city's policy on accepting gifts from subordinates? \n", + "\n", + " context_precision context_recall faithfulness answer_relevancy \n", + "1 1.000000 0.500000 1.000000 0.986973 \n", + "2 0.700000 1.000000 0.800000 0.981254 \n", + "3 0.416667 0.666667 1.000000 0.979251 \n", + "6 1.000000 0.666667 1.000000 0.000000 \n", + "7 0.866667 0.666667 1.000000 0.987241 \n", + "8 0.887500 1.000000 0.666667 0.956919 " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "low_metrics = data[\n", + " (data['context_precision'] < 0.7) |\n", + " (data['context_recall'] < 0.7) |\n", + " (data['faithfulness'] < 0.9) |\n", + " (data['answer_relevancy'] < 0.7)\n", + "]\n", + "\n", + "low_metrics[['user_input', 'context_precision', 'context_recall', 'faithfulness', 'answer_relevancy']]" + ] + }, + { + "cell_type": "markdown", + "id": "19", + "metadata": {}, + "source": [ + "---" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py index 1f156cb4d..f79a3c96e 100644 --- a/examples/E2E-RAG-App/rag_main.py +++ b/examples/E2E-RAG-App/rag_main.py @@ -8,10 +8,20 @@ import fire import requests from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction +from datasets import Dataset from dotenv import load_dotenv from llama_stack_client import LlamaStackClient from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.types.agent_create_params import AgentConfig +from ragas import evaluate, EvaluationDataset +from ragas.metrics import ( + AnswerRelevancy, + ContextPrecision, + ContextRecall, + FactualCorrectness, + Faithfulness, + SemanticSimilarity, +) from termcolor import cprint from tqdm import tqdm @@ -85,11 +95,9 @@ def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 350) -> None: def query_chromadb(query: str) -> Optional[dict]: """Queries ChromaDB for relevant context based on input query.""" - cprint(f"Querying ChromaDB with: {query}", "cyan") collection = chroma_client.get_collection( name="documents", embedding_function=embedding_function ) - print(collection.count()) results = collection.query( query_texts=[query], @@ -97,33 +105,12 @@ def query_chromadb(query: str) -> Optional[dict]: include=["documents", "metadatas", "distances"], ) - if ( - results - and results.get("distances") - and results.get("documents") - and results.get("metadatas") - ): - - for i, (doc, distance, metadata) in enumerate( - zip( - results["documents"][0], - results["distances"][0], - results["metadatas"][0], - ) - ): - print(f"\nResult {i + 1}") - print(f"Distance Score: {distance:.4f}") - print(f"Filename: {metadata['filename']}") - print(f"Chunk index: {metadata['chunk_index']}") - print(f"Context: {doc}") - print("-" * 50) - return results if results else None async def get_response_with_context( agent: Agent, input_query: str, session_id: str -) -> str: +) -> (str, List[str]): """Fetches response from the agent with context from ChromaDB.""" results = query_chromadb(input_query) if results and results["metadatas"]: @@ -132,23 +119,27 @@ async def get_response_with_context( for metadata_list in results["metadatas"] for metadata in metadata_list ) + # Collect the contexts into a list + contexts = [ + metadata["content"] + for metadata_list in results["metadatas"] + for metadata in metadata_list + ] else: context = "No relevant context found." + contexts = [] messages = [ {"role": "user", "content": f"Context: {context}\n\nQuestion: {input_query}"} ] - # cprint("Embedding retrieval completed. Sending these context to agent:", "cyan") - # cprint(context, "cyan") response = agent.create_turn(messages=messages, session_id=session_id) async for chunk in response: if chunk.event.payload.event_type == "turn_complete": - cprint(f"----input_query------- {input_query}", "magenta") - return chunk.event.payload.turn.output_message.content + return chunk.event.payload.turn.output_message.content, contexts - return "No response generated." + return "No response generated.", contexts async def run_main(host: str, port: int, docs_dir: str) -> None: @@ -158,7 +149,7 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: insert_documents_to_chromadb(docs_dir) model_name = "Llama3.2-3B-Instruct" - url = "http://localhost:5000/models/register" + url = f"http://{host}:{port}/models/register" headers = {"Content-Type": "application/json"} data = { "model_id": model_name, @@ -177,20 +168,98 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: ) agent = Agent(client, agent_config) - user_prompts = [ - "What is the policy regarding smoking in City offices?", - "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", - "What are the three categories of employees eligible for health coverage?", + # QA data + qa_data = [ + { + "Question": "What is the policy regarding smoking in City offices?", + "Answer": "Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)", + }, + { + "Question": "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", + "Answer": "Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)", + }, + { + "Question": "What are the three categories of employees eligible for health coverage?", + "Answer": "The following employees are eligible:\n\nAll permanent employees working at least 20 hours per week\n\nAll regularly scheduled provisional employees working at least 20 hours per week\n\nAll other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)", + }, + { + "Question": "How long must an employee wait before using vacation time after starting employment?", + "Answer": "Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)", + }, + { + "Question": "What must an employee do if they're summoned for jury duty?", + "Answer": "An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)", + }, + { + "Question": "What happens if an employee is absent without authorization for more than five consecutive working days?", + "Answer": "If an employee is absent from their job without proper authorization for more than five consecutive working days, or fails to return from an approved leave, their absence will be deemed an 'automatic resignation.' (Source: Page 19, 'Automatic Resignation' section)", + }, + { + "Question": "How long is the normal probationary period for permanent civil service positions?", + "Answer": "The document states that all appointments to permanent civil service positions require a probationary period, but the duration is governed by the collective bargaining agreement. Absences from work will extend the probationary period. (Source: Page 14, 'Probationary Period' section)", + }, + { + "Question": "What are employees required to do in case of a catastrophic event while off duty?", + "Answer": "Employees should ensure the safety of their family and follow their department's instructions. If phone lines are down, they are required to listen to the radio for any reporting instructions. (Source: Page 51, 'Catastrophic Event While off Duty' section)", + }, + { + "Question": "What is the city's policy on accepting gifts from subordinates?", + "Answer": "Employees may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to them. (Source: Page 49, 'Gifts' section)", + }, ] + # Prepare lists to collect data + questions = [] + generated_answers = [] + retrieved_contexts = [] + ground_truths = [] + session_id = agent.create_session(f"session-{uuid.uuid4()}") - for prompt in tqdm(user_prompts, desc="Generating responses"): - cprint(f"Generating response for: {prompt}", "green") + for qa in tqdm(qa_data, desc="Generating responses"): + question = qa["Question"] + ground_truth_answer = qa["Answer"] + + cprint(f"Generating response for: {question}", "green") try: - response = await get_response_with_context(agent, prompt, session_id) - cprint(f"Response: {response}", "green") + generated_answer, contexts = await get_response_with_context( + agent, question, session_id + ) + cprint(f"Response: {generated_answer}", "green") + + # Append data to lists + questions.append(question) + generated_answers.append(generated_answer) + retrieved_contexts.append(contexts) + ground_truths.append(ground_truth_answer) except Exception as e: - cprint(f"Error generating response for {prompt}: {e}", "red") + cprint(f"Error generating response for {question}: {e}", "red") + + # Create a Dataset for RAGAS + eval_data = Dataset.from_dict( + { + "user_input": questions, + "response": generated_answers, + "retrieved_contexts": retrieved_contexts, + "reference": ground_truths, + } + ) + + # Run evaluation + result = evaluate( + eval_data, + metrics=[ + ContextPrecision(), + ContextRecall(), + Faithfulness(), + AnswerRelevancy(), + FactualCorrectness(), + SemanticSimilarity(), + ], + ) + + df = result.to_pandas() + df.to_csv("evaluation_results2.csv", index=False) + print(df.head()) def main(host: str, port: int, docs_dir: str) -> None: From 139a8ad9c769b74647e22870da78bd47123e1a1c Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Fri, 15 Nov 2024 15:00:34 -0800 Subject: [PATCH 20/99] small fixes --- examples/E2E-RAG-App/rag_main.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py index f79a3c96e..1c59b7a27 100644 --- a/examples/E2E-RAG-App/rag_main.py +++ b/examples/E2E-RAG-App/rag_main.py @@ -119,7 +119,6 @@ async def get_response_with_context( for metadata_list in results["metadatas"] for metadata in metadata_list ) - # Collect the contexts into a list contexts = [ metadata["content"] for metadata_list in results["metadatas"] @@ -208,7 +207,6 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: }, ] - # Prepare lists to collect data questions = [] generated_answers = [] retrieved_contexts = [] @@ -226,7 +224,6 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: ) cprint(f"Response: {generated_answer}", "green") - # Append data to lists questions.append(question) generated_answers.append(generated_answer) retrieved_contexts.append(contexts) @@ -244,7 +241,6 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: } ) - # Run evaluation result = evaluate( eval_data, metrics=[ From 9391044e6a2ed7a70a7f1805850209e05e8fea3b Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 16 Nov 2024 01:29:55 +0000 Subject: [PATCH 21/99] ollama working with docker, but still need to merge --- examples/E2E-RAG-App/docker/RAG_service.json | 4 + examples/E2E-RAG-App/docker/compose.yaml | 38 +++ .../E2E-RAG-App/docker/llama_stack_start.sh | 18 ++ examples/E2E-RAG-App/docker/ollama_start.sh | 9 + examples/E2E-RAG-App/docker/run.yaml | 52 +++++ examples/E2E-RAG-App/docker/run_RAG.sh | 3 + examples/E2E-RAG-App/ollama_main.py | 219 ++++++++++++++++++ 7 files changed, 343 insertions(+) create mode 100644 examples/E2E-RAG-App/docker/RAG_service.json create mode 100644 examples/E2E-RAG-App/docker/compose.yaml create mode 100644 examples/E2E-RAG-App/docker/llama_stack_start.sh create mode 100644 examples/E2E-RAG-App/docker/ollama_start.sh create mode 100644 examples/E2E-RAG-App/docker/run.yaml create mode 100644 examples/E2E-RAG-App/docker/run_RAG.sh create mode 100644 examples/E2E-RAG-App/ollama_main.py diff --git a/examples/E2E-RAG-App/docker/RAG_service.json b/examples/E2E-RAG-App/docker/RAG_service.json new file mode 100644 index 000000000..e6cd6c54a --- /dev/null +++ b/examples/E2E-RAG-App/docker/RAG_service.json @@ -0,0 +1,4 @@ +{ + "model_name": "llama3.2:3b-instruct-fp16", + "document_path": "/home/ubuntu/llama-stack-apps/examples/E2E-RAG-App/example_data" +} diff --git a/examples/E2E-RAG-App/docker/compose.yaml b/examples/E2E-RAG-App/docker/compose.yaml new file mode 100644 index 000000000..aa3b4ac26 --- /dev/null +++ b/examples/E2E-RAG-App/docker/compose.yaml @@ -0,0 +1,38 @@ +services: + ollama: + image: ollama/ollama:latest + network_mode: "host" + environment: + - MODEL_NAME=${MODEL_NAME} + volumes: + - ollama:/root/.ollama # this solution synchronizes with the docker volume and loads the model rocket fast + - ./ollama_start.sh:/root/ollama_start.sh + - ./RAG_service.json:/root/RAG_service.json + ports: + - "11434:11434" + entrypoint: bash /root/ollama_start.sh + llamastack: + depends_on: + - ollama + image: llamastack/distribution-ollama + network_mode: "host" + volumes: + - ~/.llama:/root/.llama + # Link to ollama run.yaml file + - ./run.yaml:/root/my-run.yaml + - ~/llama-stack-apps:/root/llama-stack-apps + - ./RAG_service.json:/root/RAG_service.json + - ${DOC_PATH}:/root/rag_data/ + - ~/local/llama-stack:/root/llama-stack + - ./llama_stack_start.sh:/root/llama_stack_start.sh + ports: + - "5000:5000" + - "1234:1234" + # Hack: wait for ollama server to start before starting docker + entrypoint: bash /root/llama_stack_start.sh + #entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml" + deploy: + restart_policy: + condition: "no" +volumes: + ollama: diff --git a/examples/E2E-RAG-App/docker/llama_stack_start.sh b/examples/E2E-RAG-App/docker/llama_stack_start.sh new file mode 100644 index 000000000..c6e1d1b9a --- /dev/null +++ b/examples/E2E-RAG-App/docker/llama_stack_start.sh @@ -0,0 +1,18 @@ + +sleep 60 +echo "starting to install llama-stack" + +pip install -e /root/llama-stack +#pip install -U llama-stack +pip uninstall -y chromadb-client +pip uninstall -y chromadb +pip install -U chromadb +echo "Installing llama-stack-client" +pip install llama-stack-client==0.0.50 +echo "starting the llama-stack server" +python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml& +sleep 30 + +echo "running the RAG app" +python /root/llama-stack-apps/examples/E2E-RAG-App/rag_main.py localhost 5000 /root/RAG_service.json + diff --git a/examples/E2E-RAG-App/docker/ollama_start.sh b/examples/E2E-RAG-App/docker/ollama_start.sh new file mode 100644 index 000000000..ce7c90ab1 --- /dev/null +++ b/examples/E2E-RAG-App/docker/ollama_start.sh @@ -0,0 +1,9 @@ +#!/bin/bash +echo "-------------start to serve------------" +/usr/bin/ollama serve > /dev/null 2>&1 +sleep 10 +echo "pulling ollama model: $MODEL_NAME" +/usr/bin/ollama pull $MODEL_NAME +echo "Running ollama model: $MODEL_NAME" +/usr/bin/ollama run $MODEL_NAME +while :; do sleep 2073600; done diff --git a/examples/E2E-RAG-App/docker/run.yaml b/examples/E2E-RAG-App/docker/run.yaml new file mode 100644 index 000000000..9b0dcc45f --- /dev/null +++ b/examples/E2E-RAG-App/docker/run.yaml @@ -0,0 +1,52 @@ +version: '2' +built_at: '2024-10-08T17:40:45.325529' +image_name: local +docker_image: null +conda_env: local +apis: +- shields +- agents +- models +- memory +- memory_banks +- inference +- safety +providers: + inference: + - provider_id: ollama0 + provider_type: remote::ollama + config: + url: http://127.0.0.1:14343 + memory: + - provider_id: inline::faiss-0 + provider_type: inline::faiss + config: + kvstore: + namespace: null + type: sqlite + db_path: /home/kaiwu/.llama/runtime/faiss_store.db + safety: + - provider_id: inline::llama-guard-0 + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: inline::meta-reference-0 + provider_type: inline::meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: /home/kaiwu/.llama/runtime/kvstore.db + telemetry: + - provider_id: inline::meta-reference-0 + provider_type: inline::meta-reference + config: {} +metadata_store: null +models: [] +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] + diff --git a/examples/E2E-RAG-App/docker/run_RAG.sh b/examples/E2E-RAG-App/docker/run_RAG.sh new file mode 100644 index 000000000..aeecb526f --- /dev/null +++ b/examples/E2E-RAG-App/docker/run_RAG.sh @@ -0,0 +1,3 @@ +echo "DOC_PATH=$(jq '.document_path' ./RAG_service.json)" > .env +echo "MODEL_NAME=$(jq '.model_name' ./RAG_service.json)" >> .env +docker compose up diff --git a/examples/E2E-RAG-App/ollama_main.py b/examples/E2E-RAG-App/ollama_main.py new file mode 100644 index 000000000..9e3c7e53d --- /dev/null +++ b/examples/E2E-RAG-App/ollama_main.py @@ -0,0 +1,219 @@ +import asyncio +import json +import os +import uuid +from typing import List, Optional + +import chromadb +import fire +import requests +from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction +from dotenv import load_dotenv +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.types.agent_create_params import AgentConfig +from termcolor import cprint +from tqdm import tqdm + +# Initialization +load_dotenv() +embedding_function = SentenceTransformerEmbeddingFunction( + model_name="all-mpnet-base-v2" +) +chroma_client = None + + +def chunk_text(content: str, chunk_size: int = 500) -> List[str]: + """Splits content into chunks with overlap.""" + chunks = [] + current_chunk = [] + overlap = 100 + + for paragraph in content.split("\n\n"): + if sum(len(p) for p in current_chunk) + len(paragraph) <= chunk_size: + current_chunk.append(paragraph) + else: + chunks.append("\n\n".join(current_chunk).strip()) + current_chunk = ( + [current_chunk[-1], paragraph] if current_chunk else [paragraph] + ) + + if current_chunk: + chunks.append("\n\n".join(current_chunk).strip()) + + return chunks + + +def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 350) -> None: + """Inserts text documents from a directory into ChromaDB.""" + collection_name = "documents" + existing_collections = chroma_client.list_collections() + collection_names = [col.name for col in existing_collections] + + if collection_name in collection_names: + cprint( + f"Collection '{collection_name}' already exists. Skipping document insertion.", + "yellow", + ) + return + + collection = chroma_client.create_collection( + name=collection_name, embedding_function=embedding_function + ) + + cprint(f"Collection '{collection_name}' created.", "green") + + for filename in os.listdir(file_dir): + if filename.endswith((".txt", ".md")): + file_path = os.path.join(file_dir, filename) + with open(file_path, "r", encoding="utf-8") as file: + content = file.read() + chunks = chunk_text(content, chunk_size=chunk_size) + + for i, chunk in enumerate(chunks): + chunk_id = f"{filename}_chunk_{i}" + collection.add( + documents=[chunk], + ids=[chunk_id], + metadatas=[ + {"filename": filename, "chunk_index": i, "content": chunk} + ], + ) + + cprint(f"Inserted documents from {file_dir} into ChromaDB.", "green") + + +def query_chromadb(query: str) -> Optional[dict]: + """Queries ChromaDB for relevant context based on input query.""" + cprint(f"Querying ChromaDB with: {query}", "cyan") + collection = chroma_client.get_collection( + name="documents", embedding_function=embedding_function + ) + print(collection.count()) + + results = collection.query( + query_texts=[query], + n_results=5, + include=["documents", "metadatas", "distances"], + ) + + if ( + results + and results.get("distances") + and results.get("documents") + and results.get("metadatas") + ): + + for i, (doc, distance, metadata) in enumerate( + zip( + results["documents"][0], + results["distances"][0], + results["metadatas"][0], + ) + ): + print(f"\nResult {i + 1}") + print(f"Distance Score: {distance:.4f}") + print(f"Filename: {metadata['filename']}") + print(f"Chunk index: {metadata['chunk_index']}") + print(f"Context: {doc}") + print("-" * 50) + + return results if results else None + + +async def get_response_with_context( + agent: Agent, input_query: str, session_id: str +) -> str: + """Fetches response from the agent with context from ChromaDB.""" + results = query_chromadb(input_query) + if results and results["metadatas"]: + context = "\n".join( + f"Filename: {metadata['filename']}, Chunk index: {metadata['chunk_index']}\n{metadata['content']}" + for metadata_list in results["metadatas"] + for metadata in metadata_list + ) + else: + context = "No relevant context found." + + messages = [ + {"role": "user", "content": f"Context: {context}\n\nQuestion: {input_query}"} + ] + # cprint("Embedding retrieval completed. Sending these context to agent:", "cyan") + # cprint(context, "cyan") + + response = agent.create_turn(messages=messages, session_id=session_id) + + async for chunk in response: + if chunk.event.payload.event_type == "turn_complete": + cprint(f"----input_query------- {input_query}", "magenta") + return chunk.event.payload.turn.output_message.content + + return "No response generated." + + +async def run_main(host: str, port: int, config_path: str) -> None: + """Main async function to register model, insert documents, and generate responses.""" + base_url=f"http://{host}:{port}" + client = LlamaStackClient(base_url=base_url) + f = open(config_path) + config = json.load(f) + global chroma_client + if config["document_path"]: + print("Load DB from:", config["document_path"]) + chroma_client = chromadb.PersistentClient(path=config["document_path"]) + else: + print("Create new DB") + chroma_client = chromadb.PersistentClient(path="chroma") + insert_documents_to_chromadb(config["document_path"]) + if '3b' in config["model_name"]: + model_name = "Llama3.2-3B-Instruct" + elif '1b' in config["model_name"]: + model_name = "Llama3.2-1B-Instruct" + else: + print("Must use 1b or 3b model, but got: ", config["model_name"]) + exit() + url = base_url+"/models/register" + print("url:", url) + print("model_name",model_name) + headers = {"Content-Type": "application/json"} + data = { + "model_id": model_name, + "provider_model_id": None, + "provider_id": "ollama0", + #"provider_id": "inline::meta-reference-0", + "metadata": None, + } + response = requests.post(url, headers=headers, data=json.dumps(data)) + cprint(f"Model registration status: {response.status_code}", "blue") + + agent_config = AgentConfig( + model=model_name, + instructions="You are a helpful assistant that can answer questions based on provided documents.", + sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, + enable_session_persistence=True, + ) + agent = Agent(client, agent_config) + + user_prompts = [ + "On the day of Oct 24, 2024, which llama model was released?", + "What about Llama 3.1 model, what is the release date for it?", + "When was llama 3.3 released?", + ] + + session_id = agent.create_session(f"session-{uuid.uuid4()}") + for prompt in tqdm(user_prompts, desc="Generating responses"): + cprint(f"Generating response for: {prompt}", "green") + try: + response = await get_response_with_context(agent, prompt, session_id) + cprint(f"Response: {response}", "green") + except Exception as e: + cprint(f"Error generating response for {prompt}: {e}", "red") + + +def main(host: str, port: int, docs_dir: str) -> None: + """Entry point for the script.""" + asyncio.run(run_main(host, port, docs_dir)) + + +if __name__ == "__main__": + fire.Fire(main) From 611278e2033594d22462f319fc75fb232b82abbf Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Wed, 4 Dec 2024 09:52:59 -0800 Subject: [PATCH 22/99] rebase to main --- examples/E2E-RAG-App/rag_stack_memory.py | 253 +++++++++++++++++++++++ examples/agents/rag_with_memory_bank.py | 25 ++- requirements.txt | 12 +- 3 files changed, 275 insertions(+), 15 deletions(-) create mode 100644 examples/E2E-RAG-App/rag_stack_memory.py diff --git a/examples/E2E-RAG-App/rag_stack_memory.py b/examples/E2E-RAG-App/rag_stack_memory.py new file mode 100644 index 000000000..2f71177c4 --- /dev/null +++ b/examples/E2E-RAG-App/rag_stack_memory.py @@ -0,0 +1,253 @@ +import asyncio +import json +import os +import uuid +from typing import List, Optional + +import fire +import requests +from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction +from datasets import Dataset +from dotenv import load_dotenv +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.lib.agents.event_logger import EventLogger +from llama_stack_client.types.agent_create_params import AgentConfig +from llama_stack_client.types.memory_insert_params import Document +from ragas import evaluate, EvaluationDataset +from ragas.metrics import ( + AnswerRelevancy, + ContextPrecision, + ContextRecall, + FactualCorrectness, + Faithfulness, + SemanticSimilarity, +) +from termcolor import cprint +from tqdm import tqdm + +# Initialization +load_dotenv() +embedding_function = SentenceTransformerEmbeddingFunction( + model_name="all-mpnet-base-v2" +) + + +def chunk_text(content: str, chunk_size: int = 500) -> List[str]: + """Splits content into chunks with overlap.""" + chunks = [] + current_chunk = [] + overlap = 100 + + for paragraph in content.split("\n\n"): + if sum(len(p) for p in current_chunk) + len(paragraph) <= chunk_size: + current_chunk.append(paragraph) + else: + chunks.append("\n\n".join(current_chunk).strip()) + current_chunk = ( + [current_chunk[-1], paragraph] if current_chunk else [paragraph] + ) + + if current_chunk: + chunks.append("\n\n".join(current_chunk).strip()) + + return chunks + + +async def insert_documents_to_memory_bank(client: LlamaStackClient, docs_dir: str): + """Inserts text documents from a directory into a memory bank.""" + memory_bank_id = "test_bank" + providers = client.providers.list() + provider_id = providers["memory"][0].provider_id + + # Register a memory bank + memory_bank = client.memory_banks.register( + memory_bank_id=memory_bank_id, + params={ + "embedding_model": "all-MiniLM-L6-v2", + "chunk_size_in_tokens": 512, + "overlap_size_in_tokens": 64, + }, + provider_id=provider_id, + ) + cprint(f"Memory bank registered: {memory_bank}", "green") + + # Prepare documents for insertion + documents = [] + for filename in os.listdir(docs_dir): + if filename.endswith((".txt", ".md")): + file_path = os.path.join(docs_dir, filename) + with open(file_path, "r", encoding="utf-8") as file: + content = file.read() + chunks = chunk_text(content, chunk_size=350) + + for i, chunk in enumerate(chunks): + document = Document( + document_id=f"{filename}_chunk_{i}", + content=chunk, + mime_type="text/plain", + metadata={"filename": filename, "chunk_index": i}, + ) + documents.append(document) + + # Insert documents into the memory bank + client.memory.insert( + bank_id=memory_bank_id, + documents=documents, + ) + cprint( + f"Inserted documents from {docs_dir} into memory bank '{memory_bank_id}'.", + "green", + ) + + +async def get_response_with_memory_bank( + agent: Agent, input_query: str, session_id: str +) -> (str, List[str]): + """Fetches response from the agent with context from the memory bank.""" + response = agent.create_turn( + messages=[{"role": "user", "content": input_query}], + session_id=session_id, + ) + + context_responses = [] + async for log in EventLogger().log(response): + # Log the structure for debugging + print(f"Log structure: {vars(log)}") + + # Ensure attribute existence before accessing + if hasattr(log, "event") and hasattr(log.event, "payload"): + if log.event.payload.event_type == "turn_complete": + return log.event.payload.turn.output_message.content, context_responses + else: + print("Warning: The 'event' attribute or 'payload' is not present.") + + return "No response generated.", context_responses + + +async def run_main(host: str, port: int, docs_dir: str) -> None: + """Main async function to register model, insert documents, and generate responses.""" + client = LlamaStackClient(base_url=f"http://{host}:{port}") + + # Insert documents to the memory bank + await insert_documents_to_memory_bank(client, docs_dir) + + # Model registration + model_name = "Llama3.2-3B-Instruct" + response = requests.post( + f"http://{host}:{port}/models/register", + headers={"Content-Type": "application/json"}, + data=json.dumps( + { + "model_id": model_name, + "provider_model_id": None, + "provider_id": "inline::meta-reference-0", + "metadata": None, + } + ), + ) + cprint(f"Model registration status: {response.status_code}", "blue") + + # Agent configuration + agent_config = AgentConfig( + model=model_name, + instructions="You are a helpful assistant that can answer questions based on provided documents.", + sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, + tools=[ + { + "type": "memory", + "memory_bank_configs": [{"bank_id": "test_bank", "type": "vector"}], + "query_generator_config": {"type": "default", "sep": " "}, + "max_tokens_in_context": 4096, + "max_chunks": 10, + } + ], + tool_choice="auto", + tool_prompt_format="json", + enable_session_persistence=True, + ) + agent = Agent(client, agent_config) + + # QA data for testing + qa_data = [ + { + "Question": "What is the policy regarding smoking in City offices?", + "Answer": "Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)", + }, + { + "Question": "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", + "Answer": "Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)", + }, + { + "Question": "What are the three categories of employees eligible for health coverage?", + "Answer": "The following employees are eligible:\n\nAll permanent employees working at least 20 hours per week\n\nAll regularly scheduled provisional employees working at least 20 hours per week\n\nAll other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)", + }, + { + "Question": "How long must an employee wait before using vacation time after starting employment?", + "Answer": "Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)", + }, + { + "Question": "What must an employee do if they're summoned for jury duty?", + "Answer": "An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)", + }, + # Add more questions as needed + ] + + questions = [] + generated_answers = [] + retrieved_contexts = [] + ground_truths = [] + + session_id = agent.create_session(f"session-{uuid.uuid4()}") + for qa in tqdm(qa_data, desc="Generating responses"): + question = qa["Question"] + ground_truth_answer = qa["Answer"] + + cprint(f"Generating response for: {question}", "green") + try: + generated_answer, contexts = await get_response_with_memory_bank( + agent, question, session_id + ) + cprint(f"Response: {generated_answer}", "green") + + questions.append(question) + generated_answers.append(generated_answer) + retrieved_contexts.append(contexts) + ground_truths.append(ground_truth_answer) + except Exception as e: + cprint(f"Error generating response for {question}: {e}", "red") + + # Create a Dataset for RAGAS evaluation + eval_data = Dataset.from_dict( + { + "user_input": questions, + "response": generated_answers, + "retrieved_contexts": retrieved_contexts, + "reference": ground_truths, + } + ) + + result = evaluate( + eval_data, + metrics=[ + ContextPrecision(), + ContextRecall(), + Faithfulness(), + AnswerRelevancy(), + FactualCorrectness(), + SemanticSimilarity(), + ], + ) + + df = result.to_pandas() + df.to_csv("evaluation_results_with_memory.csv", index=False) + print(df.head()) + + +def main(host: str, port: int, docs_dir: str) -> None: + """Entry point for the script.""" + asyncio.run(run_main(host, port, docs_dir)) + + +if __name__ == "__main__": + fire.Fire(main) diff --git a/examples/agents/rag_with_memory_bank.py b/examples/agents/rag_with_memory_bank.py index 76f82884e..295ffa55d 100644 --- a/examples/agents/rag_with_memory_bank.py +++ b/examples/agents/rag_with_memory_bank.py @@ -37,23 +37,30 @@ async def run_main(host: str, port: int, disable_safety: bool = False): client = LlamaStackClient(base_url=f"http://{host}:{port}") providers = client.providers.list() - available_shields = [shield.identifier for shield in client.shields.list()] - if not available_shields: - print("No available shields. Disable safety.") - else: - print(f"Available shields found: {available_shields}") - + model_name = "Llama3.2-3B-Instruct" + url = f"http://{host}:{port}/models/register" + headers = {"Content-Type": "application/json"} + data = { + "model_id": model_name, + "provider_model_id": None, + "provider_id": "inline::meta-reference-0", + "metadata": None, + } + provider_id = providers["memory"][0].provider_id + print(provider_id) # create a memory bank - client.memory_banks.register( + test = client.memory_banks.register( memory_bank_id="test_bank", params={ "embedding_model": "all-MiniLM-L6-v2", "chunk_size_in_tokens": 512, "overlap_size_in_tokens": 64, }, - provider_id=providers["memory"][0].provider_id, + provider_id=provider_id, ) + print(f"Is memory bank registered? {test}") + # insert some documents client.memory.insert( bank_id="test_bank", @@ -67,7 +74,7 @@ async def run_main(host: str, port: int, disable_safety: bool = False): print(f"Using model: {selected_model}") agent_config = AgentConfig( - model=selected_model, + model=model_name, instructions="You are a helpful assistant", sampling_params={ "strategy": "greedy", diff --git a/requirements.txt b/requirements.txt index 867cf559c..bde1472e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ -fastapi fire -httpx mesop python-dotenv setuptools -uvicorn yfinance -gradio -llama-stack>=0.0.55 -llama-stack-client>=0.0.55 +llama-stack==0.0.53rc1 +llama-stack-client==0.0.53rc2 +lxml>=4.9.0,<5.0.0 +xmltodict>=0.12.0,<1.0.0 +pycryptodomex>=3.8.2,<4.0.0 +google-cloud-storage>=1.16,<2.0 From 18dcfc2024d88415cf1e83c7bf994fd6b6efa67f Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 11:04:48 -0800 Subject: [PATCH 23/99] Create ingest_with_image_extraction.py --- .../ingest_with_image_extraction.py | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 examples/E2E-RAG-App/ingest_with_image_extraction.py diff --git a/examples/E2E-RAG-App/ingest_with_image_extraction.py b/examples/E2E-RAG-App/ingest_with_image_extraction.py new file mode 100644 index 000000000..d3c84472b --- /dev/null +++ b/examples/E2E-RAG-App/ingest_with_image_extraction.py @@ -0,0 +1,153 @@ +import json +import logging +from pathlib import Path +from typing import Tuple, List + +import yaml +from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend +from docling.datamodel.base_models import InputFormat +from docling.document_converter import ( + DocumentConverter, + PdfFormatOption, + WordFormatOption, +) +from docling.pipeline.simple_pipeline import SimplePipeline +from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline +from docling.pipeline.pipeline_options import PdfPipelineOptions +from docling.types.doc import ImageRefMode, PictureItem, TableItem + +def get_document_files(input_dir: Path) -> list[Path]: + """ + Recursively scan directory for document files. + Returns a list of Path objects for supported document types. + """ + supported_extensions = {".pdf", ".docx", ".pptx"} + document_files = [] + + for path in input_dir.rglob("*"): + if path.is_file() and path.suffix.lower() in supported_extensions: + document_files.append(path) + + return document_files + +def save_images(res, output_subdir: Path, doc_filename: str) -> List[Tuple[str, Path]]: + """ + Extracts and saves images from the document. + Returns a list of (image_type, image_path) tuples for future processing. + """ + images_dir = output_subdir / "images" + images_dir.mkdir(parents=True, exist_ok=True) + saved_images = [] + + # Save page images + for page_no, page in res.document.pages.items(): + if hasattr(page, 'image') and page.image: + image_path = images_dir / f"{doc_filename}-page-{page_no}.png" + with image_path.open("wb") as fp: + page.image.pil_image.save(fp, format="PNG") + saved_images.append(('page', image_path)) + + # Save images of figures and tables + table_counter = 0 + picture_counter = 0 + + for element, _level in res.document.iterate_items(): + if isinstance(element, TableItem) and hasattr(element, 'image') and element.image: + table_counter += 1 + image_path = images_dir / f"{doc_filename}-table-{table_counter}.png" + with image_path.open("wb") as fp: + element.image.pil_image.save(fp, "PNG") + saved_images.append(('table', image_path)) + + if isinstance(element, PictureItem) and hasattr(element, 'image') and element.image: + picture_counter += 1 + image_path = images_dir / f"{doc_filename}-figure-{picture_counter}.png" + with image_path.open("wb") as fp: + element.image.pil_image.save(fp, "PNG") + saved_images.append(('figure', image_path)) + + return saved_images + +def main(): + # Define input and output directories relative to current directory + input_dir = Path("data/input") + output_dir = Path("data/output") + output_dir.mkdir(parents=True, exist_ok=True) + + # Get all document files recursively + input_paths = get_document_files(input_dir) + + if not input_paths: + print("No documents found in DATA directory!") + return + + print(f"Found {len(input_paths)} documents to process:") + for path in input_paths: + print(f"- {path}") + + # Configure pipeline options + pipeline_options = PdfPipelineOptions() + pipeline_options.do_ocr = False # Disable OCR + pipeline_options.images_scale = 2.0 # Set image resolution + pipeline_options.generate_page_images = True + pipeline_options.generate_table_images = True + pipeline_options.generate_picture_images = True + + # Configure document converter + doc_converter = DocumentConverter( + allowed_formats=[ + InputFormat.PDF, + InputFormat.DOCX, + InputFormat.PPTX, + ], + format_options={ + InputFormat.PDF: PdfFormatOption( + pipeline_cls=StandardPdfPipeline, + backend=PyPdfiumDocumentBackend, + pipeline_options=pipeline_options + ), + InputFormat.DOCX: WordFormatOption(pipeline_cls=SimplePipeline), + }, + ) + + # Process all documents + conv_results = doc_converter.convert_all(input_paths) + + # Store all extracted images for later processing + all_extracted_images = [] + + # Save results + for res in conv_results: + # Preserve directory structure in output + relative_path = res.input.file.relative_to(input_dir) + output_subdir = output_dir / relative_path.parent + output_subdir.mkdir(parents=True, exist_ok=True) + + # Create output filenames + md_path = output_subdir / f"{res.input.file.stem}.md" + json_path = output_subdir / f"{res.input.file.stem}.json" + + print(f"Converting: {res.input.file}" f"\nSaving to: {md_path}") + + # Extract and save images + extracted_images = save_images(res, output_subdir, res.input.file.stem) + all_extracted_images.extend(extracted_images) + + # Save markdown version with embedded images + with md_path.open("w", encoding="utf-8") as fp: + fp.write(res.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)) + + # Save JSON version + with json_path.open("w", encoding="utf-8") as fp: + json.dump(res.document.export_to_dict(), fp, indent=2) + + print(f"\nExtracted {len(all_extracted_images)} images in total") + print("Ready for image captioning processing") + + # Here you can add your vision language model processing + # for image_type, image_path in all_extracted_images: + # caption = vision_model.generate_caption(image_path) + # # Store or process caption as needed + +if __name__ == "__main__": + main() From b665cc8320eefd9ded23da9e232b73c9e00fa9e7 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 11:32:38 -0800 Subject: [PATCH 24/99] add script --- examples/E2E-RAG-App/Untitled.ipynb | 305 ++++++++++++++++++ .../ingest_with_image_extraction.py | 128 +------- 2 files changed, 309 insertions(+), 124 deletions(-) create mode 100644 examples/E2E-RAG-App/Untitled.ipynb diff --git a/examples/E2E-RAG-App/Untitled.ipynb b/examples/E2E-RAG-App/Untitled.ipynb new file mode 100644 index 000000000..3b36deeb4 --- /dev/null +++ b/examples/E2E-RAG-App/Untitled.ipynb @@ -0,0 +1,305 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 19, + "id": "126f01d1-b5ee-4105-893c-eb5511a83448", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import logging\n", + "from pathlib import Path\n", + "from typing import Tuple, List\n", + "\n", + "import yaml\n", + "from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend\n", + "from docling.datamodel.base_models import InputFormat\n", + "from docling.document_converter import (\n", + " DocumentConverter,\n", + " PdfFormatOption,\n", + " WordFormatOption,\n", + ")\n", + "from docling.pipeline.simple_pipeline import SimplePipeline\n", + "from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline\n", + "from docling.datamodel.pipeline_options import PdfPipelineOptions\n", + "from docling.datamodel.document import TableItem, PictureItem" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "d5f51deb-d7d2-4c98-996b-7e93334f6578", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import logging\n", + "from pathlib import Path\n", + "from typing import Tuple, List\n", + "\n", + "import yaml\n", + "from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend\n", + "from docling.datamodel.base_models import InputFormat\n", + "from docling.document_converter import (\n", + " DocumentConverter,\n", + " PdfFormatOption,\n", + " WordFormatOption,\n", + ")\n", + "from docling.pipeline.simple_pipeline import SimplePipeline\n", + "from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline\n", + "#from docling.pipeline.pipeline_options import PdfPipelineOptions\n", + "#from docling.types.doc import ImageRefMode, PictureItem, TableItem" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "b9df2fe5-620b-4c29-ac35-af85047edfcb", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "def get_document_files(input_dir: Path) -> list[Path]:\n", + " \"\"\"\n", + " Recursively scan directory for document files.\n", + " Returns a list of Path objects for supported document types.\n", + " \"\"\"\n", + " supported_extensions = {\".pdf\", \".docx\", \".pptx\"}\n", + " document_files = []\n", + "\n", + " for path in input_dir.rglob(\"*\"):\n", + " if path.is_file() and path.suffix.lower() in supported_extensions:\n", + " document_files.append(path)\n", + "\n", + " return document_files" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "8d5cef61-b075-4be4-b0e6-9fa613758bd4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 3 documents to process:\n", + "- data/input/Employee-Handbook (1).pdf\n", + "- data/input/2023021721-1 (2).pdf\n", + "- data/input/Blue_Book_Rev_2024-0926 (2).pdf\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7b368f5e6f4e4246afd1452ae356f3eb", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fetching 9 files: 0%| | 0/9 [00:00 List[Tuple[str, Path]]:\n", + " \"\"\"\n", + " Extracts and saves images from the document.\n", + " Returns a list of (image_type, image_path) tuples for future processing.\n", + " \"\"\"\n", + " images_dir = output_subdir / \"images\"\n", + " images_dir.mkdir(parents=True, exist_ok=True)\n", + " saved_images = []\n", + "\n", + " # Save page images\n", + " for page_no, page in res.document.pages.items():\n", + " if hasattr(page, 'image') and page.image:\n", + " image_path = images_dir / f\"{doc_filename}-page-{page_no}.png\"\n", + " with image_path.open(\"wb\") as fp:\n", + " page.image.pil_image.save(fp, format=\"PNG\")\n", + " saved_images.append(('page', image_path))\n", + "\n", + " # Save images of figures and tables\n", + " table_counter = 0\n", + " picture_counter = 0\n", + " \n", + " for element, _level in res.document.iterate_items():\n", + " if isinstance(element, TableItem) and hasattr(element, 'image') and element.image:\n", + " table_counter += 1\n", + " image_path = images_dir / f\"{doc_filename}-table-{table_counter}.png\"\n", + " with image_path.open(\"wb\") as fp:\n", + " element.image.pil_image.save(fp, \"PNG\")\n", + " saved_images.append(('table', image_path))\n", + "\n", + " if isinstance(element, PictureItem) and hasattr(element, 'image') and element.image:\n", + " picture_counter += 1\n", + " image_path = images_dir / f\"{doc_filename}-figure-{picture_counter}.png\"\n", + " with image_path.open(\"wb\") as fp:\n", + " element.image.pil_image.save(fp, \"PNG\")\n", + " saved_images.append(('figure', image_path))\n", + "\n", + " return saved_images\n", + "\n", + "def main():\n", + " # Define input and output directories relative to current directory\n", + " input_dir = Path(\"data/input\")\n", + " output_dir = Path(\"data/output\")\n", + " output_dir.mkdir(parents=True, exist_ok=True)\n", + "\n", + " # Get all document files recursively\n", + " input_paths = get_document_files(input_dir)\n", + "\n", + " if not input_paths:\n", + " print(\"No documents found in DATA directory!\")\n", + " return\n", + "\n", + " print(f\"Found {len(input_paths)} documents to process:\")\n", + " for path in input_paths:\n", + " print(f\"- {path}\")\n", + "\n", + " # Configure pipeline options\n", + " pipeline_options = PdfPipelineOptions()\n", + " pipeline_options.do_ocr = False # Disable OCR\n", + " pipeline_options.images_scale = 2.0 # Set image resolution\n", + " pipeline_options.generate_page_images = False\n", + " pipeline_options.generate_table_images = False\n", + " pipeline_options.generate_picture_images = True\n", + "\n", + " # Configure document converter\n", + " doc_converter = DocumentConverter(\n", + " allowed_formats=[\n", + " InputFormat.PDF,\n", + " InputFormat.DOCX,\n", + " InputFormat.PPTX,\n", + " ],\n", + " format_options={\n", + " InputFormat.PDF: PdfFormatOption(\n", + " pipeline_cls=StandardPdfPipeline,\n", + " backend=PyPdfiumDocumentBackend,\n", + " pipeline_options=pipeline_options\n", + " ),\n", + " InputFormat.DOCX: WordFormatOption(pipeline_cls=SimplePipeline),\n", + " },\n", + " )\n", + "\n", + " # Process all documents\n", + " conv_results = doc_converter.convert_all(input_paths)\n", + "\n", + " # Store all extracted images for later processing\n", + " all_extracted_images = []\n", + "\n", + " # Save results\n", + " for res in conv_results:\n", + " # Preserve directory structure in output\n", + " relative_path = res.input.file.relative_to(input_dir)\n", + " output_subdir = output_dir / relative_path.parent\n", + " output_subdir.mkdir(parents=True, exist_ok=True)\n", + "\n", + " # Create output filenames\n", + " md_path = output_subdir / f\"{res.input.file.stem}.md\"\n", + " json_path = output_subdir / f\"{res.input.file.stem}.json\"\n", + "\n", + " print(f\"Converting: {res.input.file}\" f\"\\nSaving to: {md_path}\")\n", + "\n", + " # Extract and save images\n", + " extracted_images = save_images(res, output_subdir, res.input.file.stem)\n", + " all_extracted_images.extend(extracted_images)\n", + "\n", + " # Save markdown version with embedded images\n", + " with md_path.open(\"w\", encoding=\"utf-8\") as fp:\n", + " fp.write(res.document.export_to_markdown())\n", + "\n", + " print(f\"\\nExtracted {len(all_extracted_images)} images in total\")\n", + " print(\"Ready for image captioning processing\")\n", + "\n", + " # Here you can add your vision language model processing\n", + " # for image_type, image_path in all_extracted_images:\n", + " # caption = vision_model.generate_caption(image_path)\n", + " # # Store or process caption as needed\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8aa23303-afc8-4ebd-bb8d-9dec24c70d2e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/E2E-RAG-App/ingest_with_image_extraction.py b/examples/E2E-RAG-App/ingest_with_image_extraction.py index d3c84472b..e025365ab 100644 --- a/examples/E2E-RAG-App/ingest_with_image_extraction.py +++ b/examples/E2E-RAG-App/ingest_with_image_extraction.py @@ -13,8 +13,10 @@ ) from docling.pipeline.simple_pipeline import SimplePipeline from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline -from docling.pipeline.pipeline_options import PdfPipelineOptions -from docling.types.doc import ImageRefMode, PictureItem, TableItem +from docling.datamodel.pipeline_options import PdfPipelineOptions +from docling.datamodel.document import TableItem, PictureItem + + def get_document_files(input_dir: Path) -> list[Path]: """ @@ -29,125 +31,3 @@ def get_document_files(input_dir: Path) -> list[Path]: document_files.append(path) return document_files - -def save_images(res, output_subdir: Path, doc_filename: str) -> List[Tuple[str, Path]]: - """ - Extracts and saves images from the document. - Returns a list of (image_type, image_path) tuples for future processing. - """ - images_dir = output_subdir / "images" - images_dir.mkdir(parents=True, exist_ok=True) - saved_images = [] - - # Save page images - for page_no, page in res.document.pages.items(): - if hasattr(page, 'image') and page.image: - image_path = images_dir / f"{doc_filename}-page-{page_no}.png" - with image_path.open("wb") as fp: - page.image.pil_image.save(fp, format="PNG") - saved_images.append(('page', image_path)) - - # Save images of figures and tables - table_counter = 0 - picture_counter = 0 - - for element, _level in res.document.iterate_items(): - if isinstance(element, TableItem) and hasattr(element, 'image') and element.image: - table_counter += 1 - image_path = images_dir / f"{doc_filename}-table-{table_counter}.png" - with image_path.open("wb") as fp: - element.image.pil_image.save(fp, "PNG") - saved_images.append(('table', image_path)) - - if isinstance(element, PictureItem) and hasattr(element, 'image') and element.image: - picture_counter += 1 - image_path = images_dir / f"{doc_filename}-figure-{picture_counter}.png" - with image_path.open("wb") as fp: - element.image.pil_image.save(fp, "PNG") - saved_images.append(('figure', image_path)) - - return saved_images - -def main(): - # Define input and output directories relative to current directory - input_dir = Path("data/input") - output_dir = Path("data/output") - output_dir.mkdir(parents=True, exist_ok=True) - - # Get all document files recursively - input_paths = get_document_files(input_dir) - - if not input_paths: - print("No documents found in DATA directory!") - return - - print(f"Found {len(input_paths)} documents to process:") - for path in input_paths: - print(f"- {path}") - - # Configure pipeline options - pipeline_options = PdfPipelineOptions() - pipeline_options.do_ocr = False # Disable OCR - pipeline_options.images_scale = 2.0 # Set image resolution - pipeline_options.generate_page_images = True - pipeline_options.generate_table_images = True - pipeline_options.generate_picture_images = True - - # Configure document converter - doc_converter = DocumentConverter( - allowed_formats=[ - InputFormat.PDF, - InputFormat.DOCX, - InputFormat.PPTX, - ], - format_options={ - InputFormat.PDF: PdfFormatOption( - pipeline_cls=StandardPdfPipeline, - backend=PyPdfiumDocumentBackend, - pipeline_options=pipeline_options - ), - InputFormat.DOCX: WordFormatOption(pipeline_cls=SimplePipeline), - }, - ) - - # Process all documents - conv_results = doc_converter.convert_all(input_paths) - - # Store all extracted images for later processing - all_extracted_images = [] - - # Save results - for res in conv_results: - # Preserve directory structure in output - relative_path = res.input.file.relative_to(input_dir) - output_subdir = output_dir / relative_path.parent - output_subdir.mkdir(parents=True, exist_ok=True) - - # Create output filenames - md_path = output_subdir / f"{res.input.file.stem}.md" - json_path = output_subdir / f"{res.input.file.stem}.json" - - print(f"Converting: {res.input.file}" f"\nSaving to: {md_path}") - - # Extract and save images - extracted_images = save_images(res, output_subdir, res.input.file.stem) - all_extracted_images.extend(extracted_images) - - # Save markdown version with embedded images - with md_path.open("w", encoding="utf-8") as fp: - fp.write(res.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)) - - # Save JSON version - with json_path.open("w", encoding="utf-8") as fp: - json.dump(res.document.export_to_dict(), fp, indent=2) - - print(f"\nExtracted {len(all_extracted_images)} images in total") - print("Ready for image captioning processing") - - # Here you can add your vision language model processing - # for image_type, image_path in all_extracted_images: - # caption = vision_model.generate_caption(image_path) - # # Store or process caption as needed - -if __name__ == "__main__": - main() From 47e8baef2fd3c0e1ea24087e2ae6feecd030adfd Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 18 Nov 2024 11:17:47 -0800 Subject: [PATCH 25/99] changed ollama_main to use memory_bank, simplified rag_stack_memory --- .../E2E-RAG-App/data/eval/explain-eval.ipynb | 12 +- .../evaluation_results_with_memory.csv | 12 + examples/E2E-RAG-App/ollama_main.py | 253 +++++++----------- examples/E2E-RAG-App/rag_stack_memory.py | 143 ++++------ 4 files changed, 163 insertions(+), 257 deletions(-) create mode 100644 examples/E2E-RAG-App/evaluation_results_with_memory.csv diff --git a/examples/E2E-RAG-App/data/eval/explain-eval.ipynb b/examples/E2E-RAG-App/data/eval/explain-eval.ipynb index d0c8c4d36..8d90fac04 100644 --- a/examples/E2E-RAG-App/data/eval/explain-eval.ipynb +++ b/examples/E2E-RAG-App/data/eval/explain-eval.ipynb @@ -318,7 +318,7 @@ ], "source": [ "pd.set_option('display.max_colwidth', None)\n", - "pd.set_option('display.max_rows', None) \n", + "pd.set_option('display.max_rows', None)\n", "\n", "data[['user_input', 'response']].head()" ] @@ -423,9 +423,9 @@ "source": [ "def plot_metric(metric):\n", " data['short_user_input'] = data['user_input'].apply(lambda x: x[:10] + '...' if len(x) > 10 else x)\n", - " plt.figure(figsize=(7, 4)) \n", - " sns.barplot(x='short_user_input', y=metric, data=data, hue='short_user_input', palette='viridis', legend=False) \n", - " plt.xticks(rotation=75, ha='right', fontsize=8) \n", + " plt.figure(figsize=(7, 4))\n", + " sns.barplot(x='short_user_input', y=metric, data=data, hue='short_user_input', palette='viridis', legend=False)\n", + " plt.xticks(rotation=75, ha='right', fontsize=8)\n", " plt.ylim(0, 1.1)\n", " plt.title(f'{metric.replace(\"_\", \" \").title()} for Each User Input')\n", " plt.ylabel(metric.replace(\"_\", \" \").title())\n", @@ -537,7 +537,7 @@ "source": [ "def plot_average_metrics(metrics):\n", " avg_metrics = {metric: data[metric].mean() for metric in metrics}\n", - " \n", + "\n", " avg_metrics_df = pd.DataFrame({\n", " 'Metric': list(avg_metrics.keys()),\n", " 'Average Score': list(avg_metrics.values())\n", @@ -549,7 +549,7 @@ " plt.title('Average Score of Each Metric')\n", " plt.ylabel('Average Score')\n", " plt.xlabel('Metric')\n", - " plt.legend([],[], frameon=False) \n", + " plt.legend([],[], frameon=False)\n", " plt.show()\n", "\n", "metrics = ['context_precision', 'context_recall', 'faithfulness', 'answer_relevancy']\n", diff --git a/examples/E2E-RAG-App/evaluation_results_with_memory.csv b/examples/E2E-RAG-App/evaluation_results_with_memory.csv new file mode 100644 index 000000000..d8fc68b43 --- /dev/null +++ b/examples/E2E-RAG-App/evaluation_results_with_memory.csv @@ -0,0 +1,12 @@ +user_input,retrieved_contexts,response,reference,context_precision,context_recall,faithfulness,answer_relevancy,factual_correctness,semantic_similarity +What is the policy regarding smoking in City offices?,[],No response generated.,"Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)",0.0,0.0,0.0,0.0,0.0,0.7311136274312703 +How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?,[],No response generated.,"Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)",0.0,0.0,0.0,0.0,0.0,0.7087668374354706 +What are the three categories of employees eligible for health coverage?,[],No response generated.,"The following employees are eligible: + +All permanent employees working at least 20 hours per week + +All regularly scheduled provisional employees working at least 20 hours per week + +All other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)",0.0,0.0,0.0,0.0,0.0,0.6871347926183213 +How long must an employee wait before using vacation time after starting employment?,[],No response generated.,"Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)",0.0,0.0,0.0,0.0,0.0,0.7100602328142038 +What must an employee do if they're summoned for jury duty?,[],No response generated.,"An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)",0.0,0.0,0.0,0.0,0.0,0.7249350381113908 diff --git a/examples/E2E-RAG-App/ollama_main.py b/examples/E2E-RAG-App/ollama_main.py index 9e3c7e53d..78a7b8427 100644 --- a/examples/E2E-RAG-App/ollama_main.py +++ b/examples/E2E-RAG-App/ollama_main.py @@ -4,144 +4,79 @@ import uuid from typing import List, Optional -import chromadb import fire import requests -from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction from dotenv import load_dotenv from llama_stack_client import LlamaStackClient from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.lib.agents.event_logger import EventLogger from llama_stack_client.types.agent_create_params import AgentConfig +from llama_stack_client.types.memory_insert_params import Document from termcolor import cprint from tqdm import tqdm # Initialization load_dotenv() -embedding_function = SentenceTransformerEmbeddingFunction( - model_name="all-mpnet-base-v2" -) -chroma_client = None - - -def chunk_text(content: str, chunk_size: int = 500) -> List[str]: - """Splits content into chunks with overlap.""" - chunks = [] - current_chunk = [] - overlap = 100 - - for paragraph in content.split("\n\n"): - if sum(len(p) for p in current_chunk) + len(paragraph) <= chunk_size: - current_chunk.append(paragraph) - else: - chunks.append("\n\n".join(current_chunk).strip()) - current_chunk = ( - [current_chunk[-1], paragraph] if current_chunk else [paragraph] - ) - - if current_chunk: - chunks.append("\n\n".join(current_chunk).strip()) - - return chunks - - -def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 350) -> None: - """Inserts text documents from a directory into ChromaDB.""" - collection_name = "documents" - existing_collections = chroma_client.list_collections() - collection_names = [col.name for col in existing_collections] - - if collection_name in collection_names: - cprint( - f"Collection '{collection_name}' already exists. Skipping document insertion.", - "yellow", - ) - return - collection = chroma_client.create_collection( - name=collection_name, embedding_function=embedding_function - ) - cprint(f"Collection '{collection_name}' created.", "green") +async def insert_documents_to_memory_bank(client: LlamaStackClient, docs_dir: str): + """Inserts entire text documents from a directory into a memory bank.""" + memory_bank_id = "test_bank_3" + providers = client.providers.list() + provider_id = providers["memory"][0].provider_id + + # Register a memory bank + memory_bank = client.memory_banks.register( + memory_bank_id=memory_bank_id, + params={ + "embedding_model": "all-MiniLM-L6-v2", + "chunk_size_in_tokens": 512, + "overlap_size_in_tokens": 64, + }, + provider_id=provider_id, + ) + cprint(f"Memory bank registered: {memory_bank}", "green") - for filename in os.listdir(file_dir): + # Prepare entire documents for insertion + documents = [] + for filename in os.listdir(docs_dir): if filename.endswith((".txt", ".md")): - file_path = os.path.join(file_dir, filename) + file_path = os.path.join(docs_dir, filename) with open(file_path, "r", encoding="utf-8") as file: content = file.read() - chunks = chunk_text(content, chunk_size=chunk_size) - - for i, chunk in enumerate(chunks): - chunk_id = f"{filename}_chunk_{i}" - collection.add( - documents=[chunk], - ids=[chunk_id], - metadatas=[ - {"filename": filename, "chunk_index": i, "content": chunk} - ], - ) - - cprint(f"Inserted documents from {file_dir} into ChromaDB.", "green") - - -def query_chromadb(query: str) -> Optional[dict]: - """Queries ChromaDB for relevant context based on input query.""" - cprint(f"Querying ChromaDB with: {query}", "cyan") - collection = chroma_client.get_collection( - name="documents", embedding_function=embedding_function - ) - print(collection.count()) - results = collection.query( - query_texts=[query], - n_results=5, - include=["documents", "metadatas", "distances"], + document = Document( + document_id=f"{filename}", + content=content, + mime_type="text/plain", + metadata={"filename": filename}, + ) + documents.append(document) + + # Insert documents into the memory bank + client.memory.insert( + bank_id=memory_bank_id, + documents=documents, + ) + cprint( + f"Inserted documents from {docs_dir} into memory bank '{memory_bank_id}'.", + "green", ) - - if ( - results - and results.get("distances") - and results.get("documents") - and results.get("metadatas") - ): - - for i, (doc, distance, metadata) in enumerate( - zip( - results["documents"][0], - results["distances"][0], - results["metadatas"][0], - ) - ): - print(f"\nResult {i + 1}") - print(f"Distance Score: {distance:.4f}") - print(f"Filename: {metadata['filename']}") - print(f"Chunk index: {metadata['chunk_index']}") - print(f"Context: {doc}") - print("-" * 50) - - return results if results else None async def get_response_with_context( agent: Agent, input_query: str, session_id: str ) -> str: - """Fetches response from the agent with context from ChromaDB.""" - results = query_chromadb(input_query) - if results and results["metadatas"]: - context = "\n".join( - f"Filename: {metadata['filename']}, Chunk index: {metadata['chunk_index']}\n{metadata['content']}" - for metadata_list in results["metadatas"] - for metadata in metadata_list - ) - else: - context = "No relevant context found." - - messages = [ - {"role": "user", "content": f"Context: {context}\n\nQuestion: {input_query}"} - ] - # cprint("Embedding retrieval completed. Sending these context to agent:", "cyan") - # cprint(context, "cyan") - - response = agent.create_turn(messages=messages, session_id=session_id) + """Fetches response from the agent with context from the memory bank.""" + response = agent.create_turn( + messages=[ + { + "role": "user", + "content": input_query, + } + ], + session_id=session_id, + ) async for chunk in response: if chunk.event.payload.event_type == "turn_complete": @@ -151,63 +86,73 @@ async def get_response_with_context( return "No response generated." -async def run_main(host: str, port: int, config_path: str) -> None: +async def run_main(host: str, port: int, docs_dir: str) -> None: """Main async function to register model, insert documents, and generate responses.""" - base_url=f"http://{host}:{port}" - client = LlamaStackClient(base_url=base_url) - f = open(config_path) - config = json.load(f) - global chroma_client - if config["document_path"]: - print("Load DB from:", config["document_path"]) - chroma_client = chromadb.PersistentClient(path=config["document_path"]) - else: - print("Create new DB") - chroma_client = chromadb.PersistentClient(path="chroma") - insert_documents_to_chromadb(config["document_path"]) - if '3b' in config["model_name"]: - model_name = "Llama3.2-3B-Instruct" - elif '1b' in config["model_name"]: - model_name = "Llama3.2-1B-Instruct" - else: - print("Must use 1b or 3b model, but got: ", config["model_name"]) - exit() - url = base_url+"/models/register" - print("url:", url) - print("model_name",model_name) - headers = {"Content-Type": "application/json"} - data = { - "model_id": model_name, - "provider_model_id": None, - "provider_id": "ollama0", - #"provider_id": "inline::meta-reference-0", - "metadata": None, - } - response = requests.post(url, headers=headers, data=json.dumps(data)) + client = LlamaStackClient(base_url=f"http://{host}:{port}") + + # Insert documents to the memory bank + await insert_documents_to_memory_bank(client, docs_dir) + + # Model registration + model_name = "Llama3.2-3B-Instruct" + response = requests.post( + f"http://{host}:{port}/models/register", + headers={"Content-Type": "application/json"}, + data=json.dumps( + { + "model_id": model_name, + "provider_model_id": None, + # "provider_id": "ollama", + "provider_id": "inline::meta-reference-0", + "metadata": None, + } + ), + ) cprint(f"Model registration status: {response.status_code}", "blue") + # Agent configuration agent_config = AgentConfig( model=model_name, instructions="You are a helpful assistant that can answer questions based on provided documents.", sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, + tools=[ + { + "type": "memory", + "memory_bank_configs": [{"bank_id": "test_bank_3", "type": "vector"}], + "query_generator_config": {"type": "default", "sep": " "}, + "max_tokens_in_context": 4096, + "max_chunks": 10, + } + ], + tool_choice="auto", + tool_prompt_format="json", enable_session_persistence=True, ) agent = Agent(client, agent_config) user_prompts = [ - "On the day of Oct 24, 2024, which llama model was released?", - "What about Llama 3.1 model, what is the release date for it?", - "When was llama 3.3 released?", + "What is the policy regarding smoking in City offices?", + "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", + "What are the three categories of employees eligible for health coverage?", + "How long must an employee wait before using vacation time after starting employment?", + "What must an employee do if they're summoned for jury duty?", ] session_id = agent.create_session(f"session-{uuid.uuid4()}") for prompt in tqdm(user_prompts, desc="Generating responses"): cprint(f"Generating response for: {prompt}", "green") - try: - response = await get_response_with_context(agent, prompt, session_id) - cprint(f"Response: {response}", "green") - except Exception as e: - cprint(f"Error generating response for {prompt}: {e}", "red") + response = agent.create_turn( + messages=[ + { + "role": "user", + "content": prompt, + } + ], + session_id=session_id, + ) + + async for log in EventLogger().log(response): + log.print() def main(host: str, port: int, docs_dir: str) -> None: diff --git a/examples/E2E-RAG-App/rag_stack_memory.py b/examples/E2E-RAG-App/rag_stack_memory.py index 2f71177c4..6ffe158a2 100644 --- a/examples/E2E-RAG-App/rag_stack_memory.py +++ b/examples/E2E-RAG-App/rag_stack_memory.py @@ -6,7 +6,6 @@ import fire import requests -from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction from datasets import Dataset from dotenv import load_dotenv from llama_stack_client import LlamaStackClient @@ -28,35 +27,11 @@ # Initialization load_dotenv() -embedding_function = SentenceTransformerEmbeddingFunction( - model_name="all-mpnet-base-v2" -) - - -def chunk_text(content: str, chunk_size: int = 500) -> List[str]: - """Splits content into chunks with overlap.""" - chunks = [] - current_chunk = [] - overlap = 100 - - for paragraph in content.split("\n\n"): - if sum(len(p) for p in current_chunk) + len(paragraph) <= chunk_size: - current_chunk.append(paragraph) - else: - chunks.append("\n\n".join(current_chunk).strip()) - current_chunk = ( - [current_chunk[-1], paragraph] if current_chunk else [paragraph] - ) - - if current_chunk: - chunks.append("\n\n".join(current_chunk).strip()) - - return chunks async def insert_documents_to_memory_bank(client: LlamaStackClient, docs_dir: str): - """Inserts text documents from a directory into a memory bank.""" - memory_bank_id = "test_bank" + """Inserts entire text documents from a directory into a memory bank.""" + memory_bank_id = "test_bank_2" providers = client.providers.list() provider_id = providers["memory"][0].provider_id @@ -72,23 +47,21 @@ async def insert_documents_to_memory_bank(client: LlamaStackClient, docs_dir: st ) cprint(f"Memory bank registered: {memory_bank}", "green") - # Prepare documents for insertion + # Prepare entire documents for insertion documents = [] for filename in os.listdir(docs_dir): if filename.endswith((".txt", ".md")): file_path = os.path.join(docs_dir, filename) with open(file_path, "r", encoding="utf-8") as file: content = file.read() - chunks = chunk_text(content, chunk_size=350) - for i, chunk in enumerate(chunks): - document = Document( - document_id=f"{filename}_chunk_{i}", - content=chunk, - mime_type="text/plain", - metadata={"filename": filename, "chunk_index": i}, - ) - documents.append(document) + document = Document( + document_id=f"{filename}", + content=content, + mime_type="text/plain", + metadata={"filename": filename}, + ) + documents.append(document) # Insert documents into the memory bank client.memory.insert( @@ -101,30 +74,6 @@ async def insert_documents_to_memory_bank(client: LlamaStackClient, docs_dir: st ) -async def get_response_with_memory_bank( - agent: Agent, input_query: str, session_id: str -) -> (str, List[str]): - """Fetches response from the agent with context from the memory bank.""" - response = agent.create_turn( - messages=[{"role": "user", "content": input_query}], - session_id=session_id, - ) - - context_responses = [] - async for log in EventLogger().log(response): - # Log the structure for debugging - print(f"Log structure: {vars(log)}") - - # Ensure attribute existence before accessing - if hasattr(log, "event") and hasattr(log.event, "payload"): - if log.event.payload.event_type == "turn_complete": - return log.event.payload.turn.output_message.content, context_responses - else: - print("Warning: The 'event' attribute or 'payload' is not present.") - - return "No response generated.", context_responses - - async def run_main(host: str, port: int, docs_dir: str) -> None: """Main async function to register model, insert documents, and generate responses.""" client = LlamaStackClient(base_url=f"http://{host}:{port}") @@ -204,44 +153,44 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: ground_truth_answer = qa["Answer"] cprint(f"Generating response for: {question}", "green") - try: - generated_answer, contexts = await get_response_with_memory_bank( - agent, question, session_id - ) - cprint(f"Response: {generated_answer}", "green") - - questions.append(question) - generated_answers.append(generated_answer) - retrieved_contexts.append(contexts) - ground_truths.append(ground_truth_answer) - except Exception as e: - cprint(f"Error generating response for {question}: {e}", "red") + response = agent.create_turn( + messages=[ + { + "role": "user", + "content": question, + } + ], + session_id=session_id, + ) + + async for log in EventLogger().log(response): + log.print() # Create a Dataset for RAGAS evaluation - eval_data = Dataset.from_dict( - { - "user_input": questions, - "response": generated_answers, - "retrieved_contexts": retrieved_contexts, - "reference": ground_truths, - } - ) - - result = evaluate( - eval_data, - metrics=[ - ContextPrecision(), - ContextRecall(), - Faithfulness(), - AnswerRelevancy(), - FactualCorrectness(), - SemanticSimilarity(), - ], - ) - - df = result.to_pandas() - df.to_csv("evaluation_results_with_memory.csv", index=False) - print(df.head()) + # eval_data = Dataset.from_dict( + # { + # "user_input": questions, + # "response": generated_answers, + # "retrieved_contexts": retrieved_contexts, + # "reference": ground_truths, + # } + # ) + + # result = evaluate( + # eval_data, + # metrics=[ + # ContextPrecision(), + # ContextRecall(), + # Faithfulness(), + # AnswerRelevancy(), + # FactualCorrectness(), + # SemanticSimilarity(), + # ], + # ) + + # df = result.to_pandas() + # df.to_csv("evaluation_results_with_memory.csv", index=False) + # print(df.head()) def main(host: str, port: int, docs_dir: str) -> None: From 537eb8425ca44b15bf8548e8ee41d759fe27069c Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 18 Nov 2024 11:28:02 -0800 Subject: [PATCH 26/99] updated ollama-main --- examples/E2E-RAG-App/ollama_main.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/examples/E2E-RAG-App/ollama_main.py b/examples/E2E-RAG-App/ollama_main.py index 78a7b8427..c4703e23f 100644 --- a/examples/E2E-RAG-App/ollama_main.py +++ b/examples/E2E-RAG-App/ollama_main.py @@ -64,28 +64,6 @@ async def insert_documents_to_memory_bank(client: LlamaStackClient, docs_dir: st ) -async def get_response_with_context( - agent: Agent, input_query: str, session_id: str -) -> str: - """Fetches response from the agent with context from the memory bank.""" - response = agent.create_turn( - messages=[ - { - "role": "user", - "content": input_query, - } - ], - session_id=session_id, - ) - - async for chunk in response: - if chunk.event.payload.event_type == "turn_complete": - cprint(f"----input_query------- {input_query}", "magenta") - return chunk.event.payload.turn.output_message.content - - return "No response generated." - - async def run_main(host: str, port: int, docs_dir: str) -> None: """Main async function to register model, insert documents, and generate responses.""" client = LlamaStackClient(base_url=f"http://{host}:{port}") From 38bf2a999699fbaf5949e555b90da51902238e1a Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 11:34:08 -0800 Subject: [PATCH 27/99] add script --- examples/E2E-RAG-App/Untitled.ipynb | 305 ------------------ .../ingest_with_image_extraction.py | 118 +++++++ examples/E2E-RAG-App/ingestion_script.py | 94 ------ 3 files changed, 118 insertions(+), 399 deletions(-) delete mode 100644 examples/E2E-RAG-App/Untitled.ipynb delete mode 100644 examples/E2E-RAG-App/ingestion_script.py diff --git a/examples/E2E-RAG-App/Untitled.ipynb b/examples/E2E-RAG-App/Untitled.ipynb deleted file mode 100644 index 3b36deeb4..000000000 --- a/examples/E2E-RAG-App/Untitled.ipynb +++ /dev/null @@ -1,305 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 19, - "id": "126f01d1-b5ee-4105-893c-eb5511a83448", - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import logging\n", - "from pathlib import Path\n", - "from typing import Tuple, List\n", - "\n", - "import yaml\n", - "from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend\n", - "from docling.datamodel.base_models import InputFormat\n", - "from docling.document_converter import (\n", - " DocumentConverter,\n", - " PdfFormatOption,\n", - " WordFormatOption,\n", - ")\n", - "from docling.pipeline.simple_pipeline import SimplePipeline\n", - "from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline\n", - "from docling.datamodel.pipeline_options import PdfPipelineOptions\n", - "from docling.datamodel.document import TableItem, PictureItem" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "d5f51deb-d7d2-4c98-996b-7e93334f6578", - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import logging\n", - "from pathlib import Path\n", - "from typing import Tuple, List\n", - "\n", - "import yaml\n", - "from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend\n", - "from docling.datamodel.base_models import InputFormat\n", - "from docling.document_converter import (\n", - " DocumentConverter,\n", - " PdfFormatOption,\n", - " WordFormatOption,\n", - ")\n", - "from docling.pipeline.simple_pipeline import SimplePipeline\n", - "from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline\n", - "#from docling.pipeline.pipeline_options import PdfPipelineOptions\n", - "#from docling.types.doc import ImageRefMode, PictureItem, TableItem" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "b9df2fe5-620b-4c29-ac35-af85047edfcb", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "def get_document_files(input_dir: Path) -> list[Path]:\n", - " \"\"\"\n", - " Recursively scan directory for document files.\n", - " Returns a list of Path objects for supported document types.\n", - " \"\"\"\n", - " supported_extensions = {\".pdf\", \".docx\", \".pptx\"}\n", - " document_files = []\n", - "\n", - " for path in input_dir.rglob(\"*\"):\n", - " if path.is_file() and path.suffix.lower() in supported_extensions:\n", - " document_files.append(path)\n", - "\n", - " return document_files" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "8d5cef61-b075-4be4-b0e6-9fa613758bd4", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 3 documents to process:\n", - "- data/input/Employee-Handbook (1).pdf\n", - "- data/input/2023021721-1 (2).pdf\n", - "- data/input/Blue_Book_Rev_2024-0926 (2).pdf\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "7b368f5e6f4e4246afd1452ae356f3eb", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Fetching 9 files: 0%| | 0/9 [00:00 List[Tuple[str, Path]]:\n", - " \"\"\"\n", - " Extracts and saves images from the document.\n", - " Returns a list of (image_type, image_path) tuples for future processing.\n", - " \"\"\"\n", - " images_dir = output_subdir / \"images\"\n", - " images_dir.mkdir(parents=True, exist_ok=True)\n", - " saved_images = []\n", - "\n", - " # Save page images\n", - " for page_no, page in res.document.pages.items():\n", - " if hasattr(page, 'image') and page.image:\n", - " image_path = images_dir / f\"{doc_filename}-page-{page_no}.png\"\n", - " with image_path.open(\"wb\") as fp:\n", - " page.image.pil_image.save(fp, format=\"PNG\")\n", - " saved_images.append(('page', image_path))\n", - "\n", - " # Save images of figures and tables\n", - " table_counter = 0\n", - " picture_counter = 0\n", - " \n", - " for element, _level in res.document.iterate_items():\n", - " if isinstance(element, TableItem) and hasattr(element, 'image') and element.image:\n", - " table_counter += 1\n", - " image_path = images_dir / f\"{doc_filename}-table-{table_counter}.png\"\n", - " with image_path.open(\"wb\") as fp:\n", - " element.image.pil_image.save(fp, \"PNG\")\n", - " saved_images.append(('table', image_path))\n", - "\n", - " if isinstance(element, PictureItem) and hasattr(element, 'image') and element.image:\n", - " picture_counter += 1\n", - " image_path = images_dir / f\"{doc_filename}-figure-{picture_counter}.png\"\n", - " with image_path.open(\"wb\") as fp:\n", - " element.image.pil_image.save(fp, \"PNG\")\n", - " saved_images.append(('figure', image_path))\n", - "\n", - " return saved_images\n", - "\n", - "def main():\n", - " # Define input and output directories relative to current directory\n", - " input_dir = Path(\"data/input\")\n", - " output_dir = Path(\"data/output\")\n", - " output_dir.mkdir(parents=True, exist_ok=True)\n", - "\n", - " # Get all document files recursively\n", - " input_paths = get_document_files(input_dir)\n", - "\n", - " if not input_paths:\n", - " print(\"No documents found in DATA directory!\")\n", - " return\n", - "\n", - " print(f\"Found {len(input_paths)} documents to process:\")\n", - " for path in input_paths:\n", - " print(f\"- {path}\")\n", - "\n", - " # Configure pipeline options\n", - " pipeline_options = PdfPipelineOptions()\n", - " pipeline_options.do_ocr = False # Disable OCR\n", - " pipeline_options.images_scale = 2.0 # Set image resolution\n", - " pipeline_options.generate_page_images = False\n", - " pipeline_options.generate_table_images = False\n", - " pipeline_options.generate_picture_images = True\n", - "\n", - " # Configure document converter\n", - " doc_converter = DocumentConverter(\n", - " allowed_formats=[\n", - " InputFormat.PDF,\n", - " InputFormat.DOCX,\n", - " InputFormat.PPTX,\n", - " ],\n", - " format_options={\n", - " InputFormat.PDF: PdfFormatOption(\n", - " pipeline_cls=StandardPdfPipeline,\n", - " backend=PyPdfiumDocumentBackend,\n", - " pipeline_options=pipeline_options\n", - " ),\n", - " InputFormat.DOCX: WordFormatOption(pipeline_cls=SimplePipeline),\n", - " },\n", - " )\n", - "\n", - " # Process all documents\n", - " conv_results = doc_converter.convert_all(input_paths)\n", - "\n", - " # Store all extracted images for later processing\n", - " all_extracted_images = []\n", - "\n", - " # Save results\n", - " for res in conv_results:\n", - " # Preserve directory structure in output\n", - " relative_path = res.input.file.relative_to(input_dir)\n", - " output_subdir = output_dir / relative_path.parent\n", - " output_subdir.mkdir(parents=True, exist_ok=True)\n", - "\n", - " # Create output filenames\n", - " md_path = output_subdir / f\"{res.input.file.stem}.md\"\n", - " json_path = output_subdir / f\"{res.input.file.stem}.json\"\n", - "\n", - " print(f\"Converting: {res.input.file}\" f\"\\nSaving to: {md_path}\")\n", - "\n", - " # Extract and save images\n", - " extracted_images = save_images(res, output_subdir, res.input.file.stem)\n", - " all_extracted_images.extend(extracted_images)\n", - "\n", - " # Save markdown version with embedded images\n", - " with md_path.open(\"w\", encoding=\"utf-8\") as fp:\n", - " fp.write(res.document.export_to_markdown())\n", - "\n", - " print(f\"\\nExtracted {len(all_extracted_images)} images in total\")\n", - " print(\"Ready for image captioning processing\")\n", - "\n", - " # Here you can add your vision language model processing\n", - " # for image_type, image_path in all_extracted_images:\n", - " # caption = vision_model.generate_caption(image_path)\n", - " # # Store or process caption as needed\n", - "\n", - "if __name__ == \"__main__\":\n", - " main()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8aa23303-afc8-4ebd-bb8d-9dec24c70d2e", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/E2E-RAG-App/ingest_with_image_extraction.py b/examples/E2E-RAG-App/ingest_with_image_extraction.py index e025365ab..fe2d22bb0 100644 --- a/examples/E2E-RAG-App/ingest_with_image_extraction.py +++ b/examples/E2E-RAG-App/ingest_with_image_extraction.py @@ -31,3 +31,121 @@ def get_document_files(input_dir: Path) -> list[Path]: document_files.append(path) return document_files + + + +def save_images(res, output_subdir: Path, doc_filename: str) -> List[Tuple[str, Path]]: + """ + Extracts and saves images from the document. + Returns a list of (image_type, image_path) tuples for future processing. + """ + images_dir = output_subdir / "images" + images_dir.mkdir(parents=True, exist_ok=True) + saved_images = [] + + # Save page images + for page_no, page in res.document.pages.items(): + if hasattr(page, 'image') and page.image: + image_path = images_dir / f"{doc_filename}-page-{page_no}.png" + with image_path.open("wb") as fp: + page.image.pil_image.save(fp, format="PNG") + saved_images.append(('page', image_path)) + + # Save images of figures and tables + table_counter = 0 + picture_counter = 0 + + for element, _level in res.document.iterate_items(): + if isinstance(element, TableItem) and hasattr(element, 'image') and element.image: + table_counter += 1 + image_path = images_dir / f"{doc_filename}-table-{table_counter}.png" + with image_path.open("wb") as fp: + element.image.pil_image.save(fp, "PNG") + saved_images.append(('table', image_path)) + + if isinstance(element, PictureItem) and hasattr(element, 'image') and element.image: + picture_counter += 1 + image_path = images_dir / f"{doc_filename}-figure-{picture_counter}.png" + with image_path.open("wb") as fp: + element.image.pil_image.save(fp, "PNG") + saved_images.append(('figure', image_path)) + + return saved_images + +def main(): + # Define input and output directories relative to current directory + input_dir = Path("data/input") + output_dir = Path("data/output") + output_dir.mkdir(parents=True, exist_ok=True) + + # Get all document files recursively + input_paths = get_document_files(input_dir) + + if not input_paths: + print("No documents found in DATA directory!") + return + + print(f"Found {len(input_paths)} documents to process:") + for path in input_paths: + print(f"- {path}") + + # Configure pipeline options + pipeline_options = PdfPipelineOptions() + pipeline_options.do_ocr = False # Disable OCR + pipeline_options.images_scale = 2.0 # Set image resolution + pipeline_options.generate_page_images = False + pipeline_options.generate_table_images = False + pipeline_options.generate_picture_images = True + + # Configure document converter + doc_converter = DocumentConverter( + allowed_formats=[ + InputFormat.PDF, + InputFormat.DOCX, + InputFormat.PPTX, + ], + format_options={ + InputFormat.PDF: PdfFormatOption( + pipeline_cls=StandardPdfPipeline, + backend=PyPdfiumDocumentBackend, + pipeline_options=pipeline_options + ), + InputFormat.DOCX: WordFormatOption(pipeline_cls=SimplePipeline), + }, + ) + + # Process all documents + conv_results = doc_converter.convert_all(input_paths) + + # Store all extracted images for later processing + all_extracted_images = [] + + # Save results + for res in conv_results: + # Preserve directory structure in output + relative_path = res.input.file.relative_to(input_dir) + output_subdir = output_dir / relative_path.parent + output_subdir.mkdir(parents=True, exist_ok=True) + + # Create output filenames + md_path = output_subdir / f"{res.input.file.stem}.md" + json_path = output_subdir / f"{res.input.file.stem}.json" + + print(f"Converting: {res.input.file}" f"\nSaving to: {md_path}") + + # Extract and save images + extracted_images = save_images(res, output_subdir, res.input.file.stem) + all_extracted_images.extend(extracted_images) + + # Save markdown version with embedded images + with md_path.open("w", encoding="utf-8") as fp: + fp.write(res.document.export_to_markdown()) + + print(f"\nExtracted {len(all_extracted_images)} images in total") + print("Ready for image captioning processing") + + # TODO: Add captioning logic here + +if __name__ == "__main__": + main() + diff --git a/examples/E2E-RAG-App/ingestion_script.py b/examples/E2E-RAG-App/ingestion_script.py deleted file mode 100644 index 8e1cb5cf4..000000000 --- a/examples/E2E-RAG-App/ingestion_script.py +++ /dev/null @@ -1,94 +0,0 @@ -import json -import logging -from pathlib import Path - -import yaml - -from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend -from docling.datamodel.base_models import InputFormat -from docling.document_converter import ( - DocumentConverter, - PdfFormatOption, - WordFormatOption, -) -from docling.pipeline.simple_pipeline import SimplePipeline -from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline - - -def get_document_files(input_dir: Path) -> list[Path]: - """ - Recursively scan directory for document files. - Returns a list of Path objects for supported document types. - """ - supported_extensions = {".pdf", ".docx", ".pptx"} - document_files = [] - - # Recursively walk through all directories - for path in input_dir.rglob("*"): - if path.is_file() and path.suffix.lower() in supported_extensions: - document_files.append(path) - - return document_files - - -def main(): - # Define input and output directories relative to current directory - input_dir = Path("data/input") - output_dir = Path("data/output") - - # Create output directory if it doesn't exist - output_dir.mkdir(parents=True, exist_ok=True) - - # Get all document files recursively - input_paths = get_document_files(input_dir) - - if not input_paths: - print("No documents found in DATA directory!") - return - - print(f"Found {len(input_paths)} documents to process:") - for path in input_paths: - print(f"- {path}") - - # Configure document converter - doc_converter = DocumentConverter( - allowed_formats=[ - InputFormat.PDF, - InputFormat.DOCX, - InputFormat.PPTX, - ], - format_options={ - InputFormat.PDF: PdfFormatOption( - pipeline_cls=StandardPdfPipeline, backend=PyPdfiumDocumentBackend - ), - InputFormat.DOCX: WordFormatOption(pipeline_cls=SimplePipeline), - }, - ) - - # Process all documents - conv_results = doc_converter.convert_all(input_paths) - - # Save results - for res in conv_results: - # Preserve directory structure in output - relative_path = res.input.file.relative_to(input_dir) - output_subdir = output_dir / relative_path.parent - output_subdir.mkdir(parents=True, exist_ok=True) - - # Create output filenames - md_path = output_subdir / f"{res.input.file.stem}.md" - json_path = output_subdir / f"{res.input.file.stem}.json" - - print(f"Converting: {res.input.file}" f"\nSaving to: {md_path}") - - # Save markdown version - with md_path.open("w", encoding="utf-8") as fp: - fp.write(res.document.export_to_markdown()) - - # Save JSON version - with json_path.open("w", encoding="utf-8") as fp: - json.dump(res.document.export_to_dict(), fp, indent=2) - - -if __name__ == "__main__": - main() From f6f7f48b91cb966115b93e1529d639da11de274c Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Mon, 18 Nov 2024 11:44:57 -0800 Subject: [PATCH 28/99] add how to run --- examples/E2E-RAG-App/RAG_workflow.jpg | Bin 0 -> 52391 bytes examples/E2E-RAG-App/README.md | 44 +++++++----------- examples/E2E-RAG-App/docker/RAG_service.json | 4 +- examples/E2E-RAG-App/docker/compose.yaml | 11 ++--- .../docker/{run.yaml => llama_stack_run.yaml} | 0 .../E2E-RAG-App/docker/llama_stack_start.sh | 14 +++--- examples/E2E-RAG-App/docker/ollama_start.sh | 5 +- 7 files changed, 32 insertions(+), 46 deletions(-) create mode 100644 examples/E2E-RAG-App/RAG_workflow.jpg rename examples/E2E-RAG-App/docker/{run.yaml => llama_stack_run.yaml} (100%) diff --git a/examples/E2E-RAG-App/RAG_workflow.jpg b/examples/E2E-RAG-App/RAG_workflow.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8db859a0918acffede5788701d5652a7d29f7b72 GIT binary patch literal 52391 zcmdSB2Ut^GwlEwUiV6w{N>xgL(3IZsm7ataAhb{f0vI6lA|Q5p4>eSQgwO)gLJ?7s z7Mcx4*ehmOXd-(Yn>!@F`u(Y~z^8KH49K5r4 z^z}Np|3RRYyG=bP9RL^<|ARFDESetaT_%p0> z00;Vc`O(rC9l$=O#u_vjNrQ!54#5tGU`H>XgY>bqG%9F!|AV>?!U4u-ojl+swEHRA zp9|m%Fb3!V)DOO&_Dl<&1pt7;765Q8@o#YsSOB0t3;?(^{07rlX4Uz&}14shoeoX*00LP9VgoEobS~yO3{2-j7qdP%If9mvU`cw4ur_Y`{ zd-@FH8G3pKCI-fH=g%{pKYf;&h50-S4L*NR$dQAb$Bv((Wjud|{tPYle;9tf1TdXC z8goqa*bzR!QKloun2!8v18^UVwWG(591O+3gp+iqj-Q~X#nE2dod*Euj?fTipr@gI zhVH}(z>%ZJj-Q}A$#jaDh4%v6B|b?7i@>z>y1`-Ai&FA8%*<0s^!zY;0cmybFp9Cm zgCwAwhOUXv!-)Lac6LD-Sx1bfpMOEsqoSHuL$nGm9IE&)^&iAC{SBSJ#*PEd9;2bk zbc_jb9k8YMS8V?$_v81XyEDsv0Hd@*ESWn5#M9tR zeL-z)ZKT6Xpfxnq#x&^VtLt$1{XikFs<}bDZTLt?;+nw+M=}BOoT1_yUnu}+L!J~1 zl36s*uX{JAoZMisfudAuxIOPGy-#oM6L^M4UkUyR9oWb?x5lQD=~wo4{Iie9lcVs- zoYu}{gVbP7n>CF_<*)?79Febb&nBab0jd9Atjut$K7x4kRn?skP;2dV!}Yyls|Sy^ zW#eq6PYV<&ZUhfqO`=Xf27ALwgsQgi+B!qda;A`mwB4rS%?A(fmX;lX|4+K=KTXSA z0c@!#Re66iue7xN->UzJ$P8j>sj#1pez&w7u2~l4Syq~$a7k#X^eOAXQucR&ThKD= z!)Ru`2-ujWL`>BBji!4)Hdd;QlHOtUi3;n4w1EC)> zip=~`R%-ozSb{I#v@hc#T4}Gbw54RDG6GOa%L7G?O!dLH3{&qYDC29F*A_~53EeGR zM@x^wr^E?%fwJbqYVa@b*U|d`9r z;qmbj{@Z_F$l+nj3OhVLPMH3E4*SgdT;0J*39UN4;}&*?L8aMukCLsE#gS!&(Sy-o z_WX(@O%KKQ8Ypj>0@;Wt+X%lRa7XRb*W5tN<8izxG{<&TqeYco+2nT`v#7r9^@2`l1c&=`nMEamS% zr*N#eJm7l(OxQ=KyOWapHb36>6Va zeP5OH%9r%nSN&BHEoSY6X6KZ8*%Kq&!%XbkD&ae`%?QzEzag#u$LvTB|Lh@G+cR2u zFy=wJ?K36_w6iuTPHS3@*wmO+$IseoP0j>NgjvPCVHu{Oe0L>ti1 zp{T`Bl0bDt7Botq0|ON(kL*e*m=2BI=fAR=5Pk2}bCIod$1hh{jhHJ&zZ3R3DV3H1 z*N-wDf{(zDh#XFO82*1e8}HDZRY_%oG~8^bA`!JGQ{!{m3Un3zanq^Wah_B7i_sFh zPfm(rS0mUGH>JG@UI|$_N~QIrP}8&0WSNz!CKb<1tI49hM8=|MqOUSTSVK5h;^v#K zIm_A}_v)Dx9I1|d0}p}~JNCNGO)?iO1Tx=l<5tzuEKb7KM%`S^(Z`a3U!B;0J|O6~ znz%x;5Fja8en)dfK-K-M-)5w>#cD2PT+kaUe3nVFL`Cp|(bb8|$M?L=HYNnKFh#Lx zFls@d=gL_(zS(n{B3`ktWpG^r(}Z*Qq~H|`{|)1k^-S26$>=(gZsG_8cjcY?5QRb0 z%~&_KOZ=tasTp&v5c`t0Ip?Stw9h1~G>;~|K8u7NSVWYW%LEzurU_!me-*?iJcJRz zVo%>bYXYljyXm)X8)LNEH`UuVJYXudWMvaOFE{&mM7N^oZ5lRcgbjHoPHKh(!(ndU zu~8Yh6Zg2kPxhy+tZWGOWh+QDnd#y5a~&v(2T{q+w1Qt55GD)i*Hd-&obFK?G6!!l zZq;2r*W>=y?$!3Z@wMcim(>yNFCr>mS7EtLkC!cY!tRv&n{P~-zkjtUU)*A*RJP+G z`o$g&V6MbI*V?EWw(Ie>dvvn*UBQmUQssO@<*P7L0Hnov>tc5&{GNFG-LsdagJb3$ zOJAhTyr5$>IwG=@HDo8s4d1V4{eJCE-_q68_|l(<=>1FZ-~RLJT(<)1clCu>#!MH5 zKPOVb2^45J{^I?#N#VpPnakTzS9x`YtCL4sZV;LtH|K`-vnRH@zPQ#N!c}1}ViHNEX3jB;InpA%_$DZp7G{5||fzTGh*}UUsxFOLyQHEa;<+$59?TPOh z+aOJcHfi~+_Ey|1y??OQ(3>+)o5Oj$t{D!)9ZG+<*ZOk;z_O26I#(NL4yovI0|ADg zkDN28N}Ig3l9b!#=KWfLP~O$_W|do&TDIt+n4sQM%?)XH{ZyW&-&&L9%N>a^yL+6p zLDjX)?W_`b^YWO;kr*4-iu75JYyDa!m+BB0tTXgfmN)k!e?kfh)rO&3ldUkw)QiTk zu`!_$ThEOLg$mx}_7}{aiG$0^4!+ZG_SFQt!hm8*6`@&vLrBdq_d@B3o9w@xwd5$n zuQ^a^(zf{7%;ZnBON)GvXX-!0D9{xrdM~dK64@KsHKfy+_>(*=E)@~g`NnBU;#!Ba zSXbecq9^w?9%*U5xkR^5!;MMl#x1XpqyRdIvhw<$CW~B`jl4^@=B!`v7r|>7xYH$> z2vuaLM3#@#MDjg)MrtST6GE090v%*`+Nq}0p+yBNj3#>kRXV~kh5X){HqPM5?d2n* zfVT3jK=7i2U}!&K#)x0SH#I6)f5MJE2*1xAY3F(+_uZbaAS;_?AEHo>%C7^W8vdNT2v&a&dzcTW1JY`>jRR7iFECw zzHavYbXw)|`bQFdsL+#{O_d$};MRA=PUAwFTcTdBSA{I3qTY}uibXGu+(ga8*fFQ( zu~(Y89_Nt6BZEy%U3cDn9|9nq*M07+|wr7+6lCyWwrpS2D(=+%9^UgZ|j(kI>PyOx5OdmPbsRQD)Vd<6F z0?(&INliLP*AlGjI83>cP29aR4BC5zC1xBGT~wtQtT{fi&GboKq%emh9#w3+$}(zq)+KIgDOJBrA^wJ>fBf8e%ayccL7z<$_AKGsbB?#^ zdT9d+NHms1sq3Y0^>vO=mCbwD5FIX7*y-XSuFED3j|TN#7)p(o$ETU)EocsnkLiXM z!YpQ;Z%KpmM0$LHSGJ7}i@RcNe*4ovxObK;;0jA(Vq0+8KGkx)!}BaV4B6dT7oO{F z|K)NWNp9kv^9QL4b7ZY^!Xg9EW|5fdm$ZV|O6-$WBETwVs745;!Oa?mO=2pjO9>VtMK=nur%yaut>RCOaH_;e=B{p$ z>H={eJmPlaG(if~{11&`+u+z)^SC$FZBF$^=hMc$q6NZ82EZCi90uQHn+%QHK5iq84udfkW3~w+T za3^TiHtrI&o;~o-o=Xp47c_A>0ySY?5omIG<~(CmVOpDJgut~#z?j^B6MKKqrbppP z-v<{zX6^TkE4(j_GSitgB;iQe>=6XAri^^33ViFb;~M(+pYBVIV7D2N?PN!V9K#H<$L( zIo@282aa&WVZZc{N(+8`by}G@ld%I1;kbajQwsCHKTbQf9Rcl(_QKk5s+Y1J)t%(Q z##q{EHTgJj1^5o&e!KNHJTav9@wP+cuKEHj$!A6L&ZLd@5Cb@`bSo2xtDGG+^Dugb znRsXWDmdiYjNd$WI5u*y7c^q+i&Jw_O34?$Y#gVld5OSbWs~75OP4z_)4&q6e90Dl z`sW05fL;I$P0ye*-u3JX8Be5}G1Ilgm}(Y$S<$Jix0YMdGM!Q$lKFy;nTEYACtrb3 zM!$J;IbgIN&CN<-N}vi$#uN+`N8YIKd&kz^@U(X(0xo%{Wc6D2qPI)6tMaqc%b>Rz zq1Om)2J#Z`E~IgobTlj(&*EvwOpel)i$JomZ!{0+Q@7e>PSLgvIqoj2f+&eKRTN*Q zDzWe^F|x$FGZ?-pqJdgKGxE|EK&Uzm3^H*q8LI=r_%HW<{aGnas(2gbC{sPx^Z2vF z9Hb#tyto%MoJ1m71d+%;FmV_pbXkf^f=>b6RhxWr$PzwoVu*-C5*Jh5PjB-?_aa1v zDZz?ph42*TWZ4F=Nhz4Qai~DnKV#B%HI_}QHrOik>d*vA&IIj->SyJIc#PY)^|-o8 zQVEotiBHrTMEGNNfvyxc{u=>`7cXQQY>Ow%n@FCG-(+ekE$^CY(AR)IfU_a8v;)A+ zwRyTwFg738FeA5MtV@z+(89Kwkqqj(WUbCIvdUV_j;Cs1>Iz?ojQC02RKNcfb&iQt z@p_TVdnT{|hA~Xl4EctuER#4dOZ*|CNBSD>uZPXd3B;N?49UX{6k~3rv!AVg;|P6s zycUahCK=BnA6ojbd(<*~8|JpeQ#wFNa?!iHgM7LE!|)53W~GV&$zt zY4t7=GB8^2pP8?l5bAe0Wk&JZ;2_iGFZPPbuPTK=vwB(FB}|SDi{;r12xI=@p|qjL zeqF#45!LXE_t!lMmTU3GAA{PNCVC!d-Ei}2Ds3S@?6`4rLiL2Ty0@#oe2{m2k=_xPE01(a)&u*AJ{oeVKRDd8oMvy6b1?5ZT9Ej* zEvB-U@`9ch8^rHeB+cJ!Ttxc#9BUtrDafxEx2Rg->DO>mw~>oG6Ib(i$=W1@XUP~W zDzm^*Nc^f~=LU}#y-2Q(>A79KVpZB{&!?C6zFn?clR)3e2|=jmqLh6(sCbM~YVO>P zk^mLHC0WJ>=+nVIi3cVRtB4A=2;25M8Ipyv@YA_aX&moEZ^3v=MqC^ht}e3?lghRr zKc|^9Gp&Dm{hIxtlQf7U-Q31LxymXJ<0d$oAEp_{+b%AV91VgsHgXyxA=v?aQ zgf3aw)OOx9y{XvqT4qP&zC3wIiWFE_MAZstnY5ZWiduIU3V`xBOg zk|WOUw(B?rB@9oQ?I)oHF@DOoL*sx^u|8jkC0yORg2a0M(QRq*9>;5aZT~(m<#-wY zI$`jQ{7!nU_bWJac|u2?q001v?La5skJv5;lmc-3*dA;@L{@(P;^>!%7q4Gl9#5DZ zD|zw0rStKz@m!W>Ga1jmpQG^f*l>rqyiB&dFI)sM%dsmimpdh`h4j6Z$c%=K$uW-SXKgH>)$_w9LdAh% zQYj_v!ow8I(5eoay?Dl!K ze8`-XNoqiWvAlG3J{aBJaQ~ChmxkbeM!xQl?H=B{59y%EY_c|E=c|g7(^~G&5j}ve zd~@E{pFyYv_8y$K3?D5cu5~rVm76w&D9$>|!Ej9v^fn(OGMNO;)F`PxTP-t|B#9S> z%VdTZh8w@#x_I&Xb8W?J^U~s?F7Kt`X3c$ifeq%?;X$>B-ug`tw7zg+s=$4x^WVz{ z7aMiD!1>))rFW^J40rWbgIQosF7)R_GSx`3Y6husE3`>9et)_ru`h3+UIL^cF_{bIhNqT3IG(Z*As+ zOx-$)VfqPtYjq)<~wQ&%X0ZVhySC=SF3YRf+t`lCs@xrh;Pp=o(&Xm*OgeW$U+Nin5 z?8B%KW9Bx~Q&RDGC2%GH!Ke#KHSVS?dkDO(6G#cWl2X`SWpGwHJIl~-Cwow&h*(`( z;Y5b!#nlLIr`h)hBjU|RJ40JK{9A*>o05exr>c!jNOMo>mPyrfnNAstcO`XnBY2%H zj}26y?c&q1l@)PJW5(w^a*`o^XNz|=K0?k%n0L)!(nbdAI*QWk;w|O_M_bT05F8II z;UQ_MAe6bxqJL4k1RwKE3JTnHv#%apA7rE6C@01Kvlzt?P-5+1f?f$0R+ucih zaZj}*)#r(L#a)L_+j=#2a^2I(=$O!M@MIhod1>jE|3&k%H7O z^PIa717q^e8DYspR~=1^fG}BgC+UR4FM!pjs!-VnCG2!plV9zaY2G#xw!M$3PnOVV z8@f$ScvGcN$IjDj1qMmWX3~pHFk{=(K-|UWKP*<{kIoq*^7x!(ne*|M@G6G3dfr8l z3Heqc#QXRobbD6w`~dUrcAtNOzSB~`RmP^QD<07sY42FKWRsuni5Cy@H9-w^-zX_* zQ*#4`Fjj`Cug5e)F6A1&Rdv#@6?Akgp@e5jAyfh0E z8t_$F+h6{;AU%0?BtTPPfT;WIh^v~9S6@3wSb+Dm zrErg5l7GCbLd)x1Vfet;StDDr^JM=MF(+2;*kFH>13!+~dM<Wi$+xzpJzG{QZ(rBeDJA!= zNI~d8my8p;Z_YZgeScqix_}!&Lbrf}nV92{5tod6kc7PA*ptq=B4wLr4Hg1&XX>9y zB*p6XR;5*24aQvtmc^s9;yYv%+#qHKG7lG-Bbde*#3qb`6C%MJ_Dmo_J?}M5d69mUcqfl9mLV%fM|&EiP=kiZYzzpm3pX&1Q{Hz&$?XI2K zMS26Owq=sq-YGP$#4z~|+0%t{-Z%st|p{0`8rb=fyY9xx!`xS+S{t zqJv`ZYs4#}X6yu4o0s%OrgUZ9Aq!yP6H=W9YDQED17gReaOWInUFT23mC#*}zT^{Y zS@?y|LCPo7(^Gbu^uI~iq&}V-=P!}=QE@R{4~3ULDjxEeDs~qWbd0`RvGDvO=9}h< z3|J!$RL`lNSbM$E;{&^JdR1U>W%k^q+=4}MRnvJ{MAEcnTfMy)sSX_9C$OV?gCoX| zU6mW}kqpk0=m=p?>U8_Q6(Lu{D<`u}qO4(M3?$(<(%Wj})l6zV8Vc3+ZV49lpgN7* zT>J^;F}N>SI=$&r2|a1Cg}g%vDL%`bJ<7{_ri01)`T_b}(cu>M4{z^rXRI`j<%=Ln z&9w`R>+PhxcJ@z>D<_#xw+&f4?Z~otXDP7BHK2NHqgR!9@sjK*K7(&>_#5@LON}$P z1dco_6(?JIfZ9D!ZrJt?pXX--!cR*ddnGQGlq4%INgTc~FZ-6H4>asF(H;N|%78=z z(~~q)gRDYcCtq5-%2N?eHhXJ#ZML=dE*Sp; zYzOt$x4TJR&z=l+^lu3DkmL!aa*k~*+k{crb&0Z20d=9FnP3M2Vmq)}pdqJgK(~4J zB&VlI)#SO;<%F&$NG4H2M~xYnhq>IbCvjF~3$2t3WpOExZAwuJ1Bw`e#NOCPny){~ zb>#4}G{f0vJhr6xOcv#&3p&G=-WRDlJ&FZZq%Jg0VJb{eq_^WuwvQ)awUBvT%PkY= z#y&-xJI#J9$A-=+ygTUoW|2vSA3H-=e33x)oI@xc!`hpQBU6k`$Af!mLf?chQN<17 zVgX(@o)r$;{!V=oT4=`@uJ|``N>ry9k7^3A{;jS(d4pa=Rw2w6n|Y>$pSNJ9U_vF? z@+#CXya$NiVGQ5hfQ`xX=+U9Gge=jKlhG>#1ajA$rGJcBUm)2kA;nt(Ib-bcbkgd* zIL5Pl9YpHtCiYPa`ReRMpsr0E?CSDC?K(H(Abb$x6s8hatGFx5XF0>IA3-bn3Hn<% zWHN*JocvQtkDF}vnXF&wj2MuYFA^w_E{<%?_Ix7CjvlfXV{l4u+ z2UN+l5*SktKU9M^BB3JjG$vT8^k;+Qz&ik4~R z5pfSx=#W&tQ~Oaz%Gs1Wyo71xQy7oy*yC}qFllbXHEfd&Z@U>vR!ELM9J=#Hd@x96 z9PQ1wAkG6ZiwxayB_@bKV*;42#D(X$9>>d6B)OXN1!3YHj*tf#SvX#ztM0`0+<}}O z7b|eq-RClAaKO~H2Iue2X`_lJjVZc9eS`kP&2^$!)g^s_*dDk{f@HU^mA=lP>W#3- zIsa6-OZ({yl$|hOtEz#lMS1#tN z1$<4$jdD?mmZ>n|JbA}*3YpV5QNO;QLa(#e=I3#>(sK>n>Ez6Gj8i#QDZF}+zcZ(2 z;OIB`N0ww5#iv6+M>!#S!%@OxvT`HqVXd_>(=ddGz<# zC8S77w#iwzi2yS9_p>J@K0MqsSf9Kn(k>^e+Cb^U`c8uA&DoTpV?VaDrP|K&1mi;su zFXvp#uAphKB|O+lR(MR+OAvbPrIf9jEw@NCabxPSa$StdYS)({zJ5y2EqCv>~Q zND(XpCAAw_kC%?Q{_Z5p8PO5011~K9xMgLJ&U)SPrMov>r0VVr7g$UL;f6pb>sO~J zec)|L9aQIl7}aJP7pkdO|5Mq8<>ffF+@Ke|&kN&E7pEfc%IpR$`~paA$1D7Uoa0ZY z-oIRq9X{@={^SAt9Jf^pR!v5rb!)LL1A{|CbHlHrMUKW>30jYEj<@Cnhce&5M+t~i%+%9KgeUz(OfY%zF z-pt@tn5D$4Pj3gm;?y#Tif>7_JWd;H2&{*uLslHJmObsdP3jD39gSKoCFuHDun$OG;;JC zA1yE$-)kQ>c;c1(3ozaj@#4|*pMr>CZTd2c6BTZD`snEKIz+02S>&_A#9sh;$A(=i z?{t3MxzBMC*dS^;X(89IK@%^BQA_{$6L|fr-e;(+NcgLkEoy;-Nu`BsX!vAw(Omv5 zmcAFM!zMi{VB0TT0hIPv-}|dOyEB;WPbV zjSoeTA$j#l4vU5NQbLad-*5Yoy+iy3=EiH-lum7{?HDkzO}9}XYhU-6T6SFT5?p#a z9YM{HI8Kt+a~BH-U!gn{B8d%18~*V6G(rv^SfRk3G>>}zNcB&1?4NLkzLk#CpEKP& z>!@geoBvKE@%47NS>tvTZ-L35$Ri9a4!U5)Zi+F~P$WvklBOHrx#Qe?y{1?J*(@qD zoV{imlvp}j3R69Du3qr$OvdC8ljf&r^+_no@~iV$K8}f3s42bzz;EdFhHm9O&%Lsy z2iX0L{mz;F@-1KOF-S$~kvXQ<;jcEbq`A!YHksd!B_%Q8Ew)@vd@l#HR*o;_271B0 zMMP(7fN-KrWL*HP{7S8^11Ad1JKB~C5`cJ1H1-xsxeq*jdDQW0M6-x{i80Q?hP}5B zmeYhu9J}aJ#K-qlt0p9wo8#2>bY4a;8EK=zO{ut0wJO-PUjDGmSXxwjokP*0UEe=y zWSgJ$DkyAQUQ`lc948vvz3T{TsxiDuOoubn@aZej! zDpK}^$P3*3L1E9?t=lk`o&JSZ#+d81Z!RadvRyMry}%FkzRNTTP&`gbW$NDIl&rwj zi`)?$TG1+DsL*hpvF2ZS>=y&E5!k?Z{&tjJGVnGFzEH@|Td6iCyd(2@LLa;aBZ^ei^`0oIgiZ3lOfyS?hW&SWDu z{v5VTFZwaQKE|&tJ~dwgDEf&)jM9W$9`CS_U+>pxkmGTY49w2HAUR!v}k&k>O|YjYO%=uN*_ zZ2?*CQ!*YKjg!Xu9~EJ}A2x6?<(`kSt#VaEE=d%B9L1Cs=n+ukWTT-O6b@|qm@4ot zwnf{*YDiC9S%qSX5;{v7{|UIZe-c`?s+fndFzTrZ3ep@+#uvHg<`r58e}Bp;Al#GP zH6Bh$aWfurGk%lh$R{SJbJilZxAfhNGa^+r!NxEhEqY#2PcX+9uWcQ4F4LH-R$ygq z)rVI>`$fwuDn`bZ|8|tgyj4zW$PC-A&eSX}TQ|yDl=S^xJ5mm8N)WX7I z@J?gMD=2`&vXqP6DFU*cvM5tFYVrJ?BrJAxj1A0=NgL8qhz~in6pk;SaMDDXYSmG& zSsqDO>XpC!i>3Q7w3T+FKO~uam>D3u_*5UdGFwzs~ z-k4PuvHy;1t#>7VnM(&D8ik5L*zA^(n*S1>4E$a)*&D@r}kky52*er_fFxq&N z6rT84nj-(K8-%co&8?qXbB;3}T|`9!U_P3)T~HJynBf zr3Ra()u&uOuGOc|<`Z0*MO@0Kh@Jyg{8%9hDc;E^KLl5=8d=Y!<=EsF>GWme)~FMb zE16HgsDQ$7Sti{IovIYhlQ`&%u1?WWGw}zz#|huKER~piPP(R9uF|;SId$(&-m!s; z$jw=aQl8>@X4knJh?HIsT5@y(Kb7#@YyUXPpabd9Slq0!ekO8$Hz~zlDM^!>yXb+6 zdE3@`Ix3={a^jIUb8Ue0eji>{5p)_1(rH`^VLCr4mKPBR=&N*cV$Tg<-1^Zwd!xIm zw!D9;mAjAoEuM(YexlYWuE|-oGBvCSg|RdG#syRdL~k9PWC1( zD6L<`+bNkMXH1iCfT(>UR|{0tSx~!nuiy0hArRtnuP)|UZQ6EZYze?E)rz4`LgEsc ziaXx4N6xaWF3!MW%c;Nc$6`ETD3i)QYJm>k}JMw!#U6XlSn z?QwaweUFwN)1%Qe|2(x>kC{zF>GDFuE4i4?huIhge&Yg2ikWBG)3`B)l4?ul#1Ht> zLZ>1VeE83+UMn5?!TVAsc>RvrlGP<&NOiUnTwG06c1+Tf=c(~TfQf`ViGFj6;*Ah! z(wHARQ5BpBP$8&s)}51mDdz0V5mZcYi}m^iFj!D&wW^NB&GkyCV;-VgY|{|Vx<4pU z5Q)3BTZvz@3^q(@8ayY{tfl%s45}*&E-0ntX;wVo1}TKOqb#q|w5#~}C&Q7FW0}4L z67##f>&H8B;8DWk09?@?;JbD_D6`@n@wo8pT%Jnp0_-G1Vd) zcq5!|NgpxrKEIJwdWH<id?Dirop`$T}aE zO`J4SO+c39w)*g&wkl}Y!8VLQ-%wN8eVLoSO}ak?ocaY|jT?WCt70$6V&=wd#`0d9 zgT|JKN$t$D=5S1+B;*L^A)|fAXBafKHF2LCEeu(Ri18~44vxJ*+O>U3bWJ40$o>kwSSy)4UN?m&RI$?B-oI8WCH&iKl z8wQbK;w)25)OMjt|2GBzf1Mq)`#;%C{$3yKzw-QZ#V`Of5!x+l_|fqEpsI9BJ4g-D z`MJ`$@A;Etw_kvlkQ3BJ$2kM>1dQ~t2g_Q z@1g^~5WUDars%rMS6xR;N!QiK48@UK<>Ew$UW+l_%2CU7*(2Us#R@-7?rq*n4-hG{ z=dg<&uyA}Zlmb3kZ3GW{|ArOYQt@p^SdVL9~3-1uL?hksms-YeoY;_F#49z zCCf9#Y$bm#r&TqQ^2ppWalvJGwrJPe$wnhEN#pKDKeejg^VHK+#L!>vUff234n3DZ8yOa>t=P&iF=GL$t^ut?Do-w~e^3 z9hNC2!h#br_&UEB;|m4jX#dyl_BsPgXOD5`Y?!GgwRF!?JH0zT+|ZVHe{H@0_)c7M zjD5ANzi&zP7p!OYg3@$RhCid5&zgx1TGVi{$kx_m*dYE#%I)9<*ACsdY4fX_tQu;!#pepp$uRlczCe9D9N8ly5KFWSrxC0#x{4S{0jOp-Q9~1djP@v4rmPXTG zXjjlDgR!96^xG{h1@Gom@@8r{U8f_b?8x3ZR)b1WBO`td+okvFnNvBEnv7d)?t2y`ct0q}e1$iv3%iI9e>-Y<-Q+T95hj}B1J%~NW%xPm zk@R$Nl(WnJDW}S1k8@|_t6!s?>#h5o#FRTF@V(K)K4U36)!ocBfq5BJ@aWJ(^+pQV zICh%{z5vho=9+kIw|N8%g!A+FPupMHEC-JzSyuT2k7&+)oJ2?V%V~OmQn(&t=$ zb@uE`2Hv5><3ao;GIeCk*TR$Sj8a=yFHiZFRdU{So|_&5u1$oYXxiz?pwar|R6m%M4iBIvZbDL$WT3C8a0*)Z|;KO!UlMNbZZ|NB(v zkg^U<&E}BG?O_|f!zw+0km?Ao+>2Z;{6^oq>eG5v^PX(8$|5AYQ<5T?r0i+72o#m} z%G0QJ6hE$ATjX*FJa0Bp3y@*H%((k3ZgH!F-z`772Q3{Z5f%12rDUty^HS~vm1}M{ zJ5)r^ccnwXw_}mb71@)Xk;Tra^<+G)!#UfVc*$2d(W)+HVR1-kVdP48DSsmKhAA{i zDmWl3eiJ#4kSS?#66k}>o2>%QDdu}&E1<7GKP5L%ZR8fR*!wr-PJGfFGA$AtuSBMUc#-(7Yv{}ha)C?GEPT~8rsBCPIX8q8XAJ$y>%R!c*7JW! zx<4eMa?H{@drhMLw}CUv5my!KNS>#|oGgZRKS`DFz0wQI#0i4L$&#j<#Qb^m;AUX= zN8YI~FL1kggvPiN9iasLk+BRWGYd$Z9RKRbsJy2V1Z#!)s6<9XJ(F%hlHailzeQ?> z{B75CSbbh&`osHkt2*l|L(^aE99xg98vgFg0JIjmU4CmiYd>VH_jECBT<_z^35?3s z^DPr%*TNK*aD8xu|`N*NpF;Y_HQq*u40!E3WYJHG3Inou#l`1$>PJ@NVl6gf_-S=H~s>#a#hxTx5V}b zp_0~yd@9irWnEZ~rv5Vqev;NmlDe&WA{3JLd2L58@~tz(5~%uR)P8}KdNas?Hyd>~ zze2V*7Q=&^+X;sOzD`^XvwCgb%O^@~4IOVW7DN~-`c1Li>6{h^@f&@`(Yp5VR{r0# z$vjx;{u=&+?El=P^WRfe{P$A*70JKd|9$$bC8@?$9m~mD)LP{syk*8JaovumqDkJ~ zk}lh%68Z?jtjhh-EO+ks^6~78INSG8(~d%K}ZOy^f22!ki{lZW(wgl>-+ffZ{$3-TOj<)wQb=Q7^t$O-6FazxQ z9ogP>gqJvRuxeLt_}jxGuAAKdTP_QHN^c`SkRDQ5S1-~Kw{f!JZS-2uY{Z)`OF?NtBJ^+yhqB%WV@-}AiA-Mjzi9CVfI%TF%J zm3?~WSgbg+;tnT#SJSY+VSyFwl@kl0?>y}mrQPj6l%>y150Mdw3SHdHz<)Z)nBmFb zZ+<&1(_*|nYTV*Z31gbyBTO`bnroiKJ^%Atz9ny=fC4_#%s@0_SA);nai*#~*K^~t zi`t*fX8r-&e;siYe#p|MH0qF8^XA`|`7f<@{#4sh!_rFG_$4aF88fBMr|RSs)r>EW z5*KN`s@L873lN=G>Gbewov|?@6mM}Ut?Xv>&>hA%Y%#Z*%7mDs;Bx3EDNW04YzA#9 z7%3@f^qgFa2|tjDLoYVAi_YrUNs5hD;PuXw;UB@Eja@B4IzwpV=5M*+&yDiS)`8c{ z?Wd2$fle=}QY>Ho@uzR@f!@Dcj>@j*rFLhH8a+W?G-GH(??6#d2eEWMxpknJf^~(U zqC)b*B&8OM(c6J~_(uS1@1FLnJXU9YXHe87KYqpkqCU);U^Mj1wgs2PK!mtum2{GE zw;8l8E2QI$J~V832*tp4$_a$%N#Sd=gjCWcv;b{8vVSV_e!$H?<8~C@o)Fzp9rU6r zH2XWP>DkmoImk4QDDSrK%-c$nodb%^guKS;&mg3rI(oAVK{FMUgR& ztJO`+>h#!k@QHd$>?V8O7uI}{Uw|C>AJe}8OINrnybZsT@wP;s==2Ca!m9ZQDQ~Sc>v3va~Uu zg^b;Zj5dDb>TZXAdr6Ii6EGCEehe1Tl-U=6x6SUGH%?Zw#w2S1K~g~REE&A8y6%ux zk2q6~T8&^IuvZjtteikzZw?b^IvZY17EHF}QbadvV^m}JUaJ+TRet8q$hG!be!Zq% zX!A1+wVIhyV429^?f8|c5Iy)D<=ks3+vs84 z?X3x(Y})<#N+q>|I!1vZUQ@aGMT54)2~whRQk)K+aKvAR4i62av9X;PZH^0FwGCuiD4K6pv5xl$GGLU)ymCOiu*^8bnh&!yd zIp%7YlB^Aq*N1$y#?BB(qtmfegS@H!nF%5yklP51k~0SFAoP3N^@T`Ru!-C4v$vc>jwTf_Y|y( z!K$KYip)?x7)NB}wZm4<_uiQf@;~3ean|!@R$i&qIh2Dik!zvv{3HLx^o^lT&Qa~o&<>HIr zWQnRx>8MqPuz2b`D&X4-Ell-Qq5l{k% z7mc2};wafwg&pzGdJylh734Tlp&dB)?L$8z3YmF3se2|NwZa&jr|Zeg)QeL{`4}8p zlA&E<@*?Pw&3--r}&rDcUqaFH-F2Vcjf7U><8G6w1yb;kG4mJWxQAiIGg1 z%*TPgO=*bLLf}D6#7=8;txzkT2omq@inw~nkI$>a#QcxF1RX5!f8-_TzzNkMFG1mn zciI-O{(O6P(RN5yv^KYMweaarSE~lG?-VxRQGqc!OG;0IcZ=5}b`K({+%-oo{tKd&4rT^j$i2E|I*rp=$NVh^XddXI3y zgMg}APjd$_hw4Hk}|VV7wapc#?7>OWX&i?ZKpDVyF_PvSH~rt!}LS$oTAFH!%&s@v8Wa`1}qJ zZ(@7#(+W`+f6Y3^#xxHXcuEJR$9-FQ&;f_j_L~5%EF)Xsz!&r$T2Z25+%5^`)|n%& zmabI)p}k$_Ef&68gMtpcbsQR`A_M)ZPnnSgK1%V@1|_H^3H>*|0J>!8`w5GW?`0JV zsh6Hfxp}Dw#!e=Ls$Eyjl|_CHrx;YXa-*a4)$YAe$E*Z0Yo|0;3EO=4q_{uQ7u|@_ zBatqgxl=mEG8Nxph)Qe-&x{>pVA*DaEZPM+zaff~9uty|C(kb^pZ|wx@W&DKzu)cr zexd#2Y=3z}|9k22VNMX>&;gL9cXwz${9JxlzOmBTWM@-xt_UvMfm&-MB)C0_jGrHbH1UsCz`hU24 z@35w}?QIkriV6xSNC4SD=uLW&A}x@FBB4o_76=fKrl7Q~(rX&MC?pVi2k9!kOXy9c zDovE86ye5w&iQTjIeVXTzVCkbxzD}xkFeHatvTl!Yt}K|@s3>$x!wwb-IJFHQ*?33 zx2X_!P6G)NH?YlL;O^IW_i%2s&2qQTa?o;85~JWVD^OuiRHVI+=4-nT+%fLd*)C27 z4H7t)#MR(%jh?AG-|wGt_Y{_)QPFtj3|+iSe<3j4kcW&0XyygsmnUnk$t%|%uvqLw zc_h&kN)@owTn6}tkBl#UG^JLs%j6WQ{dNi!^y!mlDi1&QP|)fY{uS!F{6_idH%XTe z@~dv%To=W`o>g;?l-AkRXF1ufiXYO9{*n(!XGxQSf+pxhK5#U7S*tcUa&@(Zs-vEAea?Xn}}7_#nIcqFvG8!GfT5$TP)u!I;pLMa$+&>(8&pqWW_d^tR1tnKGo_pXzEAmsh%- zzvp?rYq<>Y9;LNwZ?H=!CGoi0c}4SqmxQ`KW;~`S&z}Hio|xiK`fQ#*u{Qtk&-)^B zqvZPE#P9xjOaH&OG5Et|Lf%%KP?}P({7z69MISjOZ6v%&eX~y@OFO-H;l8iO=bRC7 zBwd5k{W-Y-CRx4~o&fuVXP!r@w@)(O-SO>D_)^)zfOzED^3W>l+AgCnW!Afe_+;D7 z1q2laYI17KrD}e4LZIxzd$W*dis}qM;7@*W=l9VYF$EMjD^<-Nom&*thIMRy^!k;- zaMgy|{yDHc0xfMg7GXvsO`>;Hge^ZIaV~LeM^X6xN|e9)^-SE1O-(iz9_zEMrF0?B zUxt$rshLw*=I{$fvdeg6YpP56)-i>qlO8jxa3>@zEUN61@YTw)jqpyB>5+{0x2*y* z43|n>=cWhRv<2aJC!)h?gl`XG7XtL{1vRYh%=%nIbKcwbqi{|*o2pco3Gnyi;eYds z-ck~SDpHCByVgvyS2q~NT}fNQW=fFBF9- zn0qzvY;s!+vd82YP9@F>i$(Sm8wc5+ElwGK+4(gs7P+5tx_lvpU|UZ8X;aFnQ}Cg3 z`&5d;@BkBU)l{qphq&I_8I9>#0ZXwSGbvQ;gBgo#VF8W#`-seNcXJ2yG>5Hlo=I*3 zRVL&vDJ;!bZLTJ=QO{L<+KKH{jo8Ok!NW5u?%#{Y1ays`-M+e-l^d)AQ@m&3DNR4t zUiM^5Q77R*e#zFqqTj&lnBv}-@4mZ9_Ov3O8~~)Aj;R*&4h3v5mmO3k?2u27Ho@5V zURRK>17V*hbtNR<4BEU6$;G^!Gn(N_%S}tWUL%@6+bjOK4wF)5fSaMQ zCZMDTkww>PLe50?4>;B43o;5fcDPn*OVh%Rbm{aoKP4(Y?=#^e`BE3BDa@I2iqN!w zngVo9wA4pTd>^1^U+nZrL7Mk zx$xyVEl&6526zxE5jXXk;eMx~u`TZL@4-c|t9Ar@6~_{@#_OUGxT(H1F$rYy+;YyM zQt4iZbHUp3^s~1f?}T^nRw{;rOnsB0Ggc0jQlQt!CDkV z^H*eR4^i+t6M_Qw|yQi>v(sS|wlhsKC ztmH8TY(*w#NXvuuJo$0sU2bLdCxd=-_v(5#kp(6{7)3e|llV2u*pz^887cP2E?+Ab zKm8->W#fW6wqJjekv?p1Rr>HA_qeo~URW*m@9J~Hp@ZdjwLp}!~Pyw zlj0nEXhUL?xqQIi@>le0|F69zW$he*J#!zIZ#w^kIH4MBZ=~gLHB(aT^Hbo*No$z~ z0Ged(Tge*-F&zSPPI?oM1{o|MwcVxl7q>Xxh-Q}wOc0$@XwurTont8$`&sNQ1oIdf zZ0-d;d`CP!2^GH}k@H+wUUdE8$W%Gapn`Oh`QZl8jkn}+CsOhzI$QF(IZv8t3aocH zPfJo_LKCeqC`JH0f?beG`3tS!Z`xbF~wEc(MGt%=}IXqgu}mhAZj|C8lJIo_k9hY-EnjElY9@o51(^+F#|SE3PO$ z_?%O?mBJi2khMG3Yi|*aH z?DsZ1mfpMVvkOAd5|{oaR%a>^S+<-h6v|&Gsw>S!5R0GSR6I-=0BD^v{tMaAcGF1b z=$Tl2WEiVIi&#x$U_-xS-i>aJU+C_*0C7|LK8pEZwKy zR03X<{!n@QsqAq@ebPC~rJAJD&LI@q{L?f9)1yY9y*U~G@leC>f6w@ssaz}@V|(V8 z!8DOKn{;g-omkhB_ZV}dhC*McMW0q{f5KXg&W`>jclIUsoDmisU!?AG26sX$eUhg< zitIK}DIIFMtXEXVWZLp@)L`e{0?+WpbCWu~X3+q9nQmiPs91;=rrxi>bu=+m*X|NGiAx!(dSr!tio}T+q87o-?2oK;F*&2e$SmwY5m2Q zP*dWXFB=Kq%GJ9yrO#?B*mmoQUu|XlCUGS6`YR}+TF++6iwT5|Nu#;uVcO}J{mlJ* zO@c47Jw!6<wey377!=r)7Z)q)$P>4{7J!Il+-<$)ww?d{kEkRA8|DR|q7 z<6_PUZT)}Gz3}wIzJ2zDeak|k95UzC2oJ!OQWp%5$(BE>SXMHIttfkrj*86pTphD~ zo`2*u^DXe0f=H%)|1JFi<4PWWPcZBKhKl*0*79E+{I_};D*L5cjWnbH-M|_g^kU)a zloU5V@M)~xz2!i-{nK#mm+Bv+lA!&J69bI*<{3~9Nu%3o@L}fjw#5Zsr_@Jt^I=w> zZnIxn8&|N2IT~{8OR&UvcY9)xkgz4?7Qify+o1QH7T@n3~e2nZJ9;ywXr;BFVn@0=S z!q*gZ8L15}g&RfRT}u+t`xeFa*Ga=iZ{vw6#}prwLR+$_4Bvkol8vXa-%jfg?bL48 z__~zbWgeEX+i*+~lpjQs59s>p(wAE|rNoujQ21oO23O9EeS1t%)7C=@++Zc9E=(yQ zbigj3wML{TjzAlvrgosLzFiilxG-AHRwW(Gl;N`>cg2$vj7nnDsj57t(1PNl*`rO` zCsy@EFxP9Cc}2Eabb)+%V`1U^TKcQvlEnQW;~*Oun!H(4bhx%{ybecKMJI#7)_f6H z^yB^mkv6aJmOw6KM`5)J*1}ep;%c`JT`83LQkcsMrNfzv9fdew&*!bEYkBZ)GyLjg zu3jJ1rFIrDZC_0yyUb5j!NNY@Kdpy)Fdt_1ZgQwRFwjJ-T~KgBs6ZoeEw9@K$R(`t z5uu$-#VfHX7kYtIiP~r~Ru>4qn2Y9fO&k89vK~{&-~wroy_=e#sPHsVRx8;&Dmx2! zz=T$AMdGBeNnC19;+*#qHqEk~Vygm(964m#d@pmp6)2>Wx}q6*s~HSMj8~y0IqY`VQmd{p1EIqED+<_ErKSJOGgSte0WWJ}Q@xI(GnRus91c z;nud@RUh->qORN4;rTTOq93)2Dw_&y45T>Uk2TxJql8tGqY})d=~>g0UagWvCC0^R ztH(d}H<6sQZ>gOepf1zPk7#L}D|H5Ne_W6?sM^2hGt7LBzjZq>8}F)VU3^r&iYzYA z2zVKj0~qz!{ITnRE6ur9M5bU6p5ro=iJj%>T=0;tLDq7XLBL69D`UN&eorCgHxi#4 zq-s7pfMZ%rHearS*Fn&TV+wWdxyU37`%;D_xZSO%b0Tb2#}r|5{zP9C+rYze=Epz> zr4>kt%)pe$4th3cFXr3jcR%=F7Z^T?ogKLDGfQV#Ut+o@WW_PF-$^p4x%4Gz%~@#} zvGSsC?&p4t={IPfK^+ff;k_jpubn-UB%dO1pWg4(9D#y;AQ(xwO6);| zflo`|x)@vY+_{L@?|XPaQUty+rT?y9L6=f;YMzHFEMo^(O6&9R`h!yu)_~&dWOPEE zYTu^~i|AvD7L8!ZYPP7rHzU={0L8KuQ_cN0BhkzAnibtJgYNOhEBemDd>ag&rJdp& z`N*0?bacAC8B>B717=mRG%R1&`K6T-h_Hiq`6ADr?Q7%hhJndqkoa zSpVXqDe!{lMo9JD>GzV);S#$tAe&|0NqilUr<_Xh`djofeR|qig=CXg=v7l@#g`g? z+ExDi;(y)TQ#x9#+M@r{_~2JydCK0z_6jpTo4#S2O^M9L2xy0YTMe#qAr8_ z9}M?EX9;eMfJiPaGxIV#i=C9-3<)^a?3hB$Uag#S{G;4ZXTX?nx}EO`^pc5p(Ro$G zwV}_V5xH+4N=Zyay5f4PRYxv@*Q#qfGsTfvlWDGFK@WHxdA~aOB<_l9U(7CQ$b~cYgazMN)qzD}yyP<*;{MXeoQRP*TSI9=#npYS3WL z57=I8rnZZB%}rV5gm?X|o5?=$l~OMJ5*@G!9wvVRLmLjb9~JcQ4*S@T#gA8N&bg+2 zjNcL}5lFi5Yoy&^ntqvpo#3uhQR}dG0rm;EJRyBmA^AaXpL(79KWhE&{<1|?%{&(9 zX~m3f2vu@b*HHNbqH1wn{u-q_YXP(eR}xr|@k3jET+}bQ349u*z{t1oKx>LE8#Y$c ze38hIZQNrXIz=Kg&8!$zXWw-#nA&iMVR{?dyQ3rQsV{h%5|57N=NNNk9&dCjBqbPT zm3Kv6*qBI3+vIZV)w|i{nnpmqeES8}^8h=*6tH~7JC}~vPZ6 z5Fbel`O*FcG%e z`-Bh0U~Y_F6{*Oa7EmFhZQ{24Nq5MSu5K=M^XZ^I#-_xT2#0;OC<(7L|7_McGy5?qsv8ZI3rR+BSF@Og^QmZ7i;F8}a)?xbzT6;5HVfFAl-+3Y ziB|1WP z|F=V$a;`R_Crd!{^)CLbaf69=a}yMsuGme4dMW;x;+O(tVfTc|#g50+3}tsCURyxb z`(kHj`G;G%uL@fdWYZdQlnFefd>bxk6oI|EahTxJFzc?C{Q7kz=5$$aX}p6C+VolC zJPa}qNNvn?az=~EOrI`0XqVUaLf`V2n^v(#MN(p6911_Y z__PbPJ0do|xF^KTLq*PuJB``B{+H_i^94@XI#OkwG%l-BC-Ad6qllZQ=>t-3aW0>k zK{T63^?{ z`;Ra*$kCFu?a@<3gIZEFJg4;sQytRwM#L`EadbK7Nduui-EZfMa-EIGK!uSu92}Qp zBpkoYSJH7(mo#61A|)okyhtjca6U4B>d(aX_dU*udPfLdoZY(ZQ2K5XV3DjVA%{^& zY1^=qF^?ce65-HtH_x`DT*&3NSY27Dj49@Ab1awl=%(*nHOyWZi+DQB@-D8!9p z{3@KaBDTso)?E-qz)dU7WP<1H|b!?l;F}4{}wTNN$U< za!6KXa*8R`hBBTE`EkNJO;)4+72^3PTH~(}&p*)`CxU}KJCXZaGEmRzL~xKRCjyF) zcZUCMan?)cnNnTdgPW`Ad_!Wa&6UXv4?ijN9M(lwc(_Dze!Z!(H#EiY?upPMt;^F5 z&FZOd;=6ZGLfC_z9R6Kqe>C&|3!X}5#OM}cm0)HHag~q}EYb>{QivPW1m{BaemqwL zIs9lw1E(~SH{2l>yfNj=lH9w(vRquVa_aqbjXzg>byM;M6-1JaAI}1k>9LZJ=cAzV^#l0h^sAERRKU`c2LYc0|G@3O%c44k|I7~ zzVdfhKK5(NQATPgX(Ku93zO;K<>72e4AeQU4WpK8Bi(M5_Crrxyp7TjyK!5>Ty0Ze zzDwZGr;QW^!G>E`N%V?4mbQH9QYU(J#%#`g8q>y*_ho3Sc2s#J6VEIJBLwvS8dns>sQRUwI$k zu)eYVQHt%N+per@z2-Q}i=Z!r!+mv7CI$-JBzouGSiFOBKa zR#c9q&R}#gG>j!l7bh{lMMJslUC-c@tI4ya`kcKhH#MUv~?xV8z!#HyIbtB~fI2 z&*Vi9dhYrV{$>|=hw19tYYOkmJt=87BtqFR&ZxIM%S?MhC03}K?P;|c4*(Fu&?F9B3NSdPpy}pzx$-PJQ@Xy%DAcsTss5; z6TwwtoJqij`I=l*K5eUbo~%rpbF|^iU=Ii1Rl(V6{@Zmo7keh3BNU>-x;<9JI8PDU zljkOMi;RcsC3*|8-=tJD=X%txjCHu?mAwb4EHDYZ#DJxlzHy+#`=T&4T*2QBaBOZV zU-|N@WLpXqUOSjTUEx`lcO{kBBJZw!dbgT)zmXVqh~E;WZbq?h!BSx(nuxa$t&Zs1 z*Y%nMuWVm8@-j#k2~p0k3mamVxeRMpGbyN=OGS!^ffqb+POz{cke$OUOU+}dmxHBT z_hA)}E?P>|fK&5g<7}|vXe?Jn!#lLBoQewFQ_HETT#gx=G0IXK0HHO$F#An`n;bE- zB6CoxIHf4s%t}FO6k1Xn&N6hMqe!MuNu(W_N6IuBCUNm|aG?uRC0ErGuJU*82$Occ{jmcaW%sG z+Y~nmOE=}+&QN54ex$diW_I;*#qS@Q>ux)_RML3T}c^u+2TDz(+N=Tk|!2 zHg~-8;+SGsFX9z;h3$~N7@%>tsN$`Ta)A2bwI0wMUDLE(=0AFvTwQ%WW1B^xEwGb+ zCAB6wVq!=qaq?n{kR3d14c$|#VF1=c)7Qq@j@5?lKGcmid~Y}}-x4> zHI>w94a|uS!P@13D{{FTVTv$4ru7;Ma$otG+{a6cF=E<&z5~;^%W)nzRCD#C&>g)j z%be5lEg<`*P}i3o6sJb&(DP9Xf;J*{*9+fwMbz{d@tfHTjdj3a)#%O)xfBQ%o7_R3 zq-fKZRLjV>8`^!JUYnGgkPj(qvk-h5?r;-W_1=Si32h>=34dV4YV!jXAqqEj&RWWHT&8ZQi;$;IRX$8acSvBr_>Oa;O* zcF2|I`{+F?ly$I(SJ(K#EOoULhYI8Z7sWU2_wgFGke9h1KJ(KdR@R^C_GkQX!>mCQ ze$4RYA5*+Kckj11|NGBhJJ6Bo{LfpUdQcXzruWoLxW2#3@U0}rPOEs)6KnfrCyu6t z@>c{8oxG}YyU*hxA;;k!-!28S8tin$#Y~9{U#z81Es9Mn2ps9Pr$BnuJJcMy(q9hF zPC>rP77BlmIXYk9wJp#!(MM?T@L4liHh5fv4+9N4bYT>uNa@I3F+F0ZM#&ZRJ5Jgn zw1CJwmaCQN_xFOYu{u~@_O%f((R!5OO`8t0@OoM7$(VoU;f?Bpbi9?xuCFJji zhU*Sl$o0I>%Ixvv_cOj2PR|-D=DzJ6%<*ngHb$&nv~iXbANP@iuKPI{R)6YK;uVRn zu7W;!!{(g2%~cvd^x|H^?ldL?Yl_n?3KXKAf-yS_HzBeH~z5~&NsdO+jsg; zo3hh}Sq+a-MG?j%!A&~oQL)oD-0FLBumQwtaG$1fl@(Tt6ZYpCXZ#8sj#lS9rtp6p z`6Gz`x6H|L8_N{V3?gB3>G=`K7jzeyoG?7}76g;7#}l~^z_hlbuaIPTwl^PmlUQZ) z%YLo6MBA2=U^f#<5FL6KfbHEDx-l5PC@v6?^mM(j97#a!ka_7|a%+r$(rT;5p3p5k zylLht-|Vcf(KA7&;8H%OpgW>ex^vTJNnXQA-cmAibkHe8<23!_Qszr(%C>=Br2L^S z{hn#ay-z9DPGVAvg#6I0C#HHicyzln!$XiFuykmS(e$OnvC3hWf7ve9d^>vTUXR$1t*ZB;b&cGy**Gj7vLHdOV0tF%__bf(ZIsmj@Y+k z9aTxw6php*QEFPn2=j%YUUTCLyF|4f&|ogHmBGF^sqYeji6$@&{Hk|3)x!Vz0pmrUs)I003L1`C<3aLHZyA>3?G99hVa{Ht|W@%&h+jk3}= zegL^sNT%%i+t;gFIqKJ>%Dd6_dI;p#OqWgC1A=V&cXpCuCwH{hC ztojQrEzO<3gvb2c_?<-3oIL)ym;4J<&Hv9I+sfGuae8#k2P)_69(LDYE*T}f{>h8l zn#pgqW_RH6O@!cW^L2e{h`j)B-Z*@g0MY495`t)(g}UT~PgNUByl;-sR&4+3OnSXJ z7?wPqowLYUUb?vqY!mjEZcks&2jMUa~{eJD;bdNJDRK^UE=DvEOq?Y`Qp(DZC%Y{x-6n(Q9%vGKsyP zamys#3_k5GBwTEuogvhc3$_lKhrr084#mYdI2AL+ZzvXirOi%$S!~Lg2at@H#Td_5 z)MA61^r@Zw#m2Hj$ja?OT`mRA)O;5bMp$2x)@_z~jzZo(d+w3h_27@CUw`;^TB;q2 z06MHSJ^2mm$LOs(erXSMz_DUWKC11ixt-{qkV$@W(1;G%bp_&)7x?l&>CP%qK#^NjjiiNUMXYfe=CGd-H915fW zx}y*7@##pddJ+p#y(|uoLM40=JGD?$vZ7d|oD{S|8S6x8kwhoawzs!uzB019(GO6c zR6q&eb`k^EhrrOtVOl_nFgDU3D|HaDu7Lx;4dKKg0hai&Nl@RL z6H#obc&ek3te!o@8w=wVV4v(&~xWO>G#!=lh zsfW(;s+e!C<)%G6cNLSh5hWDJU!5 z1w0FVj!JBd{1j(>j`S0AbT>agZCq)+I|hR^vyH2l(#n;IkEQ4r06QUy*k~Zmg^2Aq z%4n6Rv22Q~II>dOZ%DIcOZah2!6QJP)+l2hSP&2If1FbLxaFN4LEZRrzA`Gij=9H! z>fYq_yH?)d$67bEjKK(!I5MOUOlR1U$!UiFyjY)B`c?P4G{LfLE#NF)(Wr5gltSbe z+ZtgGqp*!ad`cAIi6{D+eJ$8LcgBsTwJFGh?6+k*m+{37(qKP!c;WVmv`zWIlLr!Isp)J^+*7n6Da^*l#C~lgK*iA|Vv8W|uF8yXEJ_%G}^t z$t%r9ZApKTqx$x>z$HIEZ!Mh>{w96u97ro#-5V#=>B^1>>*Rv#Xi5+#lyecnc5zoU z&o}p!v*X(1NUVsVWQJfo76Xuu6FWGcv2U2VxS#8;N{CsNtn()185ueU0(ut4 z6xT(Za%4jW99NMY=(l*npjz-R(Q3h7P8+bY!>@-2cenc~U-W(UcDTE#QHGT61ly#(ba-2|~4!rcl}j}kjxyEZh&AXtys&+8UT)X|l+t_%gg!DFCI z9kd2%d)mYm<*Mj#2*)DWoA;O^JeHINsmQ-!AZsA!7GaQ=TjLnA0P*Qz0WIVwgQXD4 zUI=eB$b(25llYY>r*`Vv{QO5!6Omzv-d&~?NWO(ou#1_6W|}AMt1@PExQ?XL1MFIK zx^<=mdzSqr>8m(Y4%Tb5rgFXgsxJezL+;4P=l9yR_&v6p-9T~v!ly)EyOvsG04L&k zVVQfr%qk?lvJB)`qu+ zKUK494uq6^f(YQXk(Y+FT852d^oTx z&MPs5&d<&2f#oH)vxcQN2LLW(`pVr!s@aEW^Rrz@Ank2CE}lGJ@q0%0ad%fk%sB7a zs&o!x=Bed#2Z%RfCEn?B(^ac+#=HT}6H)aURNE=ZXy(ms!FGGv`3kb}4)~m%3P|AN z7$e)!!onKg4A~`BF^6DPyEnU>boxyvBR8X#vbWrcUDp;)BX&BsJO`@R8 zcVyIUHd(rb!3>3%>PfixGzwRcWCTM=nPCE8VLdy5Av@xy=aroP$!62DJg?K^R~nZT zUH9P*HNjEQisX=S%l5quajVPLVYk@q(oFInMW5$FEO{n@uu>55#b0jz+LFNFXXK!a z9ttBdLksFjosRAog-Cc}ZPd?1{`H~$J)~gB!PLrdm%UoTlR^_qg;dd``<2)3)fqwH zl7zDv=!;r0ZIce~Ru;4+J60uf`5Y1>8!Vp(oD9Nn{I`8JS-bKdTpp=ZLnn?Yu0_if z?G(&VN@*UoD(J}!fsZK)9AB9vOxR5{4Xosci(GYDca>t!=M};{vn#c`$J^Ld^PEUe zZM7m54n`LrQ}Bb#)=PsO|5556{|0#2iPDiNvgAXO>@T{vGd$s<;+X2`U*q7Qr@3L< z_?Rtv25OJg?5YzN^t~h$kDKUz{F;>c1(jfrtRAnLxqtMP&stPYPw6VrA8F>`VLQ9C z;&QM%dVluTYUhzlyZ%PTrrmz*Yd*s-?8A4WO)OY1tA}*mu*Ne!o^&J|SVR`o)b7as zu}c4Zc3Q86>M*YDRJsp+#79Y8`KE2AG?JkVS>tQSS0>3erj~Z}#`s2d{w&o{nndRp z#f~)xHz)ktSkCpR{S?LUxg$qUl>H&;j#$Z!5R}8kp)|3!@<&-LCJ!rnZ_Dwm_cGW( z(OTYQnxV-~_wpl0(S)FXuEEc9Do{#B_ry%TEaFkGaZ$n*`#|empkbe9!XDlF5JqV` z&i+Swo?&mRtAeiF(UK|0>w>7YPTk^972OEQLWjd}pu^D{rP;UHT@2{q_cLndW~mL! zCiWQNzqAd5j8ExB%LG;{mkp1x^`yk~TBC5*xg& zxALvx0@PhsGLar^Qd#l|eYeF(30KEu9BLm5fA3W@6O7vRBEQ%l*thge>E^UAf%*7_ zqR13;A|JR?e|JfYAYQ?~q*5R~%PF9h@gHE)q{SHVl?^euST}Q z>1=q3TF=eA2@|X`~WetvbwF%u*sva*PT=<+RSdpSH;GNVk+3s;xqph4yXpi&g zOIK~D`UkMoYQFVw#$n1j85VW_UeM20_)p1tqL?VOT*SovBk^KcW!$Ra5H3vdsI7Cy z0~C|-0b+v6wLB|@E7(WF&(K36VKTv7Rd$Qq=oXl^`bdHp)0&H&)v88N`OKSPJ4@DB zr%%mZ7z)dj4_`j<=n620PCe|}Y`5jiaA1e8C`D*tkN zHd5EX^hp8J!+1?G*E(+fnl{-cJ*N6OJ!e*>mH|FTbl_2@)$84&@V06j#sn|Py%<`S zw4Mjy!H+b@i{r9AUTR{_IUGF?TlFq##J@ty_#@-GzNEHA-gW0M7-fp9&KKt7(5IZE zruilIZRk~h@+>hepu9r>!*mOx&i@ex@LS!^#47sn6UC5%i{%U~Qt^kO?@idNQj`JM zO5Xi&GBgbyaQYXzLVpuDq+J5QzEzVBg?)xu_%kS6eYdcs7nu1?{OaLNrr^WNUA4eu zb{e>?;zujDOr@JTb|G2s)Z>ELJ7l)Fvm$Wp7LMLKLY=~ldPh5WfkO246NWHyBy;|g z&bn%U2*CRX5&oAVqo;S@q9JH1BgY9Dq)X1T*Hwma$w)zG&UUb@~}1t^OfAw)!> z4$;<%LnlqR%un`0hOYZLwIpx-$(c;_+o-4-Obv>lhA_QWA&ABCMu~l)TNfP`KqR;# zK=~SxhD5{C-c4@J0h%L#7H0GL8NH-rX^HKqq~tNvaQhIOCsKP{R%F zK*Xb+-CK!hEHKhjT%3$SIk@B3oj}*fIry>5Ny4!=_hY)RUC0EIs}@UOgK36E_4E_N zo`>qp>kJ=`H@G*3%->jtSwsh^dSP+5S%*BD9;{qbLzqAR9CBfQfTh<$gB_uJ*Hv>a zyA>1TTAFoz1C!@z3n_PaS4hJCXyPJ}K@MacK2}Zr;HhHrHkLqMLjl3$MHCrvc7FSh zb^g;c^&FwzGegFod(Xa4{^;4es$r0|(7utb2gI`(bwmn*)MhuzJub)l>lT75Vpv-h zj8yfdLUwEA_g8OxR!?A1wX4doM@ST20`HIsw>mn=_*t6!7vSP?6JNI6&TD%ECbZTV z-m0+A$r_AOJ3X&V?=={!EnY_@ameG?HlVo53zv+vN%hnzH9eHR_SD^3z)V64_bP|Q9U=~! zc5h|MZ8xgqb2g14V?!Zi#Q&-HL4T|q&Rr}#Lvmp0EwQqEPyAMk5Ktq(D8R3%H4u5Y}1ShS}u-%#cagG4_q;JC( z)$w4>M#;+hdXk=9RkoG&0^Ou{B}11A+Mf(!Ok!fwLk0bS;d6obBqkcJOlEygPd&_V+h%V>>+eiEd;rUL`p~`gK!8Q4(?m4C=WQb1u z0zS!`iOC+X}zsrT5N$Ws3U#Nfa&H!C9>r5H`$@fO4>vm zYNgsJ1XH zvrOW50Or*aOay9w0-3nU-$D*I(&Z5)piXqpANnZERTyHM>;-kMx`a`gj+!`15oJ>5 zBD!SkQ*&`=PNwNFd0xDFCBy_S&Kt{$mly;C*?&T#zzi*F>O*SkyI1}&+5Dr?{OjLS zuNzOtFmhMWS6YJF7ItGn%gxc;DCH8}(js%l(YPsz=D0oHybSHlt|^{%eCr zrnaAJ=fboe-i+UKxDGt&pcCZ3D9e_$%|^S$pBnI-F@&)OeP}Z#0fhCDnInJ<{h8I_vY1`R3C(!=B)4 z`Y;^dP{yz00#@YEDw7&R0{7+1=jfja`8Cc-Gjvq+HM707c~>VY=V{;(f3RZVp<}uP z^|U1iet}?TYb%ACA@6j(v#gh^cx=6$qm9PwDq8hj*S!2*o)h*8j|E0L+zmdaK%Ol# z=HIWul&+aYow<9|;1zpF_Fv=v^*<94{JmBEmt+QPsbOnS>yENSAq6Ox`ZG+Z1%UEE3Dl!8HAEZd4EM#S~1LB?hNdx)j2n;`|~6mCgIZ$^G+uG~-sv+YmQ9#cHX52Dg_aM!)muxM6OaWv|? z!QBGaXr(F8nnL@R?MHV-jw$9ht|ob98*YD?FU=fiGd`vmLS9PDI1q#W_+e8-aVttd zZM#^vz}Pp^`hiJ}FDvt?Sg$AYrmofFh4G8#p_dBwv9sTGzbF0=(f{8Cc$Tyk<}dZ4 zvq4(!y~B`9lk42)pSZL*OvMagD{`;S)^--ZjO*$JmW%PwH#SF}BnSJ+9(ZDI zJ4xzAW&-|8Qm@~*3gZWh>yqcV~U-G3RpU^+ys@BAWw`$j$VlB*)t;W6nx{H`nD1Ysf35-U!3DuZo_PIEMiMO zGr8^2jq{cnSnbTfni9#4Pv_9U_)&H=e}~l3jAXQu^p7;c<}pQm`?so_eBBLuFG+;J z;;m>KtD~8?{$q*-vZg+#OeQDe{UM5AK(>mzEiyA(S{*2ywIG|vRlh%WEBrQX;&!#j zxS}|_k1a~QXsT&onTOw_qbXUN|A;c9tcZf!P+kThD8Tz|w1Q6TATfWLsjQo)$?Jf| zM|OOTAVk}b?Ec25?44n367iez3FpK%T7){Lh$R>Qj_j98479A(I8r*Mz})#60rS60 z{{d|IpEAKY&Ws4x`MwU_H+oKH>;N5=dQbM9RU%bz{1w0If5&nAd%0Yup*ySi!1-f} zJ&B9NKfd#e-*5Z(NB;p9{qG+plTVpS??e{8f53i(dr2mG9B3-Ac)0cJmF>a{zl9pz zEm6Qsb5EAu*HWOK=gd^OZ3GOu}OUNx`OyjZZOQvYr) zVybU}q#1tGpR*KQ4Q$*(ro!jT;uBb%9Bb=-=}riMoobT#c(qnE9&x(zbAn<}!MV$g z`=9abV}(`m7Wz7#>eW;mOG|s5Px$nDthh>=0Fu zV0rM446U-O2-lX%F%=gR_M%03G90+7f~?*ZaXMAxn|g7$MpwAc+Z*gBsq1Ns)A0$~ z@+|vMD4pWy{{<)e@k8O#S51Pw#C)c{ zlV>tNt7@#6oth4NrDyWxT`q6@woSjb_x`+k3E+u8f77!Zukpq^brBsykIGGxXx%vv zQ*PxRaHUYr)3*b|Grjqc7DrP#UDm7jmpug=Lr=A?l?$e;t{EDXcyP(IXb@zuv?$m3 zBuJ{sP67<(a2T3luJqtd{72K$r){HMKOmLuL@yM^`e1C6AHA?ym+YR^1J$h^Z|u=P zl?(VfksAt0qZUeEJYdP`vK?wHuw@PvdE5>uk=`D>toi_-+Y{8+`w?eLTc#_X?XE|i zX(}7mfvcLfJ2T{1W|DF}2{q(|*d?u8CLzQjksedA1+}rS{cU${dK-OUbewZFnARk% z+t&12cDa8my^N0Tba!&*rKQGrS7(UJbXZr@&1zbMKe~g|)SQehY2SXPFMe!YD7Ye( zIS)wR)m3HYDeW6CXn`w6p^7w&nJ}q`pW74Xuv)1`a~hmW!s%HR(C$3OdibkmpUEDFd zpUCwzUw|LSP2$vn_>M9KoVHMn7zb+lXOEIxARJsMfG$q>shxaCE7-lrHOk0##^ z<+`u%?bzm5&=K6~^O?>;TJy~V$uI=kH91jyQK*`OL+|Pp0}9{Z>_h1lnye1el(BJ% zwkqgCFWh4du)Lvby}v))*p+A9?^{XtZkCo$6VbZ$C|syYr`cQPmV;d6+mV#FLqua* zG0h5Mv}|Mu3)~q#G4h;iNJgzpkDYg4y(YfxG>J?PeUcI&FZ@JE%bl(}11-1c?(JL{^$ws3#->0V-UFVF*1oKPKSN7Y4yBr`$9O4;EM?8*+Nx*h> zj2u{wi=r* zyQ7*+w|5zJ7*IiJA_yoYK!AXR4uXT!geIXkQAp@TK#CyZNRbwSKoU9vN$4#=fFLNn z_aaq9nsn(^y>ZT*HO^V<%(=gF*IoCY`%k`nYwhg!O}_X2^6kB!{XCm0oE6EKdi6nk zfeo)BDqiA*#wOAIvbj|nyn8lb-{fuyK-~v!%33{=COnwrFsNG;()Y4YTXRW6MnFbP zoNY}$T{KUWc#m~Y?|PdM|HOo;4Mu#nF_hE2J$5BgA3&N^X2)bx`ssKjE`tIF>Yla{ zO=V>7WiI(s5?*J!@*Qzw4UvPnB2(dA(d4dD-Zn?;atGKdnAdeqK zcig-^6(pNw4`^B1cXfGj9Z^LFRCSZAEJRs_+7m2fC%PKmeNSIp-8g3*?0><RBa!5X6Y0hq`ZCwx86D9VTPxn&Q( z&B7xMidIdLeph2Wm|GG*g!HW%ked=8b9lYthhNoTi3V(_iZb(H%#~&+tV3@+}!4_gT)bO$v-DzXi;N~>? zF(O!OS_sINOzshwx$HJ#2MGCCtmv#0J!nv^ohJrC(k3qIcmq}Oc)U z6c=H+Ue{Oyxk4}HVptSEszE*e;)J#wlACbA)0fQ)VGkcEbFnz21|_aiCyv>?L*Fbn zZzDiT;yDSh*&Wb`G!73KnUw%@=b8HwlB=)|y#VLm9B$q6$Vak@z{kt{D;KwGn2gMq zfpnP*fY!m`$D(`-De7*Wk|HDn!dZ8h!|(y2_QR=})G1H6Ht|#0ao$~*vDmI5vXsc= z^5t@f3xr3h9uFdZE>ifK>`@|bm4BIDT@gK6sYtL!3uPG|E|-Qi=>hTbJ2W0p*K$cW zs8SJ9iEf|0Cd{Fp__im&i8RDPYGwES<$C8etEs}rOw(bJH8=TyI?5LM<{tRG7jzY@ z7OKdExey0k_Z&;5`9u|VeLHhos+_yGv5F|^q1cXh19D*IE;q3Zd&oixfV9GQ@OlcD z{;Je)i-~X`KfDprgcjVaSB{BO!M2B*jJ5cB@w_}XezL)+l{!@B7Q6__jZjZFmMEDp zik-XKt0x00YGcCYH5)Y6Ww3M9II9rGGf6#7%Pid)XXrFWt=k@OR`+uZ-E8-|*qOs> z`%8@-mZ7}iN=|kWi-}ZsuK;(E_$XW(1!p#a0NP3E4`ZEprf`O-N~=2jDO0vd($CU0 z0&?Z;dik1OAk61X@ezCja!_Qr11lyjebzmXv;UTsf#?v#HO$0* zJRz)e1hdqpgO4wp;JITwUVt+rO@w#wSuG0()~@#f3ws}?6oxf1=5zxE-W$dVH4>$Q zMZ3b)0~pAihR*JVUCRu9>g5VeZWhcXYh%r}ef2?RW!flEZEwq34Ra|pvxZTp$r|jV z!gS!!jd{}Ri95ct*e2^TJ85(isoRWA8Addl-by(FyTQBKCi6-yDIIgwU!txp0QtA7 zv6v)X-Z)oW_mm{=ncLgnxAp%yIS#F{+?-zSQ}@p^4)Zqo0MjL;Qs&W{roimDXa6RQ z`I|cb3-6cpKy-e5_zoUXhJZ0fB1V;-4+@HbQRj| zteyGg{|`pVPW+pnk?t?I_Q;0|7@L}2{h}mYmUttIkdOBn8C%nl9yd#qBk@Q{vAEG5 zO%ovsy?EO&F6p}2)yikTudd*gn9FR^Xik)$g4yqsLb)LWC^J1EHZQ9f5mPf>?7Ou<^kIS@szor{}?2WLAmtllTUk@9M zsEbN*C&Y!SeQ5BT(-*l}q~^xC&Uev>Y!;u_tL3Vjqw}02p8MP}^OlwxE}DB^7J(qYl!s3FD+4om%oHDS{dtPl#@NnRcQ=DMz(Gg zlxXTxX-)n9xRqFWNhSX`V4H=T%Eq+_FytCNdjt@6L_xDA0o2pm*jtG@d?_f6HV?BT zZI|?8-8*g?{vwi%GO?3^$j;T;l7uAQB_19nR}$!ja^feJumYGSrI#bC z3XQJLGs8uu1dN}AAbK&#Voyo-qo@axFWTNGn;NQ-f5RUWP9Gm`Z@0r!1G5a1?ZRvM zon)RwWy6P`>$9N}oHbOj5QxO-#-U7Q`6$zfk5}~l`EslTS{*IJu$F>y<7)}81=yI4tK|`*NQgJ!&7atBQPNUF`gJbMAYso zDH=R8vM=m)@5Y1E!nxG)SF^pKzBnh%Zp^@KHaIE?_;XkHS_m89$uyJem6Nd~MD99w z7(0c)8g=J#NF3mIFy8BPs%I>=GZ>F#0?m;3t0ld?GkqCx_nw?UcBlZsD!vX% zDF7Fjo1hEWu`1ZF+gEEQC^J39OCfFd?wacdU0Qb0@>al{BwO;%*g|sL$hlPU*)K#q zVX61DawqUla*TRsZ>Nr{xZ0O7#8v)0dTl{5=8@<2b@7c1>c&~yJE`8fW3xn1k0X>h zB`R-UQ!*|o&Jz;oI2PS~%EcMes47f}Sf_HD%xlNl*+QW%ryyt!VjumD%B+gf(fB>% z6gfKll8iTNpEAFbL`?1z)B&SJN8nIFej0eDH{eXdi!TYlBO^2?)hk_fc ztly|0CJ7S__b8zMoif1rsW-%cjd%(0q{nwU%%#pxx3}z#rHrwML7UM9if_Pj3Wr9s zGs{8w=_WnBHN$&zuhnbe4y($D#8ft1wok$qfY)Uo*x=rqenRM{WEl zmKdSd9m){523NPhhJ?TMe5mY8J!Ehyj zq1RCevMMSBxu>I)nM4v5qZ0EOdr}X5Jm15vT53(*zrCTS;-rCWWG$2~< zVh^Km*tl^!OeLg6Q;5bPFcchach>r|uAr39Q%J(BL`8GqPK4P)sG^br&q8>a(CUq$ zk=uZG0@97hRSok$=99I>vf&Xo>!@Le-N3@4)P$5X1kB{NVc~Hj zT|=y9+k4$iZC5h6q|Km*_Ly*u=e)2Er_SuXyNq(_Fp|Azf?64=wN(qUfhT22qS?|J z#jljBgDah=U8-+-UU-uH=DM7|lFkbyO7oJAIZgI%C$!hLm(h5o=1Sm3hZMjnJN4#$ zQ5)wbw0o=%UIQRj?5} zwT{GTG zhz8(Ja|+}_Pv_gZh)*!oH@ipavpAw?<0JBP@-Fg2O3DBm5CcMp?8jjUz&rNAkqoCV z##A~BLl-#*TBs(8zQ!CWj&5@1=6+K0;}mgyQ7pk6+Qk6KLhMTvd< z?S)*W-T4xE@%KEzTu*!7lQ?W@4ef6-z7xK!(8VS$y=Ob&z6%P>2 zj)oGh%n=tG-PkG|b)UG<*S;}|rml07Gk2F|)ZAE;p9Qx;{ux>uMgt?lPpz_-;?`NCBK3>kTNapihFt(f&mRDq!cRl; z;nJ~{jnUh-V%gqM*zur~t3upGx?%;lhdL}n)4z^rYjLP{ol5f?IFxtJDSH~YF>EA% zic+oxepQXnheyyTg9Mk2pR%WE(3Edxt7i>?h3p?QEnK%O|O=2b$@-OSZ%?EO0kdR0)R*pVH?B zDg)Imk^n&6rD0Vo71V#Nb@DgBq7SlYGf|C9bXl2|a&Ro%uhI+NeX)_5H*wxCZ_~o5 zCrn3gHypwsWZ?=#1Ni3%4V!hWmwHOqm9y>n8$6Q>pA$6pyqpnL05n%~fY*)g@O^!@ zxIN55^+xkIDv^FGf$ow^Lgg(AzuYje>n#4X1j#9fT|8JYY!{V0!kp}}?~gl+of%eV zreZ8ft}}YI+He?JYk?RIf9!foD{(#JGk~a%5RC3o&$0}?KEQX)Q4d%v7Adcr1C)Go z^NxSQoV3K&UT#$3C?=tMe&gvGWwP2EvzpW2m1^?}N`sDaE}9wEonGgGGovUxQbpsi zIoA)0`!kJcRZ~aSw_&E=s1}pi@=JX-W4@koGN$0}w2p;0Hun$kecYe$B?OV8icY+J#cDlQlcfD`blTSMyMXojS9A_C$Vux63u zxnY4hxqkQjTUxo5rzBG{r6C4U9fAfxm%$=g`8+TKDmp$-qvug13-WRSC!kK*W!Z}zeNV>?V*b8kkEHSL9vazw# zsd#jE2r}JfOc9x)fhIi3DdTFobd=G%d9wcNyTW3dOUrlK*zLl^w#RiC7qGvbr$8j1 z#tGIxNB$i9AQ^;D4uUUifI>~$$dAhrlS$|DeB5RBrRU%EcwTw_nbEMEheGQVe(plV zs^yOPA$&wGqE+;!C4ViY`)H9I8CJAOe@oz=BW+|oLOEtIlRDcko?A5-B0az+8vy@n zVfKIafb`v())@K4X0`a^U3Lj|LcBcQwxwnOdcsKTcMi%poSi6{^j%hDUVOPUp&@Jc|pXNhGM8}aQc z)kVjwa~joO2(j+KN}teY6XTR_`Jvup?{#&>HqqxTvU@-w$^Dc02H}tmh&fWNEGcox zctC$hvlOeZLw#X?uvWUEeV~7Eq5V_;jP^2R<6EmwIxHG5b%tP07%tO4{PWBIOHNHH z11@>>9aFNm0`KdR{OHAB5{WMe9V#@i-X=~k1ZbyfPc#hh^WptUCGtvKFAXVjs?`!C zhy42RXl$^r`L)ST;>^-d=w~fTK9c2IVl1bNGDl5IwpzCtx2$_@gHoxD&ocjDdFclS zY^E0tez5$(!CS=r|IMLV6;ie?lKPF)TBdDwSD>hY z1Po(AMW7J0Z99pfeDc2{B2VE;kEK{U6pVtvEl-UDE&olD%^S8$={joL zx+B)zW!BM@n83R>$I#TmUjJ>oSa zB`<#C&~04DMREy^|HPk{|BrV+P9gHYm&W|3x4(rj{KF`$Bw7&|QzBP#N#f(ZrZ)}Z zd-E8HjF%lIbu0{Hu@%Hwc{gCA8&tbP2Upv&w|C`$d;S|0OX8&Qsrt%St>~4}q+#9q zT72uM*Up4#2?4!AD}{ZsRtprZn4)20nN%=Pe31F2Ho> z^kD9yQ(m985_MZ~j4ZtYE?#P~~Ji+q< z0!ni(>K;U15A9d;d~u`>CD`qW(u{{j{e0~mT@)c%Xi2iMWvY=4 z)AnK7g8^|#98D1%{=rNizEK%_beq4dPbN^O&5JbuX3fSh*QU|YcThxn&i9H;=^zL< z`%$6k5AjL%El8`ks6pDy{llIrJPQ%+kCG7>W^}w03{zAkg$I;vz$Il^KV%!D@yVf#P3qgG($b~ko!_WXLulL2 zCh+xu<%cuTQ-Y3>Rn!9rPm<1yGhHw1Mm6?q4)YxF*oHKd0(ZmnxB6k@a4 zO7DA2r{+Ni{2JOiijPB~w)U=kqjG|nH$;EB#E$kC4;w&`tq`&Am){qBOj6U)GGkG> zTx(tQ9JhZfFhk4bHI}J!Iaw1^1!>+*JB|X3(qEH}9X*w?^cLeDyjsb2FL0vBh@R~_ zPxxAb$!M?RGX3qFnD8&WA?mobxL11i~vqM3#CgYq-d#EJhEGCwbLm3MrH3!*(?9CZRjuC>OY;F`wMpOFMzi{aQIX| zQD`w<&6=Bpk1dg3#5HBT%D-cYl{b%0aiIXev}|~#fKV7L0T(@Nr>bffKwI%r;_!Y{ z4&6!?A;$>>K=iY-7kR)EplEN7hc+ni7bZIv{hlKy zYBeKw-7t?6g)q(}hV59)g180e!C+(mWQ2^ol3||a)PZE&D!;OTk?8v8X%A!huu8GZ zU7!;7Oe9V&+TZ>|Z&)~Cay!mo=r)K}LtKe;Dl|rpCkH#1RFBR&v$y0*Sfs&o?686n zJl?7}$rC(?a78l!4I()KGNC*McB(|MF$EsvatS7T+$f%QC%>Uoh`dI)b;{Kx??1b$ z@j%sy_wrOcC#}A^Zw8*vw`(c_w_&Gcve8SME$?GWZyUOpq-AEfeAjkSLM$l)cl6xM zu64OJA#9f0$jU`J0mzSMXPM#ZQ0$J%X|5nnyr4lMdqmPallonPwGv3XT@un?jnY7R z;4QYHxP5VeD%5yt{weja)qNTERpe97hfY)QJrx6BS?!Y;&L|ZBPrxIHlR^(uNT3k= zr0r$|$9$+xp2lyB-(9R@5_Wef_Sm z()uz=>1mZtKw|A49K>k(TY1qUNUKI;&6E7ai~l5h?&He+{Z<`*r&!=c_-K80`=Er|0=U>UL87n|kg)KKK_TYWDfjQ*f95uC>xo zr@gt;Q~juk!4{9XaT>cdoW7iOy4v`vS z7~X}@D3RVf2H+v!&zSH1H}n6T-yhSY`s?!*YD)zerO4U7OTLySs+=d~_o;H0MRpnO=7ULazA5@>gYVnRud$ zt9T-8;dXmSCdWqg1g6-K8wl#l1M-MwagY2UF#oqwQs0yF3ZXIlX+pcwwNpQ;+5cbA zt@?jBSak6D0Xo(p3lsN8OPUW0jXMrk!q+;XG_lqmp+F%6s0U^+Tlu>RSf6Wu{0WWS zf4D?v&vor)f%ZDfFIo%MHiLs*LHV6|+Xi4%Ojg9hX@EM)!uR#6JK>Hm|Fm;ptI~U& zW?hy6)sEf80{yJa^2PFIyK*zB7EH5h{wgb~(^q0~5!g1;Yl(A&$XR?>=}r?W-js;& z^%IUbw#N)~<-bII1&A+p3blt1I!WHj(vXKd$yd@({+Jy4Y$=+#@S1GfHop$hFEy{+ z&RmB&fU(LE3aVO}c?UUsL12|jSAh|ndkfYG$O@n)GKIo&CGSjeWdmGz>{u(Q8F!!(o%hRd_UE|uSy3W4rMF@trp}}|u>^&ae)@EymDr;*J~GG79M}>`&7b#d;BT?tAfs> zl!y`HH>!pDcqfgaae>1cnefw9Jn{_o`Vf0t3LnAc2v}2dC(6@A#F!NJ{>yTv-O04- zlODGKeiGHqK>0xqdtEE6OY=!|Fb@&J-^Np)=(&c##dvBcb*O+C;@ECBUW3PY@i8PN NE&Xvrx&3YYe*yTLVt)Vt literal 0 HcmV?d00001 diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index e261e41fa..3c4f9d1f5 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -11,34 +11,22 @@ TODO: To run the `ingestion_script.py` script, please make sure there is a /DATA and /OUTPUT folder at its relative root. It will ingest ALL documents in /DATA and output BOTH markdown and JSON dump in /OUTPUT folder +![RAG_workflow](./RAG_workflow.jpg) +How to run: +Install docker according to this [web page](https://docs.docker.com/engine/install/). +1. We have main config `RAG_service.json` , please change `model_name` and `document_path` accordingly. +2. `run_RAG.sh` is the main script that can create `.env` file for compose.yaml and then actually start the `docker compose`. `compose.yaml` is the main docker yaml that specifies all the mount option and docker configs, change the mounts if needed. + +```bash +cd docker +bash run_RAG.sh ``` -~/work/llama-stack-apps/examples/E2E-RAG-App (rag-app)]$ python rag_main.py localhost 5000 ./example_data/ -Inserted 1 documents into bank: rag_agent_docs -Created bank: rag_agent_docs -Found 2 models [ModelDefWithProvider(identifier='Llama3.2-11B-Vision-Instruct', llama_model='Llama3.2-11B-Vision-Instruct', metadata={}, provider_id='meta-reference', type='model'), ModelDefWithProvider(identifier='Llama-Guard-3-1B', llama_model='Llama-Guard-3-1B', metadata={}, provider_id='meta1', type='model')] -Use model: Llama3.2-11B-Vision-Instruct - 0%| | 0/1 [00:00 /dev/null 2>&1 -sleep 10 +/usr/bin/ollama serve& echo "pulling ollama model: $MODEL_NAME" /usr/bin/ollama pull $MODEL_NAME echo "Running ollama model: $MODEL_NAME" -/usr/bin/ollama run $MODEL_NAME +/usr/bin/ollama run $MODEL_NAME while :; do sleep 2073600; done From b2655e6f96ca894b4825191dab714ced927df116 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 12:41:22 -0800 Subject: [PATCH 29/99] fix path --- examples/E2E-RAG-App/docker/llama_stack_run.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/E2E-RAG-App/docker/llama_stack_run.yaml b/examples/E2E-RAG-App/docker/llama_stack_run.yaml index 9b0dcc45f..4d78e8a52 100644 --- a/examples/E2E-RAG-App/docker/llama_stack_run.yaml +++ b/examples/E2E-RAG-App/docker/llama_stack_run.yaml @@ -24,7 +24,7 @@ providers: kvstore: namespace: null type: sqlite - db_path: /home/kaiwu/.llama/runtime/faiss_store.db + db_path: ${HOME}/.llama/runtime/faiss_store.db safety: - provider_id: inline::llama-guard-0 provider_type: inline::llama-guard @@ -37,7 +37,7 @@ providers: persistence_store: namespace: null type: sqlite - db_path: /home/kaiwu/.llama/runtime/kvstore.db + db_path: ${HOME}/.llama/runtime/kvstore.db telemetry: - provider_id: inline::meta-reference-0 provider_type: inline::meta-reference From 9b848f2889aed9bf2290a8159e04c49ed73f01c5 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 12:42:43 -0800 Subject: [PATCH 30/99] fix path --- examples/E2E-RAG-App/docker/RAG_service.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/E2E-RAG-App/docker/RAG_service.json b/examples/E2E-RAG-App/docker/RAG_service.json index e6ae76458..dc601adf8 100644 --- a/examples/E2E-RAG-App/docker/RAG_service.json +++ b/examples/E2E-RAG-App/docker/RAG_service.json @@ -1,4 +1,4 @@ { "model_name": "llama3.2:3b", - "document_path": "/Users/kaiwu/work/llama-stack-apps/examples/E2E-RAG-App/example_data" + "document_path": "${HOME}/llama-stack-apps/examples/E2E-RAG-App/example_data" } From ea14b1b1cb4cba80eac2bbd640de65702ba5187f Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 13:32:02 -0800 Subject: [PATCH 31/99] Create caption_outputs.py --- examples/E2E-RAG-App/caption_outputs.py | 151 ++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 examples/E2E-RAG-App/caption_outputs.py diff --git a/examples/E2E-RAG-App/caption_outputs.py b/examples/E2E-RAG-App/caption_outputs.py new file mode 100644 index 000000000..fe2d22bb0 --- /dev/null +++ b/examples/E2E-RAG-App/caption_outputs.py @@ -0,0 +1,151 @@ +import json +import logging +from pathlib import Path +from typing import Tuple, List + +import yaml +from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend +from docling.datamodel.base_models import InputFormat +from docling.document_converter import ( + DocumentConverter, + PdfFormatOption, + WordFormatOption, +) +from docling.pipeline.simple_pipeline import SimplePipeline +from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline +from docling.datamodel.pipeline_options import PdfPipelineOptions +from docling.datamodel.document import TableItem, PictureItem + + + +def get_document_files(input_dir: Path) -> list[Path]: + """ + Recursively scan directory for document files. + Returns a list of Path objects for supported document types. + """ + supported_extensions = {".pdf", ".docx", ".pptx"} + document_files = [] + + for path in input_dir.rglob("*"): + if path.is_file() and path.suffix.lower() in supported_extensions: + document_files.append(path) + + return document_files + + + +def save_images(res, output_subdir: Path, doc_filename: str) -> List[Tuple[str, Path]]: + """ + Extracts and saves images from the document. + Returns a list of (image_type, image_path) tuples for future processing. + """ + images_dir = output_subdir / "images" + images_dir.mkdir(parents=True, exist_ok=True) + saved_images = [] + + # Save page images + for page_no, page in res.document.pages.items(): + if hasattr(page, 'image') and page.image: + image_path = images_dir / f"{doc_filename}-page-{page_no}.png" + with image_path.open("wb") as fp: + page.image.pil_image.save(fp, format="PNG") + saved_images.append(('page', image_path)) + + # Save images of figures and tables + table_counter = 0 + picture_counter = 0 + + for element, _level in res.document.iterate_items(): + if isinstance(element, TableItem) and hasattr(element, 'image') and element.image: + table_counter += 1 + image_path = images_dir / f"{doc_filename}-table-{table_counter}.png" + with image_path.open("wb") as fp: + element.image.pil_image.save(fp, "PNG") + saved_images.append(('table', image_path)) + + if isinstance(element, PictureItem) and hasattr(element, 'image') and element.image: + picture_counter += 1 + image_path = images_dir / f"{doc_filename}-figure-{picture_counter}.png" + with image_path.open("wb") as fp: + element.image.pil_image.save(fp, "PNG") + saved_images.append(('figure', image_path)) + + return saved_images + +def main(): + # Define input and output directories relative to current directory + input_dir = Path("data/input") + output_dir = Path("data/output") + output_dir.mkdir(parents=True, exist_ok=True) + + # Get all document files recursively + input_paths = get_document_files(input_dir) + + if not input_paths: + print("No documents found in DATA directory!") + return + + print(f"Found {len(input_paths)} documents to process:") + for path in input_paths: + print(f"- {path}") + + # Configure pipeline options + pipeline_options = PdfPipelineOptions() + pipeline_options.do_ocr = False # Disable OCR + pipeline_options.images_scale = 2.0 # Set image resolution + pipeline_options.generate_page_images = False + pipeline_options.generate_table_images = False + pipeline_options.generate_picture_images = True + + # Configure document converter + doc_converter = DocumentConverter( + allowed_formats=[ + InputFormat.PDF, + InputFormat.DOCX, + InputFormat.PPTX, + ], + format_options={ + InputFormat.PDF: PdfFormatOption( + pipeline_cls=StandardPdfPipeline, + backend=PyPdfiumDocumentBackend, + pipeline_options=pipeline_options + ), + InputFormat.DOCX: WordFormatOption(pipeline_cls=SimplePipeline), + }, + ) + + # Process all documents + conv_results = doc_converter.convert_all(input_paths) + + # Store all extracted images for later processing + all_extracted_images = [] + + # Save results + for res in conv_results: + # Preserve directory structure in output + relative_path = res.input.file.relative_to(input_dir) + output_subdir = output_dir / relative_path.parent + output_subdir.mkdir(parents=True, exist_ok=True) + + # Create output filenames + md_path = output_subdir / f"{res.input.file.stem}.md" + json_path = output_subdir / f"{res.input.file.stem}.json" + + print(f"Converting: {res.input.file}" f"\nSaving to: {md_path}") + + # Extract and save images + extracted_images = save_images(res, output_subdir, res.input.file.stem) + all_extracted_images.extend(extracted_images) + + # Save markdown version with embedded images + with md_path.open("w", encoding="utf-8") as fp: + fp.write(res.document.export_to_markdown()) + + print(f"\nExtracted {len(all_extracted_images)} images in total") + print("Ready for image captioning processing") + + # TODO: Add captioning logic here + +if __name__ == "__main__": + main() + From 697a45bcab40f9c59e1ec518cc54dcca72fd4322 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 13:32:13 -0800 Subject: [PATCH 32/99] Update caption_outputs.py --- examples/E2E-RAG-App/caption_outputs.py | 159 ++---------------------- 1 file changed, 8 insertions(+), 151 deletions(-) diff --git a/examples/E2E-RAG-App/caption_outputs.py b/examples/E2E-RAG-App/caption_outputs.py index fe2d22bb0..30fb74dab 100644 --- a/examples/E2E-RAG-App/caption_outputs.py +++ b/examples/E2E-RAG-App/caption_outputs.py @@ -1,151 +1,8 @@ -import json -import logging -from pathlib import Path -from typing import Tuple, List - -import yaml -from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend -from docling.datamodel.base_models import InputFormat -from docling.document_converter import ( - DocumentConverter, - PdfFormatOption, - WordFormatOption, -) -from docling.pipeline.simple_pipeline import SimplePipeline -from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline -from docling.datamodel.pipeline_options import PdfPipelineOptions -from docling.datamodel.document import TableItem, PictureItem - - - -def get_document_files(input_dir: Path) -> list[Path]: - """ - Recursively scan directory for document files. - Returns a list of Path objects for supported document types. - """ - supported_extensions = {".pdf", ".docx", ".pptx"} - document_files = [] - - for path in input_dir.rglob("*"): - if path.is_file() and path.suffix.lower() in supported_extensions: - document_files.append(path) - - return document_files - - - -def save_images(res, output_subdir: Path, doc_filename: str) -> List[Tuple[str, Path]]: - """ - Extracts and saves images from the document. - Returns a list of (image_type, image_path) tuples for future processing. - """ - images_dir = output_subdir / "images" - images_dir.mkdir(parents=True, exist_ok=True) - saved_images = [] - - # Save page images - for page_no, page in res.document.pages.items(): - if hasattr(page, 'image') and page.image: - image_path = images_dir / f"{doc_filename}-page-{page_no}.png" - with image_path.open("wb") as fp: - page.image.pil_image.save(fp, format="PNG") - saved_images.append(('page', image_path)) - - # Save images of figures and tables - table_counter = 0 - picture_counter = 0 - - for element, _level in res.document.iterate_items(): - if isinstance(element, TableItem) and hasattr(element, 'image') and element.image: - table_counter += 1 - image_path = images_dir / f"{doc_filename}-table-{table_counter}.png" - with image_path.open("wb") as fp: - element.image.pil_image.save(fp, "PNG") - saved_images.append(('table', image_path)) - - if isinstance(element, PictureItem) and hasattr(element, 'image') and element.image: - picture_counter += 1 - image_path = images_dir / f"{doc_filename}-figure-{picture_counter}.png" - with image_path.open("wb") as fp: - element.image.pil_image.save(fp, "PNG") - saved_images.append(('figure', image_path)) - - return saved_images - -def main(): - # Define input and output directories relative to current directory - input_dir = Path("data/input") - output_dir = Path("data/output") - output_dir.mkdir(parents=True, exist_ok=True) - - # Get all document files recursively - input_paths = get_document_files(input_dir) - - if not input_paths: - print("No documents found in DATA directory!") - return - - print(f"Found {len(input_paths)} documents to process:") - for path in input_paths: - print(f"- {path}") - - # Configure pipeline options - pipeline_options = PdfPipelineOptions() - pipeline_options.do_ocr = False # Disable OCR - pipeline_options.images_scale = 2.0 # Set image resolution - pipeline_options.generate_page_images = False - pipeline_options.generate_table_images = False - pipeline_options.generate_picture_images = True - - # Configure document converter - doc_converter = DocumentConverter( - allowed_formats=[ - InputFormat.PDF, - InputFormat.DOCX, - InputFormat.PPTX, - ], - format_options={ - InputFormat.PDF: PdfFormatOption( - pipeline_cls=StandardPdfPipeline, - backend=PyPdfiumDocumentBackend, - pipeline_options=pipeline_options - ), - InputFormat.DOCX: WordFormatOption(pipeline_cls=SimplePipeline), - }, - ) - - # Process all documents - conv_results = doc_converter.convert_all(input_paths) - - # Store all extracted images for later processing - all_extracted_images = [] - - # Save results - for res in conv_results: - # Preserve directory structure in output - relative_path = res.input.file.relative_to(input_dir) - output_subdir = output_dir / relative_path.parent - output_subdir.mkdir(parents=True, exist_ok=True) - - # Create output filenames - md_path = output_subdir / f"{res.input.file.stem}.md" - json_path = output_subdir / f"{res.input.file.stem}.json" - - print(f"Converting: {res.input.file}" f"\nSaving to: {md_path}") - - # Extract and save images - extracted_images = save_images(res, output_subdir, res.input.file.stem) - all_extracted_images.extend(extracted_images) - - # Save markdown version with embedded images - with md_path.open("w", encoding="utf-8") as fp: - fp.write(res.document.export_to_markdown()) - - print(f"\nExtracted {len(all_extracted_images)} images in total") - print("Ready for image captioning processing") - - # TODO: Add captioning logic here - -if __name__ == "__main__": - main() - +import os +import asyncio +import base64 +import mimetypes +from llama_stack_client import LlamaStackClient + +HOST = "localhost" +PORT = 5000 From 067cbfbd62b2ac96eefcfccebfe8d04d24742659 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 13:32:23 -0800 Subject: [PATCH 33/99] Update caption_outputs.py --- examples/E2E-RAG-App/caption_outputs.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/examples/E2E-RAG-App/caption_outputs.py b/examples/E2E-RAG-App/caption_outputs.py index 30fb74dab..64ae6a70a 100644 --- a/examples/E2E-RAG-App/caption_outputs.py +++ b/examples/E2E-RAG-App/caption_outputs.py @@ -6,3 +6,14 @@ HOST = "localhost" PORT = 5000 + +def encode_image_to_data_url(file_path: str) -> str: + """Encode an image file to a data URL.""" + mime_type, _ = mimetypes.guess_type(file_path) + if mime_type is None: + raise ValueError(f"Could not determine MIME type of file: {file_path}") + + with open(file_path, "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()).decode("utf-8") + + return f"data:{mime_type};base64,{encoded_string}" From 2fa2db9a5bf8c47853cb3f7e8c4a9c538b70e987 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 13:32:46 -0800 Subject: [PATCH 34/99] Update caption_outputs.py --- examples/E2E-RAG-App/caption_outputs.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/examples/E2E-RAG-App/caption_outputs.py b/examples/E2E-RAG-App/caption_outputs.py index 64ae6a70a..cad536785 100644 --- a/examples/E2E-RAG-App/caption_outputs.py +++ b/examples/E2E-RAG-App/caption_outputs.py @@ -17,3 +17,23 @@ def encode_image_to_data_url(file_path: str) -> str: encoded_string = base64.b64encode(image_file.read()).decode("utf-8") return f"data:{mime_type};base64,{encoded_string}" + +async def get_image_caption(client: LlamaStackClient, image_path: str) -> str: + """Get caption for an image using LlamaStack Vision API.""" + data_url = encode_image_to_data_url(image_path) + + message = { + "role": "user", + "content": [ + {"image": {"uri": data_url}}, + "This image comes from a scan inside a document, please provide a high level caption of what you see inside the image. Your caption will be used inside a RAG app so make sure its descriptive of the image and can be used in the relavant context" + ] + } + + response = await client.inference.chat_completion( + messages=[message], + model="Llama3.2-11B-Vision-Instruct", + stream=False, + ) + + return response.choices[0].message.content From 808ff26af1f7561c47655281b99d9b4012d0f504 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 13:33:24 -0800 Subject: [PATCH 35/99] Update caption_outputs.py --- examples/E2E-RAG-App/caption_outputs.py | 65 +++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/examples/E2E-RAG-App/caption_outputs.py b/examples/E2E-RAG-App/caption_outputs.py index cad536785..62084fdc2 100644 --- a/examples/E2E-RAG-App/caption_outputs.py +++ b/examples/E2E-RAG-App/caption_outputs.py @@ -37,3 +37,68 @@ async def get_image_caption(client: LlamaStackClient, image_path: str) -> str: ) return response.choices[0].message.content + +async def process_markdown_file(client: LlamaStackClient, output_dir: str, md_filename: str): + """Process a single markdown file and replace image placeholders with captions.""" + md_path = os.path.join(output_dir, md_filename) + images_dir = os.path.join(output_dir, 'images') + + print(f"Processing: {md_filename}") + + # Read the markdown content + with open(md_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Get the base filename without extension + base_name = os.path.splitext(md_filename)[0] + + # Find all image placeholders + image_count = 1 + + while '' in content: + # Construct the expected image filename + image_filename = f"{base_name}-figure-{image_count}.png" + image_path = os.path.join(images_dir, image_filename) + + if not os.path.exists(image_path): + print(f"Warning: Image not found: {image_filename}") + break + + try: + # Get caption from the Vision API + caption = await get_image_caption(client, image_path) + + # Create markdown image with caption + image_markdown = f"![{caption}](images/{image_filename})\n\n_{caption}_" + + # Replace the first occurrence of the placeholder + content = content.replace('', image_markdown, 1) + + print(f"Processed image {image_count} for {base_name}") + + except Exception as e: + print(f"Error processing image {image_filename}: {str(e)}") + break + + image_count += 1 + + # Write the updated content back to the file + with open(md_path, 'w', encoding='utf-8') as f: + f.write(content) + +async def main(): + # Define the output directory + output_dir = os.path.join('DATA', 'output') + + # Initialize LlamaStack client + client = LlamaStackClient(base_url=f"http://{HOST}:{PORT}") + + # Get all markdown files in the output directory + md_files = [f for f in os.listdir(output_dir) if f.endswith('.md')] + + # Process each markdown file + for md_file in md_files: + await process_markdown_file(client, output_dir, md_file) + +if __name__ == "__main__": + asyncio.run(main()) From 484f9d02de343eb5da6c5e383d8ac0042262da75 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 14:06:44 -0800 Subject: [PATCH 36/99] update --- ...image_extraction.py => 01_ingest_files.py} | 0 ...ption_outputs.py => 02_caption_outputs.py} | 0 examples/E2E-RAG-App/gradio_interface.py | 104 ++++++++++++++++++ 3 files changed, 104 insertions(+) rename examples/E2E-RAG-App/{ingest_with_image_extraction.py => 01_ingest_files.py} (100%) rename examples/E2E-RAG-App/{caption_outputs.py => 02_caption_outputs.py} (100%) create mode 100644 examples/E2E-RAG-App/gradio_interface.py diff --git a/examples/E2E-RAG-App/ingest_with_image_extraction.py b/examples/E2E-RAG-App/01_ingest_files.py similarity index 100% rename from examples/E2E-RAG-App/ingest_with_image_extraction.py rename to examples/E2E-RAG-App/01_ingest_files.py diff --git a/examples/E2E-RAG-App/caption_outputs.py b/examples/E2E-RAG-App/02_caption_outputs.py similarity index 100% rename from examples/E2E-RAG-App/caption_outputs.py rename to examples/E2E-RAG-App/02_caption_outputs.py diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py new file mode 100644 index 000000000..62084fdc2 --- /dev/null +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -0,0 +1,104 @@ +import os +import asyncio +import base64 +import mimetypes +from llama_stack_client import LlamaStackClient + +HOST = "localhost" +PORT = 5000 + +def encode_image_to_data_url(file_path: str) -> str: + """Encode an image file to a data URL.""" + mime_type, _ = mimetypes.guess_type(file_path) + if mime_type is None: + raise ValueError(f"Could not determine MIME type of file: {file_path}") + + with open(file_path, "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()).decode("utf-8") + + return f"data:{mime_type};base64,{encoded_string}" + +async def get_image_caption(client: LlamaStackClient, image_path: str) -> str: + """Get caption for an image using LlamaStack Vision API.""" + data_url = encode_image_to_data_url(image_path) + + message = { + "role": "user", + "content": [ + {"image": {"uri": data_url}}, + "This image comes from a scan inside a document, please provide a high level caption of what you see inside the image. Your caption will be used inside a RAG app so make sure its descriptive of the image and can be used in the relavant context" + ] + } + + response = await client.inference.chat_completion( + messages=[message], + model="Llama3.2-11B-Vision-Instruct", + stream=False, + ) + + return response.choices[0].message.content + +async def process_markdown_file(client: LlamaStackClient, output_dir: str, md_filename: str): + """Process a single markdown file and replace image placeholders with captions.""" + md_path = os.path.join(output_dir, md_filename) + images_dir = os.path.join(output_dir, 'images') + + print(f"Processing: {md_filename}") + + # Read the markdown content + with open(md_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Get the base filename without extension + base_name = os.path.splitext(md_filename)[0] + + # Find all image placeholders + image_count = 1 + + while '' in content: + # Construct the expected image filename + image_filename = f"{base_name}-figure-{image_count}.png" + image_path = os.path.join(images_dir, image_filename) + + if not os.path.exists(image_path): + print(f"Warning: Image not found: {image_filename}") + break + + try: + # Get caption from the Vision API + caption = await get_image_caption(client, image_path) + + # Create markdown image with caption + image_markdown = f"![{caption}](images/{image_filename})\n\n_{caption}_" + + # Replace the first occurrence of the placeholder + content = content.replace('', image_markdown, 1) + + print(f"Processed image {image_count} for {base_name}") + + except Exception as e: + print(f"Error processing image {image_filename}: {str(e)}") + break + + image_count += 1 + + # Write the updated content back to the file + with open(md_path, 'w', encoding='utf-8') as f: + f.write(content) + +async def main(): + # Define the output directory + output_dir = os.path.join('DATA', 'output') + + # Initialize LlamaStack client + client = LlamaStackClient(base_url=f"http://{HOST}:{PORT}") + + # Get all markdown files in the output directory + md_files = [f for f in os.listdir(output_dir) if f.endswith('.md')] + + # Process each markdown file + for md_file in md_files: + await process_markdown_file(client, output_dir, md_file) + +if __name__ == "__main__": + asyncio.run(main()) From 250db1f73470eafa85fb47407ac053a13db066c1 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 14:06:53 -0800 Subject: [PATCH 37/99] Update gradio_interface.py --- examples/E2E-RAG-App/gradio_interface.py | 104 ----------------------- 1 file changed, 104 deletions(-) diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index 62084fdc2..e69de29bb 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -1,104 +0,0 @@ -import os -import asyncio -import base64 -import mimetypes -from llama_stack_client import LlamaStackClient - -HOST = "localhost" -PORT = 5000 - -def encode_image_to_data_url(file_path: str) -> str: - """Encode an image file to a data URL.""" - mime_type, _ = mimetypes.guess_type(file_path) - if mime_type is None: - raise ValueError(f"Could not determine MIME type of file: {file_path}") - - with open(file_path, "rb") as image_file: - encoded_string = base64.b64encode(image_file.read()).decode("utf-8") - - return f"data:{mime_type};base64,{encoded_string}" - -async def get_image_caption(client: LlamaStackClient, image_path: str) -> str: - """Get caption for an image using LlamaStack Vision API.""" - data_url = encode_image_to_data_url(image_path) - - message = { - "role": "user", - "content": [ - {"image": {"uri": data_url}}, - "This image comes from a scan inside a document, please provide a high level caption of what you see inside the image. Your caption will be used inside a RAG app so make sure its descriptive of the image and can be used in the relavant context" - ] - } - - response = await client.inference.chat_completion( - messages=[message], - model="Llama3.2-11B-Vision-Instruct", - stream=False, - ) - - return response.choices[0].message.content - -async def process_markdown_file(client: LlamaStackClient, output_dir: str, md_filename: str): - """Process a single markdown file and replace image placeholders with captions.""" - md_path = os.path.join(output_dir, md_filename) - images_dir = os.path.join(output_dir, 'images') - - print(f"Processing: {md_filename}") - - # Read the markdown content - with open(md_path, 'r', encoding='utf-8') as f: - content = f.read() - - # Get the base filename without extension - base_name = os.path.splitext(md_filename)[0] - - # Find all image placeholders - image_count = 1 - - while '' in content: - # Construct the expected image filename - image_filename = f"{base_name}-figure-{image_count}.png" - image_path = os.path.join(images_dir, image_filename) - - if not os.path.exists(image_path): - print(f"Warning: Image not found: {image_filename}") - break - - try: - # Get caption from the Vision API - caption = await get_image_caption(client, image_path) - - # Create markdown image with caption - image_markdown = f"![{caption}](images/{image_filename})\n\n_{caption}_" - - # Replace the first occurrence of the placeholder - content = content.replace('', image_markdown, 1) - - print(f"Processed image {image_count} for {base_name}") - - except Exception as e: - print(f"Error processing image {image_filename}: {str(e)}") - break - - image_count += 1 - - # Write the updated content back to the file - with open(md_path, 'w', encoding='utf-8') as f: - f.write(content) - -async def main(): - # Define the output directory - output_dir = os.path.join('DATA', 'output') - - # Initialize LlamaStack client - client = LlamaStackClient(base_url=f"http://{HOST}:{PORT}") - - # Get all markdown files in the output directory - md_files = [f for f in os.listdir(output_dir) if f.endswith('.md')] - - # Process each markdown file - for md_file in md_files: - await process_markdown_file(client, output_dir, md_file) - -if __name__ == "__main__": - asyncio.run(main()) From f5382f458f0be0dc5492a7c2bcc9b51fe110da20 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 14:07:54 -0800 Subject: [PATCH 38/99] Update gradio_interface.py --- examples/E2E-RAG-App/gradio_interface.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index e69de29bb..4ff6489b7 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -0,0 +1,14 @@ +import asyncio +import json +import os +from typing import List, Optional +import gradio as gr +from dotenv import load_dotenv +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.types.agent_create_params import AgentConfig + + +load_dotenv() + +class LlamaChatInterface: From d61de72d4bc18376161387a3a8f9f24c47e8b152 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 14:08:44 -0800 Subject: [PATCH 39/99] Update gradio_interface.py --- examples/E2E-RAG-App/gradio_interface.py | 74 +++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index 4ff6489b7..cadd73f6f 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -8,7 +8,79 @@ from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.types.agent_create_params import AgentConfig - +# Load environment variables load_dotenv() class LlamaChatInterface: + def __init__(self, host: str, port: int): + self.client = LlamaStackClient(base_url=f"http://{host}:{port}") + self.agent = None + self.session_id = None + self.memory_bank_id = "test_bank_3" + + async def initialize_agent(self): + # Model registration + model_name = "Llama3.2-3B-Instruct" + + # Agent configuration + agent_config = AgentConfig( + model=model_name, + instructions="You are a helpful assistant that can answer questions based on provided documents.", + sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, + tools=[ + { + "type": "memory", + "memory_bank_configs": [{"bank_id": self.memory_bank_id, "type": "vector"}], + "query_generator_config": {"type": "default", "sep": " "}, + "max_tokens_in_context": 4096, + "max_chunks": 10, + } + ], + tool_choice="auto", + tool_prompt_format="json", + enable_session_persistence=True, + ) + self.agent = Agent(self.client, agent_config) + self.session_id = str(uuid.uuid4()) + + async def chat(self, message: str, history: List[List[str]]) -> str: + if self.agent is None: + await self.initialize_agent() + + response = await self.agent.create_turn( + messages=[{"role": "user", "content": message}], + session_id=self.session_id + ) + + # Extract the assistant's response from the response object + # Note: You might need to adjust this based on the actual response structure + assistant_message = "" + async for chunk in response: + if hasattr(chunk, 'delta') and chunk.delta: + assistant_message += chunk.delta + + return assistant_message + +def create_gradio_interface(host: str = "localhost", port: int = 8000): + # Initialize the chat interface + chat_interface = LlamaChatInterface(host, port) + + # Create the Gradio interface + iface = gr.ChatInterface( + fn=chat_interface.chat, + title="LlamaStack Chat", + description="Chat with your documents using LlamaStack", + examples=[ + ["What topics are covered in the documents?"], + ["Can you summarize the main points?"], + ["Tell me more about specific details in the text."], + ], + theme=gr.themes.Soft() + ) + + return iface + +if __name__ == "__main__": + # Create and launch the Gradio interface + iface = create_gradio_interface() + iface.launch(server_name="0.0.0.0", server_port=7860) From b0c32b8a1a78105e0b1fda548b3716145881ca15 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Mon, 18 Nov 2024 13:35:17 -0800 Subject: [PATCH 40/99] add input loop logic --- examples/E2E-RAG-App/ollama_main.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/examples/E2E-RAG-App/ollama_main.py b/examples/E2E-RAG-App/ollama_main.py index c4703e23f..78ead8a29 100644 --- a/examples/E2E-RAG-App/ollama_main.py +++ b/examples/E2E-RAG-App/ollama_main.py @@ -80,8 +80,8 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: { "model_id": model_name, "provider_model_id": None, - # "provider_id": "ollama", - "provider_id": "inline::meta-reference-0", + "provider_id": "ollama", + # "provider_id": "inline::meta-reference-0", "metadata": None, } ), @@ -108,22 +108,18 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: ) agent = Agent(client, agent_config) - user_prompts = [ - "What is the policy regarding smoking in City offices?", - "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", - "What are the three categories of employees eligible for health coverage?", - "How long must an employee wait before using vacation time after starting employment?", - "What must an employee do if they're summoned for jury duty?", - ] - - session_id = agent.create_session(f"session-{uuid.uuid4()}") - for prompt in tqdm(user_prompts, desc="Generating responses"): - cprint(f"Generating response for: {prompt}", "green") + while True: + user_input = input("User> ") + if user_input.lower() in ["exit", "quit", "bye"]: + cprint("Ending conversation. Goodbye!", "yellow") + break + + message = {"role": "user", "content": user_input} response = agent.create_turn( messages=[ { "role": "user", - "content": prompt, + "content": message, } ], session_id=session_id, @@ -133,6 +129,9 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: log.print() +# Run the chat loop in a Jupyter Notebook cell using await + + def main(host: str, port: int, docs_dir: str) -> None: """Entry point for the script.""" asyncio.run(run_main(host, port, docs_dir)) From e2212e8e7ab323d1f4267ea6b15d4748489ab64e Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 18 Nov 2024 14:24:51 -0800 Subject: [PATCH 41/99] added bash script --- examples/E2E-RAG-App/run_app.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100755 examples/E2E-RAG-App/run_app.sh diff --git a/examples/E2E-RAG-App/run_app.sh b/examples/E2E-RAG-App/run_app.sh new file mode 100755 index 000000000..3259b506a --- /dev/null +++ b/examples/E2E-RAG-App/run_app.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +echo "Running 01_ingest_files.py..." +python 01_ingest_files.py +if [ $? -ne 0 ]; then + echo "Error running 01_ingest_files.py" + exit 1 +fi + +echo "Running 02_caption_outputs.py..." +python 02_caption_outputs.py +if [ $? -ne 0 ]; then + echo "Error running 02_caption_outputs.py" + exit 1 +fi + +echo "Running ollama_main.py..." +python ollama_main.py localhost 5000 ./data/output/ +if [ $? -ne 0 ]; then + echo "Error running ollama_main.py" + exit 1 +fi + +echo "All scripts ran successfully!" From 31a595c8a5b740720b7fc392f0feaa353d7aa680 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 14:54:50 -0800 Subject: [PATCH 42/99] Update 02_caption_outputs.py --- examples/E2E-RAG-App/02_caption_outputs.py | 155 ++++++++++++--------- 1 file changed, 86 insertions(+), 69 deletions(-) diff --git a/examples/E2E-RAG-App/02_caption_outputs.py b/examples/E2E-RAG-App/02_caption_outputs.py index 62084fdc2..a48ab1fd8 100644 --- a/examples/E2E-RAG-App/02_caption_outputs.py +++ b/examples/E2E-RAG-App/02_caption_outputs.py @@ -2,7 +2,9 @@ import asyncio import base64 import mimetypes +from pathlib import Path from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.inference.event_logger import EventLogger HOST = "localhost" PORT = 5000 @@ -11,94 +13,109 @@ def encode_image_to_data_url(file_path: str) -> str: """Encode an image file to a data URL.""" mime_type, _ = mimetypes.guess_type(file_path) if mime_type is None: - raise ValueError(f"Could not determine MIME type of file: {file_path}") + raise ValueError("Could not determine MIME type of the file") with open(file_path, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode("utf-8") return f"data:{mime_type};base64,{encoded_string}" -async def get_image_caption(client: LlamaStackClient, image_path: str) -> str: - """Get caption for an image using LlamaStack Vision API.""" - data_url = encode_image_to_data_url(image_path) - - message = { - "role": "user", - "content": [ - {"image": {"uri": data_url}}, - "This image comes from a scan inside a document, please provide a high level caption of what you see inside the image. Your caption will be used inside a RAG app so make sure its descriptive of the image and can be used in the relavant context" - ] - } - - response = await client.inference.chat_completion( - messages=[message], - model="Llama3.2-11B-Vision-Instruct", - stream=False, - ) - - return response.choices[0].message.content +class DocumentProcessor: + def __init__(self): + self.client = LlamaStackClient(base_url=f"http://{HOST}:{PORT}") + self.processed_images = {} -async def process_markdown_file(client: LlamaStackClient, output_dir: str, md_filename: str): - """Process a single markdown file and replace image placeholders with captions.""" - md_path = os.path.join(output_dir, md_filename) - images_dir = os.path.join(output_dir, 'images') - - print(f"Processing: {md_filename}") - - # Read the markdown content - with open(md_path, 'r', encoding='utf-8') as f: - content = f.read() + async def get_image_caption(self, image_path: str) -> str: + """Get caption for an image using LlamaStack Vision API.""" + if image_path in self.processed_images: + return self.processed_images[image_path] - # Get the base filename without extension - base_name = os.path.splitext(md_filename)[0] - - # Find all image placeholders - image_count = 1 - - while '' in content: - # Construct the expected image filename - image_filename = f"{base_name}-figure-{image_count}.png" - image_path = os.path.join(images_dir, image_filename) - - if not os.path.exists(image_path): - print(f"Warning: Image not found: {image_filename}") - break - try: - # Get caption from the Vision API - caption = await get_image_caption(client, image_path) + data_url = encode_image_to_data_url(image_path) - # Create markdown image with caption - image_markdown = f"![{caption}](images/{image_filename})\n\n_{caption}_" + message = { + "role": "user", + "content": [ + {"image": {"uri": data_url}}, + "This image comes from a scan inside a document, please provide a high level caption of what you see inside the image." + ] + } + + response = await self.client.inference.chat_completion( + messages=[message], + model="Llama3.2-11B-Vision-Instruct", + stream=False + ) - # Replace the first occurrence of the placeholder - content = content.replace('', image_markdown, 1) + caption = response.choices[0].message.content + self.processed_images[image_path] = caption + return caption + + except Exception as e: + print(f"Error processing image {image_path}: {str(e)}") + return None + + async def process_markdown_file(self, output_dir: str, md_filename: str) -> None: + """Process a single markdown file and replace image placeholders with captions.""" + print(f"Processing: {md_filename}") + + md_path = Path(output_dir) / md_filename + images_dir = Path(output_dir) / 'images' + + try: + content = md_path.read_text(encoding='utf-8') + except Exception as e: + print(f"Failed to read {md_filename}: {str(e)}") + return + + base_name = md_filename.rsplit('.', 1)[0] + image_count = 1 + updated = False + + while '' in content: + image_filename = f"{base_name}-figure-{image_count}.png" + image_path = images_dir / image_filename - print(f"Processed image {image_count} for {base_name}") + if not image_path.exists(): + print(f"Image not found: {image_filename}") + break - except Exception as e: - print(f"Error processing image {image_filename}: {str(e)}") - break + caption = await self.get_image_caption(str(image_path)) + if caption: + image_markdown = f"![{caption}](images/{image_filename})\n\n_{caption}_" + content = content.replace('', image_markdown, 1) + print(f"Processed image {image_count} for {base_name}") + updated = True + else: + print(f"Failed to process image {image_filename}") + break - image_count += 1 - - # Write the updated content back to the file - with open(md_path, 'w', encoding='utf-8') as f: - f.write(content) + image_count += 1 + + if updated: + try: + md_path.write_text(content, encoding='utf-8') + except Exception as e: + print(f"Failed to write updated content to {md_filename}: {str(e)}") async def main(): - # Define the output directory - output_dir = os.path.join('DATA', 'output') - - # Initialize LlamaStack client - client = LlamaStackClient(base_url=f"http://{HOST}:{PORT}") + output_dir = Path('DATA') / 'output' - # Get all markdown files in the output directory - md_files = [f for f in os.listdir(output_dir) if f.endswith('.md')] + if not output_dir.exists(): + print(f"Output directory not found: {output_dir}") + return + + processor = DocumentProcessor() + md_files = list(output_dir.glob('*.md')) - # Process each markdown file + if not md_files: + print(f"No markdown files found in {output_dir}") + return + for md_file in md_files: - await process_markdown_file(client, output_dir, md_file) + await processor.process_markdown_file(output_dir, md_file.name) + + print("Processing completed") if __name__ == "__main__": asyncio.run(main()) From 36d62db65932c1d97ae82ed91f30dc0c7873f7af Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 15:37:28 -0800 Subject: [PATCH 43/99] push-chat --- ...ace.py => _deprecated_gradio_interface.py} | 0 examples/E2E-RAG-App/config.yaml | 69 +++++++ examples/E2E-RAG-App/gradio_interface copy.py | 188 ++++++++++++++++++ 3 files changed, 257 insertions(+) rename examples/E2E-RAG-App/{gradio_interface.py => _deprecated_gradio_interface.py} (100%) create mode 100644 examples/E2E-RAG-App/config.yaml create mode 100644 examples/E2E-RAG-App/gradio_interface copy.py diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/_deprecated_gradio_interface.py similarity index 100% rename from examples/E2E-RAG-App/gradio_interface.py rename to examples/E2E-RAG-App/_deprecated_gradio_interface.py diff --git a/examples/E2E-RAG-App/config.yaml b/examples/E2E-RAG-App/config.yaml new file mode 100644 index 000000000..75ed40b1a --- /dev/null +++ b/examples/E2E-RAG-App/config.yaml @@ -0,0 +1,69 @@ +version: '2' +built_at: '2024-11-14T11:26:34.931183' +image_name: meta-reference-gpu +docker_image: null +conda_env: meta-reference-gpu +apis: +- inference +- memory +- safety +- agents +- telemetry +providers: + inference: + - provider_id: inline::meta-reference-0 + provider_type: inline::meta-reference + config: + model: Llama3.2-3B-Instruct + torch_seed: null + max_seq_len: 16384 + max_batch_size: 1 + create_distributed_process_group: true + checkpoint_dir: null + - provider_id: inline::meta-reference-0 + provider_type: inline::meta-reference + config: + model: Llama3.2-11B-Vision-Instruct + torch_seed: null + max_seq_len: 16384 + max_batch_size: 1 + create_distributed_process_group: true + checkpoint_dir: null + memory: + # - provider_id: inline::faiss-0 + # provider_type: inline::faiss + # config: + # kvstore: + # namespace: null + # type: sqlite + # db_path: /home/justinai/.llama/runtime/faiss_store.db + - provider_id: remote::chromadb-1 + provider_type: remote::chromadb + config: + host: localhost + port: 6000 + protocol: http + safety: + - provider_id: inline::llama-guard-0 + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: inline::meta-reference-0 + provider_type: inline::meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: /home/justinai/.llama/runtime/kvstore.db + telemetry: + - provider_id: inline::meta-reference-0 + provider_type: inline::meta-reference + config: {} +metadata_store: null +models: [] +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/examples/E2E-RAG-App/gradio_interface copy.py b/examples/E2E-RAG-App/gradio_interface copy.py new file mode 100644 index 000000000..f99a1f011 --- /dev/null +++ b/examples/E2E-RAG-App/gradio_interface copy.py @@ -0,0 +1,188 @@ +import asyncio +import json +import os +import uuid +from typing import List, Optional + +import gradio as gr +import requests +from dotenv import load_dotenv +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.lib.agents.event_logger import EventLogger +from llama_stack_client.types.agent_create_params import AgentConfig +from llama_stack_client.types.memory_insert_params import Document + +# Load environment variables +load_dotenv() + +class LlamaChatInterface: + def __init__(self, host: str, port: int): + self.host = host + self.port = port + self.client = LlamaStackClient(base_url=f"http://{host}:{port}") + self.agent = None + self.session_id = None + self.memory_bank_id = "test_bank_6" + + async def initialize_agent(self): + """Initialize the agent with model registration and configuration.""" + model_name = "Llama3.2-3B-Instruct" + + # Register model + response = requests.post( + f"http://{self.host}:{self.port}/models/register", + headers={"Content-Type": "application/json"}, + data=json.dumps({ + "model_id": model_name, + "provider_id": "inline::meta-reference-0", + "provider_model_id": None, + "metadata": None, + }) + ) + + # Agent configuration + agent_config = AgentConfig( + model=model_name, + instructions="You are a helpful assistant that can answer questions based on provided documents.", + sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, + tools=[{ + "type": "memory", + "memory_bank_configs": [{"bank_id": self.memory_bank_id, "type": "vector"}], + "query_generator_config": {"type": "default", "sep": " "}, + "max_tokens_in_context": 4096, + "max_chunks": 10, + }], + tool_choice="auto", + tool_prompt_format="json", + enable_session_persistence=True, + ) + + self.agent = Agent(self.client, agent_config) + self.session_id = str(uuid.uuid4()) + + def is_memory_bank_present(self, target_identifier): + """Checks if a memory bank exists.""" + return any( + bank.identifier == target_identifier for bank in self.client.memory_banks.list() + ) + + async def setup_memory_bank(self): + """Set up the memory bank if it doesn't exist.""" + providers = self.client.providers.list() + provider_id = providers["memory"][0].provider_id + + if not self.is_memory_bank_present(self.memory_bank_id): + memory_bank = self.client.memory_banks.register( + memory_bank_id=self.memory_bank_id, + params={ + "embedding_model": "all-MiniLM-L6-v2", + "chunk_size_in_tokens": 512, + "overlap_size_in_tokens": 64, + }, + provider_id=provider_id, + ) + print(f"Memory bank registered: {memory_bank}") + + async def process_documents(self, files) -> str: + """Process and insert documents into the memory bank.""" + await self.setup_memory_bank() + + documents = [] + for file in files: + if file.name.endswith(('.txt', '.md')): + with open(file.name, 'r', encoding='utf-8') as f: + content = f.read() + document = Document( + document_id=file.name, + content=content, + mime_type="text/plain", + metadata={"filename": file.name} + ) + documents.append(document) + + if documents: + self.client.memory.insert( + bank_id=self.memory_bank_id, + documents=documents, + ) + return "Documents processed successfully!" + return "No valid documents found to process." + + async def chat(self, message: str, history: List[List[str]]) -> str: + """Process a chat message and return the response.""" + if self.agent is None: + await self.initialize_agent() + + response = self.agent.create_turn( + messages=[{"role": "user", "content": message}], + session_id=self.session_id + ) + + # Collect the response using EventLogger + full_response = "" + async for log in EventLogger().log(response): + if hasattr(log, 'content'): + full_response += log.content + + return full_response + +def create_gradio_interface(host: str = "localhost", port: int = 8000): + # Initialize the chat interface + chat_interface = LlamaChatInterface(host, port) + + with gr.Blocks(theme=gr.themes.Soft()) as interface: + gr.Markdown("# LlamaStack Chat") + + with gr.Row(): + with gr.Column(scale=3): + chatbot = gr.Chatbot() + msg = gr.Textbox( + label="Message", + placeholder="Type your message here...", + show_label=False + ) + with gr.Row(): + submit = gr.Button("Send") + clear = gr.Button("Clear") + + with gr.Column(scale=1): + file_upload = gr.File( + label="Upload Documents", + file_types=[".txt", ".md"], + file_count="multiple" + ) + upload_button = gr.Button("Process Documents") + + gr.Examples( + examples=[ + "What topics are covered in the documents?", + "Can you summarize the main points?", + "Tell me more about specific details in the text.", + ], + inputs=msg + ) + + async def respond(message, chat_history): + bot_message = await chat_interface.chat(message, chat_history) + chat_history.append((message, bot_message)) + return "", chat_history + + async def process_files(files): + return await chat_interface.process_documents(files) + + def clear_chat(): + return None + + # Set up event handlers + msg.submit(respond, [msg, chatbot], [msg, chatbot]) + submit.click(respond, [msg, chatbot], [msg, chatbot]) + clear.click(clear_chat, None, chatbot) + upload_button.click(process_files, [file_upload], None) + + return interface + +if __name__ == "__main__": + # Create and launch the Gradio interface + interface = create_gradio_interface() + interface.launch(server_name="0.0.0.0", server_port=7860) From 92bd5079d4844aa9a426eb723430ffeb04792f5f Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 15:38:14 -0800 Subject: [PATCH 44/99] Update gradio_interface copy.py --- examples/E2E-RAG-App/gradio_interface copy.py | 141 ++++++++---------- 1 file changed, 66 insertions(+), 75 deletions(-) diff --git a/examples/E2E-RAG-App/gradio_interface copy.py b/examples/E2E-RAG-App/gradio_interface copy.py index f99a1f011..a92ffabfe 100644 --- a/examples/E2E-RAG-App/gradio_interface copy.py +++ b/examples/E2E-RAG-App/gradio_interface copy.py @@ -17,49 +17,20 @@ load_dotenv() class LlamaChatInterface: - def __init__(self, host: str, port: int): + def __init__(self, host: str, port: int, docs_dir: str): self.host = host self.port = port + self.docs_dir = docs_dir self.client = LlamaStackClient(base_url=f"http://{host}:{port}") self.agent = None self.session_id = None self.memory_bank_id = "test_bank_6" - async def initialize_agent(self): - """Initialize the agent with model registration and configuration.""" - model_name = "Llama3.2-3B-Instruct" - - # Register model - response = requests.post( - f"http://{self.host}:{self.port}/models/register", - headers={"Content-Type": "application/json"}, - data=json.dumps({ - "model_id": model_name, - "provider_id": "inline::meta-reference-0", - "provider_model_id": None, - "metadata": None, - }) - ) - - # Agent configuration - agent_config = AgentConfig( - model=model_name, - instructions="You are a helpful assistant that can answer questions based on provided documents.", - sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, - tools=[{ - "type": "memory", - "memory_bank_configs": [{"bank_id": self.memory_bank_id, "type": "vector"}], - "query_generator_config": {"type": "default", "sep": " "}, - "max_tokens_in_context": 4096, - "max_chunks": 10, - }], - tool_choice="auto", - tool_prompt_format="json", - enable_session_persistence=True, - ) - - self.agent = Agent(self.client, agent_config) - self.session_id = str(uuid.uuid4()) + async def initialize_system(self): + """Initialize the entire system including memory bank and agent.""" + await self.setup_memory_bank() + await self.load_documents() + await self.initialize_agent() def is_memory_bank_present(self, target_identifier): """Checks if a memory bank exists.""" @@ -84,35 +55,69 @@ async def setup_memory_bank(self): ) print(f"Memory bank registered: {memory_bank}") - async def process_documents(self, files) -> str: - """Process and insert documents into the memory bank.""" - await self.setup_memory_bank() - + async def load_documents(self): + """Load documents from the specified directory into memory bank.""" documents = [] - for file in files: - if file.name.endswith(('.txt', '.md')): - with open(file.name, 'r', encoding='utf-8') as f: - content = f.read() + for filename in os.listdir(self.docs_dir): + if filename.endswith(('.txt', '.md')): + file_path = os.path.join(self.docs_dir, filename) + with open(file_path, 'r', encoding='utf-8') as file: + content = file.read() document = Document( - document_id=file.name, + document_id=filename, content=content, mime_type="text/plain", - metadata={"filename": file.name} + metadata={"filename": filename} ) documents.append(document) - + if documents: self.client.memory.insert( bank_id=self.memory_bank_id, documents=documents, ) - return "Documents processed successfully!" - return "No valid documents found to process." + print(f"Loaded {len(documents)} documents from {self.docs_dir}") + + async def initialize_agent(self): + """Initialize the agent with model registration and configuration.""" + model_name = "Llama3.2-3B-Instruct" + + # Register model + response = requests.post( + f"http://{self.host}:{self.port}/models/register", + headers={"Content-Type": "application/json"}, + data=json.dumps({ + "model_id": model_name, + "provider_id": "inline::meta-reference-0", + "provider_model_id": None, + "metadata": None, + }) + ) + + # Agent configuration + agent_config = AgentConfig( + model=model_name, + instructions="You are a helpful assistant that can answer questions based on provided documents.", + sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, + tools=[{ + "type": "memory", + "memory_bank_configs": [{"bank_id": self.memory_bank_id, "type": "vector"}], + "query_generator_config": {"type": "default", "sep": " "}, + "max_tokens_in_context": 4096, + "max_chunks": 10, + }], + tool_choice="auto", + tool_prompt_format="json", + enable_session_persistence=True, + ) + + self.agent = Agent(self.client, agent_config) + self.session_id = str(uuid.uuid4()) async def chat(self, message: str, history: List[List[str]]) -> str: """Process a chat message and return the response.""" if self.agent is None: - await self.initialize_agent() + await self.initialize_system() response = self.agent.create_turn( messages=[{"role": "user", "content": message}], @@ -127,32 +132,22 @@ async def chat(self, message: str, history: List[List[str]]) -> str: return full_response -def create_gradio_interface(host: str = "localhost", port: int = 8000): +def create_gradio_interface(host: str = "localhost", port: int = 8000, docs_dir: str = "./docs"): # Initialize the chat interface - chat_interface = LlamaChatInterface(host, port) + chat_interface = LlamaChatInterface(host, port, docs_dir) with gr.Blocks(theme=gr.themes.Soft()) as interface: gr.Markdown("# LlamaStack Chat") + chatbot = gr.Chatbot() + msg = gr.Textbox( + label="Message", + placeholder="Type your message here...", + show_label=False + ) with gr.Row(): - with gr.Column(scale=3): - chatbot = gr.Chatbot() - msg = gr.Textbox( - label="Message", - placeholder="Type your message here...", - show_label=False - ) - with gr.Row(): - submit = gr.Button("Send") - clear = gr.Button("Clear") - - with gr.Column(scale=1): - file_upload = gr.File( - label="Upload Documents", - file_types=[".txt", ".md"], - file_count="multiple" - ) - upload_button = gr.Button("Process Documents") + submit = gr.Button("Send") + clear = gr.Button("Clear") gr.Examples( examples=[ @@ -168,9 +163,6 @@ async def respond(message, chat_history): chat_history.append((message, bot_message)) return "", chat_history - async def process_files(files): - return await chat_interface.process_documents(files) - def clear_chat(): return None @@ -178,11 +170,10 @@ def clear_chat(): msg.submit(respond, [msg, chatbot], [msg, chatbot]) submit.click(respond, [msg, chatbot], [msg, chatbot]) clear.click(clear_chat, None, chatbot) - upload_button.click(process_files, [file_upload], None) return interface if __name__ == "__main__": # Create and launch the Gradio interface - interface = create_gradio_interface() + interface = create_gradio_interface(docs_dir="./your_docs_directory") # Specify your docs directory here interface.launch(server_name="0.0.0.0", server_port=7860) From 2f080f00a11dedda334467a29f61f0d238703b7d Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 18 Nov 2024 15:38:30 -0800 Subject: [PATCH 45/99] push connected interface --- .../E2E-RAG-App/{gradio_interface copy.py => gradio_interface.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/E2E-RAG-App/{gradio_interface copy.py => gradio_interface.py} (100%) diff --git a/examples/E2E-RAG-App/gradio_interface copy.py b/examples/E2E-RAG-App/gradio_interface.py similarity index 100% rename from examples/E2E-RAG-App/gradio_interface copy.py rename to examples/E2E-RAG-App/gradio_interface.py From 887be1453ffb9d5024470c4301a19a6e49447e48 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 18 Nov 2024 15:31:45 -0800 Subject: [PATCH 46/99] fixed conversation loop --- examples/E2E-RAG-App/ollama_main.py | 122 ++++++++++++++++------------ 1 file changed, 68 insertions(+), 54 deletions(-) diff --git a/examples/E2E-RAG-App/ollama_main.py b/examples/E2E-RAG-App/ollama_main.py index 78ead8a29..93b28fc77 100644 --- a/examples/E2E-RAG-App/ollama_main.py +++ b/examples/E2E-RAG-App/ollama_main.py @@ -6,7 +6,6 @@ import fire import requests -from dotenv import load_dotenv from llama_stack_client import LlamaStackClient from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.lib.agents.event_logger import EventLogger @@ -16,52 +15,63 @@ from tqdm import tqdm # Initialization -load_dotenv() + + +def is_memory_bank_present(client, target_identifier): + """Checks if a memory bank with the given identifier is present in the list.""" + return any( + bank.identifier == target_identifier for bank in client.memory_banks.list() + ) async def insert_documents_to_memory_bank(client: LlamaStackClient, docs_dir: str): """Inserts entire text documents from a directory into a memory bank.""" - memory_bank_id = "test_bank_3" + memory_bank_id = "test_bank_6" providers = client.providers.list() provider_id = providers["memory"][0].provider_id - # Register a memory bank - memory_bank = client.memory_banks.register( - memory_bank_id=memory_bank_id, - params={ - "embedding_model": "all-MiniLM-L6-v2", - "chunk_size_in_tokens": 512, - "overlap_size_in_tokens": 64, - }, - provider_id=provider_id, - ) - cprint(f"Memory bank registered: {memory_bank}", "green") - - # Prepare entire documents for insertion - documents = [] - for filename in os.listdir(docs_dir): - if filename.endswith((".txt", ".md")): - file_path = os.path.join(docs_dir, filename) - with open(file_path, "r", encoding="utf-8") as file: - content = file.read() - - document = Document( - document_id=f"{filename}", - content=content, - mime_type="text/plain", - metadata={"filename": filename}, - ) - documents.append(document) - - # Insert documents into the memory bank - client.memory.insert( - bank_id=memory_bank_id, - documents=documents, - ) - cprint( - f"Inserted documents from {docs_dir} into memory bank '{memory_bank_id}'.", - "green", - ) + memorybank_boolean = is_memory_bank_present(client, memory_bank_id) + print(client.memory_banks.list()) + print(memorybank_boolean) + + if True: + # Register a memory bank + memory_bank = client.memory_banks.register( + memory_bank_id=memory_bank_id, + params={ + "embedding_model": "all-MiniLM-L6-v2", + "chunk_size_in_tokens": 512, + "overlap_size_in_tokens": 64, + }, + provider_id=provider_id, + ) + cprint(f"Memory bank registered: {memory_bank}", "green") + + # Prepare entire documents for insertion + documents = [] + for filename in os.listdir(docs_dir): + if filename.endswith((".txt", ".md")): + file_path = os.path.join(docs_dir, filename) + with open(file_path, "r", encoding="utf-8") as file: + content = file.read() + + document = Document( + document_id=f"{filename}", + content=content, + mime_type="text/plain", + metadata={"filename": filename}, + ) + documents.append(document) + + # Insert documents into the memory bank + client.memory.insert( + bank_id=memory_bank_id, + documents=documents, + ) + cprint( + f"Inserted documents from {docs_dir} into memory bank '{memory_bank_id}'.", + "green", + ) async def run_main(host: str, port: int, docs_dir: str) -> None: @@ -80,8 +90,8 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: { "model_id": model_name, "provider_model_id": None, - "provider_id": "ollama", - # "provider_id": "inline::meta-reference-0", + # "provider_id": "ollama", + "provider_id": "inline::meta-reference-0", "metadata": None, } ), @@ -96,7 +106,7 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: tools=[ { "type": "memory", - "memory_bank_configs": [{"bank_id": "test_bank_3", "type": "vector"}], + "memory_bank_configs": [{"bank_id": "test_bank_6", "type": "vector"}], "query_generator_config": {"type": "default", "sep": " "}, "max_tokens_in_context": 4096, "max_chunks": 10, @@ -108,30 +118,34 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: ) agent = Agent(client, agent_config) + user_prompts = [ + "What is the policy regarding smoking in City offices?", + "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", + "What are the three categories of employees eligible for health coverage?", + "How long must an employee wait before using vacation time after starting employment?", + "What must an employee do if they're summoned for jury duty?", + ] + + session_id = agent.create_session(f"session-{uuid.uuid4()}") + while True: user_input = input("User> ") if user_input.lower() in ["exit", "quit", "bye"]: cprint("Ending conversation. Goodbye!", "yellow") break - message = {"role": "user", "content": user_input} + cprint(f"Generating response for: {user_input}", "green") + + # Create a turn and generate the response asynchronously response = agent.create_turn( - messages=[ - { - "role": "user", - "content": message, - } - ], - session_id=session_id, + messages=[{"role": "user", "content": user_input}], session_id=session_id ) + # Log and display each response asynchronously async for log in EventLogger().log(response): log.print() -# Run the chat loop in a Jupyter Notebook cell using await - - def main(host: str, port: int, docs_dir: str) -> None: """Entry point for the script.""" asyncio.run(run_main(host, port, docs_dir)) From f392940c566e0617b911445adbe62aa8c345960e Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Mon, 18 Nov 2024 17:08:43 -0800 Subject: [PATCH 47/99] add chromaDB docker --- examples/E2E-RAG-App/docker/chroma_start.sh | 3 + examples/E2E-RAG-App/docker/compose.yaml | 14 ++- .../E2E-RAG-App/docker/llama_stack_run.yaml | 14 ++- .../E2E-RAG-App/docker/llama_stack_start.sh | 15 +-- examples/E2E-RAG-App/gradio_interface.py | 94 +++++++++++-------- examples/E2E-RAG-App/ollama_main.py | 4 +- 6 files changed, 84 insertions(+), 60 deletions(-) create mode 100644 examples/E2E-RAG-App/docker/chroma_start.sh diff --git a/examples/E2E-RAG-App/docker/chroma_start.sh b/examples/E2E-RAG-App/docker/chroma_start.sh new file mode 100644 index 000000000..69279ad17 --- /dev/null +++ b/examples/E2E-RAG-App/docker/chroma_start.sh @@ -0,0 +1,3 @@ +pip install chromadb +chroma run --host localhost --port 6000 --path /root/rag_data/& +while :; do sleep 2073600; done diff --git a/examples/E2E-RAG-App/docker/compose.yaml b/examples/E2E-RAG-App/docker/compose.yaml index c560c467a..6a751d760 100644 --- a/examples/E2E-RAG-App/docker/compose.yaml +++ b/examples/E2E-RAG-App/docker/compose.yaml @@ -1,4 +1,13 @@ services: + chromadb: + image: chromadb/chroma:latest + network_mode: "host" + volumes: + - ${DOC_PATH}:/root/rag_data/ + - ./chroma_start.sh:/root/chroma_start.sh + ports: + - "6000:6000" + entrypoint: bash /root/chroma_start.sh ollama: image: ollama/ollama:latest network_mode: "host" @@ -25,8 +34,9 @@ services: - ${DOC_PATH}:/root/rag_data/ - ./llama_stack_start.sh:/root/llama_stack_start.sh ports: - - "5000:5000" - - "1234:1234" + - "5000:5000" # for llama-stack + - "1234:1234" # for UI + - "6000:6000" # for DB # Hack: wait for ollama server to start before starting docker entrypoint: bash /root/llama_stack_start.sh #entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml" diff --git a/examples/E2E-RAG-App/docker/llama_stack_run.yaml b/examples/E2E-RAG-App/docker/llama_stack_run.yaml index 4d78e8a52..1fa0f5b6f 100644 --- a/examples/E2E-RAG-App/docker/llama_stack_run.yaml +++ b/examples/E2E-RAG-App/docker/llama_stack_run.yaml @@ -13,18 +13,17 @@ apis: - safety providers: inference: - - provider_id: ollama0 + - provider_id: remote::ollama provider_type: remote::ollama config: url: http://127.0.0.1:14343 memory: - - provider_id: inline::faiss-0 - provider_type: inline::faiss + - provider_id: remote::chromadb + provider_type: remote::chromadb config: - kvstore: - namespace: null - type: sqlite - db_path: ${HOME}/.llama/runtime/faiss_store.db + host: localhost + port: 6000 + protocol: http safety: - provider_id: inline::llama-guard-0 provider_type: inline::llama-guard @@ -49,4 +48,3 @@ memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] - diff --git a/examples/E2E-RAG-App/docker/llama_stack_start.sh b/examples/E2E-RAG-App/docker/llama_stack_start.sh index 9deb56bdf..c900f8e6e 100644 --- a/examples/E2E-RAG-App/docker/llama_stack_start.sh +++ b/examples/E2E-RAG-App/docker/llama_stack_start.sh @@ -1,18 +1,19 @@ -sleep 90 +sleep 60 echo "starting to install llama-stack" apt-get update apt-get install -y git #pip install /root/llama-stack pip install git+https://github.com/meta-llama/llama-stack.git@2edfda97e9659155074269fc3b7e66d9bb2c57d4 -pip uninstall -y chromadb-client -pip uninstall -y chromadb -pip install -U chromadb +#pip install tiktoken +pip install gradio echo "Installing llama-stack-client" -pip install llama-stack-client==0.0.50 +pip install --index-url https://test.pypi.org/simple/ llama_stack_client==0.0.53rc2 +#pip install git+https://github.com/meta-llama/llama-stack-client-python.git@f5a2391241eac03eea356b206469081688277d23 echo "starting the llama-stack server" python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml& -sleep 30 +sleep 3600000000 echo "running the RAG app" -python /root/E2E-RAG-App/ollama_main.py localhost 5000 /root/RAG_service.json +#python /root/E2E-RAG-App/gradio_interface.py +#python /root/E2E-RAG-App/ollama_main.py localhost 5000 diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index a92ffabfe..cf3411e6c 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -16,6 +16,7 @@ # Load environment variables load_dotenv() + class LlamaChatInterface: def __init__(self, host: str, port: int, docs_dir: str): self.host = host @@ -25,7 +26,7 @@ def __init__(self, host: str, port: int, docs_dir: str): self.agent = None self.session_id = None self.memory_bank_id = "test_bank_6" - + async def initialize_system(self): """Initialize the entire system including memory bank and agent.""" await self.setup_memory_bank() @@ -35,7 +36,8 @@ async def initialize_system(self): def is_memory_bank_present(self, target_identifier): """Checks if a memory bank exists.""" return any( - bank.identifier == target_identifier for bank in self.client.memory_banks.list() + bank.identifier == target_identifier + for bank in self.client.memory_banks.list() ) async def setup_memory_bank(self): @@ -59,15 +61,15 @@ async def load_documents(self): """Load documents from the specified directory into memory bank.""" documents = [] for filename in os.listdir(self.docs_dir): - if filename.endswith(('.txt', '.md')): + if filename.endswith((".txt", ".md")): file_path = os.path.join(self.docs_dir, filename) - with open(file_path, 'r', encoding='utf-8') as file: + with open(file_path, "r", encoding="utf-8") as file: content = file.read() document = Document( document_id=filename, content=content, mime_type="text/plain", - metadata={"filename": filename} + metadata={"filename": filename}, ) documents.append(document) @@ -81,36 +83,43 @@ async def load_documents(self): async def initialize_agent(self): """Initialize the agent with model registration and configuration.""" model_name = "Llama3.2-3B-Instruct" - + # Register model response = requests.post( f"http://{self.host}:{self.port}/models/register", headers={"Content-Type": "application/json"}, - data=json.dumps({ - "model_id": model_name, - "provider_id": "inline::meta-reference-0", - "provider_model_id": None, - "metadata": None, - }) + data=json.dumps( + { + "model_id": model_name, + "provider_id": "remote::ollama", + # "provider_id": "inline::meta-reference-0", + "provider_model_id": None, + "metadata": None, + } + ), ) - + # Agent configuration agent_config = AgentConfig( model=model_name, instructions="You are a helpful assistant that can answer questions based on provided documents.", sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, - tools=[{ - "type": "memory", - "memory_bank_configs": [{"bank_id": self.memory_bank_id, "type": "vector"}], - "query_generator_config": {"type": "default", "sep": " "}, - "max_tokens_in_context": 4096, - "max_chunks": 10, - }], + tools=[ + { + "type": "memory", + "memory_bank_configs": [ + {"bank_id": self.memory_bank_id, "type": "vector"} + ], + "query_generator_config": {"type": "default", "sep": " "}, + "max_tokens_in_context": 4096, + "max_chunks": 10, + } + ], tool_choice="auto", tool_prompt_format="json", enable_session_persistence=True, ) - + self.agent = Agent(self.client, agent_config) self.session_id = str(uuid.uuid4()) @@ -118,62 +127,65 @@ async def chat(self, message: str, history: List[List[str]]) -> str: """Process a chat message and return the response.""" if self.agent is None: await self.initialize_system() - + response = self.agent.create_turn( - messages=[{"role": "user", "content": message}], - session_id=self.session_id + messages=[{"role": "user", "content": message}], session_id=self.session_id ) - + # Collect the response using EventLogger full_response = "" async for log in EventLogger().log(response): - if hasattr(log, 'content'): + if hasattr(log, "content"): full_response += log.content - + return full_response -def create_gradio_interface(host: str = "localhost", port: int = 8000, docs_dir: str = "./docs"): + +def create_gradio_interface( + host: str = "localhost", port: int = 5000, docs_dir: str = "./docs" +): # Initialize the chat interface chat_interface = LlamaChatInterface(host, port, docs_dir) - + with gr.Blocks(theme=gr.themes.Soft()) as interface: gr.Markdown("# LlamaStack Chat") - + chatbot = gr.Chatbot() msg = gr.Textbox( - label="Message", - placeholder="Type your message here...", - show_label=False + label="Message", placeholder="Type your message here...", show_label=False ) with gr.Row(): submit = gr.Button("Send") clear = gr.Button("Clear") - + gr.Examples( examples=[ "What topics are covered in the documents?", "Can you summarize the main points?", "Tell me more about specific details in the text.", ], - inputs=msg + inputs=msg, ) - + async def respond(message, chat_history): bot_message = await chat_interface.chat(message, chat_history) chat_history.append((message, bot_message)) return "", chat_history - + def clear_chat(): return None - + # Set up event handlers msg.submit(respond, [msg, chatbot], [msg, chatbot]) submit.click(respond, [msg, chatbot], [msg, chatbot]) clear.click(clear_chat, None, chatbot) - + return interface + if __name__ == "__main__": # Create and launch the Gradio interface - interface = create_gradio_interface(docs_dir="./your_docs_directory") # Specify your docs directory here - interface.launch(server_name="0.0.0.0", server_port=7860) + interface = create_gradio_interface( + docs_dir="/root/rag_data" + ) # Specify your docs directory here + interface.launch(server_name="0.0.0.0", server_port=1234, share=True) diff --git a/examples/E2E-RAG-App/ollama_main.py b/examples/E2E-RAG-App/ollama_main.py index 93b28fc77..610785a6e 100644 --- a/examples/E2E-RAG-App/ollama_main.py +++ b/examples/E2E-RAG-App/ollama_main.py @@ -90,8 +90,8 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: { "model_id": model_name, "provider_model_id": None, - # "provider_id": "ollama", - "provider_id": "inline::meta-reference-0", + "provider_id": "remote::ollama", + # "provider_id": "inline::meta-reference-0", "metadata": None, } ), From 0e7bc691f1d9afc874645f11dbe6252833fb08b5 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Mon, 18 Nov 2024 17:39:16 -0800 Subject: [PATCH 48/99] fix llama-stack version --- examples/E2E-RAG-App/docker/RAG_service.json | 2 +- examples/E2E-RAG-App/docker/llama_stack_start.sh | 8 +++++--- examples/E2E-RAG-App/ollama_main.py | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/examples/E2E-RAG-App/docker/RAG_service.json b/examples/E2E-RAG-App/docker/RAG_service.json index dc601adf8..4084446cb 100644 --- a/examples/E2E-RAG-App/docker/RAG_service.json +++ b/examples/E2E-RAG-App/docker/RAG_service.json @@ -1,4 +1,4 @@ { "model_name": "llama3.2:3b", - "document_path": "${HOME}/llama-stack-apps/examples/E2E-RAG-App/example_data" + "document_path": "${HOME}/work/llama-stack-apps/examples/E2E-RAG-App/example_data" } diff --git a/examples/E2E-RAG-App/docker/llama_stack_start.sh b/examples/E2E-RAG-App/docker/llama_stack_start.sh index c900f8e6e..484a035ef 100644 --- a/examples/E2E-RAG-App/docker/llama_stack_start.sh +++ b/examples/E2E-RAG-App/docker/llama_stack_start.sh @@ -1,13 +1,15 @@ sleep 60 echo "starting to install llama-stack" -apt-get update -apt-get install -y git +#apt-get update +#apt-get install -y git #pip install /root/llama-stack -pip install git+https://github.com/meta-llama/llama-stack.git@2edfda97e9659155074269fc3b7e66d9bb2c57d4 +#pip install git+https://github.com/meta-llama/llama-stack.git@2edfda97e9659155074269fc3b7e66d9bb2c57d4 #pip install tiktoken +pip install --upgrade --no-deps --force-reinstall --index-url https://test.pypi.org/simple/ llama_stack==0.0.53rc1 pip install gradio echo "Installing llama-stack-client" +pip install distro pip install --index-url https://test.pypi.org/simple/ llama_stack_client==0.0.53rc2 #pip install git+https://github.com/meta-llama/llama-stack-client-python.git@f5a2391241eac03eea356b206469081688277d23 echo "starting the llama-stack server" diff --git a/examples/E2E-RAG-App/ollama_main.py b/examples/E2E-RAG-App/ollama_main.py index 610785a6e..f11135c35 100644 --- a/examples/E2E-RAG-App/ollama_main.py +++ b/examples/E2E-RAG-App/ollama_main.py @@ -26,7 +26,7 @@ def is_memory_bank_present(client, target_identifier): async def insert_documents_to_memory_bank(client: LlamaStackClient, docs_dir: str): """Inserts entire text documents from a directory into a memory bank.""" - memory_bank_id = "test_bank_6" + memory_bank_id = "test_bank_8" providers = client.providers.list() provider_id = providers["memory"][0].provider_id @@ -106,7 +106,7 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: tools=[ { "type": "memory", - "memory_bank_configs": [{"bank_id": "test_bank_6", "type": "vector"}], + "memory_bank_configs": [{"bank_id": "test_bank_8", "type": "vector"}], "query_generator_config": {"type": "default", "sep": " "}, "max_tokens_in_context": 4096, "max_chunks": 10, From 2a5e9dbf7f9189f65de0d20b3d206d70c0bf30fa Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Tue, 19 Nov 2024 10:52:04 -0800 Subject: [PATCH 49/99] changed docker image --- examples/E2E-RAG-App/docker/compose.yaml | 18 ++++++++++++------ .../E2E-RAG-App/docker/llama_stack_start.sh | 8 ++++---- examples/E2E-RAG-App/docker/ollama_start.sh | 7 +++---- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/examples/E2E-RAG-App/docker/compose.yaml b/examples/E2E-RAG-App/docker/compose.yaml index 6a751d760..2c7762398 100644 --- a/examples/E2E-RAG-App/docker/compose.yaml +++ b/examples/E2E-RAG-App/docker/compose.yaml @@ -13,18 +13,23 @@ services: network_mode: "host" environment: - MODEL_NAME=${MODEL_NAME} + - OLLAMA_KEEP_ALIVE=24h + - OLLAMA_HOST=127.0.0.1:14343 volumes: - ollama:/root/.ollama # this solution synchronizes with the docker volume and loads the model rocket fast - ./ollama_start.sh:/root/ollama_start.sh - ./RAG_service.json:/root/RAG_service.json ports: - - "11434:11434" - entrypoint: bash /root/ollama_start.sh + - "14343:14343" + tty: true + restart: always + entrypoint: ["bash", "/root/ollama_start.sh"] llamastack: - depends_on: - - ollama - image: llamastack/distribution-ollama + image: llamastack/distribution-ollama:test-0.0.53rc5 network_mode: "host" + tty: true + environment: + - OLLAMA_URL=http://localhost:11434 volumes: - ~/.llama:/root/.llama # Link to ollama run.yaml file @@ -38,10 +43,11 @@ services: - "1234:1234" # for UI - "6000:6000" # for DB # Hack: wait for ollama server to start before starting docker - entrypoint: bash /root/llama_stack_start.sh + entrypoint: ["bash", "/root/llama_stack_start.sh"] #entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml" deploy: restart_policy: condition: "no" + volumes: ollama: diff --git a/examples/E2E-RAG-App/docker/llama_stack_start.sh b/examples/E2E-RAG-App/docker/llama_stack_start.sh index 484a035ef..645ac4c9c 100644 --- a/examples/E2E-RAG-App/docker/llama_stack_start.sh +++ b/examples/E2E-RAG-App/docker/llama_stack_start.sh @@ -1,19 +1,19 @@ -sleep 60 -echo "starting to install llama-stack" +sleep 45 +#echo "starting to install llama-stack" #apt-get update #apt-get install -y git #pip install /root/llama-stack #pip install git+https://github.com/meta-llama/llama-stack.git@2edfda97e9659155074269fc3b7e66d9bb2c57d4 #pip install tiktoken -pip install --upgrade --no-deps --force-reinstall --index-url https://test.pypi.org/simple/ llama_stack==0.0.53rc1 +#pip install --upgrade --no-deps --force-reinstall --index-url https://test.pypi.org/simple/ llama_stack==0.0.53rc1 pip install gradio echo "Installing llama-stack-client" pip install distro pip install --index-url https://test.pypi.org/simple/ llama_stack_client==0.0.53rc2 #pip install git+https://github.com/meta-llama/llama-stack-client-python.git@f5a2391241eac03eea356b206469081688277d23 echo "starting the llama-stack server" -python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml& +python -m llama_stack.distribution.server.server --yaml-config /root/my-run.yaml --disable-ipv6& sleep 3600000000 echo "running the RAG app" diff --git a/examples/E2E-RAG-App/docker/ollama_start.sh b/examples/E2E-RAG-App/docker/ollama_start.sh index 2a1a2c336..d16f30959 100644 --- a/examples/E2E-RAG-App/docker/ollama_start.sh +++ b/examples/E2E-RAG-App/docker/ollama_start.sh @@ -1,8 +1,7 @@ #!/bin/bash echo "-------------start to serve------------" -/usr/bin/ollama serve& -echo "pulling ollama model: $MODEL_NAME" -/usr/bin/ollama pull $MODEL_NAME +OLLAMA_HOST=127.0.0.1:14343 /usr/bin/ollama serve & echo "Running ollama model: $MODEL_NAME" -/usr/bin/ollama run $MODEL_NAME +sleep 5 +OLLAMA_HOST=127.0.0.1:14343 /usr/bin/ollama run $MODEL_NAME while :; do sleep 2073600; done From 208c76286c55d0703b6392a6a5a4f0cf88ce45f5 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Tue, 19 Nov 2024 15:32:44 -0800 Subject: [PATCH 50/99] gradio UI working but slow --- examples/E2E-RAG-App/docker/compose.yaml | 6 +- .../E2E-RAG-App/docker/llama_stack_start.sh | 9 ++- examples/E2E-RAG-App/gradio_interface.py | 34 +++------- examples/E2E-RAG-App/ollama_main.py | 62 +++++++++---------- 4 files changed, 47 insertions(+), 64 deletions(-) diff --git a/examples/E2E-RAG-App/docker/compose.yaml b/examples/E2E-RAG-App/docker/compose.yaml index 2c7762398..2d2228e7f 100644 --- a/examples/E2E-RAG-App/docker/compose.yaml +++ b/examples/E2E-RAG-App/docker/compose.yaml @@ -25,11 +25,9 @@ services: restart: always entrypoint: ["bash", "/root/ollama_start.sh"] llamastack: - image: llamastack/distribution-ollama:test-0.0.53rc5 + image: llamastack/distribution-ollama:test-0.0.53rc7 network_mode: "host" tty: true - environment: - - OLLAMA_URL=http://localhost:11434 volumes: - ~/.llama:/root/.llama # Link to ollama run.yaml file @@ -40,7 +38,7 @@ services: - ./llama_stack_start.sh:/root/llama_stack_start.sh ports: - "5000:5000" # for llama-stack - - "1234:1234" # for UI + - "7860:7860" # for UI - "6000:6000" # for DB # Hack: wait for ollama server to start before starting docker entrypoint: ["bash", "/root/llama_stack_start.sh"] diff --git a/examples/E2E-RAG-App/docker/llama_stack_start.sh b/examples/E2E-RAG-App/docker/llama_stack_start.sh index 645ac4c9c..1122da512 100644 --- a/examples/E2E-RAG-App/docker/llama_stack_start.sh +++ b/examples/E2E-RAG-App/docker/llama_stack_start.sh @@ -8,14 +8,13 @@ sleep 45 #pip install tiktoken #pip install --upgrade --no-deps --force-reinstall --index-url https://test.pypi.org/simple/ llama_stack==0.0.53rc1 pip install gradio -echo "Installing llama-stack-client" +# echo "Installing llama-stack-client" pip install distro -pip install --index-url https://test.pypi.org/simple/ llama_stack_client==0.0.53rc2 +pip install --index-url https://test.pypi.org/simple/ llama_stack_client==0.0.53rc5 #pip install git+https://github.com/meta-llama/llama-stack-client-python.git@f5a2391241eac03eea356b206469081688277d23 echo "starting the llama-stack server" python -m llama_stack.distribution.server.server --yaml-config /root/my-run.yaml --disable-ipv6& -sleep 3600000000 - +sleep 36000000 echo "running the RAG app" -#python /root/E2E-RAG-App/gradio_interface.py +python /root/E2E-RAG-App/gradio_interface.py #python /root/E2E-RAG-App/ollama_main.py localhost 5000 diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index cf3411e6c..b895e3eba 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -25,7 +25,7 @@ def __init__(self, host: str, port: int, docs_dir: str): self.client = LlamaStackClient(base_url=f"http://{host}:{port}") self.agent = None self.session_id = None - self.memory_bank_id = "test_bank_6" + self.memory_bank_id = "test_bank_666" async def initialize_system(self): """Initialize the entire system including memory bank and agent.""" @@ -44,14 +44,16 @@ async def setup_memory_bank(self): """Set up the memory bank if it doesn't exist.""" providers = self.client.providers.list() provider_id = providers["memory"][0].provider_id - + memorybank_list = self.client.memory_banks.list() + # for bank in memorybank_list: + # self.client.memory_banks.unregister(memory_bank_id=bank.identifier) if not self.is_memory_bank_present(self.memory_bank_id): memory_bank = self.client.memory_banks.register( memory_bank_id=self.memory_bank_id, params={ "embedding_model": "all-MiniLM-L6-v2", - "chunk_size_in_tokens": 512, - "overlap_size_in_tokens": 64, + "chunk_size_in_tokens": 100, + "overlap_size_in_tokens": 10, }, provider_id=provider_id, ) @@ -84,21 +86,6 @@ async def initialize_agent(self): """Initialize the agent with model registration and configuration.""" model_name = "Llama3.2-3B-Instruct" - # Register model - response = requests.post( - f"http://{self.host}:{self.port}/models/register", - headers={"Content-Type": "application/json"}, - data=json.dumps( - { - "model_id": model_name, - "provider_id": "remote::ollama", - # "provider_id": "inline::meta-reference-0", - "provider_model_id": None, - "metadata": None, - } - ), - ) - # Agent configuration agent_config = AgentConfig( model=model_name, @@ -111,17 +98,16 @@ async def initialize_agent(self): {"bank_id": self.memory_bank_id, "type": "vector"} ], "query_generator_config": {"type": "default", "sep": " "}, - "max_tokens_in_context": 4096, - "max_chunks": 10, + "max_tokens_in_context": 300, + "max_chunks": 5, } ], tool_choice="auto", tool_prompt_format="json", enable_session_persistence=True, ) - self.agent = Agent(self.client, agent_config) - self.session_id = str(uuid.uuid4()) + self.session_id = self.agent.create_session(f"session-{uuid.uuid4()}") async def chat(self, message: str, history: List[List[str]]) -> str: """Process a chat message and return the response.""" @@ -188,4 +174,4 @@ def clear_chat(): interface = create_gradio_interface( docs_dir="/root/rag_data" ) # Specify your docs directory here - interface.launch(server_name="0.0.0.0", server_port=1234, share=True) + interface.launch(server_name="0.0.0.0", server_port=7860, share=True) diff --git a/examples/E2E-RAG-App/ollama_main.py b/examples/E2E-RAG-App/ollama_main.py index f11135c35..f5a812aff 100644 --- a/examples/E2E-RAG-App/ollama_main.py +++ b/examples/E2E-RAG-App/ollama_main.py @@ -26,22 +26,29 @@ def is_memory_bank_present(client, target_identifier): async def insert_documents_to_memory_bank(client: LlamaStackClient, docs_dir: str): """Inserts entire text documents from a directory into a memory bank.""" - memory_bank_id = "test_bank_8" + memory_bank_id = "test_bank_0" providers = client.providers.list() provider_id = providers["memory"][0].provider_id memorybank_boolean = is_memory_bank_present(client, memory_bank_id) - print(client.memory_banks.list()) - print(memorybank_boolean) + memorybank_list = client.memory_banks.list() + print(memorybank_list) + for bank in memorybank_list: + try: + client.memory_banks.unregister(memory_bank_id=bank.provider_resource_id) + except Exception as e: + print(e) - if True: + print("after unregistration: ", client.memory_banks.list()) + + if not memorybank_boolean: # Register a memory bank memory_bank = client.memory_banks.register( memory_bank_id=memory_bank_id, params={ "embedding_model": "all-MiniLM-L6-v2", - "chunk_size_in_tokens": 512, - "overlap_size_in_tokens": 64, + "chunk_size_in_tokens": 100, + "overlap_size_in_tokens": 10, }, provider_id=provider_id, ) @@ -81,22 +88,22 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: # Insert documents to the memory bank await insert_documents_to_memory_bank(client, docs_dir) - # Model registration + # # Model registration model_name = "Llama3.2-3B-Instruct" - response = requests.post( - f"http://{host}:{port}/models/register", - headers={"Content-Type": "application/json"}, - data=json.dumps( - { - "model_id": model_name, - "provider_model_id": None, - "provider_id": "remote::ollama", - # "provider_id": "inline::meta-reference-0", - "metadata": None, - } - ), - ) - cprint(f"Model registration status: {response.status_code}", "blue") + # response = requests.post( + # f"http://{host}:{port}//alpha/models/register", + # headers={"Content-Type": "application/json"}, + # data=json.dumps( + # { + # "model_id": model_name, + # "provider_model_id": None, + # "provider_id": "remote::ollama", + # # "provider_id": "inline::meta-reference-0", + # "metadata": None, + # } + # ), + # ) + # cprint(f"Model registration status: {response.status_code}", "blue") # Agent configuration agent_config = AgentConfig( @@ -106,10 +113,10 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: tools=[ { "type": "memory", - "memory_bank_configs": [{"bank_id": "test_bank_8", "type": "vector"}], + "memory_bank_configs": [{"bank_id": "test_bank_0", "type": "vector"}], "query_generator_config": {"type": "default", "sep": " "}, - "max_tokens_in_context": 4096, - "max_chunks": 10, + "max_tokens_in_context": 512, + "max_chunks": 5, } ], tool_choice="auto", @@ -118,13 +125,6 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: ) agent = Agent(client, agent_config) - user_prompts = [ - "What is the policy regarding smoking in City offices?", - "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", - "What are the three categories of employees eligible for health coverage?", - "How long must an employee wait before using vacation time after starting employment?", - "What must an employee do if they're summoned for jury duty?", - ] session_id = agent.create_session(f"session-{uuid.uuid4()}") From 36b7aaa87677b6ad26122fe05fc7c18baa0a4770 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 19 Nov 2024 16:20:09 -0800 Subject: [PATCH 51/99] add streaming --- .../_deprecated_gradio_interface.py | 86 ------------------ examples/E2E-RAG-App/gradio_interface.py | 88 +++++++++++++------ 2 files changed, 60 insertions(+), 114 deletions(-) delete mode 100644 examples/E2E-RAG-App/_deprecated_gradio_interface.py diff --git a/examples/E2E-RAG-App/_deprecated_gradio_interface.py b/examples/E2E-RAG-App/_deprecated_gradio_interface.py deleted file mode 100644 index cadd73f6f..000000000 --- a/examples/E2E-RAG-App/_deprecated_gradio_interface.py +++ /dev/null @@ -1,86 +0,0 @@ -import asyncio -import json -import os -from typing import List, Optional -import gradio as gr -from dotenv import load_dotenv -from llama_stack_client import LlamaStackClient -from llama_stack_client.lib.agents.agent import Agent -from llama_stack_client.types.agent_create_params import AgentConfig - -# Load environment variables -load_dotenv() - -class LlamaChatInterface: - def __init__(self, host: str, port: int): - self.client = LlamaStackClient(base_url=f"http://{host}:{port}") - self.agent = None - self.session_id = None - self.memory_bank_id = "test_bank_3" - - async def initialize_agent(self): - # Model registration - model_name = "Llama3.2-3B-Instruct" - - # Agent configuration - agent_config = AgentConfig( - model=model_name, - instructions="You are a helpful assistant that can answer questions based on provided documents.", - sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, - tools=[ - { - "type": "memory", - "memory_bank_configs": [{"bank_id": self.memory_bank_id, "type": "vector"}], - "query_generator_config": {"type": "default", "sep": " "}, - "max_tokens_in_context": 4096, - "max_chunks": 10, - } - ], - tool_choice="auto", - tool_prompt_format="json", - enable_session_persistence=True, - ) - self.agent = Agent(self.client, agent_config) - self.session_id = str(uuid.uuid4()) - - async def chat(self, message: str, history: List[List[str]]) -> str: - if self.agent is None: - await self.initialize_agent() - - response = await self.agent.create_turn( - messages=[{"role": "user", "content": message}], - session_id=self.session_id - ) - - # Extract the assistant's response from the response object - # Note: You might need to adjust this based on the actual response structure - assistant_message = "" - async for chunk in response: - if hasattr(chunk, 'delta') and chunk.delta: - assistant_message += chunk.delta - - return assistant_message - -def create_gradio_interface(host: str = "localhost", port: int = 8000): - # Initialize the chat interface - chat_interface = LlamaChatInterface(host, port) - - # Create the Gradio interface - iface = gr.ChatInterface( - fn=chat_interface.chat, - title="LlamaStack Chat", - description="Chat with your documents using LlamaStack", - examples=[ - ["What topics are covered in the documents?"], - ["Can you summarize the main points?"], - ["Tell me more about specific details in the text."], - ], - theme=gr.themes.Soft() - ) - - return iface - -if __name__ == "__main__": - # Create and launch the Gradio interface - iface = create_gradio_interface() - iface.launch(server_name="0.0.0.0", server_port=7860) diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index b895e3eba..f8039e2ff 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -2,7 +2,7 @@ import json import os import uuid -from typing import List, Optional +from typing import List, Optional, Generator, AsyncGenerator import gradio as gr import requests @@ -45,8 +45,6 @@ async def setup_memory_bank(self): providers = self.client.providers.list() provider_id = providers["memory"][0].provider_id memorybank_list = self.client.memory_banks.list() - # for bank in memorybank_list: - # self.client.memory_banks.unregister(memory_bank_id=bank.identifier) if not self.is_memory_bank_present(self.memory_bank_id): memory_bank = self.client.memory_banks.register( memory_bank_id=self.memory_bank_id, @@ -86,7 +84,6 @@ async def initialize_agent(self): """Initialize the agent with model registration and configuration.""" model_name = "Llama3.2-3B-Instruct" - # Agent configuration agent_config = AgentConfig( model=model_name, instructions="You are a helpful assistant that can answer questions based on provided documents.", @@ -109,22 +106,31 @@ async def initialize_agent(self): self.agent = Agent(self.client, agent_config) self.session_id = self.agent.create_session(f"session-{uuid.uuid4()}") - async def chat(self, message: str, history: List[List[str]]) -> str: - """Process a chat message and return the response.""" + async def chat_stream( + self, message: str, history: List[List[str]] + ) -> AsyncGenerator[List[List[str]], None]: + """Stream chat responses token by token with proper history handling.""" if self.agent is None: await self.initialize_system() + # Initialize history if None + history = history or [] + + # Add user message to history + history.append([message, ""]) + + # Get streaming response from agent response = self.agent.create_turn( messages=[{"role": "user", "content": message}], session_id=self.session_id ) - # Collect the response using EventLogger - full_response = "" + # Stream the response using EventLogger + current_response = "" async for log in EventLogger().log(response): if hasattr(log, "content"): - full_response += log.content - - return full_response + current_response += log.content + history[-1][1] = current_response + yield history def create_gradio_interface( @@ -136,12 +142,19 @@ def create_gradio_interface( with gr.Blocks(theme=gr.themes.Soft()) as interface: gr.Markdown("# LlamaStack Chat") - chatbot = gr.Chatbot() + chatbot = gr.Chatbot( + bubble_full_width=False, + show_label=False, + height=400 + ) msg = gr.Textbox( - label="Message", placeholder="Type your message here...", show_label=False + label="Message", + placeholder="Type your message here...", + show_label=False, + container=False, ) with gr.Row(): - submit = gr.Button("Send") + submit = gr.Button("Send", variant="primary") clear = gr.Button("Clear") gr.Examples( @@ -153,25 +166,44 @@ def create_gradio_interface( inputs=msg, ) - async def respond(message, chat_history): - bot_message = await chat_interface.chat(message, chat_history) - chat_history.append((message, bot_message)) - return "", chat_history - def clear_chat(): - return None + return [], "" + + # Set up event handlers with streaming + submit_event = msg.submit( + fn=chat_interface.chat_stream, + inputs=[msg, chatbot], + outputs=chatbot, + queue=True, + ).then( + fn=lambda: "", # Clear textbox after sending + outputs=msg, + ) + + submit_click = submit.click( + fn=chat_interface.chat_stream, + inputs=[msg, chatbot], + outputs=chatbot, + queue=True, + ).then( + fn=lambda: "", # Clear textbox after sending + outputs=msg, + ) + + clear.click(clear_chat, outputs=[chatbot, msg], queue=False) - # Set up event handlers - msg.submit(respond, [msg, chatbot], [msg, chatbot]) - submit.click(respond, [msg, chatbot], [msg, chatbot]) - clear.click(clear_chat, None, chatbot) + # Add keyboard shortcut for submit + msg.submit(lambda: None, None, None, api_name=False) return interface if __name__ == "__main__": # Create and launch the Gradio interface - interface = create_gradio_interface( - docs_dir="/root/rag_data" - ) # Specify your docs directory here - interface.launch(server_name="0.0.0.0", server_port=7860, share=True) + interface = create_gradio_interface(docs_dir="/root/rag_data") + interface.launch( + server_name="0.0.0.0", + server_port=7860, + share=True, + debug=True + ) From f80a1503f525b6d379c5b0276fd0773e42e81819 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 19 Nov 2024 16:24:54 -0800 Subject: [PATCH 52/99] Update README.md --- examples/E2E-RAG-App/README.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index 3c4f9d1f5..3e7aaad73 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -1,13 +1,14 @@ ## E2E-RAG-App -This is an E2E RAG App that can be pointed to any folder to do RAG over a collection of mixed file formats and do retrieval using the `Llama-3.2-3B-Instruct` Model +This is an End to End RAG App leveraging llama-stack that handles the logic for ingesting documents, storing them in a vector db and providing an inference interface. -Details: -TODO: -1. Save memory_bank to local, and load it to local -2. Make the chat inference multi-turn -3. Front-end + docker +All of the steps are run using a single-step via docker script. +Details: +1. We use [docling](https://github.com/DS4SD/docling) framework for handling multiple file input formats (PDF, PPTX, DOCX) +2. If you are using a GPU inference machine, we have an option to use `Llama-3.2-11B-Vision` to caption images in the documents, on CPU machine this step is skipped +3. Once ingested, we use a llama-stack distribution running chroma-db and `Llama-3.2-3B-Instruct` to ingest chunks into a memory_bank +4. Once the vectordb is created, we then use llama-stack with the `Llama-3.2-3B-Instruct` to chat with the model. To run the `ingestion_script.py` script, please make sure there is a /DATA and /OUTPUT folder at its relative root. It will ingest ALL documents in /DATA and output BOTH markdown and JSON dump in /OUTPUT folder From 23b4760865936c7f13a2d2e7650505bb7b67336a Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 19 Nov 2024 16:53:36 -0800 Subject: [PATCH 53/99] update readme --- examples/E2E-RAG-App/README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index 3e7aaad73..d85b8bf4a 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -2,20 +2,21 @@ This is an End to End RAG App leveraging llama-stack that handles the logic for ingesting documents, storing them in a vector db and providing an inference interface. +We share the details of how it works first and then detailed steps on how to run below: + All of the steps are run using a single-step via docker script. -Details: +Overview of how it works: 1. We use [docling](https://github.com/DS4SD/docling) framework for handling multiple file input formats (PDF, PPTX, DOCX) 2. If you are using a GPU inference machine, we have an option to use `Llama-3.2-11B-Vision` to caption images in the documents, on CPU machine this step is skipped 3. Once ingested, we use a llama-stack distribution running chroma-db and `Llama-3.2-3B-Instruct` to ingest chunks into a memory_bank 4. Once the vectordb is created, we then use llama-stack with the `Llama-3.2-3B-Instruct` to chat with the model. -To run the `ingestion_script.py` script, please make sure there is a /DATA and /OUTPUT folder at its relative root. It will ingest ALL documents in /DATA and output BOTH markdown and JSON dump in /OUTPUT folder - ![RAG_workflow](./RAG_workflow.jpg) How to run: Install docker according to this [web page](https://docs.docker.com/engine/install/). + 1. We have main config `RAG_service.json` , please change `model_name` and `document_path` accordingly. 2. `run_RAG.sh` is the main script that can create `.env` file for compose.yaml and then actually start the `docker compose`. `compose.yaml` is the main docker yaml that specifies all the mount option and docker configs, change the mounts if needed. @@ -31,3 +32,4 @@ docker exec -it docker-ollama-1 bash ``` 4. Llama-stack docker will start as well. The `llama_stack_start.sh` control the docker startup behavior, change it if needed. (As we have not yet have a working llama-stack docker image, this `llama_stack_start.sh` currently on-the-fly install llama-stack from source) It should be able to run llama-stack server based on the `llama_stack_run.yaml` config. Once the server is ready, then it will run the `rag_main.py`. + From c323c8eb07cf3682000d65271029f0ffa66a6f57 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 19 Nov 2024 16:53:43 -0800 Subject: [PATCH 54/99] Update README.md --- examples/E2E-RAG-App/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index d85b8bf4a..b335cdc0c 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -20,7 +20,7 @@ Install docker according to this [web page](https://docs.docker.com/engine/insta 1. We have main config `RAG_service.json` , please change `model_name` and `document_path` accordingly. 2. `run_RAG.sh` is the main script that can create `.env` file for compose.yaml and then actually start the `docker compose`. `compose.yaml` is the main docker yaml that specifies all the mount option and docker configs, change the mounts if needed. -```bash +```bas cd docker bash run_RAG.sh ``` From 4f3f1745afd93c30ccb3a66ad13a0f5c1d587f9a Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 19 Nov 2024 16:53:56 -0800 Subject: [PATCH 55/99] Update README.md --- examples/E2E-RAG-App/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index b335cdc0c..6c41ad093 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -20,10 +20,10 @@ Install docker according to this [web page](https://docs.docker.com/engine/insta 1. We have main config `RAG_service.json` , please change `model_name` and `document_path` accordingly. 2. `run_RAG.sh` is the main script that can create `.env` file for compose.yaml and then actually start the `docker compose`. `compose.yaml` is the main docker yaml that specifies all the mount option and docker configs, change the mounts if needed. -```bas +```bash cd docker bash run_RAG.sh -``` +``1 3. Ollama docker will start and this docker will pull and run the llama model specified. The `ollama_start.sh` control the Ollama docker startup behavior, change it if needed. You can log into the docker by From 3791fe1becba30885af968437ddef4af6e83e92b Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 19 Nov 2024 16:54:02 -0800 Subject: [PATCH 56/99] Update README.md --- examples/E2E-RAG-App/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index 6c41ad093..d85b8bf4a 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -23,7 +23,7 @@ Install docker according to this [web page](https://docs.docker.com/engine/insta ```bash cd docker bash run_RAG.sh -``1 +``` 3. Ollama docker will start and this docker will pull and run the llama model specified. The `ollama_start.sh` control the Ollama docker startup behavior, change it if needed. You can log into the docker by From 9a55c8ab1b6ddeecc9832fe44fb4864508908f6e Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Tue, 19 Nov 2024 17:29:31 -0800 Subject: [PATCH 57/99] added extended chroma support --- examples/E2E-RAG-App/gradio_interface.py | 49 +++++++++++------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index f8039e2ff..e75243ab9 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -2,10 +2,13 @@ import json import os import uuid -from typing import List, Optional, Generator, AsyncGenerator +from typing import AsyncGenerator, Generator, List, Optional + +import chromadb import gradio as gr import requests +from chromadb.utils import embedding_functions from dotenv import load_dotenv from llama_stack_client import LlamaStackClient from llama_stack_client.lib.agents.agent import Agent @@ -18,11 +21,12 @@ class LlamaChatInterface: - def __init__(self, host: str, port: int, docs_dir: str): + def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): self.host = host self.port = port self.docs_dir = docs_dir self.client = LlamaStackClient(base_url=f"http://{host}:{port}") + self.chroma_client = chromadb.HttpClient(host=host, port={chroma_port}) self.agent = None self.session_id = None self.memory_bank_id = "test_bank_666" @@ -33,19 +37,18 @@ async def initialize_system(self): await self.load_documents() await self.initialize_agent() - def is_memory_bank_present(self, target_identifier): - """Checks if a memory bank exists.""" - return any( - bank.identifier == target_identifier - for bank in self.client.memory_banks.list() - ) - async def setup_memory_bank(self): """Set up the memory bank if it doesn't exist.""" providers = self.client.providers.list() provider_id = providers["memory"][0].provider_id - memorybank_list = self.client.memory_banks.list() - if not self.is_memory_bank_present(self.memory_bank_id): + collections = chroma_client.list_collections() + + if any(col.name == memory_bank_id for col in collections): + print(f"The collection '{memory_bank_id}' exists.") + else: + print( + f"The collection '{memory_bank_id}' does not exist. Creating the collection..." + ) memory_bank = self.client.memory_banks.register( memory_bank_id=self.memory_bank_id, params={ @@ -115,10 +118,10 @@ async def chat_stream( # Initialize history if None history = history or [] - + # Add user message to history history.append([message, ""]) - + # Get streaming response from agent response = self.agent.create_turn( messages=[{"role": "user", "content": message}], session_id=self.session_id @@ -134,19 +137,18 @@ async def chat_stream( def create_gradio_interface( - host: str = "localhost", port: int = 5000, docs_dir: str = "./docs" + host: str = "localhost", + port: int = 5000, + chroma_port: int = 6000, + docs_dir: str = "./docs", ): # Initialize the chat interface - chat_interface = LlamaChatInterface(host, port, docs_dir) + chat_interface = LlamaChatInterface(host, port, docs_dir, chroma_port) with gr.Blocks(theme=gr.themes.Soft()) as interface: gr.Markdown("# LlamaStack Chat") - chatbot = gr.Chatbot( - bubble_full_width=False, - show_label=False, - height=400 - ) + chatbot = gr.Chatbot(bubble_full_width=False, show_label=False, height=400) msg = gr.Textbox( label="Message", placeholder="Type your message here...", @@ -201,9 +203,4 @@ def clear_chat(): if __name__ == "__main__": # Create and launch the Gradio interface interface = create_gradio_interface(docs_dir="/root/rag_data") - interface.launch( - server_name="0.0.0.0", - server_port=7860, - share=True, - debug=True - ) + interface.launch(server_name="0.0.0.0", server_port=7860, share=True, debug=True) From 184c8078da25941db15219ee93677eb84528b630 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Tue, 19 Nov 2024 18:14:38 -0800 Subject: [PATCH 58/99] gradio UI streaming not working --- examples/E2E-RAG-App/docker/compose.yaml | 4 +-- .../E2E-RAG-App/docker/llama_stack_start.sh | 12 ++----- examples/E2E-RAG-App/gradio_interface.py | 36 ++++++++++--------- 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/examples/E2E-RAG-App/docker/compose.yaml b/examples/E2E-RAG-App/docker/compose.yaml index 2d2228e7f..5a6d38d49 100644 --- a/examples/E2E-RAG-App/docker/compose.yaml +++ b/examples/E2E-RAG-App/docker/compose.yaml @@ -25,7 +25,7 @@ services: restart: always entrypoint: ["bash", "/root/ollama_start.sh"] llamastack: - image: llamastack/distribution-ollama:test-0.0.53rc7 + image: llamastack/distribution-ollama:test-0.0.53rc8 network_mode: "host" tty: true volumes: @@ -33,13 +33,11 @@ services: # Link to ollama run.yaml file - ./llama_stack_run.yaml:/root/my-run.yaml - ../../E2E-RAG-App:/root/E2E-RAG-App - - ./RAG_service.json:/root/RAG_service.json - ${DOC_PATH}:/root/rag_data/ - ./llama_stack_start.sh:/root/llama_stack_start.sh ports: - "5000:5000" # for llama-stack - "7860:7860" # for UI - - "6000:6000" # for DB # Hack: wait for ollama server to start before starting docker entrypoint: ["bash", "/root/llama_stack_start.sh"] #entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml" diff --git a/examples/E2E-RAG-App/docker/llama_stack_start.sh b/examples/E2E-RAG-App/docker/llama_stack_start.sh index 1122da512..9ea363efd 100644 --- a/examples/E2E-RAG-App/docker/llama_stack_start.sh +++ b/examples/E2E-RAG-App/docker/llama_stack_start.sh @@ -1,17 +1,9 @@ sleep 45 #echo "starting to install llama-stack" -#apt-get update -#apt-get install -y git -#pip install /root/llama-stack -#pip install git+https://github.com/meta-llama/llama-stack.git@2edfda97e9659155074269fc3b7e66d9bb2c57d4 -#pip install tiktoken -#pip install --upgrade --no-deps --force-reinstall --index-url https://test.pypi.org/simple/ llama_stack==0.0.53rc1 + pip install gradio -# echo "Installing llama-stack-client" -pip install distro -pip install --index-url https://test.pypi.org/simple/ llama_stack_client==0.0.53rc5 -#pip install git+https://github.com/meta-llama/llama-stack-client-python.git@f5a2391241eac03eea356b206469081688277d23 + echo "starting the llama-stack server" python -m llama_stack.distribution.server.server --yaml-config /root/my-run.yaml --disable-ipv6& sleep 36000000 diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index e75243ab9..01389623d 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -26,10 +26,10 @@ def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): self.port = port self.docs_dir = docs_dir self.client = LlamaStackClient(base_url=f"http://{host}:{port}") - self.chroma_client = chromadb.HttpClient(host=host, port={chroma_port}) + self.chroma_client = chromadb.HttpClient(host=host, port=chroma_port) self.agent = None - self.session_id = None - self.memory_bank_id = "test_bank_666" + self.memory_bank_id = "test_bank_999" + self.chat_history = [] async def initialize_system(self): """Initialize the entire system including memory bank and agent.""" @@ -41,13 +41,13 @@ async def setup_memory_bank(self): """Set up the memory bank if it doesn't exist.""" providers = self.client.providers.list() provider_id = providers["memory"][0].provider_id - collections = chroma_client.list_collections() + collections = self.chroma_client.list_collections() - if any(col.name == memory_bank_id for col in collections): - print(f"The collection '{memory_bank_id}' exists.") + if any(col.name == self.memory_bank_id for col in collections): + print(f"The collection '{self.memory_bank_id}' exists.") else: print( - f"The collection '{memory_bank_id}' does not exist. Creating the collection..." + f"The collection '{self.memory_bank_id}' does not exist. Creating the collection..." ) memory_bank = self.client.memory_banks.register( memory_bank_id=self.memory_bank_id, @@ -104,10 +104,9 @@ async def initialize_agent(self): ], tool_choice="auto", tool_prompt_format="json", - enable_session_persistence=True, + enable_session_persistence=False, ) self.agent = Agent(self.client, agent_config) - self.session_id = self.agent.create_session(f"session-{uuid.uuid4()}") async def chat_stream( self, message: str, history: List[List[str]] @@ -117,16 +116,19 @@ async def chat_stream( await self.initialize_system() # Initialize history if None - history = history or [] + if history: + # Add assistant message to history + self.chat_history.append({"role": "assistant", "content": history[-1][1]}) + # Add user message to history history.append([message, ""]) - + self.chat_history.append({"role": "user", "content": message}) + session_id = self.agent.create_session(f"session-{uuid.uuid4()}") # Get streaming response from agent response = self.agent.create_turn( - messages=[{"role": "user", "content": message}], session_id=self.session_id - ) - + messages=self.chat_history, session_id=session_id + ) # Stream the response using EventLogger current_response = "" async for log in EventLogger().log(response): @@ -143,7 +145,7 @@ def create_gradio_interface( docs_dir: str = "./docs", ): # Initialize the chat interface - chat_interface = LlamaChatInterface(host, port, docs_dir, chroma_port) + chat_interface = LlamaChatInterface(host, port, chroma_port,docs_dir) with gr.Blocks(theme=gr.themes.Soft()) as interface: gr.Markdown("# LlamaStack Chat") @@ -176,7 +178,7 @@ def clear_chat(): fn=chat_interface.chat_stream, inputs=[msg, chatbot], outputs=chatbot, - queue=True, + queue=False, ).then( fn=lambda: "", # Clear textbox after sending outputs=msg, @@ -203,4 +205,4 @@ def clear_chat(): if __name__ == "__main__": # Create and launch the Gradio interface interface = create_gradio_interface(docs_dir="/root/rag_data") - interface.launch(server_name="0.0.0.0", server_port=7860, share=True, debug=True) + interface.launch(server_name="0.0.0.0", server_port=7860, share=True, debug=True,inline=False) From fa0cf90bbc7de5fe572fdc8b07e857e968a5db33 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Tue, 19 Nov 2024 20:31:32 -0800 Subject: [PATCH 59/99] add how to run readme --- examples/E2E-RAG-App/RAG_workflow.jpg | Bin 52391 -> 55117 bytes examples/E2E-RAG-App/README.md | 39 +++++++++++++++--- examples/E2E-RAG-App/docker/RAG_service.json | 2 +- .../E2E-RAG-App/docker/llama_stack_start.sh | 9 ++-- examples/E2E-RAG-App/gradio_interface.py | 21 ++++++++-- 5 files changed, 56 insertions(+), 15 deletions(-) diff --git a/examples/E2E-RAG-App/RAG_workflow.jpg b/examples/E2E-RAG-App/RAG_workflow.jpg index 8db859a0918acffede5788701d5652a7d29f7b72..2b17f301f1486d818a2dda1884499fe92e9818b6 100644 GIT binary patch literal 55117 zcmdpe2Ut_vwr*@FRY5>Q6-ely^p0D45{d}~q=`T%0YWdLpl<0Mk^s_`-fQRxO7AEo zbd(mPDpi{J@Z5XOVV`!(d*8e7doy3w8kv1%&bh{zWBgog0HFE1TU%=%_(7RsSua6L1yf$ZOi!-#Pv+J+?;iq}zF%BNT+WHlK^_a@a5vhB6zjFFz z0k{DW0Cj-UujfDgeHxt70RZV;0N_IOU*jwi0D$TM0O01tU*kZp0f4Li06^8iU*rDz zpE#Sln*XV~i>KG~*46;PRt^9_Z3qBd{{R3`82>5l^zu*ocIz~W<+NW;rym=D1HcM! z3jhH)0xSW-r<5q*4nPDT`EwGW47hOqSNL_ka2hU>UHlcUkda*?BfmmHL4JjtoPz2q z6$K>?B{?}YEj7*6Yu9M6QBcucr@MCjl)m<>l5@XGUbuMWwBR*La>~=#|6=(04nTY5 ze8mNc3+Ffi=V{Mfpgs4q6~OjuuARSd?lk=QaW9iyxp;~E!ufL)r|D+b006S{r%!O1 zitHlAB?__&fOF?BT)aef`3f!Fbq@NQoFZQFBMgjE()aWY9>i5v4UymA5{1VqAE< zstc!mrM*B4xC_|T{B!(&<$m#bXm?`44;WqOUO6}d6Y1iwl0ak3=N%#_gVzVQCu3C5 zM)WxGM9uK<@Xodu(3-e?+1#DqUq0h60BmZ{TCNP`H@1&H(|DZYT(*lIF|)L90wI69 zSkNXH5LKX^f5dEhz%iH7+%b}a^o=M!jzI$GO6WHj6^mpG)-YZRL7`rh?D{FlJ<0kl zRXqT-LAU0?Az`JYtnw3qm|5 z{}>kf-EUta;-MG?C|h>x$?)wr#7XGTa6leU=h0vW*M=A~KiCB3!>ae>xA%7$kT3FI z{8z^7zn?eOMd+IceEGt~W28QRI#Tr0T&-KWb)w&*)LZAgp! z|FF}`qP?Fe->qsd$@DCXj9nT?F0dy{7d2@(G0ulR|9WovMeO7L6`qFt1aIWL%}l&) zX;Qs@vd->@1&v6s?Sbr|b*i=MKtfP>Roy{trLXSd!~S!~(@~EbHItGo$@eK3e?Ko& z+~|lg(>pMzc!B&4+F(^V+SgaGlE%O1{J6)yBq_;YL=n06nc+TN2@vD4?PhnF)BQG? zO>|k%EiG3=4N$+AoyRoUy)I?+6R*B8!|IxH# zwUS~Qw!sY<5(%`R2(vmhi!jyb4^|9CQ}a@>d*9f80(kf@;x(+EPZ5Bb$BSDp*8cI> z{byp&E@lQe|D5sXm5f0$^8Nh6OM3sf{KbM0C%^nCjhj4k`DIADVsIl(zQV0XX`%V= zkiY#+5i(%ab{Erk${399ImR19Gj=)bFUk#6dJOs3<_L3GHCuZEoH^1vdcwr;JBUm! zam}ZATQoL@aI6-22=YvQ+nFsk@mu@kbHT&UIov=wScHn- z3!Cn$a_p0Y30dXjT#}?4%pL6Q3{gm*TpxV7Fj;iab_<|gIWt)PwrgD?)i?6!Q}1#{ zHoAN=g&_BUthi5Xwth*n&u1_@zKe)QgK4qRl^<2MJ?&S0CK(+E$e+oxtloS&?O^n@ z9Sw84Z@M`Rr%m(gbSjo>>w2i1oi0qc)pKjquurY$y=}z`r_rOC_jdM}!cOAH+=lFt zp87nB70A{G33o2$#i_`k|8`Oy;!t>jX>8a?$EeIWR=2r6aQJPE!tJWoxcl2^7rqPx z1Qi91$`+Z`k{|QijQb06~D*d(8-fnK{>`3LmwHiCRqHu=(wVuxA{l8x@ zNH+$fb>t_3bV^*Ie%2>A^)fCLHsXCe?)vJxTDLsCY@kVw6^*+vLLV&!@8eTcABd)W za5}!J>(hB+rg2j05%X*Zq7B_2R&4V}#omn>2b*_Yl|POQNYbq85$1!!?BQupL@ZOF zeEV_IOFfhRIh~t+C2%-D$Qr}>K+Z-m^z-AM>H6t_@>{f3v-nnke zqJ|C=Z0EjoUD9@gR1{}q-ia`F$drArIE8Ge>dIco%K+m&a#I$6oQpu2m@iW#bKU9V z6D5~gZdpb>tY>8P!RPx}<29r>i!fM_#6;c|sU!(;KRDAcmP?e|o=*or?@G z-D1+D`jTcx%z0gv9vmGsv^9u+oKtKAi~G>itsE)=3gNa^O1(foHa>Sp_Hl`7t|w`O zWMTIuALt<@-0qs>(riyKaI=MJHmrt;Dh4LFnBdW%RQ4MWoqQtv`}?Gh8|>^365ciQ z3E*g7E-hqK5~74g!-Eg_d=kMxP*>Ga=gGL){^)h1i3@Mn)myeE#czD`tTxl%yu9LM@x#HhWNXS`tT`X>|5jT5+j`>@F-Yqk zQWh$oDS@@z!OrHXlEBpzAStcOKR98X@b9jOrw0p6-_39Hq`3v6M2t#$Ma(V>ywXg> zcPIG@NN>&OnXZ`6VP;aFA@nAL4e)+qeJc6$$FNWz_4f@?o|Px6v&r@KX6MUtzCH74 zdYaR+=(;ozZ(E#d3SIekru-jrZJKJ%E|*@VR5j(e1qMPa{7R(CDiG5i*#+F{q^MFqC_?_tMo0Y_o zk^RYVu^5MNqoOX&3b4OcP2~H!p>A=)9K>EYvj`#z4XagCQyqvtsU}q7KH3b}Sj?3(=Yt@gpM#l>h zV&YvJl}{^dFOqbu&TvgADDsOiDB)cG&MI2;%66?TI6{#IYUnxD*%PmRSB8ez!SOmL z$;=qzZAM{Lk#Er`pTLXhas(@=1!6O}TvSX+5Ry;*J5PX!fI@xVvIVzq|3K?&HIpR) zx}BulKFPd@gzGU~P(#`}oG6$2*Bgpk?Dh&7h06{TSv6y?S}89NH#3EB&&cUW=LGj* zq4eeOB!Bfmm4^mgi@jvp=S|~WI0{7KT=?!gF?G$8fOr!|f&HCVLq=iNhpdM50oOA% zppaM(*A+uxPN$;p$m!++aF(yO>7C`$=a7HHndpjwega@$wR9rx9%+1-Z0wf1eRn;* z!Bg4LC|vet%m6|bz#-c06i$-Q?u~i&hJ;h6nwgU%h7h9N2RM7(a|;Qc?XU~{TXnAKdt1T#xV|2bp>E2=L1^ObY!M<^=#A^k&=_& zE#Q-ho#hvsPB(*HQXBkut^>{i9y}ykjmLOMW(cbXk9>1!;(lKh5ofSeLJk>XIMpL7SxV4*YRw6o47#XME)EkmlSss0)mR{E0`@tHg7#lANzIDLIGlfzU{Wh6Q3 zn#b>B`m%3;_g_g=fg`6%ZoQ3O-RLDj5c9HGYA@d3>M5!+a__SmPW??X8yBH?!xdU@ zoUia6DN4=Kd0&tn6T&JeC*(^Teo#F#F|vRQ=wmsGP~np%wzRlWuHk^~CCq77!99+b zdV~nG^bksiWSDgY1PuKG2Vyl3+&C50rzGNqKPPTWro2+FPK%O&#h@p8$@rO!vlpH| z*3KhlCdzT@SOk&Bg1StfsS9H_7#y$fROx0`y6qY>Zd@X|VNJ(S30ldQgfwBL*yS|I zzScLUd}zVfwV6$BHNmylD5d{2?_*KVnS8y!`~-*|b+DYSjK5!rXIDgcnDd&|ih`3@ z`DoY}>e^=K$(IMs#l}_w@1jwztY~4T`<0TPj8n)mcj>5t54N}h7m_uXX}CP7RU0z~ z3N}pa<=;)ko7hR(wA}VZsvg<)EX+~n9zuK~C`1)wdF$U{eu=pFlPWQK7rv{STJI""" def chat_stream( self, message: str, history: List[List[str]] ) -> Generator[List[List[str]], None, None]: - """Stream chat responses token by token with proper history handling.""" - history = history or [] history.append([message, ""]) - + output_queue = Queue() def run_async(): @@ -148,11 +104,25 @@ async def async_process(): ) current_response = "" + context_shown = False + async for log in EventLogger().log(response): log.print() - if hasattr(log, "content"): - current_response += log.content - history[-1][1] = current_response + + # Handle RAG context differently + if hasattr(log, 'retrieved_context') and not context_shown: + context = self.format_rag_context(log.retrieved_context) + history[-1][1] = context + "\n" + context_shown = True + output_queue.put(history.copy()) + + elif hasattr(log, 'content'): + current_response = log.content + # If we showed context before, append to it + if context_shown: + history[-1][1] = history[-1][1] + f'
{current_response}
' + else: + history[-1][1] = current_response output_queue.put(history.copy()) output_queue.put(None) @@ -166,8 +136,7 @@ async def async_process(): item = output_queue.get() if item is None: break - else: - yield item + yield item thread.join() @@ -180,19 +149,33 @@ def create_gradio_interface( ): chat_interface = LlamaChatInterface(host, port, chroma_port, docs_dir) - with gr.Blocks(theme=gr.themes.Soft()) as interface: + with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as interface: gr.Markdown("# LlamaStack Chat") - chatbot = gr.Chatbot(bubble_full_width=False, show_label=False, height=400) - msg = gr.Textbox( - label="Message", - placeholder="Type your message here...", + chatbot = gr.Chatbot( + bubble_full_width=False, show_label=False, - container=False, + height=600, + container=True, + elem_classes={ + "user": "user-message", + "bot": "bot-message" + } ) + + with gr.Row(): + with gr.Column(scale=20): + msg = gr.Textbox( + label="Message", + placeholder="Type your message here...", + show_label=False, + container=False + ) + with gr.Column(scale=1, min_width=100): + submit = gr.Button("Send", variant="primary") + with gr.Row(): - submit = gr.Button("Send", variant="primary") - clear = gr.Button("Clear") + clear = gr.Button("Clear Chat") gr.Examples( examples=[ @@ -235,8 +218,10 @@ def clear_chat(): if __name__ == "__main__": - # Create and launch the Gradio interface interface = create_gradio_interface() interface.launch( - server_name=HOST, server_port=GRADIO_SERVER_PORT, share=True, debug=True - ) + server_name=HOST, + server_port=GRADIO_SERVER_PORT, + share=True, + debug=True + ) \ No newline at end of file From 7322d7c1370f06458105de10a3021811a55d5223 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Wed, 20 Nov 2024 13:49:55 -0800 Subject: [PATCH 75/99] Revert "Update gradio_interface.py" This reverts commit 7b00a1ce4f6acf0c0e95bfb58ef513d39100e9ab. --- examples/E2E-RAG-App/gradio_interface.py | 199 ++++++++++++----------- 1 file changed, 107 insertions(+), 92 deletions(-) diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index eec553e8c..234013df6 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -7,7 +7,9 @@ from typing import AsyncGenerator, Generator, List, Optional import chromadb + import gradio as gr +import requests from chromadb.utils import embedding_functions from dotenv import load_dotenv from llama_stack_client import LlamaStackClient @@ -16,6 +18,7 @@ from llama_stack_client.types.agent_create_params import AgentConfig from llama_stack_client.types.memory_insert_params import Document + # Load environment variables load_dotenv() @@ -26,44 +29,6 @@ GRADIO_SERVER_PORT = int(os.getenv("GRADIO_SERVER_PORT", 7861)) MODEL_NAME = os.getenv("MODEL_NAME", "Llama3.2-1B-Instruct") -# Custom CSS for enhanced styling -CUSTOM_CSS = """ -.message-rag { - font-size: 0.875rem !important; - background-color: rgba(30, 41, 59, 0.5) !important; - border-radius: 0.5rem !important; - padding: 0.75rem !important; - margin-bottom: 1rem !important; - font-family: ui-monospace, monospace !important; -} - -.message-rag-title { - font-size: 0.75rem !important; - color: #94a3b8 !important; - margin-bottom: 0.25rem !important; - display: flex !important; - align-items: center !important; - gap: 0.5rem !important; -} - -.message-rag-title::before { - content: "📄" !important; - font-size: 1rem !important; -} - -.message-rag-content { - color: #cbd5e1 !important; -} - -.bot-message { - font-size: 1rem !important; - line-height: 1.5 !important; -} - -.user-message { - background-color: rgb(79, 70, 229) !important; -} -""" class LlamaChatInterface: def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): @@ -76,21 +41,100 @@ def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): self.session_id = None self.memory_bank_id = "test_bank_212" - # ... [previous methods remain the same until chat_stream] ... - - def format_rag_context(self, context: str) -> str: - """Format RAG context with custom styling.""" - return f"""
-
Retrieved context from memory:
-
{context}
-
""" + async def initialize_system(self): + """Initialize the entire system including memory bank and agent.""" + await self.setup_memory_bank() + await self.initialize_agent() + + async def setup_memory_bank(self): + """Set up the memory bank if it doesn't exist.""" + providers = self.client.providers.list() + provider_id = providers["memory"][0].provider_id + collections = self.chroma_client.list_collections() + + if any(col.name == self.memory_bank_id for col in collections): + print(f"The collection '{self.memory_bank_id}' exists.") + else: + print( + f"The collection '{self.memory_bank_id}' does not exist. Creating the collection..." + ) + self.client.memory_banks.register( + memory_bank_id=self.memory_bank_id, + params={ + "embedding_model": "all-MiniLM-L6-v2", + "chunk_size_in_tokens": 100, + "overlap_size_in_tokens": 10, + }, + provider_id=provider_id, + ) + await self.load_documents() + print(f"Memory bank registered.") + + async def load_documents(self): + """Load documents from the specified directory into memory bank.""" + documents = [] + for filename in os.listdir(self.docs_dir): + if filename.endswith((".txt", ".md")): + file_path = os.path.join(self.docs_dir, filename) + with open(file_path, "r", encoding="utf-8") as file: + content = file.read() + document = Document( + document_id=filename, + content=content, + mime_type="text/plain", + metadata={"filename": filename}, + ) + documents.append(document) + + if documents: + self.client.memory.insert( + bank_id=self.memory_bank_id, + documents=documents, + ) + print(f"Loaded {len(documents)} documents from {self.docs_dir}") + + async def initialize_agent(self): + """Initialize the agent with model registration and configuration.""" + + if "1b" in MODEL_NAME: + model_name = "Llama3.2-1B-Instruct" + elif "3b" in MODEL_NAME: + model_name = "Llama3.2-3B-Instruct" + elif "8b" in MODEL_NAME: + model_name = "Llama3.1-8B-Instruct" + else: + model_name = MODEL_NAME + + agent_config = AgentConfig( + model=model_name, + instructions="You are a helpful assistant that can answer questions based on provided documents. Return your answer short and concise, less than 50 words.", + sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, + tools=[ + { + "type": "memory", + "memory_bank_configs": [ + {"bank_id": self.memory_bank_id, "type": "vector"} + ], + "query_generator_config": {"type": "default", "sep": " "}, + "max_tokens_in_context": 300, + "max_chunks": 5, + } + ], + tool_choice="auto", + tool_prompt_format="json", + enable_session_persistence=True, + ) + self.agent = Agent(self.client, agent_config) + self.session_id = self.agent.create_session(f"session-{uuid.uuid4()}") def chat_stream( self, message: str, history: List[List[str]] ) -> Generator[List[List[str]], None, None]: + """Stream chat responses token by token with proper history handling.""" + history = history or [] history.append([message, ""]) - + output_queue = Queue() def run_async(): @@ -104,25 +148,11 @@ async def async_process(): ) current_response = "" - context_shown = False - async for log in EventLogger().log(response): log.print() - - # Handle RAG context differently - if hasattr(log, 'retrieved_context') and not context_shown: - context = self.format_rag_context(log.retrieved_context) - history[-1][1] = context + "\n" - context_shown = True - output_queue.put(history.copy()) - - elif hasattr(log, 'content'): - current_response = log.content - # If we showed context before, append to it - if context_shown: - history[-1][1] = history[-1][1] + f'
{current_response}
' - else: - history[-1][1] = current_response + if hasattr(log, "content"): + current_response += log.content + history[-1][1] = current_response output_queue.put(history.copy()) output_queue.put(None) @@ -136,7 +166,8 @@ async def async_process(): item = output_queue.get() if item is None: break - yield item + else: + yield item thread.join() @@ -149,33 +180,19 @@ def create_gradio_interface( ): chat_interface = LlamaChatInterface(host, port, chroma_port, docs_dir) - with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as interface: + with gr.Blocks(theme=gr.themes.Soft()) as interface: gr.Markdown("# LlamaStack Chat") - chatbot = gr.Chatbot( - bubble_full_width=False, + chatbot = gr.Chatbot(bubble_full_width=False, show_label=False, height=400) + msg = gr.Textbox( + label="Message", + placeholder="Type your message here...", show_label=False, - height=600, - container=True, - elem_classes={ - "user": "user-message", - "bot": "bot-message" - } + container=False, ) - - with gr.Row(): - with gr.Column(scale=20): - msg = gr.Textbox( - label="Message", - placeholder="Type your message here...", - show_label=False, - container=False - ) - with gr.Column(scale=1, min_width=100): - submit = gr.Button("Send", variant="primary") - with gr.Row(): - clear = gr.Button("Clear Chat") + submit = gr.Button("Send", variant="primary") + clear = gr.Button("Clear") gr.Examples( examples=[ @@ -218,10 +235,8 @@ def clear_chat(): if __name__ == "__main__": + # Create and launch the Gradio interface interface = create_gradio_interface() interface.launch( - server_name=HOST, - server_port=GRADIO_SERVER_PORT, - share=True, - debug=True - ) \ No newline at end of file + server_name=HOST, server_port=GRADIO_SERVER_PORT, share=True, debug=True + ) From f28d921ecd8cc206296f3be056a8f5b00e68b586 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Wed, 20 Nov 2024 13:48:35 -0800 Subject: [PATCH 76/99] wip-eval: trying to get it work with current stack --- examples/E2E-RAG-App/data/eval/eval.py | 271 +++++++++++++++++++++++++ 1 file changed, 271 insertions(+) create mode 100644 examples/E2E-RAG-App/data/eval/eval.py diff --git a/examples/E2E-RAG-App/data/eval/eval.py b/examples/E2E-RAG-App/data/eval/eval.py new file mode 100644 index 000000000..91c8bd3f0 --- /dev/null +++ b/examples/E2E-RAG-App/data/eval/eval.py @@ -0,0 +1,271 @@ +import asyncio +import json +import os +import uuid +from typing import List, Optional + +import chromadb +import fire +import requests +from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction +from datasets import Dataset +from dotenv import load_dotenv +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.types.agent_create_params import AgentConfig +from ragas import evaluate, EvaluationDataset +from ragas.metrics import ( + AnswerRelevancy, + ContextPrecision, + ContextRecall, + FactualCorrectness, + Faithfulness, + SemanticSimilarity, +) +from termcolor import cprint +from tqdm import tqdm + +# Initialization +load_dotenv() +embedding_function = SentenceTransformerEmbeddingFunction( + model_name="all-mpnet-base-v2" +) +chroma_client = chromadb.PersistentClient(path="chroma") + + +def chunk_text(content: str, chunk_size: int = 500) -> List[str]: + """Splits content into chunks with overlap.""" + chunks = [] + current_chunk = [] + overlap = 100 + + for paragraph in content.split("\n\n"): + if sum(len(p) for p in current_chunk) + len(paragraph) <= chunk_size: + current_chunk.append(paragraph) + else: + chunks.append("\n\n".join(current_chunk).strip()) + current_chunk = ( + [current_chunk[-1], paragraph] if current_chunk else [paragraph] + ) + + if current_chunk: + chunks.append("\n\n".join(current_chunk).strip()) + + return chunks + + +def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 350) -> None: + """Inserts text documents from a directory into ChromaDB.""" + collection_name = "documents" + existing_collections = chroma_client.list_collections() + collection_names = [col.name for col in existing_collections] + + if collection_name in collection_names: + cprint( + f"Collection '{collection_name}' already exists. Skipping document insertion.", + "yellow", + ) + return + + collection = chroma_client.create_collection( + name=collection_name, embedding_function=embedding_function + ) + + cprint(f"Collection '{collection_name}' created.", "green") + + for filename in os.listdir(file_dir): + if filename.endswith((".txt", ".md")): + file_path = os.path.join(file_dir, filename) + with open(file_path, "r", encoding="utf-8") as file: + content = file.read() + chunks = chunk_text(content, chunk_size=chunk_size) + + for i, chunk in enumerate(chunks): + chunk_id = f"{filename}_chunk_{i}" + collection.add( + documents=[chunk], + ids=[chunk_id], + metadatas=[ + {"filename": filename, "chunk_index": i, "content": chunk} + ], + ) + + cprint(f"Inserted documents from {file_dir} into ChromaDB.", "green") + + +def query_chromadb(query: str) -> Optional[dict]: + """Queries ChromaDB for relevant context based on input query.""" + collection = chroma_client.get_collection( + name="documents", embedding_function=embedding_function + ) + + results = collection.query( + query_texts=[query], + n_results=5, + include=["documents", "metadatas", "distances"], + ) + + return results if results else None + + +async def get_response_with_context( + agent: Agent, input_query: str, session_id: str +) -> (str, List[str]): + """Fetches response from the agent with context from ChromaDB.""" + results = query_chromadb(input_query) + if results and results["metadatas"]: + context = "\n".join( + f"Filename: {metadata['filename']}, Chunk index: {metadata['chunk_index']}\n{metadata['content']}" + for metadata_list in results["metadatas"] + for metadata in metadata_list + ) + # Collect the contexts into a list + contexts = [ + metadata["content"] + for metadata_list in results["metadatas"] + for metadata in metadata_list + ] + else: + context = "No relevant context found." + contexts = [] + + messages = [ + {"role": "user", "content": f"Context: {context}\n\nQuestion: {input_query}"} + ] + + response = agent.create_turn(messages=messages, session_id=session_id) + + async for chunk in response: + if chunk.event.payload.event_type == "turn_complete": + return chunk.event.payload.turn.output_message.content, contexts + + return "No response generated.", contexts + + +async def run_main(host: str, port: int, docs_dir: str) -> None: + """Main async function to register model, insert documents, and generate responses.""" + client = LlamaStackClient(base_url=f"http://{host}:{port}") + + insert_documents_to_chromadb(docs_dir) + + model_name = "Llama3.2-3B-Instruct" + url = f"http://{host}:{port}/models/register" + headers = {"Content-Type": "application/json"} + data = { + "model_id": model_name, + "provider_model_id": None, + "provider_id": "inline::meta-reference-0", + "metadata": None, + } + response = requests.post(url, headers=headers, data=json.dumps(data)) + cprint(f"Model registration status: {response.status_code}", "blue") + + agent_config = AgentConfig( + model=model_name, + instructions="You are a helpful assistant that can answer questions based on provided documents.", + sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, + enable_session_persistence=True, + ) + agent = Agent(client, agent_config) + + # QA data + qa_data = [ + { + "Question": "What is the policy regarding smoking in City offices?", + "Answer": "Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)", + }, + { + "Question": "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", + "Answer": "Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)", + }, + { + "Question": "What are the three categories of employees eligible for health coverage?", + "Answer": "The following employees are eligible:\n\nAll permanent employees working at least 20 hours per week\n\nAll regularly scheduled provisional employees working at least 20 hours per week\n\nAll other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)", + }, + { + "Question": "How long must an employee wait before using vacation time after starting employment?", + "Answer": "Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)", + }, + { + "Question": "What must an employee do if they're summoned for jury duty?", + "Answer": "An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)", + }, + { + "Question": "What happens if an employee is absent without authorization for more than five consecutive working days?", + "Answer": "If an employee is absent from their job without proper authorization for more than five consecutive working days, or fails to return from an approved leave, their absence will be deemed an 'automatic resignation.' (Source: Page 19, 'Automatic Resignation' section)", + }, + { + "Question": "How long is the normal probationary period for permanent civil service positions?", + "Answer": "The document states that all appointments to permanent civil service positions require a probationary period, but the duration is governed by the collective bargaining agreement. Absences from work will extend the probationary period. (Source: Page 14, 'Probationary Period' section)", + }, + { + "Question": "What are employees required to do in case of a catastrophic event while off duty?", + "Answer": "Employees should ensure the safety of their family and follow their department's instructions. If phone lines are down, they are required to listen to the radio for any reporting instructions. (Source: Page 51, 'Catastrophic Event While off Duty' section)", + }, + { + "Question": "What is the city's policy on accepting gifts from subordinates?", + "Answer": "Employees may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to them. (Source: Page 49, 'Gifts' section)", + }, + ] + + # Prepare lists to collect data + questions = [] + generated_answers = [] + retrieved_contexts = [] + ground_truths = [] + + session_id = agent.create_session(f"session-{uuid.uuid4()}") + for qa in tqdm(qa_data, desc="Generating responses"): + question = qa["Question"] + ground_truth_answer = qa["Answer"] + + cprint(f"Generating response for: {question}", "green") + try: + generated_answer, contexts = await get_response_with_context( + agent, question, session_id + ) + cprint(f"Response: {generated_answer}", "green") + + # Append data to lists + questions.append(question) + generated_answers.append(generated_answer) + retrieved_contexts.append(contexts) + ground_truths.append(ground_truth_answer) + except Exception as e: + cprint(f"Error generating response for {question}: {e}", "red") + + # Create a Dataset for RAGAS + eval_data = Dataset.from_dict( + { + "user_input": questions, + "response": generated_answers, + "retrieved_contexts": retrieved_contexts, + "reference": ground_truths, + } + ) + + # Run evaluation + result = evaluate( + eval_data, + metrics=[ + ContextPrecision(), + ContextRecall(), + Faithfulness(), + AnswerRelevancy(), + FactualCorrectness(), + SemanticSimilarity(), + ], + ) + + df = result.to_pandas() + df.to_csv("evaluation_results2.csv", index=False) + print(df.head()) + + +def main(host: str, port: int, docs_dir: str) -> None: + """Entry point for the script.""" + asyncio.run(run_main(host, port, docs_dir)) + + +if __name__ == "__main__": + fire.Fire(main) \ No newline at end of file From 545f9424ef97148f86caeb77d5bcb04b74be52f4 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Wed, 20 Nov 2024 14:24:06 -0800 Subject: [PATCH 77/99] stop ingest when there is output folder --- examples/E2E-RAG-App/01_ingest_files.py | 62 +++++++++++++++++------- examples/E2E-RAG-App/README.md | 18 ++++--- examples/E2E-RAG-App/docker/run_RAG.sh | 1 - examples/E2E-RAG-App/gradio_interface.py | 2 +- 4 files changed, 56 insertions(+), 27 deletions(-) diff --git a/examples/E2E-RAG-App/01_ingest_files.py b/examples/E2E-RAG-App/01_ingest_files.py index cea7aa459..876ff2f58 100644 --- a/examples/E2E-RAG-App/01_ingest_files.py +++ b/examples/E2E-RAG-App/01_ingest_files.py @@ -3,11 +3,13 @@ import logging import shutil from pathlib import Path -from typing import Tuple, List +from typing import List, Tuple import yaml from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend from docling.datamodel.base_models import InputFormat +from docling.datamodel.document import PictureItem, TableItem +from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.document_converter import ( DocumentConverter, PdfFormatOption, @@ -15,15 +17,26 @@ ) from docling.pipeline.simple_pipeline import SimplePipeline from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline -from docling.datamodel.pipeline_options import PdfPipelineOptions -from docling.datamodel.document import TableItem, PictureItem + def parse_args(): - parser = argparse.ArgumentParser(description='Process documents from input directory') - parser.add_argument('--input_dir', type=str, required=True, help='Input directory containing documents') - parser.add_argument('--output_dir', type=str, help='Output directory for processed files (default: input_dir/output)') + parser = argparse.ArgumentParser( + description="Process documents from input directory" + ) + parser.add_argument( + "--input_dir", + type=str, + required=True, + help="Input directory containing documents", + ) + parser.add_argument( + "--output_dir", + type=str, + help="Output directory for processed files (default: input_dir/output)", + ) return parser.parse_args() + def get_document_files(input_dir: Path) -> Tuple[List[Path], List[Path]]: """ Recursively scan directory for document files. @@ -43,6 +56,7 @@ def get_document_files(input_dir: Path) -> Tuple[List[Path], List[Path]]: return documents_to_process, markdown_files + def save_images(res, output_subdir: Path, doc_filename: str) -> List[Tuple[str, Path]]: """ Extracts and saves images from the document. @@ -54,40 +68,53 @@ def save_images(res, output_subdir: Path, doc_filename: str) -> List[Tuple[str, # Save page images for page_no, page in res.document.pages.items(): - if hasattr(page, 'image') and page.image: + if hasattr(page, "image") and page.image: image_path = images_dir / f"{doc_filename}-page-{page_no}.png" with image_path.open("wb") as fp: page.image.pil_image.save(fp, format="PNG") - saved_images.append(('page', image_path)) + saved_images.append(("page", image_path)) # Save images of figures and tables table_counter = 0 picture_counter = 0 - + for element, _level in res.document.iterate_items(): - if isinstance(element, TableItem) and hasattr(element, 'image') and element.image: + if ( + isinstance(element, TableItem) + and hasattr(element, "image") + and element.image + ): table_counter += 1 image_path = images_dir / f"{doc_filename}-table-{table_counter}.png" with image_path.open("wb") as fp: element.image.pil_image.save(fp, "PNG") - saved_images.append(('table', image_path)) + saved_images.append(("table", image_path)) - if isinstance(element, PictureItem) and hasattr(element, 'image') and element.image: + if ( + isinstance(element, PictureItem) + and hasattr(element, "image") + and element.image + ): picture_counter += 1 image_path = images_dir / f"{doc_filename}-figure-{picture_counter}.png" with image_path.open("wb") as fp: element.image.pil_image.save(fp, "PNG") - saved_images.append(('figure', image_path)) + saved_images.append(("figure", image_path)) return saved_images + def main(): args = parse_args() - + # Set up input and output directories input_dir = Path(args.input_dir) output_dir = Path(args.output_dir) if args.output_dir else input_dir / "output" - output_dir.mkdir(parents=True, exist_ok=True) + if os.path.exists(output_dir): + print(f"Output directory already exists: {output_dir}, will stop ingestion") + raise Exception(f"Output directory already exists: {output_dir}") + else: + output_dir.mkdir(parents=True, exist_ok=True) # Get all document files recursively documents_to_process, markdown_files = get_document_files(input_dir) @@ -120,7 +147,7 @@ def main(): InputFormat.PDF: PdfFormatOption( pipeline_cls=StandardPdfPipeline, backend=PyPdfiumDocumentBackend, - pipeline_options=pipeline_options + pipeline_options=pipeline_options, ), InputFormat.DOCX: WordFormatOption(pipeline_cls=SimplePipeline), }, @@ -149,5 +176,6 @@ def main(): print(f"\nExtracted {len(all_extracted_images)} images in total") print("Ready for image captioning processing") + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index 29af3e654..8523d0ad3 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -4,13 +4,17 @@ This is an End to End RAG App leveraging llama-stack that handles the logic for We share the details of how to run first and then an outline of how it works: -## Prerequisite: +### Prerequisite: Install docker: Check [this doc for Mac](https://docs.docker.com/desktop/setup/install/mac-install/), [this doc for Windows](https://docs.docker.com/desktop/setup/install/windows-install/) and this [instruction for Linux](https://docs.docker.com/engine/install/). For Mac and Windows users, you need to start the Docker app manually after installation. -## How to run: +### How to run the pipeline: + +![RAG_workflow](./RAG_workflow.jpg) + +The above is the workflow diagram for this RAG app. To run the app, please read the following instructions: 1. We have main config `RAG_service.json` inside of the docker folder, please change `model_name` and `document_path` accordingly, for example: @@ -41,21 +45,19 @@ docker exec -it docker-ollama-1 bash ollama ps ``` -> Check more about Ollama instruction [here](https://github.com/ollama/ollama) +Check more about Ollama instruction [here](https://github.com/ollama/ollama) 4. ChromaDB docker will also start. This docker will host the chroma database that can interact with llama-stack. -5. Lastly, Llama-stack docker will start. The `llama_stack_start.sh` control the docker startup behavior, change it if needed. It should be able to run llama-stack server based on the `llama_stack_run.yaml` config. Once the server is ready, then it will run the `gradio_interface.py`. +5. Lastly, Llama-stack docker will start. The `llama_stack_start.sh` control the docker startup behavior, change it if needed. It will first run the ingestion pipeline to convert all the documents into MarkDown files. Then, it will run llama-stack server based on the `llama_stack_run.yaml` config. Once the server is ready, then it will run the `gradio_interface.py` which will insert document chunks into memory_bank and start the UI for user interaction. 6. `gradio_interface.py` will show a public link. You can access the gradio UI by putting this link to the browser. Then you can start your chat in the gradio web page. -All of the steps are run using a single-step via docker script. -Overview of how it works: +### Overview of how the RAG app works: + 1. We use [docling](https://github.com/DS4SD/docling) framework for handling multiple file input formats (PDF, PPTX, DOCX) 2. If you are using a GPU inference machine, we have an option to use `Llama-3.2-11B-Vision` to caption images in the documents, on CPU machine this step is skipped 3. Once ingested, we use a llama-stack distribution running chroma-db and `Llama-3.2-3B-Instruct` to ingest chunks into a memory_bank 4. Once the vectordb is created, we then use llama-stack with the `Llama-3.2-3B-Instruct` to chat with the model. - -![RAG_workflow](./RAG_workflow.jpg) diff --git a/examples/E2E-RAG-App/docker/run_RAG.sh b/examples/E2E-RAG-App/docker/run_RAG.sh index fbf24f658..581a1b0cc 100755 --- a/examples/E2E-RAG-App/docker/run_RAG.sh +++ b/examples/E2E-RAG-App/docker/run_RAG.sh @@ -5,6 +5,5 @@ echo "MODEL_NAME=$(jq -r '.model_name' ./RAG_service.json)" >> .env echo "HOST=$(jq -r '.host' ./RAG_service.json)" >> .env echo "PORT=$(jq -r '.port' ./RAG_service.json)" >> .env echo "CHROMA_PORT=$(jq -r '.chroma_port' ./RAG_service.json)" >> .env -echo "DOCS_DIR=$(jq -r '.docs_dir' ./RAG_service.json)" >> .env echo "GRADIO_SERVER_PORT=$(jq -r '.gradio_server_port' ./RAG_service.json)" >> .env docker compose up diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index 234013df6..5962ade9a 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -39,7 +39,7 @@ def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): self.chroma_client = chromadb.HttpClient(host=host, port=chroma_port) self.agent = None self.session_id = None - self.memory_bank_id = "test_bank_212" + self.memory_bank_id = "test_bank_232" async def initialize_system(self): """Initialize the entire system including memory bank and agent.""" From dda9b78052170aa6cb2883ac8b31bfe742bb26db Mon Sep 17 00:00:00 2001 From: Dalton Flanagan <6599399+dltn@users.noreply.github.com> Date: Thu, 21 Nov 2024 10:10:28 -0500 Subject: [PATCH 78/99] Update README.md --- examples/E2E-RAG-App/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index 8523d0ad3..e9219072b 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -1,6 +1,6 @@ ## E2E-RAG-App -This is an End to End RAG App leveraging llama-stack that handles the logic for ingesting documents, storing them in a vector db and providing an inference interface. +This is an end-to-end Retrieval Augmented Geneartion (RAG) App leveraging llama-stack that handles the logic for ingesting documents, storing them in a vector database and providing an inference interface. We share the details of how to run first and then an outline of how it works: @@ -49,7 +49,7 @@ Check more about Ollama instruction [here](https://github.com/ollama/ollama) 4. ChromaDB docker will also start. This docker will host the chroma database that can interact with llama-stack. -5. Lastly, Llama-stack docker will start. The `llama_stack_start.sh` control the docker startup behavior, change it if needed. It will first run the ingestion pipeline to convert all the documents into MarkDown files. Then, it will run llama-stack server based on the `llama_stack_run.yaml` config. Once the server is ready, then it will run the `gradio_interface.py` which will insert document chunks into memory_bank and start the UI for user interaction. +5. Lastly, Llama-stack docker will start. The `llama_stack_start.sh` controls the docker startup behavior. Change it if needed. It will first run the ingestion pipeline to convert all the documents into Markdown files. Then, it will run Llama Stack server based on the `llama_stack_run.yaml` config. Once the server is ready, then it will run the `gradio_interface.py` which will insert document chunks into memory_bank and start the UI for user interaction. 6. `gradio_interface.py` will show a public link. You can access the gradio UI by putting this link to the browser. Then you can start your chat in the gradio web page. @@ -58,6 +58,6 @@ Check more about Ollama instruction [here](https://github.com/ollama/ollama) ### Overview of how the RAG app works: 1. We use [docling](https://github.com/DS4SD/docling) framework for handling multiple file input formats (PDF, PPTX, DOCX) -2. If you are using a GPU inference machine, we have an option to use `Llama-3.2-11B-Vision` to caption images in the documents, on CPU machine this step is skipped +2. If you are using a GPU, we have an option to use `Llama-3.2-11B-Vision` to caption images in the documents. On a CPU-only machine this step is skipped. 3. Once ingested, we use a llama-stack distribution running chroma-db and `Llama-3.2-3B-Instruct` to ingest chunks into a memory_bank 4. Once the vectordb is created, we then use llama-stack with the `Llama-3.2-3B-Instruct` to chat with the model. From 7ae664530af207150c9e870d912bcf5436934ff0 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Thu, 21 Nov 2024 15:08:44 -0800 Subject: [PATCH 79/99] modified eval for 0.0.53 --- examples/E2E-RAG-App/data/eval/eval.py | 253 +++++++++---------------- 1 file changed, 91 insertions(+), 162 deletions(-) diff --git a/examples/E2E-RAG-App/data/eval/eval.py b/examples/E2E-RAG-App/data/eval/eval.py index 91c8bd3f0..0bdb68116 100644 --- a/examples/E2E-RAG-App/data/eval/eval.py +++ b/examples/E2E-RAG-App/data/eval/eval.py @@ -4,15 +4,13 @@ import uuid from typing import List, Optional -import chromadb import fire import requests -from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction -from datasets import Dataset -from dotenv import load_dotenv from llama_stack_client import LlamaStackClient from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.types.agent_create_params import AgentConfig +from llama_stack_client.types.memory_insert_params import Document +from datasets import Dataset from ragas import evaluate, EvaluationDataset from ragas.metrics import ( AnswerRelevancy, @@ -24,194 +22,131 @@ ) from termcolor import cprint from tqdm import tqdm - -# Initialization -load_dotenv() -embedding_function = SentenceTransformerEmbeddingFunction( - model_name="all-mpnet-base-v2" -) -chroma_client = chromadb.PersistentClient(path="chroma") - - -def chunk_text(content: str, chunk_size: int = 500) -> List[str]: - """Splits content into chunks with overlap.""" - chunks = [] - current_chunk = [] - overlap = 100 - - for paragraph in content.split("\n\n"): - if sum(len(p) for p in current_chunk) + len(paragraph) <= chunk_size: - current_chunk.append(paragraph) - else: - chunks.append("\n\n".join(current_chunk).strip()) - current_chunk = ( - [current_chunk[-1], paragraph] if current_chunk else [paragraph] - ) - - if current_chunk: - chunks.append("\n\n".join(current_chunk).strip()) - - return chunks +import chromadb +from dotenv import load_dotenv -def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 350) -> None: - """Inserts text documents from a directory into ChromaDB.""" - collection_name = "documents" - existing_collections = chroma_client.list_collections() - collection_names = [col.name for col in existing_collections] +load_dotenv() +HOST = os.getenv("HOST", "localhost") +PORT = int(os.getenv("PORT", 5000)) - if collection_name in collection_names: - cprint( - f"Collection '{collection_name}' already exists. Skipping document insertion.", - "yellow", - ) - return +CHROMA_PORT = int(os.getenv("CHROMA_PORT", 6000)) - collection = chroma_client.create_collection( - name=collection_name, embedding_function=embedding_function - ) - cprint(f"Collection '{collection_name}' created.", "green") +chroma_client = chromadb.HttpClient(host=HOST, port=CHROMA_PORT ) - for filename in os.listdir(file_dir): +async def load_documents(client, docs_dir, memory_bank_id): + """Load documents from the specified directory into memory bank.""" + documents = [] + for filename in os.listdir(docs_dir): if filename.endswith((".txt", ".md")): - file_path = os.path.join(file_dir, filename) + file_path = os.path.join(docs_dir, filename) with open(file_path, "r", encoding="utf-8") as file: content = file.read() - chunks = chunk_text(content, chunk_size=chunk_size) - - for i, chunk in enumerate(chunks): - chunk_id = f"{filename}_chunk_{i}" - collection.add( - documents=[chunk], - ids=[chunk_id], - metadatas=[ - {"filename": filename, "chunk_index": i, "content": chunk} - ], - ) + document = Document( + document_id=filename, + content=content, + mime_type="text/plain", + metadata={"filename": filename}, + ) + documents.append(document) + if documents: + client.memory.insert( + bank_id=memory_bank_id, + documents=documents, + ) + print(f"Loaded {len(documents)} documents from {docs_dir}") - cprint(f"Inserted documents from {file_dir} into ChromaDB.", "green") +async def setup_memory_bank(client, docs_dir, memory_bank_id): + """Set up the memory bank if it doesn't exist.""" + providers = client.providers.list() + provider_id = providers["memory"][0].provider_id + collections = chroma_client.list_collections() -def query_chromadb(query: str) -> Optional[dict]: - """Queries ChromaDB for relevant context based on input query.""" - collection = chroma_client.get_collection( - name="documents", embedding_function=embedding_function - ) + if any(col.name == memory_bank_id for col in collections): + print(f"The collection '{memory_bank_id}' exists.") + else: + print( + f"The collection '{memory_bank_id}' does not exist. Creating the collection..." + ) + client.memory_banks.register( + memory_bank_id=memory_bank_id, + params={ + "embedding_model": "all-MiniLM-L6-v2", + "chunk_size_in_tokens": 100, + "overlap_size_in_tokens": 10, + }, + provider_id=provider_id, + ) + await load_documents(client, docs_dir, memory_bank_id) + print(f"Memory bank registered.") - results = collection.query( - query_texts=[query], - n_results=5, - include=["documents", "metadatas", "distances"], +async def get_response_with_context(agent, input_query, session_id): + response = agent.create_turn( + messages=[{"role": "user", "content": input_query}], session_id=session_id ) - return results if results else None - - -async def get_response_with_context( - agent: Agent, input_query: str, session_id: str -) -> (str, List[str]): - """Fetches response from the agent with context from ChromaDB.""" - results = query_chromadb(input_query) - if results and results["metadatas"]: - context = "\n".join( - f"Filename: {metadata['filename']}, Chunk index: {metadata['chunk_index']}\n{metadata['content']}" - for metadata_list in results["metadatas"] - for metadata in metadata_list - ) - # Collect the contexts into a list - contexts = [ - metadata["content"] - for metadata_list in results["metadatas"] - for metadata in metadata_list - ] - else: - context = "No relevant context found." - contexts = [] - - messages = [ - {"role": "user", "content": f"Context: {context}\n\nQuestion: {input_query}"} - ] + generated_answer = "" + retrieved_contexts = [] - response = agent.create_turn(messages=messages, session_id=session_id) + async for event in response: + if event.event.payload.event_type == "token": + generated_answer += event.event.payload.token - async for chunk in response: - if chunk.event.payload.event_type == "turn_complete": - return chunk.event.payload.turn.output_message.content, contexts + elif event.event.payload.event_type == "tool_use": + if event.event.payload.tool == "memory": + result = event.event.payload.result + if result and isinstance(result, dict) and 'documents' in result: + retrieved_docs = result['documents'] + for doc in retrieved_docs: + retrieved_contexts.append(doc.get("content", "")) + else: + print(f"Tool use result: {event.event.payload.result}") - return "No response generated.", contexts + elif event.event.payload.event_type == "turn_complete": + break + return generated_answer, retrieved_contexts async def run_main(host: str, port: int, docs_dir: str) -> None: """Main async function to register model, insert documents, and generate responses.""" client = LlamaStackClient(base_url=f"http://{host}:{port}") + memory_bank_id = "test_bank_113" - insert_documents_to_chromadb(docs_dir) + await setup_memory_bank(client, docs_dir, memory_bank_id) - model_name = "Llama3.2-3B-Instruct" - url = f"http://{host}:{port}/models/register" - headers = {"Content-Type": "application/json"} - data = { - "model_id": model_name, - "provider_model_id": None, - "provider_id": "inline::meta-reference-0", - "metadata": None, - } - response = requests.post(url, headers=headers, data=json.dumps(data)) - cprint(f"Model registration status: {response.status_code}", "blue") + model_name = "Llama3.2-1B-Instruct" agent_config = AgentConfig( model=model_name, instructions="You are a helpful assistant that can answer questions based on provided documents.", sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, + tools=[ + { + "type": "memory", + "memory_bank_configs": [{"bank_id": memory_bank_id, "type": "vector"}], + "query_generator_config": {"type": "default", "sep": " "}, + "max_tokens_in_context": 512, + "max_chunks": 5, + } + ], + tool_choice="auto", + tool_prompt_format="json", enable_session_persistence=True, ) agent = Agent(client, agent_config) - # QA data qa_data = [ { "Question": "What is the policy regarding smoking in City offices?", "Answer": "Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)", }, - { - "Question": "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", - "Answer": "Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)", - }, - { - "Question": "What are the three categories of employees eligible for health coverage?", - "Answer": "The following employees are eligible:\n\nAll permanent employees working at least 20 hours per week\n\nAll regularly scheduled provisional employees working at least 20 hours per week\n\nAll other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)", - }, - { - "Question": "How long must an employee wait before using vacation time after starting employment?", - "Answer": "Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)", - }, - { - "Question": "What must an employee do if they're summoned for jury duty?", - "Answer": "An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)", - }, - { - "Question": "What happens if an employee is absent without authorization for more than five consecutive working days?", - "Answer": "If an employee is absent from their job without proper authorization for more than five consecutive working days, or fails to return from an approved leave, their absence will be deemed an 'automatic resignation.' (Source: Page 19, 'Automatic Resignation' section)", - }, - { - "Question": "How long is the normal probationary period for permanent civil service positions?", - "Answer": "The document states that all appointments to permanent civil service positions require a probationary period, but the duration is governed by the collective bargaining agreement. Absences from work will extend the probationary period. (Source: Page 14, 'Probationary Period' section)", - }, - { - "Question": "What are employees required to do in case of a catastrophic event while off duty?", - "Answer": "Employees should ensure the safety of their family and follow their department's instructions. If phone lines are down, they are required to listen to the radio for any reporting instructions. (Source: Page 51, 'Catastrophic Event While off Duty' section)", - }, - { - "Question": "What is the city's policy on accepting gifts from subordinates?", - "Answer": "Employees may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to them. (Source: Page 49, 'Gifts' section)", - }, ] - # Prepare lists to collect data questions = [] generated_answers = [] - retrieved_contexts = [] + retrieved_contexts_list = [] ground_truths = [] session_id = agent.create_session(f"session-{uuid.uuid4()}") @@ -221,30 +156,26 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: cprint(f"Generating response for: {question}", "green") try: - generated_answer, contexts = await get_response_with_context( + generated_answer, retrieved_contexts = await get_response_with_context( agent, question, session_id ) cprint(f"Response: {generated_answer}", "green") - - # Append data to lists questions.append(question) generated_answers.append(generated_answer) - retrieved_contexts.append(contexts) + retrieved_contexts_list.append(retrieved_contexts) ground_truths.append(ground_truth_answer) except Exception as e: cprint(f"Error generating response for {question}: {e}", "red") - # Create a Dataset for RAGAS eval_data = Dataset.from_dict( { "user_input": questions, "response": generated_answers, - "retrieved_contexts": retrieved_contexts, + "retrieved_contexts": retrieved_contexts_list, "reference": ground_truths, } ) - # Run evaluation result = evaluate( eval_data, metrics=[ @@ -258,14 +189,12 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: ) df = result.to_pandas() - df.to_csv("evaluation_results2.csv", index=False) + df.to_csv("evaluation_results.csv", index=False) print(df.head()) - -def main(host: str, port: int, docs_dir: str) -> None: +def main(docs_dir: str) -> None: """Entry point for the script.""" - asyncio.run(run_main(host, port, docs_dir)) - + asyncio.run(run_main(HOST, PORT, docs_dir)) if __name__ == "__main__": - fire.Fire(main) \ No newline at end of file + fire.Fire(main) From 2e269b55b2a47429d03ab7a097f78e4aa550e9af Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Thu, 21 Nov 2024 15:12:19 -0800 Subject: [PATCH 80/99] undo changes on example.agent.rag_with_memory_bank --- examples/agents/rag_with_memory_bank.py | 28 +++++++------------------ 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/examples/agents/rag_with_memory_bank.py b/examples/agents/rag_with_memory_bank.py index 295ffa55d..1dde56d19 100644 --- a/examples/agents/rag_with_memory_bank.py +++ b/examples/agents/rag_with_memory_bank.py @@ -36,31 +36,17 @@ async def run_main(host: str, port: int, disable_safety: bool = False): client = LlamaStackClient(base_url=f"http://{host}:{port}") providers = client.providers.list() - - model_name = "Llama3.2-3B-Instruct" - url = f"http://{host}:{port}/models/register" - headers = {"Content-Type": "application/json"} - data = { - "model_id": model_name, - "provider_model_id": None, - "provider_id": "inline::meta-reference-0", - "metadata": None, - } - provider_id = providers["memory"][0].provider_id - print(provider_id) # create a memory bank - test = client.memory_banks.register( - memory_bank_id="test_bank", - params={ + client.memory_banks.register( + memory_bank={ + "identifier": "test_bank", "embedding_model": "all-MiniLM-L6-v2", "chunk_size_in_tokens": 512, "overlap_size_in_tokens": 64, - }, - provider_id=provider_id, + "provider_id": providers["memory"][0].provider_id, + } ) - print(f"Is memory bank registered? {test}") - # insert some documents client.memory.insert( bank_id="test_bank", @@ -74,7 +60,7 @@ async def run_main(host: str, port: int, disable_safety: bool = False): print(f"Using model: {selected_model}") agent_config = AgentConfig( - model=model_name, + model="Llama3.1-8B-Instruct", instructions="You are a helpful assistant", sampling_params={ "strategy": "greedy", @@ -128,4 +114,4 @@ def main(host: str, port: int): if __name__ == "__main__": - fire.Fire(main) + fire.Fire(main) \ No newline at end of file From c6380f82cbef5d9d596bbb6fbda72ae9b562d1ac Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Fri, 22 Nov 2024 16:20:15 -0800 Subject: [PATCH 81/99] changes made before PR review, stable branch --- examples/E2E-RAG-App/.env.template | 6 ++++++ examples/E2E-RAG-App/01_ingest_files.py | 3 ++- examples/E2E-RAG-App/docker/.env.template | 6 ++++++ examples/E2E-RAG-App/docker/RAG_service.json | 5 +++-- examples/E2E-RAG-App/gradio_interface.py | 4 +--- 5 files changed, 18 insertions(+), 6 deletions(-) create mode 100644 examples/E2E-RAG-App/.env.template create mode 100644 examples/E2E-RAG-App/docker/.env.template diff --git a/examples/E2E-RAG-App/.env.template b/examples/E2E-RAG-App/.env.template new file mode 100644 index 000000000..fb7718b92 --- /dev/null +++ b/examples/E2E-RAG-App/.env.template @@ -0,0 +1,6 @@ +DOC_PATH=/path/to/your/llama-stack-apps/examples/E2E-RAG-App/example_data +MODEL_NAME=llama3.2:1b-instruct-fp16 +HOST=localhost +PORT=5000 +CHROMA_PORT=6000 +GRADIO_SERVER_PORT=7860 \ No newline at end of file diff --git a/examples/E2E-RAG-App/01_ingest_files.py b/examples/E2E-RAG-App/01_ingest_files.py index 876ff2f58..89252d987 100644 --- a/examples/E2E-RAG-App/01_ingest_files.py +++ b/examples/E2E-RAG-App/01_ingest_files.py @@ -6,6 +6,8 @@ from typing import List, Tuple import yaml +import os + from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend from docling.datamodel.base_models import InputFormat from docling.datamodel.document import PictureItem, TableItem @@ -18,7 +20,6 @@ from docling.pipeline.simple_pipeline import SimplePipeline from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline - def parse_args(): parser = argparse.ArgumentParser( description="Process documents from input directory" diff --git a/examples/E2E-RAG-App/docker/.env.template b/examples/E2E-RAG-App/docker/.env.template new file mode 100644 index 000000000..fb7718b92 --- /dev/null +++ b/examples/E2E-RAG-App/docker/.env.template @@ -0,0 +1,6 @@ +DOC_PATH=/path/to/your/llama-stack-apps/examples/E2E-RAG-App/example_data +MODEL_NAME=llama3.2:1b-instruct-fp16 +HOST=localhost +PORT=5000 +CHROMA_PORT=6000 +GRADIO_SERVER_PORT=7860 \ No newline at end of file diff --git a/examples/E2E-RAG-App/docker/RAG_service.json b/examples/E2E-RAG-App/docker/RAG_service.json index 47c9fe934..453c00119 100644 --- a/examples/E2E-RAG-App/docker/RAG_service.json +++ b/examples/E2E-RAG-App/docker/RAG_service.json @@ -1,6 +1,7 @@ + { - "model_name": "llama3.2:3b", - "document_path": "/Users/kaiwu/work/llama-stack-apps/examples/E2E-RAG-App/example_data", + "model_name": "llama3.2:1b-instruct-fp16", + "document_path": "/Users/justinai/Documents/Code/llama-stack-apps/examples/E2E-RAG-App/example_data", "host": "localhost", "port": 5000, "chroma_port": 6000, diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index 5962ade9a..8261b6744 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -7,7 +7,6 @@ from typing import AsyncGenerator, Generator, List, Optional import chromadb - import gradio as gr import requests from chromadb.utils import embedding_functions @@ -25,11 +24,10 @@ HOST = os.getenv("HOST", "localhost") PORT = int(os.getenv("PORT", 5000)) CHROMA_PORT = int(os.getenv("CHROMA_PORT", 6000)) -DOCS_DIR = "/root/rag_data/output" +DOCS_DIR = "/root/rag_data" GRADIO_SERVER_PORT = int(os.getenv("GRADIO_SERVER_PORT", 7861)) MODEL_NAME = os.getenv("MODEL_NAME", "Llama3.2-1B-Instruct") - class LlamaChatInterface: def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): self.host = host From 6bc21649897770b94c3a7326a3e5a1bf9a533ae7 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 25 Nov 2024 08:23:24 -0800 Subject: [PATCH 82/99] removed default value for memory tool, modified gitignore and created a ragservice template --- .gitignore | 3 ++- examples/E2E-RAG-App/docker/RAG_service-template.json | 9 +++++++++ examples/E2E-RAG-App/docker/RAG_service.json | 2 +- examples/E2E-RAG-App/gradio_interface.py | 1 - 4 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 examples/E2E-RAG-App/docker/RAG_service-template.json diff --git a/.gitignore b/.gitignore index d190335c7..3d80216d9 100644 --- a/.gitignore +++ b/.gitignore @@ -174,4 +174,5 @@ Package.resolved **/chroma /examples/E2E-RAG-App/data/input /examples/E2E-RAG-App/data/output -**/.gradio \ No newline at end of file +**/.gradio +**/RAG_service.json \ No newline at end of file diff --git a/examples/E2E-RAG-App/docker/RAG_service-template.json b/examples/E2E-RAG-App/docker/RAG_service-template.json new file mode 100644 index 000000000..0ffa5b6fe --- /dev/null +++ b/examples/E2E-RAG-App/docker/RAG_service-template.json @@ -0,0 +1,9 @@ + +{ + "model_name": "llama3.2:1b-instruct-fp16", + "document_path": "/your/document_path/to/llama-stack-apps/examples/E2E-RAG-App/example_data", + "host": "localhost", + "port": 5000, + "chroma_port": 6000, + "gradio_server_port": 7860 +} diff --git a/examples/E2E-RAG-App/docker/RAG_service.json b/examples/E2E-RAG-App/docker/RAG_service.json index 453c00119..b29ce0c3b 100644 --- a/examples/E2E-RAG-App/docker/RAG_service.json +++ b/examples/E2E-RAG-App/docker/RAG_service.json @@ -1,7 +1,7 @@ { "model_name": "llama3.2:1b-instruct-fp16", - "document_path": "/Users/justinai/Documents/Code/llama-stack-apps/examples/E2E-RAG-App/example_data", + "document_path": "/Users/justinlee/Documents/Code/llama-stack-apps/examples/E2E-RAG-App/example_data", "host": "localhost", "port": 5000, "chroma_port": 6000, diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index 8261b6744..d72d9045f 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -113,7 +113,6 @@ async def initialize_agent(self): "memory_bank_configs": [ {"bank_id": self.memory_bank_id, "type": "vector"} ], - "query_generator_config": {"type": "default", "sep": " "}, "max_tokens_in_context": 300, "max_chunks": 5, } From 7f9c8c2f6bb207f5c27501ddeef895d41a256dfd Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 25 Nov 2024 19:34:36 -0800 Subject: [PATCH 83/99] changes to run locally v0.55 --- examples/E2E-RAG-App/gradio_interface.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index d72d9045f..96e64c3f7 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -23,10 +23,10 @@ HOST = os.getenv("HOST", "localhost") PORT = int(os.getenv("PORT", 5000)) -CHROMA_PORT = int(os.getenv("CHROMA_PORT", 6000)) -DOCS_DIR = "/root/rag_data" +CHROMA_PORT = 8000 +DOCS_DIR = "./example_data" GRADIO_SERVER_PORT = int(os.getenv("GRADIO_SERVER_PORT", 7861)) -MODEL_NAME = os.getenv("MODEL_NAME", "Llama3.2-1B-Instruct") +MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct" class LlamaChatInterface: def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): From 51fdd3d04678a989bbbbf4210e1c6204afdfa59d Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 25 Nov 2024 19:36:43 -0800 Subject: [PATCH 84/99] change handling of eventlog and streaming --- examples/E2E-RAG-App/gradio_interface.py | 50 +++++++----------------- 1 file changed, 15 insertions(+), 35 deletions(-) diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index 96e64c3f7..0e924a335 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -37,7 +37,7 @@ def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): self.chroma_client = chromadb.HttpClient(host=host, port=chroma_port) self.agent = None self.session_id = None - self.memory_bank_id = "test_bank_232" + self.memory_bank_id = "test_bank_235" async def initialize_system(self): """Initialize the entire system including memory bank and agent.""" @@ -132,41 +132,21 @@ def chat_stream( history = history or [] history.append([message, ""]) - output_queue = Queue() + if self.agent is None: + asyncio.run(self.initialize_system()) - def run_async(): - async def async_process(): - if self.agent is None: - await self.initialize_system() - - response = self.agent.create_turn( - messages=[{"role": "user", "content": message}], - session_id=self.session_id, - ) - - current_response = "" - async for log in EventLogger().log(response): - log.print() - if hasattr(log, "content"): - current_response += log.content - history[-1][1] = current_response - output_queue.put(history.copy()) - - output_queue.put(None) - - asyncio.run(async_process()) - - thread = Thread(target=run_async) - thread.start() - - while True: - item = output_queue.get() - if item is None: - break - else: - yield item + response = self.agent.create_turn( + messages=[{"role": "user", "content": message}], + session_id=self.session_id, + ) - thread.join() + current_response = "" + for log in EventLogger().log(response): + log.print() + if hasattr(log, "content"): + current_response += log.content + history[-1][1] = current_response + yield history.copy() def create_gradio_interface( @@ -235,5 +215,5 @@ def clear_chat(): # Create and launch the Gradio interface interface = create_gradio_interface() interface.launch( - server_name=HOST, server_port=GRADIO_SERVER_PORT, share=True, debug=True + server_name=HOST, server_port=8888, share=True, debug=True ) From 92bc7038761a7e7da71fc1620321f5ff6be0ef79 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 25 Nov 2024 19:40:27 -0800 Subject: [PATCH 85/99] move requirements to proj dir --- examples/E2E-RAG-App/requirements.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 examples/E2E-RAG-App/requirements.txt diff --git a/examples/E2E-RAG-App/requirements.txt b/examples/E2E-RAG-App/requirements.txt new file mode 100644 index 000000000..dc847762f --- /dev/null +++ b/examples/E2E-RAG-App/requirements.txt @@ -0,0 +1,6 @@ +llama-stack==0.0.55 +llama-stack-client==0.0.55 +lxml>=4.9.0,<5.0.0 +xmltodict>=0.12.0,<1.0.0 +pycryptodomex>=3.8.2,<4.0.0 +google-cloud-storage>=1.16,<2.0 \ No newline at end of file From 1939b7293cee87b3581f819f85183a3b4982ecd3 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 25 Nov 2024 22:57:35 -0800 Subject: [PATCH 86/99] removed unnecesary requirement.txt from root --- requirements.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index bde1472e0..f84ea4bee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,9 +3,3 @@ mesop python-dotenv setuptools yfinance -llama-stack==0.0.53rc1 -llama-stack-client==0.0.53rc2 -lxml>=4.9.0,<5.0.0 -xmltodict>=0.12.0,<1.0.0 -pycryptodomex>=3.8.2,<4.0.0 -google-cloud-storage>=1.16,<2.0 From ed20291b68707cfe1dada075c81b6858cb0bdcec Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 25 Nov 2024 23:13:43 -0800 Subject: [PATCH 87/99] updated eval --- examples/E2E-RAG-App/data/eval/.env.template | 14 + examples/E2E-RAG-App/data/eval/eval.py | 429 ++++++++++++------- 2 files changed, 289 insertions(+), 154 deletions(-) create mode 100644 examples/E2E-RAG-App/data/eval/.env.template diff --git a/examples/E2E-RAG-App/data/eval/.env.template b/examples/E2E-RAG-App/data/eval/.env.template new file mode 100644 index 000000000..304e30e2d --- /dev/null +++ b/examples/E2E-RAG-App/data/eval/.env.template @@ -0,0 +1,14 @@ +# Server Configuration +HOST=localhost +PORT=5000 +CHROMA_PORT=8000 + +# Model and Memory Configuration +MODEL_NAME=meta-llama/Llama-3.2-3B-Instruct +MEMORY_BANK_ID=eval_bank + +# File Paths +DOCS_DIR=../output + +# Optional: Add your API keys here if needed +# OPENAI_API_KEY=your_api_key_here diff --git a/examples/E2E-RAG-App/data/eval/eval.py b/examples/E2E-RAG-App/data/eval/eval.py index 0bdb68116..4a193ca07 100644 --- a/examples/E2E-RAG-App/data/eval/eval.py +++ b/examples/E2E-RAG-App/data/eval/eval.py @@ -1,17 +1,22 @@ import asyncio import json +import logging import os import uuid -from typing import List, Optional +from dataclasses import dataclass +from pathlib import Path +from typing import List, Optional, Tuple, Dict, Any +import chromadb import fire import requests +from datasets import Dataset +from dotenv import load_dotenv from llama_stack_client import LlamaStackClient from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.types.agent_create_params import AgentConfig from llama_stack_client.types.memory_insert_params import Document -from datasets import Dataset -from ragas import evaluate, EvaluationDataset +from ragas import evaluate from ragas.metrics import ( AnswerRelevancy, ContextPrecision, @@ -20,181 +25,297 @@ Faithfulness, SemanticSimilarity, ) -from termcolor import cprint from tqdm import tqdm -import chromadb -from dotenv import load_dotenv + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) -load_dotenv() -HOST = os.getenv("HOST", "localhost") -PORT = int(os.getenv("PORT", 5000)) +@dataclass +class Config: + """Configuration for the evaluation script.""" + host: str + port: int + chroma_port: int + model_name: str + memory_bank_id: str + docs_dir: Path -CHROMA_PORT = int(os.getenv("CHROMA_PORT", 6000)) + @classmethod + def from_env(cls) -> 'Config': + """Create configuration from environment variables.""" + load_dotenv() + return cls( + host=os.getenv("HOST", "localhost"), + port=int(os.getenv("PORT", 5000)), + chroma_port=int(os.getenv("CHROMA_PORT", 8000)), + model_name=os.getenv( + "MODEL_NAME", "meta-llama/Llama-3.2-3B-Instruct"), + memory_bank_id=os.getenv("MEMORY_BANK_ID", "test_bank_236"), + docs_dir=Path(os.getenv("DOCS_DIR", "../output")).resolve(), + ) -chroma_client = chromadb.HttpClient(host=HOST, port=CHROMA_PORT ) +class MemoryBankManager: + """Manages memory bank operations.""" + + def __init__(self, client: LlamaStackClient, config: Config): + self.client = client + self.config = config + + async def setup(self) -> None: + """Set up the memory bank if it doesn't exist.""" + try: + providers = self.client.providers.list() + provider_id = providers["memory"][0].provider_id + memory_banks = self.client.memory_banks.list() + + if any(bank.identifier == self.config.memory_bank_id for bank in memory_banks): + logger.info( + f"Memory bank '{self.config.memory_bank_id}' exists.") + return + + logger.info( + f"Creating memory bank '{self.config.memory_bank_id}'...") + self.client.memory_banks.register( + memory_bank_id=self.config.memory_bank_id, + provider_id=provider_id, + ) + await self._load_documents() + logger.info("Memory bank registered successfully.") + + except Exception as e: + logger.error(f"Failed to setup memory bank: {str(e)}") + raise -async def load_documents(client, docs_dir, memory_bank_id): - """Load documents from the specified directory into memory bank.""" - documents = [] - for filename in os.listdir(docs_dir): - if filename.endswith((".txt", ".md")): - file_path = os.path.join(docs_dir, filename) - with open(file_path, "r", encoding="utf-8") as file: - content = file.read() - document = Document( - document_id=filename, - content=content, - mime_type="text/plain", - metadata={"filename": filename}, + async def _load_documents(self) -> None: + """Load documents from the specified directory into memory bank.""" + try: + documents = [] + for file_path in self.config.docs_dir.glob("*.{txt,md}"): + document = self._create_document(file_path) + if document: + documents.append(document) + + if documents: + self.client.memory.insert( + bank_id=self.config.memory_bank_id, + documents=documents, ) - documents.append(document) - if documents: - client.memory.insert( - bank_id=memory_bank_id, - documents=documents, - ) - print(f"Loaded {len(documents)} documents from {docs_dir}") + logger.info( + f"Loaded {len(documents)} documents from {self.config.docs_dir}") -async def setup_memory_bank(client, docs_dir, memory_bank_id): - """Set up the memory bank if it doesn't exist.""" - providers = client.providers.list() - provider_id = providers["memory"][0].provider_id - collections = chroma_client.list_collections() + except Exception as e: + logger.error(f"Failed to load documents: {str(e)}") + raise + def _create_document(self, file_path: Path) -> Optional[Document]: + """Create a Document object from a file.""" + try: + content = file_path.read_text(encoding="utf-8") + return Document( + document_id=file_path.name, + content=content, + mime_type="text/plain", + metadata={"filename": file_path.name}, + ) + except Exception as e: + logger.error( + f"Failed to create document from {file_path}: {str(e)}") + return None - if any(col.name == memory_bank_id for col in collections): - print(f"The collection '{memory_bank_id}' exists.") - else: - print( - f"The collection '{memory_bank_id}' does not exist. Creating the collection..." - ) - client.memory_banks.register( - memory_bank_id=memory_bank_id, - params={ - "embedding_model": "all-MiniLM-L6-v2", - "chunk_size_in_tokens": 100, - "overlap_size_in_tokens": 10, - }, - provider_id=provider_id, - ) - await load_documents(client, docs_dir, memory_bank_id) - print(f"Memory bank registered.") - -async def get_response_with_context(agent, input_query, session_id): - response = agent.create_turn( - messages=[{"role": "user", "content": input_query}], session_id=session_id - ) - - generated_answer = "" - retrieved_contexts = [] - - async for event in response: - if event.event.payload.event_type == "token": - generated_answer += event.event.payload.token - - elif event.event.payload.event_type == "tool_use": - if event.event.payload.tool == "memory": - result = event.event.payload.result - if result and isinstance(result, dict) and 'documents' in result: - retrieved_docs = result['documents'] - for doc in retrieved_docs: - retrieved_contexts.append(doc.get("content", "")) - else: - print(f"Tool use result: {event.event.payload.result}") - - elif event.event.payload.event_type == "turn_complete": - break - - return generated_answer, retrieved_contexts - -async def run_main(host: str, port: int, docs_dir: str) -> None: - """Main async function to register model, insert documents, and generate responses.""" - client = LlamaStackClient(base_url=f"http://{host}:{port}") - memory_bank_id = "test_bank_113" - - await setup_memory_bank(client, docs_dir, memory_bank_id) - - model_name = "Llama3.2-1B-Instruct" - - agent_config = AgentConfig( - model=model_name, - instructions="You are a helpful assistant that can answer questions based on provided documents.", - sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, - tools=[ - { - "type": "memory", - "memory_bank_configs": [{"bank_id": memory_bank_id, "type": "vector"}], - "query_generator_config": {"type": "default", "sep": " "}, - "max_tokens_in_context": 512, - "max_chunks": 5, - } - ], - tool_choice="auto", - tool_prompt_format="json", - enable_session_persistence=True, - ) - agent = Agent(client, agent_config) - - qa_data = [ - { - "Question": "What is the policy regarding smoking in City offices?", - "Answer": "Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)", - }, - ] - - questions = [] - generated_answers = [] - retrieved_contexts_list = [] - ground_truths = [] - - session_id = agent.create_session(f"session-{uuid.uuid4()}") - for qa in tqdm(qa_data, desc="Generating responses"): - question = qa["Question"] - ground_truth_answer = qa["Answer"] - - cprint(f"Generating response for: {question}", "green") + def query(self, query: str) -> Optional[Dict[str, Any]]: + """Query memory bank for relevant context.""" try: - generated_answer, retrieved_contexts = await get_response_with_context( - agent, question, session_id + response = self.client.memory.query( + bank_id=self.config.memory_bank_id, + query=[query], ) - cprint(f"Response: {generated_answer}", "green") - questions.append(question) - generated_answers.append(generated_answer) - retrieved_contexts_list.append(retrieved_contexts) - ground_truths.append(ground_truth_answer) + + if response.chunks and response.scores: + return { + "documents": [chunk.content for chunk in response.chunks], + "metadatas": [{"content": chunk.content} for chunk in response.chunks], + "distances": response.scores + } + return None + except Exception as e: - cprint(f"Error generating response for {question}: {e}", "red") - - eval_data = Dataset.from_dict( - { - "user_input": questions, - "response": generated_answers, - "retrieved_contexts": retrieved_contexts_list, - "reference": ground_truths, + logger.error(f"Failed to query memory: {str(e)}") + return None + + +class ResponseGenerator: + """Handles generation of responses using the agent.""" + + def __init__(self, agent: Agent, memory_manager: MemoryBankManager): + self.agent = agent + self.memory_manager = memory_manager + + async def get_response(self, query: str, session_id: str) -> Tuple[str, List[str]]: + """Generate a response for the given query using context from memory.""" + try: + context, contexts = self._get_context(query) + messages = [ + {"role": "user", "content": f"Context: {context}\n\nQuestion: {query}"}] + + response = self.agent.create_turn( + messages=messages, session_id=session_id) + return self._process_response(response), contexts + + except Exception as e: + logger.error(f"Failed to generate response: {str(e)}") + return "Error generating response.", [] + + def _get_context(self, query: str) -> Tuple[str, List[str]]: + """Get context for the query from memory.""" + results = self.memory_manager.query(query) + if results and results["metadatas"]: + contexts = [metadata["content"] + for metadata in results["metadatas"]] + context = "\n".join(f"Content:\n{ctx}" for ctx in contexts) + return context, contexts + return "No relevant context found.", [] + + def _process_response(self, response) -> str: + """Process the response from the agent.""" + full_response = "" + for chunk in response: + if hasattr(chunk, "event"): + if chunk.event.payload.event_type == "turn_complete": + return chunk.event.payload.turn.output_message.content + elif hasattr(chunk.event.payload, "delta"): + full_response += chunk.event.payload.delta.content or "" + return full_response or "No response generated." + + +class Evaluator: + """Handles evaluation of the question-answering system.""" + + def __init__(self, config: Config): + self.config = config + + async def run_evaluation(self) -> None: + """Run the evaluation process.""" + try: + client = self._setup_client() + memory_manager = MemoryBankManager(client, self.config) + await memory_manager.setup() + + agent = self._setup_agent(client) + response_generator = ResponseGenerator(agent, memory_manager) + + qa_data = self._load_qa_data() + results = await self._process_questions(response_generator, qa_data) + self._evaluate_and_save_results(results) + + except Exception as e: + logger.error(f"Evaluation failed: {str(e)}") + raise + + def _setup_client(self) -> LlamaStackClient: + """Set up the LlamaStack client.""" + return LlamaStackClient(base_url=f"http://{self.config.host}:{self.config.port}") + + def _setup_agent(self, client: LlamaStackClient) -> Agent: + """Set up the agent with configuration.""" + agent_config = AgentConfig( + model=self.config.model_name, + instructions="You are a helpful assistant that can answer questions based on provided documents.", + sampling_params={"strategy": "greedy", + "temperature": 1.0, "top_p": 0.9}, + tools=[{ + "type": "memory", + "memory_bank_configs": [{"bank_id": self.config.memory_bank_id, "type": "vector"}], + }], + tool_choice="auto", + tool_prompt_format="json", + enable_session_persistence=True, + ) + return Agent(client, agent_config) + + def _load_qa_data(self) -> List[Dict[str, str]]: + """Load QA evaluation data.""" + qa_file_path = Path(__file__).parent / "QA_eval.json" + with qa_file_path.open('r') as f: + return json.load(f)[:10] + + async def _process_questions( + self, + response_generator: ResponseGenerator, + qa_data: List[Dict[str, str]] + ) -> Dict[str, List]: + """Process all questions and collect results.""" + results = { + "questions": [], + "generated_answers": [], + "retrieved_contexts": [], + "ground_truths": [] } - ) - result = evaluate( - eval_data, - metrics=[ + session_id = f"session-{uuid.uuid4()}" + for qa in tqdm(qa_data, desc="Generating responses"): + try: + question = qa["Question"] + ground_truth = qa["Answer"] + + answer, contexts = await response_generator.get_response(question, session_id) + + results["questions"].append(question) + results["generated_answers"].append(answer) + results["retrieved_contexts"].append( + [str(ctx) for ctx in contexts]) + results["ground_truths"].append(ground_truth) + + except Exception as e: + logger.error(f"Failed to process question: {str(e)}") + continue + + return results + + def _evaluate_and_save_results(self, results: Dict[str, List]) -> None: + """Evaluate and save the results.""" + eval_data = Dataset.from_dict({ + "user_input": results["questions"], + "response": results["generated_answers"], + "retrieved_contexts": results["retrieved_contexts"], + "reference": results["ground_truths"], + }) + + metrics = [ ContextPrecision(), ContextRecall(), Faithfulness(), AnswerRelevancy(), FactualCorrectness(), SemanticSimilarity(), - ], - ) + ] + + evaluation_result = evaluate(eval_data, metrics=metrics) + df = evaluation_result.to_pandas() + df.to_csv("evaluation_results.csv", index=False) + logger.info("\nEvaluation Results:") + logger.info("\n" + str(df)) + + +async def run_main(docs_dir: str = None) -> None: + """Main entry point for the evaluation script.""" + config = Config.from_env() + if docs_dir: + config.docs_dir = Path(docs_dir).resolve() + + evaluator = Evaluator(config) + await evaluator.run_evaluation() + - df = result.to_pandas() - df.to_csv("evaluation_results.csv", index=False) - print(df.head()) +def main() -> None: + """CLI entry point.""" + asyncio.run(run_main()) -def main(docs_dir: str) -> None: - """Entry point for the script.""" - asyncio.run(run_main(HOST, PORT, docs_dir)) if __name__ == "__main__": fire.Fire(main) From 633c50c3347b733905e2b934161fff9fb218f91b Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 25 Nov 2024 23:14:06 -0800 Subject: [PATCH 88/99] include readme for eval --- examples/E2E-RAG-App/data/eval/README.md | 98 +++++++++++++++++++ .../data/eval/evaluation_results.csv | 29 ++++++ 2 files changed, 127 insertions(+) create mode 100644 examples/E2E-RAG-App/data/eval/README.md create mode 100644 examples/E2E-RAG-App/data/eval/evaluation_results.csv diff --git a/examples/E2E-RAG-App/data/eval/README.md b/examples/E2E-RAG-App/data/eval/README.md new file mode 100644 index 000000000..ba042b9cd --- /dev/null +++ b/examples/E2E-RAG-App/data/eval/README.md @@ -0,0 +1,98 @@ +# RAG System Evaluation + +This directory contains tools for evaluating the Retrieval-Augmented Generation (RAG) system using RAGAS metrics. + +## Setup + +1. Create your environment file: + +```bash +cp .env.template .env +``` + +2. Configure the environment variables in `.env`: + +```env +# Server Configuration +HOST=localhost # Your server host +PORT=5000 # Your server port +CHROMA_PORT=8000 # Chroma DB port + +# Model and Memory Configuration +MODEL_NAME=meta-llama/Llama-3.2-3B-Instruct # Model to use +MEMORY_BANK_ID=eval_bank # Memory bank identifier + +# File Paths +DOCS_DIR=../output # Directory containing your documents +``` + +## Running the Evaluation + +1. Make sure your server is running and accessible at the configured host and port. + +2. Run the evaluation script: + +```bash +python eval.py +``` + +The script will: + +- Set up a memory bank for evaluation +- Load your documents +- Generate responses for test questions +- Evaluate the responses using various RAGAS metrics: + - Context Precision + - Context Recall + - Faithfulness + - Answer Relevancy + - Factual Correctness + - Semantic Similarity + +Results will be saved to `evaluation_results.csv`. + +## Analyzing Results + +For detailed analysis of your evaluation results, you can use the Jupyter notebook: + +```bash +jupyter notebook explain-eval.ipynb +``` + +The notebook provides: + +- Visualization of evaluation metrics +- Detailed breakdown of each metric +- Analysis of system performance +- Insights for improvement + +## Metrics Explanation + +The evaluation uses the following RAGAS metrics: + +1. **Context Precision**: Measures how much of the retrieved context is actually relevant +2. **Context Recall**: Measures if all relevant information was retrieved +3. **Faithfulness**: Measures if the answer is faithful to the provided context +4. **Answer Relevancy**: Measures if the answer is relevant to the question +5. **Factual Correctness**: Measures the factual accuracy of the answer +6. **Semantic Similarity**: Measures semantic closeness between answer and reference + +## Troubleshooting + +If you encounter issues: + +1. Verify your server is running and accessible +2. Check the environment variables in `.env` +3. Ensure your documents are in the correct directory +4. Check the logs for detailed error messages + +## Requirements + +- Python 3.10+ +- Jupyter Notebook (for analysis) +- Required Python packages (install via `pip`): + - ragas + - datasets + - pandas + - numpy + - matplotlib (for visualization) diff --git a/examples/E2E-RAG-App/data/eval/evaluation_results.csv b/examples/E2E-RAG-App/data/eval/evaluation_results.csv new file mode 100644 index 000000000..09b235f37 --- /dev/null +++ b/examples/E2E-RAG-App/data/eval/evaluation_results.csv @@ -0,0 +1,29 @@ +user_input,retrieved_contexts,response,reference,context_precision,context_recall,faithfulness,answer_relevancy,factual_correctness,semantic_similarity +What is the policy regarding smoking in City offices?,"[' productive workplace which is free from inappropriate workplace behavior.\n\n## Smoke-Free Workplace\n\nSmoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings.\n\n## Drug-Free Workplace\n\nYou may not manufacture, distribute, dispense, possess, use or be under the influence of alcohol or illegal drugs in workplace. This prohibition includes prescription drugs used improperly (e.g., those not prescribed for the user). Any violation of this policy may be grounds', '45 |\n| Policy Regarding the Treatment of Co-Workers and Members of the Public ...................46 |\n| Smoke-Free Workplace .....................................................................................................46 |\n| Drug-Free Workplace ........................................................................................................46 Disciplinary Action against Striking Employees ...............................................................47 |\n| Political Activity ................................................................................................................47 |\n\n| If You Suspect Improper or Criminal Activity on the Job .................................................47 |\n|---------------------------------------------------------------------------------------------------------------------------------------------|\n| Use of City and County Property for Business Purposes', ' City resources or divert employees from their assigned duties.\n\nCity employees are prohibited from using their official positions to influence elections, and from using City funds or resources for political or r election activities. Further, City employees may not participate in political activities of any kind while in uniform (i.e., part or all of a uniform they are required or authorized to wear when engaged in official duties).\n\nViolation of these rules may result in considerable civil and criminal penalties, as well as discipline, up to and including']",No response generated.,"Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)",0.99999999995,0.5,0.0,0.0,0.0,0.7310965383185614 +How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?,"[' Accrual of Paid Sick Leave\n\nSick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.\n\nMost full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.\n\nSome employees may be entitled to accrue sick leave under Chapter 12W of the Administrative', ' must complete the appropriate form and comply with the specific requirements for that type of leave. All requests for leave of more than five working days must be made in writing on the appropriate form. Please see your departmental personnel officer for more information .\n\n## Sick Leave\n\nPlease refer to your collective bargaining agreement and the Civil Service Rules or contact your departmental personnel officer for more information regarding sick leave accrual, usage, and notice and verification requirements.\n\n## Accrual of Paid Sick Leave\n\nSick', ' leave under Chapter 12W of the Administrative Code. Please contact your departmental personnel officer for more information.\n\n## Sick Leave without Pay\n\nYou may be eligible to take sick leave without pay. Please refer to the Civil Service Rules and any applicable collective bargaining agreement for more information.\n\n## Use of Sick Leave\n\nYou may not use sick leave with pay accrued under the Civil Service Rules during your first 6 months of employment with the City.\n\nYou may not use sick leave for reasons not set']",No response generated.,"Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)",0.9999999999,0.5,0.0,0.0,0.0,0.7089057204017655 +What are the three categories of employees eligible for health coverage?,"[' health coverage:\n\n- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\n- (3) All other employees of the City including temporary y exempt or ""as needed"" employees who have worked more than 1040 hours in any consecutive ', '02.\n\n## EMPLOYEE BENEFITS\n\nThe City provides eligible employees and their eligible dependents the opportunity to enroll in medical, dental, vision and flexible spending account benefits, administered by the Health Service System (""HSS"").\n\nPlease note that the information in this section is subject to change. You should consult with HSS if you have any questions at (415) 554-1750.\n\n## Eligibility\n\nThe following employees are eligible for health coverage:\n\n- (1) All permanent employees', ' of the employee organization, is available at the discretion of the department head.\n\nWhile the employee is on leave, the employee organization is responsible for paying the employee\'s salary.\n\nBefore the leave begins, the employee may choose to either continue or waive access to health coverage through the City for the duration of the approved leave. If the employee wishes to continue his or her health coverage, premium contributions for the employee and any covered dependents must be paid directly to the Health Services System (""HSS"").']",No response generated.,"The following employees are eligible: + +All permanent employees working at least 20 hours per week + +All regularly scheduled provisional employees working at least 20 hours per week + +All other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)",0.99999999995,1.0,0.0,0.0,0.0,0.6871705187744059 +How long must an employee wait before using vacation time after starting employment?,"[', you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.\n\nAn additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will', 'ing officer and the provisions of the applicable collective bargaining agreement. Contact your departmental personnel officer or payroll representative for more information.\n\n## Vacation\n\nThe City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the', "" department's procedures for details.\n\nWhen a holiday falls on a regular working day during your vacation, the holiday is not counted as a vacation day. If you resign or are separated after one or more years of continuous service and have not taken all of your vacation, you will be paid for the accumulated unused vacation.\n\nRequests to take vacation must be made and approved d in advance, in accordance with your department's procedures.\n\n## LEAVES OF ABSENCE\n\nLeaves of absence are governed by the""]",No response generated.,"Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)",0.99999999995,0.6666666666666666,0.0,0.0,0.0,0.710133331048373 +What must an employee do if they're summoned for jury duty?,"[' you would return to the workplace to work the remaining two hours each day and continue to take Friday off.\n\nYou are not entitled to a per diem pay from the City or County for which Jury Duty was served, as the City and County of San Francisco already provides regular compensation and benefits to you while performing jury service. You must notify the jury staff that you are a government employee when reporting for jury service\n\n## Witness Duty\n\nIf you are summoned as a witness on behalf of the City and', ' 10 days of unpaid leave during a period of his or her leave from deployment. Please refer to the Civil Service Rules for details or consult your departmental personnel officer for more information.\n\n## Jury Duty Leave\n\nYou must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report', ' give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.\n\n## Work Schedule While on Jury Duty\n\nAlternative Work Schedule. If you have an alternative work schedule and perform jury service for an extended period, your work schedule will revert to a regular Monday through Friday work schedule for compensation purposes for the duration of the jury service.\n\nSwing and']",No response generated.,"An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)",0.5833333333041666,0.75,0.0,0.0,0.0,0.7250633971841919 +What happens if an employee is absent without authorization for more than five consecutive working days?,"[' work and the number of hours worked in every work day.\n\nAll planned absences must be requested and approved d in advance. If illness or some other emergency causes an unplanned or unforeseeable absence, you must notify your department as soon as possible on the first day of absence, and keep the department advised daily during the absence. In the case of an extended unforeseeable absence, you may be asked to complete forms and submit medical certifications as appropriate during your leave. Improper use of sick', 'ation\n\nIf you are absent from your job for any period of time without proper authorization, you may be subject to discipline. If you are absent from your r job without proper authorization for more than five consecutive working days, or if you fail to return from an approved leave, your absence will be deemed an ""automatic resignation."" If you receive a notice of automatic resignation and wish to appeal the determination to the Civil Service Commission, you will have fifteen days from the date on which the notice was mailed to', ' period of incarceration .\n\nPlease see your departmental personnel officer if you have any questions regarding appropriate uses of sick leave.\n\n## Notice and Verification Requirements\n\nIt is your responsibility to notify your supervisor r as soon as possible whenever you are unable to report for work due to illness. You must t keep your supervisor informed throughout your absence and notify your supervisor of your expected date of return to work.\n\nIf you are absent from your job for more than five consecutive working days, you must submit to your supervisor a completed']",No response generated.,"If an employee is absent from their job without proper authorization for more than five consecutive working days, or fails to return from an approved leave, their absence will be deemed an 'automatic resignation.' (Source: Page 19, 'Automatic Resignation' section)",0.49999999995,1.0,0.0,0.0,0.0,0.7395137950936732 +How long is the normal probationary period for permanent civil service positions?,"["" and most important phase of the selection process. This period is used to evaluate your performance and suitability for the position. The duration of the probationary period is governed by the collective bargaining agreement. Absences from work will extend your probationary period. For infoformation about the length of your probationary period, consult your departmental personnel officer or collective bargaining agreement.\n\nThe department's appointing officer may release an employee at any time during the probationary period. At the conclusion of the probationary period,"", ' of eligibles to a permanent position. Permanent empmployees who have successfully completed the probationary period may only be removed for cause.\n\n## Exempt\n\nPersons appointed to temporary or permanent positions exempted from civil service status in accordance with the City Charter. Exempt t appointees serve at the pleasure of the appointing officer.\n\n## Provisional\n\nProvisional appointments are made when an eligible list for the classification is not available. The just cause rights of provisional employees are described in the', ' the class from m which they resigned within four years from the date of resignation. Former members of the uniformed ranks of the Police and Fire departments may be reappointed within two years of the date of resignation. Former employees may request reappointment to a vacancy in any class in which the employee has completed the probationary period. The receiving department, at i its discretion, may approve a request for reappointment.\n\n## Reinstatement\n\nAn employee may submit a written request to return to a vacant']",No response generated.,"The document states that all appointments to permanent civil service positions require a probationary period, but the duration is governed by the collective bargaining agreement. Absences from work will extend the probationary period. (Source: Page 14, 'Probationary Period' section)",0.99999999995,0.6666666666666666,0.0,0.0,0.0,0.7124925328695783 +What are employees required to do in case of a catastrophic event while off duty?,"[' whatever role you are directed to perform by a superior or by law.\n\nAll Disaster Services Workers will be issued a citywide identification badge. You must have your badge with you at all times.\n\nCatastrophic Event While on Duty: Should a catastrophic event occur while you are on duty, report immediately to your supervisor or designated areas for assignment.\n\nCatastrophic Event While off Duty: Should a catastrophic event occur while you are off duty, you should ensure the safety of your family and follow your', ' less.\n\n## Disaster Service Workers - Your Responsibilities in Case of Emergency\n\nAll City employees are designated Disaster Service Workers under state and local law. If the Mayor or an authorized City official proclaims a local emergency due to actual or threatened disaster such as earthquake, fire, riot, etc., City employees, as Disaster Service Workers, may be required to provide services during the emergency and subsequent period of assessment and recovery.\n\nSimply put, being a Disaster Service Worker means that any time a catastrophic event-', "" healthful work environment.\n\nSafety is every City employee's responsibility. All employees are required to remain alert and to correct hazardous conditions and unsafe acts-if it is safe to do so-and to report unsafe conditions to their supervisors.\n\n## On-the-Job Injury and Workers' Compensation\n\nAll City employees are covered by State Workers' Compensation laws. If you are injured at work or because of your work, you must report the injury or illness to your immediate supervisor as soon as possible, and no later than the""]",No response generated.,"Employees should ensure the safety of their family and follow their department's instructions. If phone lines are down, they are required to listen to the radio for any reporting instructions. (Source: Page 51, 'Catastrophic Event While off Duty' section)",0.99999999995,0.5,0.0,0.0,0.0,0.7465800852696091 +What is the city's policy on accepting gifts from subordinates?,"[' a copy of your department\'s ""Statement of Incompatible Activities,"" please see your departmental personnel officer or go to the Ethics Commission website at www.sfgov.org/site/ethics.\n\n## Gifts\n\nYou may not accept any gift that is intended to inflfluence you in the performance of your job.\n\nYou may not solicit or accept any gift from any person or entity who has a contract with your department or who has attempted to influence you in a governmental decision during the past 12 months', ' a governmental decision during the past 12 months.\n\nYou may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to you .\n\nIf you are required to file a Statement of Economic Interests (""SEI"") financial disclosure, you cannot accept gifts worth more than a certain amount in a calendar year from any source that you must report on your SEI. That amount is $420.00 for calendar year 2011; please', ' recipient to believe that you are speaking in an official capacity when you are not.\n- You may not make, participate in, or attempt to inflfluence a governmental decision affecting a person or entity with whom you are discussing or negotiating an agreement concerning future employment.\n- You may not accept any compensation, reward, or gift ft from any source except the City for any service, advice, assistance or other matter related to your City job.\n- You may not solicit or accept anything of value in']",No response generated.,"Employees may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to them. (Source: Page 49, 'Gifts' section)",0.9999999999666667,0.5,0.0,0.0,0.0,0.7311572952124511 +What documents must new employees present at the time of hire?,"['\n\nPrior to employment in certain positions or classififications, departments may require a background review of criminal history, motor vehicle (driving) record, personnel history and employment records. The requirement for a background review will be published on the examination announcement.\n\n## Medical Examination\n\nDepending on the classification or position in which you are to be employed, you may be required to pass a medical examination, including drug testing, as a condition of employment. Most medical examinations are conducted by the Deparartment of Public', ' these documents may result in loss of eligibility. Acceptable verification documents are listed in the information pamphlet entitled ""Federal Immigration and Naturalization Service Requirements,"" available at the Department of Human Resources\' (""DHR"") Employment Information Center, located on the 4 th floor at 1 South Van Ness Avenue.\n\nAny applicant or employee seeking to have the City p provide documentation to the Department of Homeland Security (including the U.S. Citizenship and Immigration Services) about possible or actual employment with the City must obtain', 'ingerprinting\n\nAll employees must be fingerprinted. Fingerprints are electronically transmitted to the California Department of Justice to obtain conviction records.\n\n## REQUIREMENTS AT THE TIME OF HIRE\n\n## Social Security Number\n\nYou must have a Social Security number to work for the City.\n\n## Authorization to Work\n\nYou must present documents to verify identity and authorization to work in the United States as required by the Immigration Reform Control Act of 1986. Failure to provide these documents may result in loss of eligibility. Accept']",No response generated.,"New employees must present: + +Social Security number + +Documents to verify identity and authorization to work in the United States + +Take the Oath of Office + +Sign acknowledgment of no-strike provision + +Complete tax forms (W-4) + +Complete Warrant Recipient Form (Source: Pages 11-12, 'Requirements at the Time of Hire' section)",0.3333333333,0.3333333333333333,0.0,0.0,0.0,0.7086797649178207 From 30619b67aab0962b6f123e13fc33703f8e6a882b Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 25 Nov 2024 23:17:51 -0800 Subject: [PATCH 89/99] remove external chroma, using only memorybank --- examples/E2E-RAG-App/gradio_interface.py | 36 +++++++++++++----------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index 0e924a335..0df5046fb 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -22,19 +22,19 @@ load_dotenv() HOST = os.getenv("HOST", "localhost") -PORT = int(os.getenv("PORT", 5000)) -CHROMA_PORT = 8000 -DOCS_DIR = "./example_data" -GRADIO_SERVER_PORT = int(os.getenv("GRADIO_SERVER_PORT", 7861)) -MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct" +PORT = int(os.getenv("PORT", "5000")) +GRADIO_SERVER_PORT = int(os.getenv("GRADIO_SERVER_PORT", "7861")) + +MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.2-3B-Instruct") +DOCS_DIR = os.getenv("DOCS_DIR", "./example_data") + class LlamaChatInterface: - def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): + def __init__(self, host: str, port: int, docs_dir: str): self.host = host self.port = port self.docs_dir = docs_dir self.client = LlamaStackClient(base_url=f"http://{host}:{port}") - self.chroma_client = chromadb.HttpClient(host=host, port=chroma_port) self.agent = None self.session_id = None self.memory_bank_id = "test_bank_235" @@ -48,14 +48,15 @@ async def setup_memory_bank(self): """Set up the memory bank if it doesn't exist.""" providers = self.client.providers.list() provider_id = providers["memory"][0].provider_id - collections = self.chroma_client.list_collections() + memory_banks = self.client.memory_banks.list() + print(f"Memory banks: {memory_banks}") - if any(col.name == self.memory_bank_id for col in collections): - print(f"The collection '{self.memory_bank_id}' exists.") + # Check if memory bank exists by identifier + if any(bank.identifier == self.memory_bank_id for bank in memory_banks): + print(f"Memory bank '{self.memory_bank_id}' exists.") else: print( - f"The collection '{self.memory_bank_id}' does not exist. Creating the collection..." - ) + f"Memory bank '{self.memory_bank_id}' does not exist. Creating...") self.client.memory_banks.register( memory_bank_id=self.memory_bank_id, params={ @@ -106,7 +107,8 @@ async def initialize_agent(self): agent_config = AgentConfig( model=model_name, instructions="You are a helpful assistant that can answer questions based on provided documents. Return your answer short and concise, less than 50 words.", - sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, + sampling_params={"strategy": "greedy", + "temperature": 1.0, "top_p": 0.9}, tools=[ { "type": "memory", @@ -152,15 +154,15 @@ def chat_stream( def create_gradio_interface( host: str = HOST, port: int = PORT, - chroma_port: int = CHROMA_PORT, docs_dir: str = DOCS_DIR, ): - chat_interface = LlamaChatInterface(host, port, chroma_port, docs_dir) + chat_interface = LlamaChatInterface(host, port, docs_dir) with gr.Blocks(theme=gr.themes.Soft()) as interface: gr.Markdown("# LlamaStack Chat") - chatbot = gr.Chatbot(bubble_full_width=False, show_label=False, height=400) + chatbot = gr.Chatbot(bubble_full_width=False, + show_label=False, height=400) msg = gr.Textbox( label="Message", placeholder="Type your message here...", @@ -215,5 +217,5 @@ def clear_chat(): # Create and launch the Gradio interface interface = create_gradio_interface() interface.launch( - server_name=HOST, server_port=8888, share=True, debug=True + server_name=HOST, server_port=GRADIO_SERVER_PORT, share=True, debug=True ) From 0bc12573341a9ee1d598dc5a026318a159371100 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Tue, 26 Nov 2024 15:42:46 -0800 Subject: [PATCH 90/99] changed to app.py and add GPU flag --- .../{gradio_interface.py => app.py} | 5 ++-- .../docker/RAG_service-template.json | 3 +- examples/E2E-RAG-App/docker/RAG_service.json | 5 ++-- examples/E2E-RAG-App/docker/compose.yaml | 2 +- .../E2E-RAG-App/docker/llama_stack_start.sh | 28 +++++++++++-------- examples/E2E-RAG-App/docker/run_RAG.sh | 1 + 6 files changed, 27 insertions(+), 17 deletions(-) rename examples/E2E-RAG-App/{gradio_interface.py => app.py} (97%) diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/app.py similarity index 97% rename from examples/E2E-RAG-App/gradio_interface.py rename to examples/E2E-RAG-App/app.py index 0df5046fb..4c9b16138 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/app.py @@ -24,9 +24,10 @@ HOST = os.getenv("HOST", "localhost") PORT = int(os.getenv("PORT", "5000")) GRADIO_SERVER_PORT = int(os.getenv("GRADIO_SERVER_PORT", "7861")) - +USE_GPU = os.getenv("USE_GPU", False) MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.2-3B-Instruct") -DOCS_DIR = os.getenv("DOCS_DIR", "./example_data") +# if use_gpu, then the documents will be processed to output folder +DOCS_DIR = "/root/rag_data/output" if USE_GPU else "/root/rag_data/" class LlamaChatInterface: diff --git a/examples/E2E-RAG-App/docker/RAG_service-template.json b/examples/E2E-RAG-App/docker/RAG_service-template.json index 0ffa5b6fe..8fc294303 100644 --- a/examples/E2E-RAG-App/docker/RAG_service-template.json +++ b/examples/E2E-RAG-App/docker/RAG_service-template.json @@ -5,5 +5,6 @@ "host": "localhost", "port": 5000, "chroma_port": 6000, - "gradio_server_port": 7860 + "gradio_server_port": 7860, + "use_gpu": false } diff --git a/examples/E2E-RAG-App/docker/RAG_service.json b/examples/E2E-RAG-App/docker/RAG_service.json index b29ce0c3b..7d4461b33 100644 --- a/examples/E2E-RAG-App/docker/RAG_service.json +++ b/examples/E2E-RAG-App/docker/RAG_service.json @@ -1,9 +1,10 @@ { "model_name": "llama3.2:1b-instruct-fp16", - "document_path": "/Users/justinlee/Documents/Code/llama-stack-apps/examples/E2E-RAG-App/example_data", + "document_path": "/Users/kaiwu/work/llama-stack-apps//examples/E2E-RAG-App/example_data", "host": "localhost", "port": 5000, "chroma_port": 6000, - "gradio_server_port": 7860 + "gradio_server_port": 7860, + "use_gpu": false } diff --git a/examples/E2E-RAG-App/docker/compose.yaml b/examples/E2E-RAG-App/docker/compose.yaml index 5a6d38d49..e6b3f6cb1 100644 --- a/examples/E2E-RAG-App/docker/compose.yaml +++ b/examples/E2E-RAG-App/docker/compose.yaml @@ -25,7 +25,7 @@ services: restart: always entrypoint: ["bash", "/root/ollama_start.sh"] llamastack: - image: llamastack/distribution-ollama:test-0.0.53rc8 + image: llamastack/distribution-ollama:0.0.54 network_mode: "host" tty: true volumes: diff --git a/examples/E2E-RAG-App/docker/llama_stack_start.sh b/examples/E2E-RAG-App/docker/llama_stack_start.sh index fc07bec3c..072e653b0 100644 --- a/examples/E2E-RAG-App/docker/llama_stack_start.sh +++ b/examples/E2E-RAG-App/docker/llama_stack_start.sh @@ -1,18 +1,24 @@ - +#!/bin/bash +# Sleep for 45 seconds sleep 45 - +# Print a message indicating the start of llama-stack docker echo "-----starting to llama-stack docker now---------" - +# Install required packages pip install gradio -pip install docling - -python /root/E2E-RAG-App/01_ingest_files.py --input_dir /root/rag_data/ +pip install -U llama-stack +# Check if GPU is enabled and run ingest files script accordingly +if [ "$USE_GPU" = true ]; then + pip install docling + python /root/E2E-RAG-App/01_ingest_files.py --input_dir /root/rag_data/ +fi +# Print a message indicating the start of llama-stack server echo "starting the llama-stack server" - -python -m llama_stack.distribution.server.server --yaml-config /root/my-run.yaml --disable-ipv6& - +# Run llama-stack server with specified config and disable ipv6 +python -m llama_stack.distribution.server.server --yaml-config /root/my-run.yaml --disable-ipv6 & +# Sleep for 30 seconds sleep 30 +# Print a message indicating the start of RAG app echo "---------running the RAG app--------------" - -python /root/E2E-RAG-App/gradio_interface.py +# Run RAG app +python /root/E2E-RAG-App/app.py diff --git a/examples/E2E-RAG-App/docker/run_RAG.sh b/examples/E2E-RAG-App/docker/run_RAG.sh index 581a1b0cc..6e68b528e 100755 --- a/examples/E2E-RAG-App/docker/run_RAG.sh +++ b/examples/E2E-RAG-App/docker/run_RAG.sh @@ -6,4 +6,5 @@ echo "HOST=$(jq -r '.host' ./RAG_service.json)" >> .env echo "PORT=$(jq -r '.port' ./RAG_service.json)" >> .env echo "CHROMA_PORT=$(jq -r '.chroma_port' ./RAG_service.json)" >> .env echo "GRADIO_SERVER_PORT=$(jq -r '.gradio_server_port' ./RAG_service.json)" >> .env +echo "USE_GPU=$(jq -r '.use_gpu' ./RAG_service.json)" >> .env docker compose up From 70e5678417d273efd9db80a222befa6e5a32897d Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 2 Dec 2024 11:35:12 -0800 Subject: [PATCH 91/99] folder restructure, update scripts, change readme --- .gitignore | 4 ++-- examples/{E2E-RAG-App => DocQA}/README.md | 8 ++++---- examples/{E2E-RAG-App => DocQA}/app.py | 0 .../data/assets}/RAG_workflow.jpg | Bin .../{E2E-RAG-App => DocQA}/data/eval/.env.template | 0 .../{E2E-RAG-App => DocQA}/data/eval/QA_eval.json | 0 examples/{E2E-RAG-App => DocQA}/data/eval/README.md | 0 examples/{E2E-RAG-App => DocQA}/data/eval/eval.py | 0 .../data/eval/evaluation_results.csv | 0 .../data/eval/explain-eval.ipynb | 0 .../{E2E-RAG-App => DocQA/docker}/.env.template | 2 +- .../docker/RAG_service-template.json | 3 +-- .../{E2E-RAG-App => DocQA}/docker/chroma_start.sh | 0 examples/{E2E-RAG-App => DocQA}/docker/compose.yaml | 3 ++- .../docker/llama_stack_run.yaml | 0 .../docker/llama_stack_start.sh | 4 ++-- .../{E2E-RAG-App => DocQA}/docker/ollama_start.sh | 0 examples/{E2E-RAG-App => DocQA}/docker/run_RAG.sh | 0 .../example_data/llama_3.1.md | 0 .../example_data/llama_3.2.md | 0 .../example_data/llama_3.2_vision.md | 0 examples/{E2E-RAG-App => DocQA}/requirements.txt | 0 .../scripts/caption_outputs.py} | 0 .../scripts/ingest_files.py} | 0 examples/E2E-RAG-App/docker/.env.template | 6 ------ examples/E2E-RAG-App/docker/RAG_service.json | 10 ---------- 26 files changed, 12 insertions(+), 28 deletions(-) rename examples/{E2E-RAG-App => DocQA}/README.md (94%) rename examples/{E2E-RAG-App => DocQA}/app.py (100%) rename examples/{E2E-RAG-App => DocQA/data/assets}/RAG_workflow.jpg (100%) rename examples/{E2E-RAG-App => DocQA}/data/eval/.env.template (100%) rename examples/{E2E-RAG-App => DocQA}/data/eval/QA_eval.json (100%) rename examples/{E2E-RAG-App => DocQA}/data/eval/README.md (100%) rename examples/{E2E-RAG-App => DocQA}/data/eval/eval.py (100%) rename examples/{E2E-RAG-App => DocQA}/data/eval/evaluation_results.csv (100%) rename examples/{E2E-RAG-App => DocQA}/data/eval/explain-eval.ipynb (100%) rename examples/{E2E-RAG-App => DocQA/docker}/.env.template (57%) rename examples/{E2E-RAG-App => DocQA}/docker/RAG_service-template.json (87%) rename examples/{E2E-RAG-App => DocQA}/docker/chroma_start.sh (100%) rename examples/{E2E-RAG-App => DocQA}/docker/compose.yaml (94%) rename examples/{E2E-RAG-App => DocQA}/docker/llama_stack_run.yaml (100%) rename examples/{E2E-RAG-App => DocQA}/docker/llama_stack_start.sh (87%) rename examples/{E2E-RAG-App => DocQA}/docker/ollama_start.sh (100%) rename examples/{E2E-RAG-App => DocQA}/docker/run_RAG.sh (100%) rename examples/{E2E-RAG-App => DocQA}/example_data/llama_3.1.md (100%) rename examples/{E2E-RAG-App => DocQA}/example_data/llama_3.2.md (100%) rename examples/{E2E-RAG-App => DocQA}/example_data/llama_3.2_vision.md (100%) rename examples/{E2E-RAG-App => DocQA}/requirements.txt (100%) rename examples/{E2E-RAG-App/02_caption_outputs.py => DocQA/scripts/caption_outputs.py} (100%) rename examples/{E2E-RAG-App/01_ingest_files.py => DocQA/scripts/ingest_files.py} (100%) delete mode 100644 examples/E2E-RAG-App/docker/.env.template delete mode 100644 examples/E2E-RAG-App/docker/RAG_service.json diff --git a/.gitignore b/.gitignore index 3d80216d9..a93fa8f91 100644 --- a/.gitignore +++ b/.gitignore @@ -172,7 +172,7 @@ Package.resolved **/faiss_index.index **/faiss_index.index.json **/chroma -/examples/E2E-RAG-App/data/input -/examples/E2E-RAG-App/data/output +/examples/DocQA/data/input +/examples/DocQA/data/output **/.gradio **/RAG_service.json \ No newline at end of file diff --git a/examples/E2E-RAG-App/README.md b/examples/DocQA/README.md similarity index 94% rename from examples/E2E-RAG-App/README.md rename to examples/DocQA/README.md index e9219072b..c7f6ea623 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/DocQA/README.md @@ -1,6 +1,6 @@ -## E2E-RAG-App +## DocQA -This is an end-to-end Retrieval Augmented Geneartion (RAG) App leveraging llama-stack that handles the logic for ingesting documents, storing them in a vector database and providing an inference interface. +This is an end-to-end Retrieval Augmented Generation (RAG) App leveraging llama-stack that handles the logic for ingesting documents, storing them in a vector database and providing an inference interface. We share the details of how to run first and then an outline of how it works: @@ -12,7 +12,7 @@ For Mac and Windows users, you need to start the Docker app manually after insta ### How to run the pipeline: -![RAG_workflow](./RAG_workflow.jpg) +![RAG_workflow](./data/assets/RAG_workflow.jpg) The above is the workflow diagram for this RAG app. To run the app, please read the following instructions: @@ -21,7 +21,7 @@ The above is the workflow diagram for this RAG app. To run the app, please read ```yaml { "model_name": "llama3.2:1b-instruct-fp16", - "document_path": "${HOME}/work/llama-stack-apps/examples/E2E-RAG-App/example_data" + "document_path": "${HOME}/work/llama-stack-apps/examples/DocQA/example_data" } ``` diff --git a/examples/E2E-RAG-App/app.py b/examples/DocQA/app.py similarity index 100% rename from examples/E2E-RAG-App/app.py rename to examples/DocQA/app.py diff --git a/examples/E2E-RAG-App/RAG_workflow.jpg b/examples/DocQA/data/assets/RAG_workflow.jpg similarity index 100% rename from examples/E2E-RAG-App/RAG_workflow.jpg rename to examples/DocQA/data/assets/RAG_workflow.jpg diff --git a/examples/E2E-RAG-App/data/eval/.env.template b/examples/DocQA/data/eval/.env.template similarity index 100% rename from examples/E2E-RAG-App/data/eval/.env.template rename to examples/DocQA/data/eval/.env.template diff --git a/examples/E2E-RAG-App/data/eval/QA_eval.json b/examples/DocQA/data/eval/QA_eval.json similarity index 100% rename from examples/E2E-RAG-App/data/eval/QA_eval.json rename to examples/DocQA/data/eval/QA_eval.json diff --git a/examples/E2E-RAG-App/data/eval/README.md b/examples/DocQA/data/eval/README.md similarity index 100% rename from examples/E2E-RAG-App/data/eval/README.md rename to examples/DocQA/data/eval/README.md diff --git a/examples/E2E-RAG-App/data/eval/eval.py b/examples/DocQA/data/eval/eval.py similarity index 100% rename from examples/E2E-RAG-App/data/eval/eval.py rename to examples/DocQA/data/eval/eval.py diff --git a/examples/E2E-RAG-App/data/eval/evaluation_results.csv b/examples/DocQA/data/eval/evaluation_results.csv similarity index 100% rename from examples/E2E-RAG-App/data/eval/evaluation_results.csv rename to examples/DocQA/data/eval/evaluation_results.csv diff --git a/examples/E2E-RAG-App/data/eval/explain-eval.ipynb b/examples/DocQA/data/eval/explain-eval.ipynb similarity index 100% rename from examples/E2E-RAG-App/data/eval/explain-eval.ipynb rename to examples/DocQA/data/eval/explain-eval.ipynb diff --git a/examples/E2E-RAG-App/.env.template b/examples/DocQA/docker/.env.template similarity index 57% rename from examples/E2E-RAG-App/.env.template rename to examples/DocQA/docker/.env.template index fb7718b92..dec896449 100644 --- a/examples/E2E-RAG-App/.env.template +++ b/examples/DocQA/docker/.env.template @@ -1,4 +1,4 @@ -DOC_PATH=/path/to/your/llama-stack-apps/examples/E2E-RAG-App/example_data +DOC_PATH=/path/to/your/llama-stack-apps/examples/DocQA/example_data MODEL_NAME=llama3.2:1b-instruct-fp16 HOST=localhost PORT=5000 diff --git a/examples/E2E-RAG-App/docker/RAG_service-template.json b/examples/DocQA/docker/RAG_service-template.json similarity index 87% rename from examples/E2E-RAG-App/docker/RAG_service-template.json rename to examples/DocQA/docker/RAG_service-template.json index 8fc294303..64032030d 100644 --- a/examples/E2E-RAG-App/docker/RAG_service-template.json +++ b/examples/DocQA/docker/RAG_service-template.json @@ -1,7 +1,6 @@ - { "model_name": "llama3.2:1b-instruct-fp16", - "document_path": "/your/document_path/to/llama-stack-apps/examples/E2E-RAG-App/example_data", + "document_path": "/your/document_path/to/llama-stack-apps/examples/DocQA/example_data", "host": "localhost", "port": 5000, "chroma_port": 6000, diff --git a/examples/E2E-RAG-App/docker/chroma_start.sh b/examples/DocQA/docker/chroma_start.sh similarity index 100% rename from examples/E2E-RAG-App/docker/chroma_start.sh rename to examples/DocQA/docker/chroma_start.sh diff --git a/examples/E2E-RAG-App/docker/compose.yaml b/examples/DocQA/docker/compose.yaml similarity index 94% rename from examples/E2E-RAG-App/docker/compose.yaml rename to examples/DocQA/docker/compose.yaml index e6b3f6cb1..bded3c99f 100644 --- a/examples/E2E-RAG-App/docker/compose.yaml +++ b/examples/DocQA/docker/compose.yaml @@ -32,7 +32,8 @@ services: - ~/.llama:/root/.llama # Link to ollama run.yaml file - ./llama_stack_run.yaml:/root/my-run.yaml - - ../../E2E-RAG-App:/root/E2E-RAG-App + - ../../DocQA:/root/DocQA + - ../../DocQA/scripts:/root/DocQA/scripts - ${DOC_PATH}:/root/rag_data/ - ./llama_stack_start.sh:/root/llama_stack_start.sh ports: diff --git a/examples/E2E-RAG-App/docker/llama_stack_run.yaml b/examples/DocQA/docker/llama_stack_run.yaml similarity index 100% rename from examples/E2E-RAG-App/docker/llama_stack_run.yaml rename to examples/DocQA/docker/llama_stack_run.yaml diff --git a/examples/E2E-RAG-App/docker/llama_stack_start.sh b/examples/DocQA/docker/llama_stack_start.sh similarity index 87% rename from examples/E2E-RAG-App/docker/llama_stack_start.sh rename to examples/DocQA/docker/llama_stack_start.sh index 072e653b0..114688abb 100644 --- a/examples/E2E-RAG-App/docker/llama_stack_start.sh +++ b/examples/DocQA/docker/llama_stack_start.sh @@ -10,7 +10,7 @@ pip install -U llama-stack # Check if GPU is enabled and run ingest files script accordingly if [ "$USE_GPU" = true ]; then pip install docling - python /root/E2E-RAG-App/01_ingest_files.py --input_dir /root/rag_data/ + python /root/DocQA/scripts/ingest_files.py --input_dir /root/rag_data/ fi # Print a message indicating the start of llama-stack server echo "starting the llama-stack server" @@ -21,4 +21,4 @@ sleep 30 # Print a message indicating the start of RAG app echo "---------running the RAG app--------------" # Run RAG app -python /root/E2E-RAG-App/app.py +python /root/DocQA/app.py diff --git a/examples/E2E-RAG-App/docker/ollama_start.sh b/examples/DocQA/docker/ollama_start.sh similarity index 100% rename from examples/E2E-RAG-App/docker/ollama_start.sh rename to examples/DocQA/docker/ollama_start.sh diff --git a/examples/E2E-RAG-App/docker/run_RAG.sh b/examples/DocQA/docker/run_RAG.sh similarity index 100% rename from examples/E2E-RAG-App/docker/run_RAG.sh rename to examples/DocQA/docker/run_RAG.sh diff --git a/examples/E2E-RAG-App/example_data/llama_3.1.md b/examples/DocQA/example_data/llama_3.1.md similarity index 100% rename from examples/E2E-RAG-App/example_data/llama_3.1.md rename to examples/DocQA/example_data/llama_3.1.md diff --git a/examples/E2E-RAG-App/example_data/llama_3.2.md b/examples/DocQA/example_data/llama_3.2.md similarity index 100% rename from examples/E2E-RAG-App/example_data/llama_3.2.md rename to examples/DocQA/example_data/llama_3.2.md diff --git a/examples/E2E-RAG-App/example_data/llama_3.2_vision.md b/examples/DocQA/example_data/llama_3.2_vision.md similarity index 100% rename from examples/E2E-RAG-App/example_data/llama_3.2_vision.md rename to examples/DocQA/example_data/llama_3.2_vision.md diff --git a/examples/E2E-RAG-App/requirements.txt b/examples/DocQA/requirements.txt similarity index 100% rename from examples/E2E-RAG-App/requirements.txt rename to examples/DocQA/requirements.txt diff --git a/examples/E2E-RAG-App/02_caption_outputs.py b/examples/DocQA/scripts/caption_outputs.py similarity index 100% rename from examples/E2E-RAG-App/02_caption_outputs.py rename to examples/DocQA/scripts/caption_outputs.py diff --git a/examples/E2E-RAG-App/01_ingest_files.py b/examples/DocQA/scripts/ingest_files.py similarity index 100% rename from examples/E2E-RAG-App/01_ingest_files.py rename to examples/DocQA/scripts/ingest_files.py diff --git a/examples/E2E-RAG-App/docker/.env.template b/examples/E2E-RAG-App/docker/.env.template deleted file mode 100644 index fb7718b92..000000000 --- a/examples/E2E-RAG-App/docker/.env.template +++ /dev/null @@ -1,6 +0,0 @@ -DOC_PATH=/path/to/your/llama-stack-apps/examples/E2E-RAG-App/example_data -MODEL_NAME=llama3.2:1b-instruct-fp16 -HOST=localhost -PORT=5000 -CHROMA_PORT=6000 -GRADIO_SERVER_PORT=7860 \ No newline at end of file diff --git a/examples/E2E-RAG-App/docker/RAG_service.json b/examples/E2E-RAG-App/docker/RAG_service.json deleted file mode 100644 index 7d4461b33..000000000 --- a/examples/E2E-RAG-App/docker/RAG_service.json +++ /dev/null @@ -1,10 +0,0 @@ - -{ - "model_name": "llama3.2:1b-instruct-fp16", - "document_path": "/Users/kaiwu/work/llama-stack-apps//examples/E2E-RAG-App/example_data", - "host": "localhost", - "port": 5000, - "chroma_port": 6000, - "gradio_server_port": 7860, - "use_gpu": false -} From 8d699f22403daaa5dd5d64eab1000e3af122874c Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 2 Dec 2024 12:18:02 -0800 Subject: [PATCH 92/99] made changes to UI, gradio, seperating retrievecontext and inference --- examples/DocQA/app.py | 89 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 6 deletions(-) diff --git a/examples/DocQA/app.py b/examples/DocQA/app.py index 4c9b16138..7d83c058c 100644 --- a/examples/DocQA/app.py +++ b/examples/DocQA/app.py @@ -16,6 +16,7 @@ from llama_stack_client.lib.agents.event_logger import EventLogger from llama_stack_client.types.agent_create_params import AgentConfig from llama_stack_client.types.memory_insert_params import Document +import re # Load environment variables @@ -29,6 +30,47 @@ # if use_gpu, then the documents will be processed to output folder DOCS_DIR = "/root/rag_data/output" if USE_GPU else "/root/rag_data/" +CUSTOM_CSS = """ +.context-block { + font-size: 0.8em; + border-left: 3px solid #e9ecef; + margin: 0.5em 0; + padding: 0.5em 1em; + opacity: 0.85; +} + +.context-title { + font-size: 0.8em; + color: #9ca3af; + font-weight: 400; + display: flex; + align-items: center; + gap: 0.5em; + margin-bottom: 0.3em; + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.context-title::before { + content: "📄"; + font-size: 1em; + opacity: 0.7; +} + +.context-content { + color: #6b7280; + line-height: 1.4; + font-weight: 400; +} + +.inference-response { + font-size: 1em; + color: #111827; + line-height: 1.5; + margin-top: 1em; +} +""" + class LlamaChatInterface: def __init__(self, host: str, port: int, docs_dir: str): @@ -131,7 +173,7 @@ def chat_stream( self, message: str, history: List[List[str]] ) -> Generator[List[List[str]], None, None]: """Stream chat responses token by token with proper history handling.""" - + history = history or [] history.append([message, ""]) @@ -144,13 +186,43 @@ def chat_stream( ) current_response = "" + context_shown = False + for log in EventLogger().log(response): log.print() if hasattr(log, "content"): - current_response += log.content + # Format context blocks if present + if not context_shown and "Retrieved context from banks" in str(log): + context = self.format_context(str(log)) + current_response = context + current_response + context_shown = True + else: + current_response += log.content + history[-1][1] = current_response yield history.copy() + def format_context(self, log_str: str) -> str: + """Format the context block with custom styling.""" + # Extract context and clean up the markers + context_match = re.search(r"Retrieved context from banks:.*?\n(.*?===.*?===.*?)(?=\n>|$)", log_str, re.DOTALL) + if context_match: + context = context_match.group(1).strip() + # Remove the marker lines + context = re.sub( + r"====\s*Here are the retrieved documents for relevant context:\s*===\s*START-RETRIEVED-CONTEXT\s*===\s*", + "", + context, + flags=re.IGNORECASE + ) + return f""" +
+
Retrieved Context
+
{context}
+
+ """ + return "" + def create_gradio_interface( host: str = HOST, @@ -159,11 +231,16 @@ def create_gradio_interface( ): chat_interface = LlamaChatInterface(host, port, docs_dir) - with gr.Blocks(theme=gr.themes.Soft()) as interface: + with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as interface: gr.Markdown("# LlamaStack Chat") - chatbot = gr.Chatbot(bubble_full_width=False, - show_label=False, height=400) + chatbot = gr.Chatbot( + bubble_full_width=False, + show_label=False, + height=400, + container=True, + render_markdown=True + ) msg = gr.Textbox( label="Message", placeholder="Type your message here...", @@ -218,5 +295,5 @@ def clear_chat(): # Create and launch the Gradio interface interface = create_gradio_interface() interface.launch( - server_name=HOST, server_port=GRADIO_SERVER_PORT, share=True, debug=True + server_name=HOST, server_port=8888, share=True, debug=True ) From ceb10a8430221760d2c4f5379a4363754e330aab Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Mon, 2 Dec 2024 13:05:04 -0800 Subject: [PATCH 93/99] change readme and workflow diagram to simple version --- examples/DocQA/README.md | 25 +++----------------- examples/DocQA/data/assets/DocQA.png | Bin 0 -> 41391 bytes examples/DocQA/data/assets/RAG_workflow.jpg | Bin 55117 -> 0 bytes 3 files changed, 3 insertions(+), 22 deletions(-) create mode 100644 examples/DocQA/data/assets/DocQA.png delete mode 100644 examples/DocQA/data/assets/RAG_workflow.jpg diff --git a/examples/DocQA/README.md b/examples/DocQA/README.md index c7f6ea623..601dcb275 100644 --- a/examples/DocQA/README.md +++ b/examples/DocQA/README.md @@ -12,7 +12,7 @@ For Mac and Windows users, you need to start the Docker app manually after insta ### How to run the pipeline: -![RAG_workflow](./data/assets/RAG_workflow.jpg) +![RAG_workflow](./data/assets/DocQA.png) The above is the workflow diagram for this RAG app. To run the app, please read the following instructions: @@ -25,33 +25,14 @@ The above is the workflow diagram for this RAG app. To run the app, please read } ``` -2. Inside of docker folder, `run_RAG.sh` is the main script that can create `.env` file for compose.yaml and then actually start the `docker compose` process to launch all the pipelines in our dockers. `compose.yaml` is the main docker yaml that specifies all the mount option and docker configs, change the mounts if needed. +2. Run following code: ```bash cd docker bash run_RAG.sh ``` -> [!TIP] -> You can check the status of dockers by typing `docker ps` on another terminal. - -3. Ollama docker will start and this docker will pull and run the llama model specified. The `ollama_start.sh` control the Ollama docker startup behavior, change it if needed. - -> [!TIP] -> On anther terminal, you can log into the docker and check which model has been hosted, by following code: - -```bash -docker exec -it docker-ollama-1 bash -ollama ps -``` - -Check more about Ollama instruction [here](https://github.com/ollama/ollama) - -4. ChromaDB docker will also start. This docker will host the chroma database that can interact with llama-stack. - -5. Lastly, Llama-stack docker will start. The `llama_stack_start.sh` controls the docker startup behavior. Change it if needed. It will first run the ingestion pipeline to convert all the documents into Markdown files. Then, it will run Llama Stack server based on the `llama_stack_run.yaml` config. Once the server is ready, then it will run the `gradio_interface.py` which will insert document chunks into memory_bank and start the UI for user interaction. - -6. `gradio_interface.py` will show a public link. You can access the gradio UI by putting this link to the browser. Then you can start your chat in the gradio web page. +3. Once the service is ready, open the link http://localhost:7861/ in your browser to chat with your documents. diff --git a/examples/DocQA/data/assets/DocQA.png b/examples/DocQA/data/assets/DocQA.png new file mode 100644 index 0000000000000000000000000000000000000000..4d922b9e4e334658c1fa34fa666ad9c2a8bb3c28 GIT binary patch literal 41391 zcmd?R1zgqJx-JaUiB1U#DFvh^Al)Th0#c%+64D^u-Q6Xk(jcOAcS{O_fD(dqDBT_3 zmD2Zv5WosM8(EpY-wvp#V$_8 z#-?Ls#iVbdV`QUaZp&n0Ua^X7UvhpS-O~*ndwlm-Q{Fr0YBZA)6p}ru(Px^ zreYTb-=!_=49vivU>N*g;UV~+8u*WuMVpmVo0}GV6tS{0Gf*|ql`#QNzst$a&BV?H zhHuMADapxFv5A4tW+tWv;15XyJyT1>En>ztmgZpOE-O1L6AR)WF!)f%P{+pPw?`mm z>R@1FYhr2fYcXsbOgv2B*@z!(^>oY(ehst!8qu}1(KoR9H3Xi=M#U~c#UcVOMf}Sy zb}j|*#X<+X`=@m713@sJ-)^G+YfP1gl}*Coft)ytskAwVz3wAfDf7R7+Ql=?og{5^ ztc>L>^?%Ap-|2ipENthq)OY?h!o|Y#Ym!Db7wRbn?L;F|IOo`8a#61P-Jmr6?2m2 zwqRkCQgqT`WIcbPgN~W~xy*kl)Yi`V!h#L-f%MP6TH4qdTN+tf=$J|TIVx&nX>Xx# zfKVc^Wzj#ck+rn60)wnz@Tq~Fo%6XV>Db#@f+1r&bF=ebf4$R6=g*a%FW%PD-bU|y zVt>7WbNkb=F#_@9ry>01b-*hTj)_>yud9B(;2-PrPpwKCfQVpc;|!i)V_>FZXX5bt zQ$WZ%w`ik3u0y=+U(q_a=GCV7_zm}0k^RHX&_wI_6XMy)z#56HTr1^ zjCz)4mLS+6;+u(uiJgfKP&A0w7gt_9@@Ht${uS}SwSP?G*DHR__P61`n9OfO`j&e3 z=D<_5t$up>uh@E5U&l`8Uw#A=6VtzZ?Zt(nx7OCUyo7 zt#p1Gxg&_zKv4gRV}R6SXK)eG5%2iN=JS`Pe11FEc?>wu25j8t-yQ#qt*jSg#=pl^ z_VWw>eXJjm+Yz4kr*X3VH$*Q3Clfoh^GJsHtPVc2FmVD+BEE?u#EgiGKffS^{PT+i z2q%b}7(so(_VZfA$gf*DxY!ZbBL@Dw`{%d6x&6L@jR^=#Kg$ZlGk>l9KNCfP%n)L; z`)5&<_t$m*jZySpG3!5#`@SWp4#7gsEt{41Z%v!^BHdkB?5_o%--VFCKM&a#FZ&s| z|Hupfc$(~>%>1tn^XCfthhg5{LK}p7CQvK?8Cib|@QCI9<|qFo5b4-j{VL@QO`H&! z{(o(3|Amq1@6>nk_4mMZq0zqyOc#R}YW{Cc%gSm-T$Y?h65K2@_Y`@Z9?7xts$7)u zzsdW*BQ9OUm_Gu}Z;|OA1ed>bayFL#Okg<=82>b|uyG(>`j5Tn!t+31^GBJ*@^4;_ zov;5N`Kh?2o+6@HIWNinC1(Xq257cG4J!KY=(g+^ofslp0li)n55N2EMbYs$etR+a zKjOE4Sx*15OVH{6-XHy4J$={4(!!2W!b1PARrl-a-zq|7plP6Q6ALSQkgNVEdVjm( zV)VD6i!R>M-tHgV_IEM;&r%c7g8dUy`Q`P0;pG2?0iEljWIVSFF3w+X{oOJyqWs@j z#)S#}`&b6Yd1>*RWr*1r=-2_^=U2bV^jlT0V`gH6K#lZ3iHE4(L=hr30RV@{ z`48qM`ud3W`!99&r&j-A(f41dwm-`MbG5NvIQVaRyYSuL)pnune;Guo^p`69*`EImUi9CAU!3Ere`2ZX=c@b{ zaMkmh5HJVgu0J4<|9wF9@6gV_@NpoRf3{Ec7eL5g3i_w7{3l|@zYh-bm&-c$g#Y`4 zHv5H>{~Jc_pD^_QVjue)qW+CB@VgQI{PlvP@au>38xZV{U%1_~?TH>FBr+svaSq-NTDdQ~?UEK%Y0n;1)hSh2Sj6z(+oa3Hq41}0iQ4{L11GPt3#GV9J{__hBaVq0 zH>>>G#&yZM=62nE+_PTC@v}STqk;>|?z>v9Yh{C$Zi0e5MtKMO;Z^g;A6eUp^|H)I z%gxz?v9KVb!Y~*zIT_SXI2u-avOBulMr1i$Y|v%)C0lv-U?F9++J4DwrY&4l9`@Q# zc%;}MPq#HhrOL*DZ?l6k?{4@_hv{INuSM;*5+3kM2-L<(d9)QOiTwH04lK38C*E?& z{3FTE^Ec=pz6s+r{?z#NOSXaM+0g`v$f!|@d#*;64f#@8^$a(Jh|8?7%yg(aeEMju z!BXZqL(#$3!sr{hgk0qu6^C)>1@&*$_5)7?q&yz2|1dzz+Y7M>vM?C`>HZ9t*)YrF z62ox&@1L4_co&HI7T;zx4=eA`Mq)Xw)$J{J(xjAno-ApXuhk!YRaVtB0AgTNM3dN* zfhK6-{2`0xpvL8ql03_a`f~dvm73kJrYmE%y%>7jU$T(M)?2PC44>>xSZ3afD;}?N z9i`1mjCqhFvH8$P1{3Y4C=pZAkw|sjTkT=Y(r)k=eD^S`(Ba_;Dj_6N@bK-<;+L+f z4fGpq@E?PrvosiSw{Q`!*1zIiP=EMA@bEk3(e{9XIk>Fttl?muXrZCX<%M2^b;}!vm9MLo z!B{~zA-w{7i9q!|moaYY10{z(+WRu$Mb4tz0;`z{ATs;#jCQcrd5hvPa||iVTy_7bq8(f5qV^18k8(LH&Oqm_!?)ta^H@YeoJ z8S}n$35)CZ8$8?z1ZHPr73BGfebU-r9xa3MW>X@L#zKz{4 z*}rK`6Kz{gjdUDx>6g(XvoIG@hK?73j> z#h=8EN1VEo_=e2I>yI~O_-4Y`bJa@>2h%0cs#xG?uanWBkYtoI;9-sv8R9VLRpj{} z?7jRYQla%y9#I_G<*3NySTJbN2XB9@rOAg10&@{cfH$pK(tVoeUv59bDic5oe zNjpJS7UHf^Fz-|fFHFJUK>a4n?8{#)u+|w4;l?tv8O$*S))Y|SCLLIi%4yjbEKRN4 z`KkrUWhMh{t%GfWg5P~2g^CigNmgw`HH)y#g&gl!+x?&*Jq}tF zlKs{u>AC&AZ`DGzHfT0bdSEdW7!`#He4H`hyu(p|J;`?bDNRfhN*jy?qlAc?)y%&#MQwf>G8?GawHOPe zdqV;#<%n;sc0itI3cA#n5eR}HW5!%yQ6V0^%CHymrR9&@6Qz8sm12TNI!ZQ;hkkgZ z5OKfTzn&MWOSz=JC9(Pbt-+2o4CWn1CS01RRp(MATU_V5SKby*Wb}IMHXfT%5Ao7y zML}2hXo0qn#7uC^=GNN+w=Q_y^lNDfXmc16O*gzV`*B5n@NA&^z+wWJ7?r3O&tBy# z7RBAQi3z5fcF%Qd&pqUM;c=rUaVlWn1T$X5l#GQ5aA< z)%<0gE7;!RKc^F&AyvB%948b^r|;)<-jO3!wP*E(CdPgpb-AMst%dRL-xb@X5^Yp~ zj(Z%L0}*b`X3x-c(D4;uH5wRg&UR89IA`K<-UTyABop3mr_Zl!mgpkibth!|JP+-_ zrK9p?V%^z@z9r~Z*&Ox8E9sUT26EdFNQ}>p={SeC?YywbEM>xPa*A(W$`#|(1kXVy z6Q;<@*Q~kuu4&FS1zGqZ14R48;#>8|Tg40wQPyCr4h~2dy^T6c4_uTC6;_z6QyZh; zfm@m<2wXi13z=yK`zd<5c8!C1+A~ThKC{*`OpyFq6$3RkYH?K1 zO?>Yv+l6YGd9aEc3Qk#&!W*#2Hig?GNb*GeQSYK3gGhIz9QQ#E$NOUfca7TN(ojLg z)$7!iQn!IXuR<&B(0=gFMv1X0CVs$zI7c>uEIiLu1|C;H$9W$tMwY_M(&A`$MRV{G zj)wFm8ZwSiG^1nxYw4aWB?@T2L@G$x<%S<`&LCnpO>ihapWz^@X0Kk_n_QgihcwC|drD!dJ1dI+>A;6+I z7am5UG)P9?;?iqF&jC50@MggvK3SpNj|ZlMxi2vYS%mT3!$ZMjtK$%pg~C916%HO* z^`Y)Eqs%|N7lI_XuV0kJY5JUDp9+OFa{PU|XWI><%H_(uPlXV8% zX++$=PVlyq?fA|$G0fof0g!K8e`Lnb`jTGuKlLPC@3ZWG9W0k3sHx~E`dYQ2Cf&Zv z(ofk#kb*2GCGt_{7MDqJw=K#$=exwmvGR91Z@F(PO7vZwaw1!l2PtA1#9L+J6RgFd zNG6kSo0pX1Uaoq^Z_zNzu5d!s<)=+$xetb>9n;7@kx{KmC@sa zt+*T9EiOGuL!}l5sM2xQ;RtcR&f_=9q3+^W^m`SJEOG$@TP*-T~V^GGApO<(nv3;4i!bzS>+BZj}kacV!T3Nkd@Jj3isWS z_5k!7IMKIq5~xIkmsF(h5Acad3d4oI-|QAS;#8kUvQt)PfK*yy*&9qX`>TL7r<*c# zX-fFX*8E46EO`Hdw=t)WSw_?|=A%Jh$tR4rpg!HXZT{78!;2lui08)opmVkq%0UX(&X zW~f5sDouwGgLk!WjKusJJEAGgmxqg1;gS^4X%ka<-1CFn zB;&Fb5HBmbm>b4aa@BO^y5b{M7QT#CiN;}Ai6`d*Ivn?52I^<_w+p_3PqKs2lEicssrEVIg+DUAD1q3mQ0CJQUNP8uS1*Q1On!c- zP{w8c_4ccX0Esjhmbua5=XblCUCd}yJ*?06l>qi3LkSN7mMN*YWU{LsYw!wbai&`e7mGJtkMmu2y1zP#m<1zPSE2xc~1*c4srk4Eq)j!wCsPKH?$(;HFw013}<1Vo?6wgcte`{^jCK1eM0|wYq26sv#z$*?qOTgyjU@ZZJD|ywJBHDo zVzCAwHVz{5d{6aV2KHJ6Y}cU9LYeRy^f`mNF0hVb=K6gKVv+~U4-npW&+5li6vAt9 zBn(T4fcFKLFH$g_URU7PL9`ol7U@jR<@KA8O zessUBjwpz2vR}I&Xmh9=Ui}`VS^_%Pocn1$(cM9CwVyd(Dnx>KZx zz>vUf{~~Yn1m2Pd?K&dzyPh5_Y^Cu&6$`@R+3IG`dzdAU=)wKpsxIDBgc*P9-w&by z(^mZK^2&Ws|0T7TivTS&$frM7ot?72N*tz-gS^Su{etzqgn5kkC17%WpGsV+1i&pf zv1ut&JJ}36wnal=QBw|k-yAE}8qUT)#51Q%kGg2UaKjrTg-(rKSF5s4KsWrsizvD& z-JYHzM1>pJK3w|}g#na9{SEp{Eh%hd4SQ<2SNgqzM`j_Gi`L{(#1Nw-`{!kr6Ix7s zolSWAS$~+n4VL3!9pH$68R{r%tAtLW|0|q zwkh%JFfZz^XXFEDv=oKlRl_kWXP6(CGw|VMKkuNppK%w1fLS}Zs96X^Zh!6e#ib;3>&q^-f?L=AG|k z@jJX)$u5hud`HVA>|7iGV3?Nw0IF^?;89=G#z0Fw4BF`U!uobn&oR({#Xfj^JIC%v z|86UR_Umn}7o_-|m?aMGiwV>+DPGD&*N{bxJa$Kn%J;@y(Xh!U=^+yyM|mVR9pp^U z-oVFz-gGG6;drAx>odOPd*P~>GmuvcqNCU#uNWQ%$TGu&kv#Ur2c3Z}#kz?_UiIF_ z;yL`Dq3K6*yhSwybTSwL_XO#^yo;&EEXyukenS8(p>=-`gxi%M%80_8{OS){9tQ>p zl!|iJal*YKj`8szqE7-D*O8_G9zP0}&KF#_-7gdF7oZ?`@S1X?Hj>^j*9m^+cU_)k zRksyw37N?roN2MJg*-m|^-Rj|6>GeD&P^}stsgC0R-SR=1wb*%vAlIf?GkX64XX3s0(}if$L+86=S-3ZDBy4LsaDvtf z;k7{9U!!N+pVk@S1)75ynl9LLgo~nN#M5f1tP{@8!2?3v!+YU`#3zKZLj`e*#lOHPu`U@4|O;Gje4rD1*fYdq+ z62cF86pNY_uAk{@;v7!IbA2HcijQp-%FhPySv8y6=U8aSVRzP+gi z^I9fjaa`4qG?NOWy2=cqq67RO%$+HlqH>(rwwo0NK2h>q2w0NCls^G}lvA?5tgZS9 z0!K4Sc5aJyliCFkpe{fNZ127bzD{$8AXzQNZGLY^+w9`&%Gj^aN^M^ z6#~#uD&B(f?U9E4%dQH5oK%+FyVr~e{Up*J6e(`KF+!fFZt?cSY?$u=Xjumk&NCZ- zr#|QU!$o=`0hdzB11y^`c8O>=h7C8%4&*DuPY@_5NhfnR%>?B^B^023rhGAxHTUaW zb`sBo5>R5kimXd)0_cD*)ZiU3%@U|xOSVoES@C<%)2)=C9gS}vIU$}0UD{?F)2$Is zQq7>0$%~0TCiU2Figp>ZEqnor|Djdge93Nslw)x?si}LF&0M0;$#9XLBTrg*c>X}4 z&ij?ix7LcUHqr=TcLomY;ck9yl_t)DQXGIbl+Sm{H{fh^Fz@j<-4ftH3kHut&7JuO zGI2X1qG+%?ftz{VUGa*KKN2(pAgngm(Vl;O@HB5Q@3WNP>l-q>KJ@YcX+RVGc)DWg zSpvG-!3h3Msl9KLp}of+9b5d>-R5cJUd>XBuOq+!hkj|A5&DAK;-sFCkXTQU<*T~N zY9&c3COm3vG4JdS>xHks5QWeT#Xwfoc2{+4i>EX3?=VGNsD;B>=(9jP+p? z92`d}Erz>4Ct<`U;rAZ1y_9rcQLEPQ>lyI>0TPK!ic6Q#tq2~}W|S+9%k8SmJ@2Hz z`?gTH$nwKgkswCI=e`*O#7{34=50pd)ys-6rhx>X5(mk|#pPtBVnX8-=Xe|wxYo+_ z5`pR_@SLO#8vwh~ul7CjRABJXu&?YOBH9v~^ASB1y)7l!E~!?}`-a?u-q++uf9CFd zg5^!V9snU7q}MAg?l@F%_&r`MG3sS9zRvd`vb#h~VEdC8ny6gns~yJE@7<9Rrp4Qr zzF5WmIC!AzKc&E?pHkE%+B^Y@2{~w?xQ)$+i5M#i&>AJWC>Mm>B~yZ@WKc1SLOk>l zNqb3P&^p&QQegK5T5KDJO@uQ7ya4H-VDu$^oK+eNOdv*EscyWd3?L*H15>Rot!FO= zt~4ta@ur2tk+L6^W#>d}_iH_i2Cd`TCul@Np_|R;|7xX`J*(bG?Gwfwi;M}kt&a|%+^vj7w#YFO1u+}ay3*BHM-}TM@y?w3LV9PFsw_@rv`{){(yA zM4^r^iTifnDt^tvQ$JS>0{aw?Lz4;-oeUiMF)5>x^)=74(UTW4)#6lBT-8RMo&d-g zd)I{6d;z#iQs&~Q_UFRBx#!&q^hNsI*E5Z>YwcnmIMe7Cxo^5{&y02iPYzo!iN8k1QURw)~h=ljUiaBM@ws@viKRLd}^FXh1?5?MeX=($_@L|2ZZRk#+as_iMXkk=3PK5&S8ujZzX8oiy8`;*TR&lb$cEs7 ze>6z=t8cM#_yr%Rw+U7{^pnO4C?qd=%J>8dv-Q=AZ&nN~ zh23D!1z`7c#+XPV&H&l^64~n;Ui+iXoy(TcqoL_fF>b}#%DHNNnsx??yBZn* zVBFZl?^l!YpqBD*tJ^0PbFdmBmnDL11U3%fPuE|&$;iB73ajP@S+zj=N z?91uPY$iz?+7<(E!s$sTT!VzJQiyM!96eF8*@QX48+dHi;wsy3xe~svay!`URE_bc z5r}E9niAy$SfXZlWhVy7ozI#K^RB+cI7B!1_*VDVNhUzOW5N2~r&XHh$Ao%NrETMn zn+HcOoA6x2X>cOg3k|ucGMwqgh2=NABl@iW+JolU!Q_XR z!Y-@LzcdPR3~vp0zMeyL_lmsu56b*ITAv`RC!ZX+h{aiF+~=vrnX~H|-ZEy=c^q#@ z#;*r@VZ=P4XnaSm-%!XWL~-yg)=ggJU)8gF4j3y(~Rv+@Cae!Q7zY%xyx)q3mqw~_!N^@ z(O52}pj%WfVF?6c1|vrh$pf%C$@EgQ?f$s!8kry~3fflLkXC}VBZ>N^`~FQaB{(!? z4Gof*yM(2fzfZldvL%fZ?Y7!m*fh`ATHQ0K^MB(gOTN^)~_!7j$0+9`dKfr zSe+Se&|#~__e?*dEg#nmn(r7Sx*U@?aHd!$K1q%R@T*E{U;sGc$=;)73D3hI^31}+ zP3eYG|8O)HuOKO(U@wC5Ayaq$5rWPMt%6qd&5=s6=e&w7jYtiR_k+D5IK8#dH8Nrz zTQu7*U_WE*a!S>;J)N~@F|Pz1Ni;A(-(oAdZyimEbhUa~5GNQ*fEkSo*NPe)t2?bb z>+{u=>G2q&eXj`}9;y#yw%4zgZkl6s&Jp0D#-yNn!R4ONpr%RPRr|c25J{lw^JM>m z3*a>~OdH!Q>PvqOp9(*58pS87)4Nl&Vv_9_W?qKaW8m(fSqw$3l6MJr>W1Fdql~;x$QFio$bRL6KAJG0(X_@`B^_Q99F{UU0ij+ngc3;;Ec=;fF5;+7r=kr)&PLEq5ZxeOu$}T|!M6Iv0%gf6Uak zw9XP}yY(@Ccp`Fymcu*|F<{V%*UW) z=-Za^UhZkKrO>^3DdLhp=)Z3B^n9(}#3u*u7|@C-RvZjeA`lQ-tdl@Co7&toqiN(rrJGUf_$*&V<) zJ9}x&pXD;P8VnCsmr>?-U#dKsE^2Hek`b6-adcgGZhzrDm+Xub6WQl&RFtZh`$V2x zgIm$X>f8HFn(J^&#|vjL;PcKd$UB$D|zZrP7>{ zf-OA^6d|mRA)bTIT|A%e>DFK<^v)2}=yw z(HjS4qj4@OHh+KC)omzom&B?O8Rc@QY#5IG&5`?74Z+SVr>7AF^Y|I}YgsUz+hS!) zsn89;rkzNr`l>H=7sI7n(Tkg$Gv?xxk#^j18(hbWL)CcbB(Ct3#9J}jV2eS4^nFJh zfIn^MdhZ4DUT$khYyeYcvr#|PnP{kCmMj;{NJhVy+xUUA71+rHu<(|_vWd_EIdv#pZxF6m- z{c3i}MHcpq5Si&<1przoo!$2|?#D3W_y@D(Y83ZWEH_9ykXuEXeESgg;YG%Q3cN{J z`1$dZ@3eY(v%Ag!?AGWuSCBGmQnXKhOBzhaWKn#)Go-EgfK1d3TMlgN$B1}+GB2pz z3<(Wnz=n*7dxf>`BgN)4Dv;}w2RC;}PfCMb6wPI88%C*PvD8ZYu7+2(s%*h|!}oEb z13X2s!p#^4=h*<*ma}Yc)*X=$@O`og9Rt}(3@Hjhhd|dcMbj zTx3c9y)-UG&w%3kEo(dG%1OwAd{Y22sY=X~?i8U3s?X%3M!uIZ5+xAZxb}QTG0-A! zGCmTa8NK%*0E!N0ASlhVn>v4KrXRE#{W=*mMRJ_ESVv zF=CuX(q!e-0?N5g?5t%%r0%GJ$v7QRQ!e&2n>Z3S!yw&SSB-|!66Hv6> zjK(}1MJ_hRUu!Zz&24zYx9sVc;slH8PJ?N~6J`tkwph0@C$yZvj}kzZhL&G#&QA7f z4m*5g5^n4z-LWr5V0|jtO0=$^^&xK1eb4W*ZOq+$Ae4WvG->T}JUQT8|Y58eTbpg@onlMB1lS?67;)eN=J&hLX}yz zH#J!k>556p+maj^C?fuyw_lc>*6Q7m?NAzVZb{-b7WH*4K(QPXRqZ!Ru4-1;IgCVl z*LhHWY?rX!K#)x?1+!`%0bHXAX%_fZoRp4FwyRYOcQUTn>gvFS`ZU6n<6_mM1Xb>> z;zkL)JczGh1R!9!PG{>Qw5H%<@wi!qmu#)k$Aa#MJdSZQkwW9q_8shgH-vV;rXIc- zgpktemi%*;O_9P0=LHgOyTS-<_nj{mBtNBg3`iV!k1+Kb3av?(@-Ho;Da%+XmO|rW+S58gAUg$$2E1jegP3-&InZ=V#0Om2Z9-YQbDmw(tNCm+8c?G>5 zGX)56g(g2S)0qkOCxP^E`!Hx!egZ@`KcCoRCkrgTTQkyv2?5QEI%Wi1tBB9~mIL1O z92VkZG%c@4Wi}5iCeCC)7~f3(3DzbQ7RMj^!D&_#LWiq@Y4OQF{0JzQ)^DMw?F65z+>Mlw?!cn9%y*-g zgeclA`dGJtHgcBT_yvDeV>1e^6tBk;mKy^N*!Zy~MW2mFqu8Ie-FSvpu>mJ-Mteu? z&dxX2aI#`4QQe0-+5so^iYnn-*hCdf^|R5FjI64JO^ z;QOsW^r*)I)(ehr#iV?25_e1{Qcri8B=!$?UCo>{KpL7P($RyEPK^RmSS+eXJI_Kr za$+b3+Bohyo0Q<$n-oRa(%UJ=aKmenvwFc(#aFP=*R5xKQ1J<+1WzseoSTgpRZuAp z#x(YrMAM+TGvliq!PIg4*bddQ`^ymm-fp&G?LH!;yoP#H!aRd|$hx+^R*BthUV6p& zjE@LO$wVg*TMrtM!z=9GliBav+YJL>932%5NnswyksWL(4!jr|k)3Q# z4PEYY81kNduLwG+8E?*;{j1nhh#k&w-g*@D=UAqL&gi~~Eli?(z^JpkD=6iWJrli0 zJ{G*F^YGDd<-%+z22b1lubAq4{A81IPS@9ya<%HdN~b#1)dD}kpD zQDTdGp#&e@WB?q-Ng4Ci1kSig2N`Bf?z6uGG z%NltLm{@#dqO=_XFPielR z*m}`TSA@njX@=lJ!4?FEQn--<15Gk(Zvv2L1k@5J#BRx^*Y;6 z{6cT#6|Q-axorVxrk=sQCl^j@73ZT>J-ppVXv?xaf0CR z<2gwII(8uh*NoKVK7Da0AX_+WT$3<=#UPf}R=W!Adl97DN+rrXW2eQf7LJAs+0VO< z5;h4FzZ~mFifDd@id-rw5o{E^KiDV$3vmxeTh@nzoGD8otRf4c)bGM$k2P0SrvjWS zl}@XH$4z&I;S)wDQ$Avs)tZe1s04EW?cqvzh$C#WSS-x4mu z`|ob^P4$`PyNW82u<69)GIK@C=Z|I{o&X}l^M?KG?PDOR38T7haQLA0%NjuWl>u3! z<7-1gQ^cf zkF>MPOFl)ARmd}@;I*?PiV<=TNxu1HUuHFr>A7e zj`9Lr5AC4lpro)~YIvd)sYZV_NT|yI4lL1&;BD)uz1-Bk8PT1{J3w+$d}AHD89`dV zkY8fZg>O1ioO!e}ak|TWwAXO9a*9z!;{#K-B|jk0a!L!o!G0Gm^n^6_)e~#Zd%Jm4sqKcpE`v%u&n;&;+G%j}fW%6rgCU0Sdt~Xk{JYq`GUGxU%A#oy9=~ zX8?JdW5S!R7UQKjIE4EP>q3V)w!*O{G$^3=e2@fdsSS*tANYhIN z91*~PggffsJKmh88_HE50Hk;`z%DEXd?cQ(Q&1=yB95$l1jV#fyL7vZZHkmrgm`ucl;CvkiM69FoeFPr9=U z3SRp6=52i#cfAqd3r!hY39Lgi3yVLVS=+*I&9{*f>*Q#)rdOVCg(HE-I^72g;Pf;-9+iF8`RGbrn1?!+*}Cx#-b5_wC0(7N4i*he;pu<`QrY!Pc-W zDXkyu!NR2QVsRkuhuhZhaARe#sg4TKtZ}eU5O0fuzXF_@kT>HQE8u!M2Em1nhqDHMMFc66k?1Kd-<v9j}3c?>Hr~0p(fSRr^mw?F(pw6+^wWhvTJcW* zaGMsQ;YA8KDx4N$m9Hhf*9DO^YSI>mdv12pVE-tpRwo2l7zfwi(~%X>kf-Ob8w9dc z0RBEMWR{#f`EeN8qj2_wF^M1iGn@aU`?I}bCE`m)4YJ)?0H6T_&!?3InoJryTz?r0knSHDN zlC6@r$R-P*vOAOp*-?{}>wv6WH3t8ec-5C+QTnTZ$IPvp2Fz6}Rh-xxarSQRzSH-# zK#k`%!C31cO=cEh{WYf?yxtz1)!Gkxp403T*oZjE1z^BZ5#P%w1shL`3c$2C@wD)g zvw(jtywlyRQV0H|&ev!57{CO}F)~bwrAfxw6FaYr@94EhlmQa(T|yN5Iu^iKGcgm# z24-;kslZ1;(_-1{!ga~kKe@W#s9(86D1KD~RiEe+z#?L5BKsGZ^LX9>KN$|dBwb~w z%9M$h-?0}Uy$|KG95+jx)v#{Ga{v^9%cTTQ%O?fKseK8 zquD+`lOq2Fm>vt&)(c6Fce5uHmzP24ozf^Z>`A=Qx|-8VOP0#Xi&D64YzzqSdm?e} zN4sSJYN!+f2d{S1F5}UH^R_xLER!rCVh7NCcezuxyoc`Po}ZJ@6h}~@Q4xn&A`wQp z<~xY|<$6`wK2l)_E~J&L&}@W_@5k%d2E=h>H(YFrJh@LrRzH4M1eT=)4M7FzwTujD^hV&-(pA454jGZrC>4m zfJCu0)#ycTm-8VrH1{f`_G%`#!Gi|387H1MdP z_ktQLt!Hw7J0EjZrq8tM?h`;+0uN*-VtL}F2@VieSwt*i5`tn4`QeLK3D{sd?2wr= z)Q#NzeaH;`P5RA`y=0&0?!jIMHNIfgTizYDtN@6Q3jPUhQCDDl(PYYR$91;Sq@RkN zpH28Y76u67DcFP`3#Y}FgWp{0+@r>o_}YopNOu?ZIv5H6gvg?@2|BM{~`X%W#SegRrn`u&?NwHOd;FSAhN2poq{1Gmqu>D{p z!9x;0hf;75m~Js5McC-AS^9Ut#%-gPeL+TgjIML@i!M0qk$qsi_ZU7wb7CsGaTOr( z(0*~Rw|s#6S_a~es`d6jOMmZBu&?J#IQLh8S>@lp;{hRwE-s^%6z^M{cL3gb#6iZ9 zwhzq>!Gqio_To7NhhZF5bPd`*BBkD7@{ui1ESG)7A#J_R9%il-3PMX_o_a~!{$mPg zx>sWW29X(HOOz#*|4=b*+hL4Jv6fO!#p<{X(Qj+D&jJU_0vbGkR@fiaqsovV4wmsX z9D$mNkATFWSdK)jE}GE$LmWNP9^W8i)c4eVdZGi#w*Zj z$2AL3{IHLSJsrwl5yPNky8%UUQm)?#xClbe7-QD1WYax_?FS;ol8;yes7Chm8xl}#r+iY3#C=zJ0=jqkVrOpd3fO9-g4uHfFO!-lO`z0lXP~PuC0Lq9$ zuMSYcBgRsk=U-xfFROlm39~wInD2h+@L5q3adhA^`6Lkp9Dq;eH2?Zp5Y&o@sDR(O z2hMNNLW=_%0hmv6!+kbdGS~C$bn{Cu()RxN-tx!*Q~kbfg)=CYlrG;>#ug>xg;we- zv`{lqf(GXW_GAn%_AE>{_4^$`<45p`EBBROV|5@y(C`_oUOxe+B$!O?!KRJvr;sPA zyx@!}<aVvGv0#@WE&VjsgI8%1p9r{Y7lOYz03~CJ*K7{3Fw8)e{gc&rH9yE3WZNonuluit zG;G6b&?vAWI^te@1_+*US{gR6y;6plwM%d59>8XiA(yUPv&*N}??Q-8ljBzBF6;>{ z3BOZe*Q3y`0kBa-7vv|bMYg^_6UhhJ{%TqJ`L)hDe0x=eNB{+}4|%$ELSm zAr1^7D&25Wp>a@ck{aEJ>veZ|vAD8KF8n$voQRu_S7nz>Rqp?3@2#S$?7H|-Kv0ly z(@HmrgfG%ah)OD;G=d1yohqFIDv}B!seq&)r8J^6(kLyc8|jqVqNvz0T!b&P+ZbeFr?gU5g=Uhhwf4Vj zhM0t)VqHFcx;t%|*aI*uRA86F{fr=|4kcD!cinKgkOP=QgfWqic(=`o{9iFxlJ8wnM73#4W0-Kn@9v&V0v$t9;IpU=RKAs~cg;g8Qb z9OLI&*D2Lek3r)j;RAvh90({RCh?Hkq8eKTo(__%hp6tCQQ|Rh>SDjR3=DlUB+vMH zjc7Jk% zp4CTzhTfuK%ZppB?$i!#+ZMhy%SUxBf+yiL=)y`y#IEq!Gu!G>`K{BO~u1P2|ZS71P_?1omVJBkr z?`+IxhZrOVYG;`PDsGIhUZ{u5ue|*F<_UdUH{Q1NLh}8QHiPe6$mS%_9;FZD&vlfv zF>I!~(C2t|)<9qV@f`?3mGS}A9LfBYodpk;e~fZhslJurz_q16|8>#OS&Q&`6xfAC z(-FLK>kHBlA`J{7c0TuD=)Eb@RVUB~CDWVJz97bzqhw>eD5=u=vE*OV5*_T6p~2ez zfMQTd`K09;XB}Qbj>v#N$>FE)JSOxA!^1^A*UYDzwV-08_k{;yxCw2KapQ>TAEdw| zRA8eeTq&MTrNyWG;eY6R8$1u5R($|`a5~-%q+MCeKd;B{X^X^SOAVdl2vws<;1Vfe zgv<#QX*md=T$_Ym$Nk4)6?+x7-}sbzVCS#`I|u$vV;MF=Gs?5KKNKJqr~?f!`jj8c zCJ{OSWyHQYfU2$g^mBijOrpV~Z308^h$CH6Dfr6&%x8W8c6jvdM-VEsXmaT0l} zkPu#qO`G?;!6ipxF-v*$lp8UvQf(8^_jmz4CcHn8sv?=G{XovmO|Zbw-&z_!k}l1C z%O8d4V+Ai6QgYoEDAQyAZ*%aTzU*Xam9$^|jwg{*cHGL*!pXy8!HvqfnRG zhB3K@yG`dp#3pBz9aERZnOluJjaQYwGp_!=RP$FI7~hG-pYG2Mv>ol256Yp`_RSie*rHyjN70&DB{omN{;qz)eF%wVDUwtX(Hq@GB98r84S(0~ zdk;OKpRn-jsnCb&rK%24*qh1iugAHV(hoAx4FRKg6!bmFE6Jk>Qq)?cV*@o~0Zf5O zX#!x3RwfT#UGvo|=R5M9BBTf`04R9k8${P(1FIAZD67i*HRQ%|QxNKwC|Z$RB;f14 z*CsU&i$3QTCe*eZ#;{2%L3OS=?|cd`qzS z{Q)yHbpj?(1K=Ytt`Bww>kR>9OCBZvDN?$Xi~daYr^*Cs=*%f~d6uW&T96L3~4}U#Qwz*B}*p;r=q`;OBJb1Z5v{no9j9)->W+NmBgth8dVvQ8o z#&z5@mXA)-p1kv$V+pF41;MG<&iD(7`hR8Q^!+}90+GIAhwFA4Yqw3q#!>no^fX`s zecWUudASjqbCdK0o=R)`bJk)V--42El<6eEO}=syIsUKJ77O$gSerxWjE>4Vd19I; z0e_gtDgJlgP~Cq%-4m2V6s_|J-bO@h-PL>dUzagRMI!J8Dh%)pbdVDdu+Grqfmc0B zTXX!an9r2~1*{c~IxP=`^?%SiF%S)qKbgT=>4oEEla~1AI_&TkejLNGqu)cY_Y-S@ z4MMC}*BE{SZ#n(%m;HbF7R?IoiLW%qO2@b`#Ah+uYy+8!fAH|n(7EDO<^3;2ND@e2 zUb~jMW~64D`nb^=LmViFnE_9ojivEwO|=6kA#Hh6++*3m*a)Qz#6ZtuHJv((6f`zQF-=Nm|*q0kHKnb0UqQT$VKWNx)Oa! z+YwguN)h6~M&ibuKOIksZXVenVF*|dphg-f56}U|_79ek zRBngGdh!@dK2?j>8CV9ptukZLrWY^llb}>Qy20&WNFL-W@&?FYjMGfzb&OPr8>t+k z3R>cphYoomNA{!BNTwF$i!-mPAiwhFS@_^v^Ib+D{YND(D5r+W;mDNXX48px zqws|K^_@rNWAum}`C|kk%Dl~W;uq!IldY*~>Lry!toTHj+7N|2fWxIol{3^ z`NRLT?Qr|p6ACvZoidfE9!mt%T*F@bnu%s6b@zd-PwhE`_F-nR@xv{)Qb zPfp+FP6bLW)9OJluY%<1wo&OiBtZ}{)TxKRhlaRv& zKP<#F?)1ME3{hiERMM}`E2OokcHHWJ!g1ib(H2Xk{S3y{6TSLJ?p1PCNX`62T8OaZ zgNz4}<$WCrtevaTszvvVKb)3BSVoa|(r4}VzS5&XWJ7~^_1(4iogBE{n_icS!=67L zP*OVd+Zf>7kM^bXtJl;8;FlOnjm9jKygW-2uK$ai1e zc358{;iTSamXzbQyb(>vfvPimTJ^0Y?~I|&?jwlgZG)$Y2?-3HkG+1I$#AAfjhkKd zq?io$7{MJH=7Y!w?ET5Vk4-`ghB&Z*17jF_2=*<2*7;giFV|Hs!hg*nDW?>wQ8yn^ z4;MB3ceF-sw7YxSh__}Fluhms+EY9cxSmjKo@RNj(URghbqwFVHeXZ0d#~I$gijCL zV8@sdu7zUdRvW3m19rQZ9-0i2#DS&yL%&hCZW&U)L6F}wO1bB7E+68${R^~nZxPjx zLRPr}&92(tZ3rZWnDQLy!l&~VKp{R3Y@$uzR94qzod<#C-y#a%<^ zzq+F5#0w;hyU}8>p0j@^CBcM$9HCEG$g6PSi$)_ufQyv>5xocfUqe7)-@(p~ zd^4r&#IJ6HD^D5X<*+=Q#1ET-0^J@ubsqp$Zrk@i*u&`prb&^2DrMMTyO zoqC;_KMJ+o&!}y?WKTvZ`_H{pS|yJf=zq}7oGoF-`c#YWx5$(2yzc*nGDrmOMX4iz z-wte}0ON4k{C?X>QUv6M*QCLhgM5Aug#S)fP0e{x2seg)t^+C+pxN5q9xpKO7iq!j zNbC_BEzDF z%O+?&N8uJs5gdiwWL~&|4vvxWCHs}oiq4Zs^y;)rwOe-(`(OdoXQiW#)t%+^9S7hm zVFp%y{AG2JG&>FY@o%4!S&w#e$k8iGrTy-AhvA}S1Ubv4Aw-}In}wu$apZLU(0_JbYxxZySkc_RD>Er23JCj0KOVfPpm&3_>Ehl&@ zen_ZfrExQ7=MKEKI(p!K9MOP*_xX)`3`z=L^4WG)n!G1~OObx?q4-|Qh?-C>Rt!&n zp&r~ZM?I%g96JPU@NnWlJ)QTqWYzSPQ_W6=C0;mJW2mMAgnH z$=&G*nm>_OdG4ZC)w;4=?I($VM#Eh-)g{;9C$=}}0zVVWEPHQBs4C3;p6?!zxbeZn zOZ}*hu>nE`w&ek)kL0x=pbicrWo-u2GGXyP>LE>l@!QOqmUfqZWPLf9V3?qEc_p(Oc&Cf z-HAg-#P%&MxJTad;tz6a&`;X~KaWAjI-I4|w|cPAN>=LwS?fY!^^)r{PhON5sw>k; zLCqBx@_D2?w=r4LEt!aB?MjOHxO<#*?&O1LpB))!2yTsm%S7e)AHM!$Lf_WPCf`2M zKagF{7QFYk-fC6j>OpPVl#pt#W1+6n(YZ@A6D);zrt_#lWUSqi! zZ1!Gl^?f$|HA+u+4C4j%B&oJ03GxrR-@G6JTq6&x;lx2P$P(Bz5+PUfAXkj#YO4g{ zeL$AFXo=E%3b0V>g?ZD^v$gX@V($fJGjVLbU%5NnVt5EWb}6+>hj*YJEXDqHYJCVj zfU#?L#dtAZ8kf{6rx}7t$+Gc_<9+;&H}I5T%#(JHM=x(Kj!MU-_ZMd7vK^IWv)SxzA-P?=NUrC-_UuxVBc55CKR@M0tHqnn z05IF7>hWQuEY|mIl%ea`_HQwxRm1)z`QpQvjeF}>t^~Wb?sjahP}VoU#-Y{YMvNS%(>kBvH%qYpn9<|kj0I}csZ z_h4+pZ|a+@JlWFyHdw8?n9Co4{jQzVoc+`&IeJI-jhZlNOpM=<2&6BEEe!E=$73!v zRYg7Y3g`W>dKkr9@@TfGqNgsAt8Ufd`T!3Z(`xKY?bW38ZE}smf|bN=-5?3|i@i=S zpa_$Hh-qbvU2bZND#v;Ya!23g{|Zy7&e*wuNX+ze2m!e>C2dfI&JIGT5k{E6?6V=m z?nPesPemuqRAH&rw(CM@%^^j?+gPn``vaU1$94{J_#M{>*RI)}YJ z1yMiUj`OqSBWt-k$v(@^8@x}|`z_(o=&~jexE>E^1H|zoC$8-E`_11S-N)~OMJ3S< zbLwC_>ZejygF4#RW2%oq3$nmtapYU2{Qn8%H+wB=)KpDpXq9T7?)vf>s#t!Bo=LpQ z7-Qb9n&p7owAXI7sry;DiJCByD5|;R zCZv|BwB%&91eXt>+v-8@bup$merM>V{9LY(tht5D#{S>5LlHq5Z{9Oa5SwwJ8vZRSvWfDy~|}!cX`Ed$IB2+rz>n_T=f@nR!dvBc_`bagKVSC z!K|>0mY7{uE}PVpR-oc%Z-M(67UR%LUWRPb?}a-&gl_`nvC4ig2ilvA1$(GupSMbx z(xKNC{^xZlm0xGtzGK@!UUQOEip@AwDZz9JV567i`E?|L*3j2B2~Q=7n!UF2xLLDx z8D7}ww4d20H7iV8wr*dD>5=H9UCYyhE4|K+IT?Eha)xT<3&k;>cxT0~2-3S~#h{iD z_&z?%)O|z};~Fl_%DBk;LTcT9jo7;321~w=|E;R856E+U+0tF|Tu^b?LrF~3#(Vy} z%hY4FLog?#p9y)N*x026EyMWO!dP}(V-2ricm2Ue(ew82UvfFqiNrm=u5FOd3Y&;# z+;8H5&sqKVbI99Rh0Vh`#~L5<-lTCwhJDa~nKb)$WevRS9wbO63@s;#JAD8EssgKR z(f-^;r4)eaB?T+0?9@hWgwU2^>mf_IkI;7TJzPNMBZvWJQF$X2`*#UU7YhK5Y<_&K zcs;A>i;}5d240aVOs=Lo5wuDP|vd{ z0FT8d-A(x~xMmR*j`qE9;nzF*~o_j8C6QYZvw-yS42vis{=(%^_|#5x@P{+z^g zvYUcvQ2Au2Qc543L3id)s&#UxK{dDyhaV?+&4JTzVeiv#1;;wSBdaO*1^o{8Pus&+ zUP9A#w(RLt(!}M>;+`v$lw)4u1t9Kt0L8M&VgaSzt5AmP z{oHVAwjDK=`DbPWCk|YcUP~vwl=8?cohdS^?34MzgTJNofi4^mk5vPm5x~m7zbAc% z#cPHgXb!h?6yMX0t>kD`kwpo98KZvIf5A@Amztng9xQqM+hVy95!LEN%hd7nNMmRs zzXq&-^J!N%@XwZ71dje(Mb+JZTIg9^ksG;mhXmK*W8T#Fz>aOG>|nbTe0Tu)$MoLx z)9Qy$MN_NsF*^@q{UNKZ+$E^^p$*V;Kn*Bc&GkT#Wc`|}9@5e!bAoywr#N}VPuvs6GzeJ^rNPda~1J61dOW& z)TL&*jvV`;PVrchY69o2^Ump25n70TG#iI+q!{RO5# zrLsfSz`;k_xkdM;;Fyf7C^Vd^wAjYjQD)=%FO)b;hj0`X*UF|YAeJ%l3WSC`KiMF^<+ z?QiJq?#vgZcw@S?y20ne!hfefEp~~{qd)r?g~X?8vm_bT4dhX}|6R5m&>7a82f~&; z6a*wHvIiR=Xoh~TY$)zlkb9!gD^2Mq3i7|Te*>}?n2y?2|Ue(kO()^^W4%L^8CI+mi}*7h7o?n zYyHx$p>u$^)mO65V#y<&jwULg*=F_hED1_683U7JeH3JLK?ckndNLOCDmF(NIi6ko zVQ;oh%^kk*aW%&CR&E5bOvn(|1Nnjopbv%5{e|+6){rS7kVI59_KObDf)|4`R9ETopK1B8>dZpXy&Wb0 zT}Aec`3bVqlL}X<>{9hNQ*WP?LzRHTkD6gA_nEucsdDm$$h9(Nv&lI7r(dgD zVGlG~@CvzJy_e>t0mNt&{O83V=Z8ljs)s(*s7w^7UCP%(UUbfPL#4UJjiW%=JC7bWs&;K93w$?K}f7M^Vk4l}>yu0^ zjIYr{wvY72?lGLdqqSQ9IpnQAZ%!Xn0~`@Lu818V3m8W{43~772Rp%E;jTd2SjfvS zbln((RDr?Z%y5q=rE`y-19cJH%-uh}gjn`W?341l$L(-9uz6$wB%9O%|1W zL9jdzx?cHsU~4MDq)K8h+G^yhVqnN^#Lk`JQAG zXsc;$>^M&7zXQacCY|B;rdRY7eH3qt2}26~psijx{u_Al4xSRDH1E&4q%6ybU$St= zYc|XAihN3iW{hM4`Kj*rxeBb0Q=fKoPflO>9yHHeH7szu>Z{<7ym#ir*2(c?;%>M)F%s-{NiRzfwl1A)2wi9K-&>8;H}g<@|LMwgrDqIN%AzD`E2mmYy1Lo& zA(AdL^Z(TU+IE*z+hWqaI zwAQ}STrJ*&8wq|K1CQk=**EX1O-LHdfLSYQ9Ebr^Jf&LQw7ZFy=&zf;UBd?T_(HsZ zu_e9JH(g=0Ktq#$0E*4#k7?y=3%jcDwb+`IlwOdZ?Gq4a9tfU8dbj!{8Sf9Jnr=5~ zN>1WO(Hg0PE_B$Iy)>0OEe=n3fWw|EoF|x@>9ydPPlbGkBIh0FQ?Fn{=4_94CH%-S zLMo~L2-`0>=8qv3UAh1mUacFh!S)c)OUXf2_48CQdqaLJ-amwv-U!njAd&GF_YTyV z`VVdej{S>_R``(tZ0nFIF~!y)m1~pOg9f9tS`bbH|}({%Cd#$ z^jps%IW7^`Az-%m1tmdsDjL?Qz~9z#>k!o868TaJ%oX{QX2J7hjK|e!`q**FmnPp_ zWkoETX{s`qu98o8z}9?m=w6hE-{$sLUJhLGzH%t`%1}daCs?FsrHoS0hg3t}yc0N2 zQUB#p_;iI;55VF_S{pH;Rj#x{`pslzKmE6X3!7K4=c*s(e~5(|$541|Js$N%gVB=B zr297M%!kUurniH1bE5dmG}%>B+%xmbQ@#U18%+DpZfhXu0 zkwcRMN&PPLhouxE?M~5$t=4Bdim`$_rux;2vZqe9yvd=Hhussz+4O+>y4T@Cnh6g9 z@s)hQyL3$R6fgPR)>>ktHZMn~H$+ewLu_5OvW> ztvrE;P~A^F=D5xa(> zd`?cI%qD^;>03Zw8+%LeQ&>2gj#wlIcyRCJ{v_)&QVQVTQFo zXOroSAcrE(;k_&m3@A)lrWin~pnd_ol@;(-Qb)Fd=h~cE9hrCgu1G4vw%Bq~a|3dU zOvIrY!Q=(Nyd)B5JZqIrF)J)BksrSfCH#$3a!~L z!ryTGxc;mkQOit-Ly*HX=IlQwje8)U7x-_=3}D-p?!e_Ny0K6B@6(U{w{QY9smE>7 zJC{g&g^vc{u9KX-)SZ$snx#_b2o=ypzM4Zmr$A&dz5m_7YS=W4*%Ekc#ef1#Fo;;+ zg_NdJ22%I?O|*LY5PbRz&W#k=L8f|;@kZ@WC^D^tW1@cl34wzMB)J_&KXf7=7t~+> zk}-hUh75pJA4Ae-EdM zNzWeubH|QTDY6V$O46>+o^~VU-BvaFo)COeoE4o&to6beOh2Hnryx`! z1g{i%cf#ptD;0?DedK3Q+l;x56;RH7s6JfiJz$qO9FZQx z?A5D(6_!5Z&L8zDl#1<%ugA`s>*3{pYKsm1Wn0)#Ajl;NM^CE_E;0|5-Tn z+Z>s%{7QZZxSQ`5(1<&qQ@GFcuutf<#Ig@k2g1OQM!h!hr6uU8t1^wgnFcvk_kq(` zMKAd9ICv!}^VGd+o2Y(Jgh=OBJL2|3cipDDHnyG(tY4Hl({m#xKNqAGM?XRw7!0 zasHE^@V((wxcvtRBV*h~wa;N%?bo6*w#2=K$Cs6q!n$XyY(z^fJ4?`^c;4RC-D~sB z@)x(f@bxRV%CFZs60VoL+tCv+&Q4`aB<^L9ROg)ea+(b?g4U_HL*ZmtL7 zo8y|r5TxK1r4idA+2hra)V)* zt7~#j9H{myu{B6e(p@%qZ|#L;M)nD>m@U-y1Vx{h!F3rDP(;XGGFB2NYhTpciS7&3 zHnsu_`qUE6xI@58Js;l{ox%#4)Q!pFu+&9m)SmlMGcJaMtmhfwid5i=cm+&6rMij4 zBfs~SYp3iX7g~_H-~@Zu-@MBkZ_ODL#gFbnBU~OVsP`jC|7y=08zyn6jr5$AhbzQ^V59sB_x2 z48fD}C%L<}D@nD!eE=Lq+22hk6FR^A2^zgr{btdCPC!RYYwAMz$RbEKUd3O||2-$M zLw2*Sep_@zn?mcHSk*s0(=m0=b+68+${YN8emVe;@E6r5>{j}_J3jsQ9CQjaMNj&! zSP9kJ=yg8N%`|#&qdZrqdk0qHmhi)}Kg7-r4w1Jhw-h=!@)XkeNHE!rgoJF-Qux>X zQg}>~X0ys+|EbT9FZX*(g=RtA=E7TM3H*flW&m zjv!X$-)qjGuD61^_G7}ZmSe%D)*~{*Ey4?-E^m$r{I)*h$<=6M?s0~Zu>JDU=c69_ zbg69&qkwkCo}mC^(XIQ!xE(2w_l>RR?^Rdr#fmgybZPGFc=A#K^JkO4=&iy=2q4$oW|fX)?0NE$9Fl~omJFW zSzA}*n~?R(J>f>YKe=Rl*>Z}%M10B5;1jm1!@Xy>A>Dz{=?sHyKGy-j^NcDHx~+KT$I=TrWG^}%0}6yVI_RAZ11s2hvU>EuyQ9xCU{ z(tS2?GOm5J{@0W9KGNUX^kjUeX6~&q9+v(L8kH!0vlJMAO()dPbnP)du{;|U%tLa~ z@=Ly)`2&zJZPisxsqm%@I7f(h6^3d=Z&RH9(n$2xgZ01j|W(-3nq?RBJ|#k-V6T%!6VQc?xMdWtQeZTU;$-j5Kni@5Sxmxjd~?zDC$W&H6h z>u;st?ppu7I}PK8*!470N2b@k!jbVYN(#w}x(UUX)0qe_;HG*caW`EXZru4?h@Wo3 zB)}OJBJm8!aKZAD#CO+jXO_+hW|?F{>h>+eEj!>|jne}5P5brfdlq@@X>d6xr9_Pu@rsO3 zbxv4}_<1lMk6iX&fZZ1$sdJ4>?fhz0FFRjQ>LYgNg^M<=Qt2L|g@apPMdBcw&b5d{ zr5Y1w5!^d%2gxdiiB`PJ1&0y$geyhT^e6Vc4stwNY>Ah-U<+u?|Ce zJzi$Fc>sve)|ms)*X@#i6y5+;#>yTJ7(^q`LmDe<>vp7b|ABb%;On+5^BH?@EyIt^ zH1!Q1qn%Ns4ZqA8a>90HG`*|Z4ck9iEh4H7;$VTUQb?m7kkA{0%i8!#FWkUW(*`QJ z)xMW}ak=!iFHGb>5$X*H)WX{&N5l$aNr)?QPnGfB9D)fyt8om-D&+MIrvZA*CbiI@ zZa@9}Tk}^Uadoc$F8ll$pY@rWyFd=Pj$3@25A!;I+sZ0?dOu$(>Q%dSUTPiRvJX=E z0cr0;iS#2`dipVij(*MQo)UH=`%ChK;r;jo-8Srafgpx#TfX?blT}%0J#Pt}Yhwn7 z;s(xIjMY6}UCapnvvf=-QsP*->Bq3#3k-Pw zna}l1QoM4if)#CQ;cZczrsottTdAKjw*=e28+dW7hCd{Ne+NB{cOkwIOGCVp7#F0v zPDy~txrP^~;lp(qMvF@L+c%Z60#a&5@(cLC+yjUv*D(R|MX`=GmYbP%`&ro14`DCn<%1s-C|miG%q4eEA1JiN*<0~hE#|H~FL>_G z1fv&+$5XGaX%DwwmMZ^a<$MZ9{{7GV{`2L^ixG*rdiAC5OcB1ee0?Txh2pTq)xK`2 zgF)>eHWyNwoA_F0x^p|UzK8eLr5p?KN4V{mmpp~DE|9)sNeRXy+<6KgUf=3!1n)H= zF@I~qd($?`(C-7Q+hEc5iG|7WGhQBNy+2>^-Nla!qrQSiK*SNyCKP?zornN4!yH(% z+HJX)pLn=nRV&Tty+t4RwKc!MV$*liX{w)*%-28nlMBq~0eBEvdeH~)b{VQ1k;4&o zCx*X@H#P7oNMT(KTUo)igus1p)1+xHY4r?eo z9SQQr$;_4b{zkZ_>R68b z_U8{xY|N{JW7BdAKin{3m}r;Pl}@w#e*Sj(e%?5t&X1J~q^wICf%s5ikWfN}!9`~b z3#E@;GcmT$xZ7L!hs=H*CCA3t1?|zdBhQp&T5L0&>`7rV;t}|&P1aK!`|bO)^v7L1 P_@{DB{c54Y-Jt&q_K~c> literal 0 HcmV?d00001 diff --git a/examples/DocQA/data/assets/RAG_workflow.jpg b/examples/DocQA/data/assets/RAG_workflow.jpg deleted file mode 100644 index 2b17f301f1486d818a2dda1884499fe92e9818b6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 55117 zcmdpe2Ut_vwr*@FRY5>Q6-ely^p0D45{d}~q=`T%0YWdLpl<0Mk^s_`-fQRxO7AEo zbd(mPDpi{J@Z5XOVV`!(d*8e7doy3w8kv1%&bh{zWBgog0HFE1TU%=%_(7RsSua6L1yf$ZOi!-#Pv+J+?;iq}zF%BNT+WHlK^_a@a5vhB6zjFFz z0k{DW0Cj-UujfDgeHxt70RZV;0N_IOU*jwi0D$TM0O01tU*kZp0f4Li06^8iU*rDz zpE#Sln*XV~i>KG~*46;PRt^9_Z3qBd{{R3`82>5l^zu*ocIz~W<+NW;rym=D1HcM! z3jhH)0xSW-r<5q*4nPDT`EwGW47hOqSNL_ka2hU>UHlcUkda*?BfmmHL4JjtoPz2q z6$K>?B{?}YEj7*6Yu9M6QBcucr@MCjl)m<>l5@XGUbuMWwBR*La>~=#|6=(04nTY5 ze8mNc3+Ffi=V{Mfpgs4q6~OjuuARSd?lk=QaW9iyxp;~E!ufL)r|D+b006S{r%!O1 zitHlAB?__&fOF?BT)aef`3f!Fbq@NQoFZQFBMgjE()aWY9>i5v4UymA5{1VqAE< zstc!mrM*B4xC_|T{B!(&<$m#bXm?`44;WqOUO6}d6Y1iwl0ak3=N%#_gVzVQCu3C5 zM)WxGM9uK<@Xodu(3-e?+1#DqUq0h60BmZ{TCNP`H@1&H(|DZYT(*lIF|)L90wI69 zSkNXH5LKX^f5dEhz%iH7+%b}a^o=M!jzI$GO6WHj6^mpG)-YZRL7`rh?D{FlJ<0kl zRXqT-LAU0?Az`JYtnw3qm|5 z{}>kf-EUta;-MG?C|h>x$?)wr#7XGTa6leU=h0vW*M=A~KiCB3!>ae>xA%7$kT3FI z{8z^7zn?eOMd+IceEGt~W28QRI#Tr0T&-KWb)w&*)LZAgp! z|FF}`qP?Fe->qsd$@DCXj9nT?F0dy{7d2@(G0ulR|9WovMeO7L6`qFt1aIWL%}l&) zX;Qs@vd->@1&v6s?Sbr|b*i=MKtfP>Roy{trLXSd!~S!~(@~EbHItGo$@eK3e?Ko& z+~|lg(>pMzc!B&4+F(^V+SgaGlE%O1{J6)yBq_;YL=n06nc+TN2@vD4?PhnF)BQG? zO>|k%EiG3=4N$+AoyRoUy)I?+6R*B8!|IxH# zwUS~Qw!sY<5(%`R2(vmhi!jyb4^|9CQ}a@>d*9f80(kf@;x(+EPZ5Bb$BSDp*8cI> z{byp&E@lQe|D5sXm5f0$^8Nh6OM3sf{KbM0C%^nCjhj4k`DIADVsIl(zQV0XX`%V= zkiY#+5i(%ab{Erk${399ImR19Gj=)bFUk#6dJOs3<_L3GHCuZEoH^1vdcwr;JBUm! zam}ZATQoL@aI6-22=YvQ+nFsk@mu@kbHT&UIov=wScHn- z3!Cn$a_p0Y30dXjT#}?4%pL6Q3{gm*TpxV7Fj;iab_<|gIWt)PwrgD?)i?6!Q}1#{ zHoAN=g&_BUthi5Xwth*n&u1_@zKe)QgK4qRl^<2MJ?&S0CK(+E$e+oxtloS&?O^n@ z9Sw84Z@M`Rr%m(gbSjo>>w2i1oi0qc)pKjquurY$y=}z`r_rOC_jdM}!cOAH+=lFt zp87nB70A{G33o2$#i_`k|8`Oy;!t>jX>8a?$EeIWR=2r6aQJPE!tJWoxcl2^7rqPx z1Qi91$`+Z`k{|QijQb06~D*d(8-fnK{>`3LmwHiCRqHu=(wVuxA{l8x@ zNH+$fb>t_3bV^*Ie%2>A^)fCLHsXCe?)vJxTDLsCY@kVw6^*+vLLV&!@8eTcABd)W za5}!J>(hB+rg2j05%X*Zq7B_2R&4V}#omn>2b*_Yl|POQNYbq85$1!!?BQupL@ZOF zeEV_IOFfhRIh~t+C2%-D$Qr}>K+Z-m^z-AM>H6t_@>{f3v-nnke zqJ|C=Z0EjoUD9@gR1{}q-ia`F$drArIE8Ge>dIco%K+m&a#I$6oQpu2m@iW#bKU9V z6D5~gZdpb>tY>8P!RPx}<29r>i!fM_#6;c|sU!(;KRDAcmP?e|o=*or?@G z-D1+D`jTcx%z0gv9vmGsv^9u+oKtKAi~G>itsE)=3gNa^O1(foHa>Sp_Hl`7t|w`O zWMTIuALt<@-0qs>(riyKaI=MJHmrt;Dh4LFnBdW%RQ4MWoqQtv`}?Gh8|>^365ciQ z3E*g7E-hqK5~74g!-Eg_d=kMxP*>Ga=gGL){^)h1i3@Mn)myeE#czD`tTxl%yu9LM@x#HhWNXS`tT`X>|5jT5+j`>@F-Yqk zQWh$oDS@@z!OrHXlEBpzAStcOKR98X@b9jOrw0p6-_39Hq`3v6M2t#$Ma(V>ywXg> zcPIG@NN>&OnXZ`6VP;aFA@nAL4e)+qeJc6$$FNWz_4f@?o|Px6v&r@KX6MUtzCH74 zdYaR+=(;ozZ(E#d3SIekru-jrZJKJ%E|*@VR5j(e1qMPa{7R(CDiG5i*#+F{q^MFqC_?_tMo0Y_o zk^RYVu^5MNqoOX&3b4OcP2~H!p>A=)9K>EYvj`#z4XagCQyqvtsU}q7KH3b}Sj?3(=Yt@gpM#l>h zV&YvJl}{^dFOqbu&TvgADDsOiDB)cG&MI2;%66?TI6{#IYUnxD*%PmRSB8ez!SOmL z$;=qzZAM{Lk#Er`pTLXhas(@=1!6O}TvSX+5Ry;*J5PX!fI@xVvIVzq|3K?&HIpR) zx}BulKFPd@gzGU~P(#`}oG6$2*Bgpk?Dh&7h06{TSv6y?S}89NH#3EB&&cUW=LGj* zq4eeOB!Bfmm4^mgi@jvp=S|~WI0{7KT=?!gF?G$8fOr!|f&HCVLq=iNhpdM50oOA% zppaM(*A+uxPN$;p$m!++aF(yO>7C`$=a7HHndpjwega@$wR9rx9%+1-Z0wf1eRn;* z!Bg4LC|vet%m6|bz#-c06i$-Q?u~i&hJ;h6nwgU%h7h9N2RM7(a|;Qc?XU~{TXnAKdt1T#xV|2bp>E2=L1^ObY!M<^=#A^k&=_& zE#Q-ho#hvsPB(*HQXBkut^>{i9y}ykjmLOMW(cbXk9>1!;(lKh5ofSeLJk>XIMpL7SxV4*YRw6o47#XME)EkmlSss0)mR{E0`@tHg7#lANzIDLIGlfzU{Wh6Q3 zn#b>B`m%3;_g_g=fg`6%ZoQ3O-RLDj5c9HGYA@d3>M5!+a__SmPW??X8yBH?!xdU@ zoUia6DN4=Kd0&tn6T&JeC*(^Teo#F#F|vRQ=wmsGP~np%wzRlWuHk^~CCq77!99+b zdV~nG^bksiWSDgY1PuKG2Vyl3+&C50rzGNqKPPTWro2+FPK%O&#h@p8$@rO!vlpH| z*3KhlCdzT@SOk&Bg1StfsS9H_7#y$fROx0`y6qY>Zd@X|VNJ(S30ldQgfwBL*yS|I zzScLUd}zVfwV6$BHNmylD5d{2?_*KVnS8y!`~-*|b+DYSjK5!rXIDgcnDd&|ih`3@ z`DoY}>e^=K$(IMs#l}_w@1jwztY~4T`<0TPj8n)mcj>5t54N}h7m_uXX}CP7RU0z~ z3N}pa<=;)ko7hR(wA}VZsvg<)EX+~n9zuK~C`1)wdF$U{eu=pFlPWQK7rv{STJI

z6{yC&YNmTInDH=pummv1$9bzwdT?Ysu>JLD)qe5CnJ4Wx zl9ZaNT2pYWcw;4H9m(?BS47eF_iAFXjHp;z-iG7g-M!Vo=@_Jx>6-XAOxP#*yTYzp z(2=r@Iq5b~*yXNvD=;F?WjHGf6c52II~8;atM)#yc2TYr@yS~^j~$#FKn;6&G7xp& z+&MJY2~Ku2ZQv6W&xtrK(gD*JudMAneP4J(5N@Gtd|$d1*_j1ewJMObPKpJ4yD@|+%Yy1A`1SVKy!nZMRLIicX_P5pi)c0Q36|CSn$Z8bVuJ} z+)xcS-h*xx&k#>2YKEpDG-8Qav;__AbNg$L%7rophgNdk?D_qE;?`!!pJNk~h;#9%qT30z8{_5Im1t#TI zOu@S&b(Y5|Yj_lH8P0we6nj`cd%>eAA1CxySvFgXzcQ;#Ael2PnbY4QlO@>~a(&8gIUf|(Row2tJ1eCH&MT1{&vVOYfKRLu zgXq`jR(K#P*k$ps??Byc7JDUyxCO$s%^qJa?EL5hp}th5R=@5{btUp0_t(*L^aOro z>%p+RAssj?s^zvRD22>88>Jr zIXEY$nmKIJVpyeqnR!s{HBnkku7r}q=UY;xS-&}wuha%a5cOSXSdU@1x=EZH%;5s1 z{zw4fqd+}D@!uqBMb~u_RjP*W*evFcJf@L}s1^3G#{vl@YeT5X+sy5nTQHvpUkEH@ zgUjHRqvp(QXQTZxuI~44gPjWtm%|Efs46=em05mPWC#8ALu<(fg4ACnmJe%?ZbtkO*$I@{&w6^)OBFvEg8KDe7m z6DQ~^tw8wx}q_89v(!X97EoB?s81%O+Af47R2DPbb2gv!JA5Oh400=XltJu z5$WdU^=!HG8Rz;<*GHeyWLu6DNzhC*=cdWrpRaLplaf(;JeWmi4ChdRtw6E&o}Md6 zj0y`2b6}!@RG`zIZS*j=J96E>{~@nd2T~jJ=+#=O{i_vINHr~)T84cVbrW?jt=3>|6`hCOt8)tmn=u)$&8m zXwM$~jr;>RkBm7+?=EE=_k3k>MyeEW?GLREs2{}7NPqrk+PFV45_0{debt$nXpR$x zH>Z4L==qiU$-#?vn+dm0TR4ww_G+`#I=tJ_xbf(;Qva`^?MXY&qkN~{{f(0Gti<(? zbb-`%gC+{Lf=f3#U6V)ESC3z=IKJzb=WZVY1_0smDK?Q&(@25N4sReDt4 zaqEknoKse2Rv;o)I*X|uVHCkKDcp;fJOB%jy#ryFm#Yp<(;79MD6*{}8Xb~E+WJIza-#%S*Q8KSoM3hv{Q zNY!R?v(Lki%~mN(`AxON4E7;!n3C~u@Dh9t-!0GZSiyio6h|e|R zX0}k0N%qV|d&A2#Z}?61cu03?^XL+F{l7xq0_Wi3LG=DPd1%PZHp#fuUT2nr>7EjY zVyjO^i`Um6ozinch9zOb*>%^;K84IW$Z+r@VFIwC8l2KF7D4|K5yjXheW%|Q^Ry(P zPc+|dl0Tnhg0J156m>#dvn*xREc%9fO@6a6;lWPkiFy}fu-H_Su6y<~lkp&;VPr`= z@A9<;!Jb=BB>Ds^-`iRr7cczK9Ys(iyt(4xOT)*9@ntr#v)e1{VJSYk z(U&V=d0&9P#C0%Bh@X@qOc-2jRWdV7< z)8De{e4H2s6gfP&9tMxuLJUG6356dZ27*u<#s`_6ua$#}JXs@E!L?i5mT-P1WUTSE zDVQr8Hda0^4aq6PdZKB}?WQ>Un%%;;DPb+e*`3Vk4O!IzzFMfYyOeg~oyFLp z2{B^i$I6_dN>Chb+4+DsDkfy%$K_gG45eJ=Ol$<{>*#}_=?B_v#ABW}UB0(PgJW<) zl;JLT+*)0#SDMk`Vfev|{RI=J6@L0+A0zqxt(ahOqHA$JUjrwLEiv;UppxBC+IkL; zr@Iy)h_~tkna(Fh-Y#7VWpMX(DSg;wJ=OnO(2Bw3$Dp;lMq!UjwM^&QHFY*+?JVDt zds!-KDBqSKN`KiWCbC0huY2O@s!%?%pofE(ZEM!=*L4pK`@ZoEeVnjhxAjCz%_p8D z|6J;WF{0uz(nhxWb^x6!5$=ZVPO?SaCKD>yPp)QuxI4nZ)+w&S7bbeHCDURQH6Yg3i4 zhB(mmD5M1F52NU2N4jMX4nU71YT-0yzjHgGwnLn!9U=lUyGF#dfC)T=m5uA%(jwg= z6O;~?WK#~m1tnW<++Vug*U_(%#8A)g5JYN0fFxf<6|QByt_xWJbAQ|tbLw6%d3;w* zy%Lu&hQd=J?qqS`-?w)QZNArIP?4>6nDoK!aDth-XzpaigK|o0!Y^B=@)vAVX_GwP zwmCgA<+>S@U@3&E%wbZ58L&)Bv4-0^6Y+(~=t~n|8m?w0!>Y{h>{o@-N9w=3qgORc zDBtvy^sIrj=sC=A9-o=@R#7d%ga#)%-g&O=^9>S_p`xt-c4f4?Y03dpZ% zkZt(h+SnA=+8BvxSkdAqWir*$)nTFMG|NWWzBodut2YL0HrS->qmI%(A0IhU<1TGX zR<3M4k-wailX+OL?bDNkElp0+SWBp76C-T;79cNM!iT%xl$It$@*UPEeouu><4Qk* zX~eUqFceB3PPeV3Ss-tGXZ_ofwBtYH!D4^5J zU*KTq?e@3xPM5p3j(qGz)#SoD3Nm7&>xF4Msa<*O?$3j37rCDGn4hFW8aN zMMclOGB;6RWS6^=Xy_Gi)K}3hSm!&ic6@s}MCb0u0fOQ{`FfrAfvK4nDDO*|DmNau6dq!E9hZmJ})(V#-IUI6QLj z49!K;b7!r;*iW>@O8(e;h;LMuDsVwvm(I{tL~*c-Vg)*gTIS=&D3DAJy30;zC50(r ztjRaLMn#^=mXqQ+tIFCU)q~no>_ywE4bgOyYoI#|2Nz?Rl9lRu`z}qzM1ORFV^Sf7 z@dX~#aWL2hEA{R|M!BQ>E2>FJ?ea<5I)+*8s;<3ao}zM4B80n=U1QF2Z5TdOOi?a;{gOn4Q17B3fKxRU8N&zn+IwPeNhvMEl4 z>yQ$VW^sA!Z=ug>^PhokemmkO?X}|7JZNv~M(mMgh#G>?-~{>;o-aEc%av&>O>BLS zT61tPl_Xy6`G_X6XR3OuFvsc6?x(N2m5e2`)Wr-O{M6U!G81E?9g9JG)R=%uNkTIl zzk`=_yNH|*VM$%oTCZzl1>4LiJrU7<%x3XG_pM51&38~vr$o-H(VJ`c>=bR{R=ve1 zCgxOojpXq~*N(PEjM?BGhTyI@w83%EjQP5X0IZCg@8o4jJS z7QFE)S zY%tg1j@O^8sX5Yxew-`gBr0?hGGFIaspS(W!$ zU*>_7$`lz@@TbES9XkxOu##D~lAMY%>7w?6*QU&#-`>DPYGN zc>cSTpW#RNgnK(kewK+n@lc~rWKv8?iyth1s*wxv!pv$}DYE~TRNGH7VRTY*4nDsr$PQOCD8RmiD^#u_6`$4K2GLr(KP6E@!0q zf!p;v>?b@tlWVIqw%$FG@uIqs_?5L|t5BD-s_tz|zH@%!P;mxkw|t#TUt~n2vjFRd z9V#-Cg+&Hkn?vBlh3pnoHD`6{2Sgg!ISFGYv>W}paT8`hh<~u-o^T$o?FcK4tY%DB zI>K0rF*6GS&K-u{L{{G%x8{2gK&oT3@))V~HEmbWWxizrixc5CYAF_%o~`m%#}38O z>Sgy5{dlf~%q%iJHsraMgyZV^Hbs+j)rPvx9$gu{SR~=z+V7&PS>R6Zl^mLxS3SRz zh{wEX-Lu0eaL>xVm?%4o15lgYd=oQ*Cb}IKnPZY#+P`)8#`Bjwn`40p@*CS5V{lsK zc-dKw#*rZoBVBA^X*N>&OUB~hsBjQ*vOe)c6(T1ofS@DIP4c*VIA{UAW54}QufeQ3~Ls1E!)PeM#I)whe!9bKGS zP9i#uTdNd3oi~u=`r7XU429o+`7m@e$gZiMyk>m+F5=KTUQ8IJ##cjx-Al0C9e*g` z#7In?5Cu0UF*F%v38OJbZ~fxLkKNK-&=Kz;YJ{z05N%W9Qn!&(gY{aAOU;O3IkOdgiRH z9lTnzl8Uzc?YpAbG~vl(A)yXhhH9!Xf-`YzcsmoqL$4Jt9GA2J{PL$I_RmN8>J0Hi z^j@lYxDDU9%rgN@aD_@w$qGC{(su08Ig$9583swcJcOWd zILSJ^{+weG>L^RSNy!&H=TBN>+ zuBnB`$(1%(xA`;b!Cw904CuUQHHB5Ub;eeDFzD&uc@#-U^@%h6vDvUs&Dx;SY5iz|_q&DkQato4DSdt_F+hAzHH{P=w3K&z_z zq}{Ii8mOdLc19Q-!YZI-0yP-VG6_+EvQPBW-^ukNTP!v>wfMo)s&UBB0TbP#0?)py zQi9{+cjAZDrV@CSOgzw-+k#lr#qJ3J=i{FMim^Tm0ZPuXghrz}h=zF$a(?QQrC+odsVLAquRdZO$8@Ekf5qUTQp z&4f1wJ}?^A6p~Lx^NZsGO=`{Fk+Hv6S$k?S&?U%gHQvyW?f@AalGJe;FIMn*K#irh zBBC(LXsAS*U78 zGghE=m`&3o`Xkf11g-)6k>4W1D;cs~Q{s~rZ{Op8ut z3Gb4vCU-32cp~PKJ|*l9q+@5=#`cA zr-yYuzrz4bh6HKR9-7#7-Mp-a&21Z-q2bo;o#h(6_^NciR*x}tJqaP}Nq#P9x=`_O z@1%RkrQWYoe}$O-y@mZReDxRc2c9gxWYL@4BYp%)^HwNdg z#xm?3U1ScH5JKVLbO*FvUg0g4U!U=d2o$ie-D3ePW? zy?bGsZQo|bAVV!_TKcVbChh9)fvZ%;iZQBOtaj+GSeaPq9fXG+xa#9hNyA~BNlZ^p z_fouFL_t+q>Zv)ohL%Vx#U8-nt8J*{02sKgEH&S!`Zy73PXx3(p`Qr`NKd zHcqtI5fN!lH{T}w{%A_2hz&T9N64uLb3nkn2WespD$35~&)CGtFi!<|dv_cgn`)D7 zW%MOq_3|Oo#9v3JR+~5wiy=^iN$&xjMnkKd)rzU!0&v&SULJmboh^*p-9uMRGE3oR zGTGY7%@og_+A`t*@+FV(mD`&azF*xRoI2O4DRl8@HDK(G1)^ z`D(Sbal{u5$AuuYF6J1E=Qg=N@#h}eeSY$2{9o5#1J8M zYW8P4s{I{#F8isQ=7!HvM15GFcZwC$7WvA9Z@SofnRa>*L}=gcW7>L-N=XUyusJ4bD?_rp6&Dt)9>q~uuE+$yh zH#=MRTyG-)m+?z2o-KXOl)!|*hOr#`aOa-@ZIXPmQF&i-59=(%fx{&w7Z2oebiD=*KkdhoXNj5G`rT9H-o^*h#cwziVZ+LL$L3D4RoS4b# zB)OAL+DpqpRHtFh>HxB|n#C|p41q_JtZlYlT@~N%XY*QkD^PbE;oZ8zBc-xx;PKKW zmJcZ8GNo?jUsW~97luKLy5(gtQ41!-3-8`=YNN@Rd+LQr$*V18HR-mrA0`sb%N>~# z7)Midb4$GsbKi#Gi>FUM5^}>stOoIk;X&D-FZ8c!;+!7IoXqr23>&M_ukS)4Cg%;@ zZ(0avj=W`~V2;sM6vSh>3h|CoT?;!{EtsNOOzLMt?wOePg{Jos?1dt9(k4HD|EiG` zVd1hmQQ}eSR35zkrty7TmQW?QJ@Fn_rH%v`Wd~+k0bJ$&)_1o`lv>EQ*vii2ZN3aM zC53OoCVGQ^Caz09b0GRYD$h~X1B2o$ffQ%qMxuP~>)&#oa1kIE%oii&!uYjLe2 zwm!6@>|3nZXKfX8n4STl&DUvE zYPzv4w^QMU<3;V*c{`Vj#cH-Z*X%e&!8~cArDyT^kJ!FT=NzI&9u^7u)}5vKU!O;Q zbqjr-t%Y0}?3jFWhEC51+$mCcF|lfLtE^xu5Il|henZNQN2NrMk>y!?zvg$kNeb^p zNq>I$>)>g_t$V<<$9=#3c^&-2`&q2osd@7MB%<8@B&T{V&DO40%K8~W0%_AY^{npu zyCwtA+mCMts9`pG2;W^4d=+wZ44~dyOP#e_%%>=`zwQeG=ugWO@AfnMD2-?LF^iI> zWn;CT#Mm_++7xxgIDR9^ndo&~TuHisfYCU_(|=q@gQQJMKqKvgG>t>1be!!IH+ZXP z?&3e4lx%&ygmmD7Z5jzsc_o6ng#9(lgkO^45Gb@^Qj`ZBu6pjQarduK`ioC*%$=GI z=#z|o5GD=OhT=Ru5@@x}2|^Fg$0lKKf@;!@1te3T)M~LzNpL3J&WlVY4ylT1b+h+m zm&Dx!l1i;*7>#(rtcVrRLU;Lhl{E+(Mcfd>5XsMtuueO^<0g93su5UaPMU;2Nn+@- zvLfz|W0|Ux?wKR)jaIYgW0>T`+&>$s8y)Ii&B$t}%l--QdgD`lm1>S%Z*gCnIjN`n z*4(;H%-|2Or>dkl-xjrcPdQ9&cDF{oGlEybD}l!Pd4oq|O?mN~`p!2^-Zu6~y*_b6 z2YzqQ2V;3e^SHlns4_n8xmTVuwrn z>^(IphZa-ci@wyjF!OQ=FOR(P?F6@eH`Y%@cUD-4i&NQk6Z%cr3Etj1NLjf(_e&ih3pSa368o;bjm^dsrIEtTRb?e^3 zOELC4R*i|qnI98hg~jnOrI#E6E}k39(7)#D@@IPE$h=gfiGvK#s4r zWqkbcip__2zgyjzDmSO5_x8^)M>~%Uri+5oyk4c){sd&FnitjGB~RTW5sl?UM!Xq( zx{BBKCSomBxvGm?^_3EkU$Dh~-%$CSq&<7*!GP`q5Y} zjNREf*O>S;oSLsjLQkkUY_P@FK`<>Z>T8`Ibjbw!F^1{-k=5(ogYUG0y*~lJQ&s+$ z^lR>0E1fhf8Wl5y(QNkQxp7vSR9PuHl=~M>udYa7y_upl42&TB@HCFahgODBPltaG z8X?tTkqA-w^}Xv?Wm{7c7?N%v?$c{)`?imT&!g1l67F?ExZ*)+5cY|~)W~hb*c?{U zy2usv7Vb9}1blj%om4wA8qp*YnVvML68uF1Tf3863+A@l`t8YTI7vFvo_du^ia)0@ zjCE1XyH z+}$7kOQ9@MdNs>@6iRwoZ&>Z%Ko2*PE_Oh{JyS9?L1YxL57Ms>`)xbu}BE3OTB7h zg6Q%Jce`?oEhetrI@zV@4hep%ig-II$Na8jEg2Hd@**CaZrp{NBusfFaSx7XY3=fH z+ZjNsZ2{hWl>U7S4T?upNfA~Z)K zVD?H{P&23oM(ulZxiQ)9Lj&XAJNq8I+d5xPXn*C&Z;G8*&1{FQ)5kMgbv*DOO+od` zH^59fxme*_7ht>++UvxvHzYkKJ7iJSTskPtJgo&L%9$& zM0Eo?gMU!71Mkdf!E6&4;s=LwZ<2N*0h#v$r!!VO+c2-VF8ZW=oXlIx3KUg8x&wgl9ye%wv$NROXG? znMsr#+>P_A$YLLNNZ-W!@-^ha0a8UhKYy|#7yiN~t`clR)|Q9b+(#N<==eW;B)ELY z@n#OJI7O)3Vm_F!Xs$J|0l+IRFEPRR|h`C{as8efz zx$%wzX=szy9kj+OmOaJ1F|ioow4VCg0K4nq;(oq*3$<8C8@4V51PTtu`cCgxp@Y{9 zCq1kAi1Y*96>n#RtYU+^T;x`Q!%N*aB=OyPV}c28dLA)FFWr+Q2pNiYkee8bnlB$J zwzUjH=e%hb;GPX3F$IO62}_m??Jz9e7X3>jz3S&SgD5pWQA8KtR$$9jiAw#;__8a365)5AGc4d?DW!@sGxeBpJe7)GN&q^ShfX zs~L|O?<|=!1P)GW?G~rN44~JrqznmmVQXvO`KljUYi31}BPYh8M>l`D4em;nJ*$#4 zIB*nLd-R`ABTL&1!dp)XeX(E9ZWNHzOLBKNxUUpz8n)-UHbc_lP)T%8jD887F@iwE zFsB}k>>NpGTSJBHMU$9LaZT1?=(o+X z7%vt>MkD&Zd`n!_(CP{O2~Zj&co1g!D3*=r(n+S>5TI8K&Dj5_V?B5By>DGD7~#uE z(WIb0`}4{P?Kfy0+gm@2SKc{+81?iqVLt-jv?se%=JaL?K@?He%C=`^h0?44a|Z@Q zB6X;eB|jK7zfGUrHX1cF&~P-7i4l>eY69*JLoviDm`bJ)KPQ~`^mv5ePQ<%E4A}p+ zRPu+~{{6?lUZsC7!PL}I{NjC8V^Xgt&EtAz2K!@nj?&kKnmB7_M#YQs0TQhl*c1=$ zA(fYi2y#v{tgkiSlM6wbQh{{^4KSkz6V)LT51!`H#JT%nLvx6!@iDB6C!=MC92p?# ztVDfnf!rfyP8Z9~2UDq0TkQ2|r|*aVY&a+0Fvo6uA2oW@@8h;Hv*0sjxL_#m!rLkf zu?=-QsF7Z<^E7I7tkcRGYnX67WU2F#;2&J}X}~%RiE$_S4C@Tfy32~nBByHHIPIq1 zkU*t#5Usk&kP`7C3J5kt#aivuL@NmR?^*J0)pvybaLTpTYRf=LXN>!L_+P=E)EDE0Gy` z2UJZX+S&oBigSIA(y94hh1wa2nCc%~(Mw_*x0tmGzdIgJ)}UDZ{Oq)BgkOHtPVuSU zqb__-fuZ4R9J%89B2EJlU5rD(^5pRf_zC!Ug399KJZWY;d4>S3@@s?{GQ6xe# z#35IFe`KS9NQf=8y172p^--*LI}H=M`WQC_bKppCOgpzSsc@S%YD@Tt0BH59kd=A}Efz#AU<{LZng)B7_JVsh|AoIZW z_zwnHs^=F5L{ehI%i`zgvYL#JLtL&bj&{C0a%zuWKvT=GL7Ba?-jGc1W!XZu?}fkk z`-%Cl*Co$HiA3aj5hM}jaV2;+HjaBME*(TDxbn6_KSv850wez6e-r(1Q!#6-cxitt zv8hB=(M%)rUDvXAnjZN=nm|U{llj^cc#u!Oc&0$jL1x82==Hxp$0;BFdV4iLARZ->+auBLlFuT=R@Ay_m%EGK*)$FyV{zZ9YN5`=`h%K& zr@cng`^XMN_`=?$GjCUC#x=wZ697naAf9sz5mqLF>{0lX@%0E}4 zzxdl14PBwzyiVy4_SvS#3#UhvJXmT^Ge5lB@@%uS?Nr1#>xMvA%=C<==*KOa=b=S^ z+4i03*1wz-r{a%%U!@X8G*0pvt#uvC43*hMK0Y{J&bCVo!TCVkY%!4wad_hdj&red z=*fea>G}LNY`&3Q@f1;F(zlGjDVkt$%3wZGuuzhC7O#@VuQr`zsQex zHeAmgrsK>};u)w#=aX1y^8o&!$C}dCB%mskNYzCs(ts4~)8>X9slAIps9nW7lzfc2 z_hMG!*_TfbSD0*(n{uAx>#0afR=!mih?4gyf`>Qk=r`-blCWcDD^jV)V(R78&j!^J zVKrG%@x#@$jGNc6x?$m~w`C{#$VCxYJ20J2ny^WmGI0EGR3gk$j%Ms}-;SM|_}E)R z3>$(Yfi?lxjj`6+0_7eTKeRl#ps}sU{y1gMP%qM&sgAoqyKdBLfBpQzLRn51p|ZS0 zMGO%MEE4Qk8%$TsVhStl4L)}L?I@we zWUC}pz?)dHi3U^!PyMjfK&Dg@*POcE$BMw7{3c7jprl2Y(l2zI%kZ2#z26f(T9bNU znUH(eDZuW#yx~I&Y2IB=3v))*&m)4L}*LZHmByH$q3 zBFJ6ow(^;4(&5$35>FWqHK*0C22Xt6b~%35?O$&b_jPUEP;W=R`MMAp&Z8NMdV|Vx z<^1t^=grjD%N7R>hR59z^*IrQYNL8aD59^G4-+G9wMV9qz3-P&ktC|}wjfOtSG3^o2vNT_C zL+Dj-JAbZYVHRzgkrf987wmZc019{izAbvL`pY=edhdIq&4%wR`g(y!NO)|<e5*gI>lHTRlx&H2n{iW;Qk?gkEmf)%8y8eF{vG*j+o?J)>f9UKHj82?DEWgG76 zjf6jMh5a_BIar7&DGPbrj+Zf54Pd)q$viB8(Pmdu8<5fLVJ1h6_-cEABegqq%h5eTu9H5SNa~N@?tv2f5S`h@a3`GrM zZ$JyCBz14EN8GA;4JBpMLAB6nD|#pt>pDkXFl;eNjZzXN+`U#~#!g^#wjMMsX5Gu7 zhPXR8^aPvd#->qG0*jlRJvr?{WnJpc2+xn?v?qs87jqNgR(h!nK*{PA7>i6L+hD!m3ecE z1{o4#L#jrYMW}=T2F9bv%N1?tp{=^|ssI&@1*GpG2Imz;?&2?Vx#d|Y;3c!nJ=`cH zX&;{#f7sP8h=ORM*d3=OUAFkkm)Uh4dQ%!w?0&)jB3mHrahf&qet1`{+pSCP%(*1a zdN+xNxkh@oQBB^SmPqV1Kar|A&azIVx%`UD`I|FQtlNGx8*%ld7mi_A(d{P~8BQ^nS zr-sN(6(_C12dreb3-iNshqlo>D27CmHWBC zEWiB0>UZLz$)17+7R!+YEaR(SB1Mmf6};*A*zb6Z-50s z<{m8-mh{fLuwgFa=U~&hqRk+cP<`hF4_jmP2!#R)K?Xoy-2wDj@hSa*O>*TRuG~+- zUsdf5?tWj2Xu%IpKa$};F?Nv^!T0Bv(yms>Za*${XtgoVqL*(S=`g?$Bi9H0N|m4b zd(&5x2{fpHqx)`U#JJoY+C3T@2gKB)CkNu?VWa$)D~d~s^58Y=rEy`joaO?^zCMvF z_h4dG<%B)~a#UW8bF#iic&#L+5Pp zs>j93lGM~*&zqp$$=IIg;>qc~(GZZ+{(Up0_Q~^#4ce~>6nm|c>#~zZ2}Ky zr0`5@9_0r2s&_(^)>3I=pOq~ZCiximw;C@0dP10ZsN7JNBJzuKoPArxu|BGbC*&gM zzv9YIMkxR11w7SKQT9sR7_`t2Ru8ng(GIl) zN$?FL_y}3VYTP=Ni)-~Nu1@EB!ou5w`NWHW2O{E`rRSqKOlWm_%6o<(JbZg-%`$bg z9c17u+3 zWn-=9^scgcx^HA`B|_jc@B5)K(9i-kOezxPwy*%wcr2YUsiM9L-tK#zXlrEW_PxDa z$6ii0up*d__$0I?GymMtQ+HjZ6&s9{N?<-iRZfKu`xKJcJgL-QmGcq4xYl@ndPyxw zD-evqCf=3a(F5|n*<#d8@LmVARn(nh$raYO$qgtB*Y9svGf0EqDK3LZUA1qpu;8_K zAf<{rkllWxt}SPBId*%Nl+|}1kIS*-gc`}MWe&cG`Y}=GCQi)~|Caf6VS+;IwVU&q zYTKUNx}8{-*n0G`cB4NEx;|a0sc!l9Ee;!#CtQMcvE!RXUm9|b<&k*uRzAUdT5t|c z%GhT_OR`1Vr^LDx(ez&qZW^~jO|o7x4ZAp5;pG_DTmVbN^})pPB_m4on}gYc49U{% zzR50F3chkq)b1g=N^g|v(~dD+))X=As!8?N8gC&cNVdm^Ak?ClvtwdheE8A?k|NW? zdvZ*D!qRaok7y`xUv^iy-OD-xTS21agwZP((q#Y>x={lT5hi!I%VP41g3WC7byJr= z<7;oUpa`OFK)S}u`-wOEWB2Z7n3-KX+hFJty3%ILUq&=n(N=>8g&xoWJqv5wOII}! z?*+f#<=uZC+4Ao){r@d}z=@Ik{WSKZ3y%udX`3vrzQZ-G?4|Tqz5*JcS2dSr`JSqARr%t?zx=*`6H`n3*F*(>z1@XBNy(qL`@a}A>wfHO#9vxnGkiz& zM4Ze-_{)=0oBxI#;jdr%j~D;%C;y+0*?GLv<%8EQm%|WKIkZht<+~JJ-Vy%$&pwGg38I5&dA-Az z{3*M;nfvU0mL*GmZ^U=DyBW0{er$DYrj#oP_O82*JgrbY{t$0H%WA4^`EAQ10e_QN zqgBmVjq_p{Q_Uew=|(+Qu0CijJN+-ICU@isgeF2b zFZuOdGd_wYa$7qf*-T3^^STC555*QCsH94zvc*b>$yQL!fnOVG%*2kaP>B@+o}RmJ z5x=J5V8zxRA(0F1t$471!AGbC1BPmlQia5n0US{$e7da1{6A?D$|eq!lz-A702QYS zx5_#62bL9s?O)_>>8GsPZuVN9-ES!tZTrAGbZ2-w<=dXci}|OUn^;}j?u42>e&Lhu zT|=6yN6f*nhtiFPYEFInGWu?pg0?lK+&QpJQBPg@bW}9FTXAN|=P?+z2)sK~7v(nn zJ5kg>94BFQe^!hCK8NeiH~PPLM1CieqSPIq$fUmkp# zKxKrD(G8h)=cy0x>6`9z{7^U_y&RCwlej+X88e1Oa0gv9O_g=z@gro@&H*5rwI4lZ z7SZB?rp{sB3~Sot>RwUK(=apoh7wm0UJb=bH&%*D^T`RqlqGZ*n?ftWh2qcoXe2cQ zT!gKAU7QqdMc!HVs53XUW&LvJjfr6zqGT-6&OOgw2bn0RI?1W8`^h1_)GS5c(UhHC z9JZF(R?jthG-XP~zx;3G_vE=hj$iy+#6DanQ7K~RGf1cRCw=Vn{{>)f#wLzHe@t5N}JMJ|9pkMxe)jyp4 zV)!ZQldaoxW#Wq!U#jXk>3x3rm;a7+)qP`eb^aHJZ)V@9X@c!P!qWV1Y8?vu{`>C( zB-+LQ$70*>3JYKZxYj=}JV;}S^3zjEbR<(ohX6><}uuA5{!{#a)MvVk@ zrPoaHde8EQv+Nb%@Unx{FfJ+tKBPKXMq)(_<99OJ{QFkvWLR1GM3nmf@4r38HIxtS z_A~pVK2BCT%JJnC81z5bvcKgB9foUy1>PowlM*yj0f3qH=&DQlF22|8t-c>c9ZI~E zd%jKan>`(2F|0h&S(F(pq**VhYgPaX6fsAu-Y1j0Q4GvKSZO{yOn2Nd9Ro}ZZ*8O{y%)g0fGHO?f-Wq$IHXaPj9MNe> zY0oF1L*l}1j_7njdr*8fpQRfJq_oZylmg#O81kU!#o$>9)Hw&^1Zo?gAdvD`J6m$( zon6~8B}rL&&E|2#XxkqZ%N;j|eQ;8%n;dv07DxBYl3DTrKQ2)Ku7)i z?725n`y+Jt56FidHkn)joxZC17Mju{U|vkQzqZq~6EvzGBE`Ccd%61Z?@#uRCt&H( zs!~t-u;(0csR#u|AHIq>2#=|F*rtdgX$FGE3Z+@$|y1S_V@;ObSC{bF=M7;JoO#!ufpj2#DTJ zwpzNiG^W>Gcs2(sPLC|JI$KHyr63I7e>RU@hR8Hp2YrjoY{9h!WqIQ8x=i#< zYw@*bf0a|>v?O;pz!Fr;oReW_I@*L7FqU#H!^;5U1vEol;=Tb=&1TKfaJ`@*`o^1$ zgiiu4ZocUDw+jg&1zpJ^TTd%*1W}xkpHhh!eE9EJ(#!OD{Re!Rq5L(%2p)5zxlVA_ z^Xm+D3-$9QZ-80ZwFcA1rDrDbL*_3&>0HflTIz3rTpb?sOPStnb=Ijje9=D3_L?t3 zKAT0m*VS9aDU65xDqpAR5hdxh$`KR28<4iu$~f zV?>vvJ0=*TuN|$Q?G<0Tdv(b<*UQd$DqsA3KS{q2qzGwc#A~W%m6$ARVH)61MiGLW ze{&VtB-(y01^uK^U-JK!#QN})blXRJ*m2b0Pnu@gQ`yIKwz$6GOuep z<{LM!y)PQQXHd??17TDnu%Z}Palhg!6`rj44;v&06*eXF%*XP8{MLs6Gv8rN`W~HG zbsbNz0PH4<9>YtrdwqSONAs85b>CM&gdAU%Nl&x ze2e67pV@t(U+Njm?I(>JNeVln3EI1sFbGkCD{V|qR~T8eI07W*Nl$JVzH;W{1zc$x zju3htN}^*PHBY2FcRr*R3`&+#+&>8GKsqUZ8wGu44CpZ#TV1B221*6-h2w!y<=5Y` z`g6vDh6&jH8Lk7Lww+I2;Km6rDt)Ms^0=jnzpX35-<#yuWs^sBr3fC1rurnwF7g{5 zc@@jj3;(3SX|w*M8COmb8kH}fFv}7j4)hJnj976CJBVfZ3_DI|dUQ%ai8I&fu#U^g zWz?i;Roxb&>`1wCz*_brBc!pDm%;zhVNPRcxH74*vDxz$uh;IKBnJT(ZDp}t(v3PV zCK5UIoLxAToK>Bid-E3)s}^ZNdY@}PN^x^$rbH7E$y-9Z{Gw*?DA?vBp#YF0F++)@ za=XAf&o~ebLWsohC>&RO8)rvF&E)AV&mRDtKtbA`HGCP?)Z^S7BsvYG$Mplm=rNy^ zXv&!RV*@2+$j~VG5Jrl;b8pQb3JS{}?%7yhlE(p*?3<$M&c2~N`*L|9ytVUCZ6(7~ z7XdjB)+qv4wit=J?zc?Gv{o9{~&V$v`BK%PqsKmEn9jI+C ziK^w(fAa=~TZd@QKzn^d1;A_=dsS6?yWbR$L32JnmW#pZ#?yPhK!)A4AHd&(rbpxm zv$HMIjFm~Eey#X-6!Qae5Xb0pGj)dmhkCL#UVIvsVGz>3L$Zs)zQE&pG&)}HTjV~unW=8l@HL@+sQfxs@&qKlV#S4pIeO?Fhpx#b+%&Cz+ z)`-O2h%v1tV^F|^vlwUjyyk*SZNzRDZzto_#tMIe1kVZ(&D443Z%OwjrRt5=?&oh+=bZXGaKY%QA#6^V$ z1i8R$FZN#VgL-U>X>)b~O21Bdh(X>4qch`Gib4aNtDWiZ(MOBQRxYIqAvBGwm8&|Y=t-KW%~HU7n?26<8|jO zCZ=fAFgQ^O0x0TG&a1p`hrw>+LQ@whbGJpi!3&Fd<%FL!F}0;86us2#43+QttC_o^ ziVo4fQ5_rZGY=e1rAF1r(#hofV9b3J1vXeSSirE$3oCp3>cRO|Ro~isKtWxp*rZfP9tfK$ml zXhYykvSa;!bSj3-9A|_WzNCg9G=lb{m- z9!pZbVS$mV;c3S+l5cSSWmMtC@7nh>DAvIb8(E(;KK;Hy_Yjf!lV+Dk-KKCw-`;9a znZH2BUg|)L-|vnqMWN@MWO{@gM`IT5*E)vjpEQeAt~g(O;q5${3*5^`uY$bU{dG-L zL!u_?TPzK?or@S({UF+lrThn zw3z5%Mk{WHotJ4Zql(KVE?WrZ_#M%*IVB-)wRAa|LWKsptChZ&r9*R*B&IFw32hR} z>md?+pS&yCy~`9yp=QotsuO4f$gme4HFJ~e-AQ5n=jZ=9v}9eY?5MY}@3_IQigQL# zGnHOJ^#<@*`C)5R!b59XJ4+iW5hsv}BT&Q%qCs}wI9!P`w`Ft-oAPUs#;b=hf)v0K zGgWCS3=bc7RVJAbF6gc)+1shwKMcz4ul%wCL)7P0@Htf8N<42(*hV22Hb7A4G_DTw zb|LST8O3kzN*bDuzxnC+O0*Nb4Aez=}PTmh{irK1x@U!W4axpyT1ilZQ;{+9U1=0-(5~w(}7PFUAq4 zD)~!`w5^_MVlr1qi*I2r(^x3jek4LR1}MFu!`rScABZ-hR!hKV^ZrgRHt`a(uv93} zy2?bTC@=0Nau-!g$`o*m98BlkMX~4bqJR>t3-|1}c~$*1Jb1zG1g8Ie5|&y2J&3 z?YrTb=-$V@hU$9N#^GBGSS_d?GlHL>7E3Y(JCNH;;Ixj)8UOvX-PicjJkSajZAAi0 zozqp?t!r=|h0iPu@iN+>ThJP_wYYO#MCybv>aVeVzAqvF}@O)c1o5`we2qEvZXquQQ4p?r&4R8PqiWdA#aPYiu&T;gR+-#@_86RAe}p_W>ofobZj z#86B@IF>l{!n{XhA#b&Z;APZ~JG&NlMX}|i8Sir(=$dI1S6s#|?b7L_nvaBabIZbc zjIp;Sv;2Cr2*Hp+@|IKWB`=TNtU_+U5~LfE5r( z0svr;+dT$1yImYH)7Pr+M{pU&@iPf|TG07ZPzI^v$Txcf=cYv+xKLP|+7OyWWF0#A z_WM^Do9#7(5Q0TD-W?#T37^H7w}Yu0kbO(*q4QKAv2buwiYKY{!MDXQY@hJ$3^*;m zM?-JJf^5M2+)QE>L(xQGWhkon8lB-Oa1=-ZI;k|Wr1Yk$CqD}l{A=1Q1dK&eahs^< zD_MVflz-Crlds^Pgie5F3zV*YCeege``YVMHX5F%*Kx-DH}xgah8TQmvJf98=-0GW zP0FA#@sa!}Q{0ZH&;E1?LTvIGX|?Gii>V@Zf=3^;aw)3Id3`Xl5QRoIt)RvnI3NiL z!)#dwPd_IFTUp$wLIFT27!V20`~HGhbSSUt{loKaAO4}cv{x%Unxv&=dki=9Owbgt zc7+sjXe+0py(}xy3%i-9C#YRBfkVhnUfHRR%E%l97O!11(<+cB)$GytSs4lJCHp0w zqZ}yyq`^n}#tk2m;8emaj0%Y1FrPoyOIyj0Yx~)Di<^=Jinq}t(Et@|-h6x`Gxmac zSOR7mq%`~C?=r7;+sJHeXmt_YUe*{oCDurH#@cb+$~RPtR0UF&DiQz=xWrx zU=8`VmaU=ldG^l5uF|~0kCX*t-Q3PffPp3u@E#=%u3>#qbhBP0&^JhsT6(5T>Ot20 zdnx*c8U-Ye8tje$ERF)cM^%B08X;l3ztaD`2Y(-&OZ+a2Z(LlE8M(7*r9eD7(Zy-1 zbkPtI1J{#Y6`Pk%gC=@3f`*YTT&wP^MKsMGN*_g{Lehp|$bO3fqG!&2u{wEiX;29} zG|Yo>{Sji#|iN6ocG#JpanlHwwkh z&`HgTZrz9~+_#~Y7yav@EGj+HnU76`jF@T*LBenXQC(_Qg6C%|0X?kkjyNSXVJA{Q z%f&q|E&IfEf&p9OVPu*|z}`b>w8a;4SI2F?_E*}2ESjGkf#v6b*?zf6ZtkJ8@o~lE z{-)mha|rd$%Bx3w$w(y_O&5*Lew#vY>1}zJaSj>l8!_j;-A)0?JR%{~M&k=uy*x_R zZ@|rKYZApVf9>R7+W+CTcP}N;Sex3)Vv9W@=nb8k03$U`-$vWiS6a)$C(}Fa8jBfC zyt(*S_QJ>#di;HaRR>qZ9lwL(3EXLkm#;Ut6R*dS=5?tKefC1k{2~7F&K}X|kDl22 z2unm1l#lDCl3bD;*fViX83)vT=^!Xm{(M+nC$$zxSRI)dLd%fr?ZB>iXLtoCM}H~3 zNq}jNw=JOb$IjKm%kI4!kc|xi5%0QBW-ONfc@SD1|H&BqSA#%LAKwLBvanT&H7yF+ zUMZ^b68~~goDY!A+AtAYQjN1zZ$D_T*|^jT-h%rKjU?(?y;$;^{f+$f<{{Dk6QN{Z z|Ioj6aYo)>kp1Y`?bL}yf>C>(bi2CgX`;QRvq-67@`!(HWZCD#H>qoyCCs#-uo>0Z zox@-=Mql3g&9zg-pig@)HbslKiB=!4u-~^82PpCl`mnwl_M@0tGSN5iA*9k_dHqP-)W2Hdk@x*WA2tOz@Fz`glkU;IKY~b-$6O2?rU=dD zS1~_!3ZAh0%2>kC;-Jjx7@j+=_YUvhBviBMI>vwLnZeq-0#$uu`^GH*D-)c3yKysJ zTdh4!g|-Ht?fR}g$j=cpe<5Pya3AKV0KSuxcn@ z-CG*<61U~o+xHc~+Mn}Vc}-=+#TkA9QPTPG^5u-tLvBjs2&eBn;u}iJn~N-SHT@5R zS_-Ql3q%BWeBzzTFfMk%w|VRm%?k@2LtP$)emg0bsY#0cNsnL{<_{3XdKrPa^hjN>JBt1$SGm1pCcoJSz6uLif z4m($xckw5UQ1jfBi!Lm~zOO%N4xnwlW8{1{J+lMRlG?t$A&L#6SKKz57#Uu*oETZC zOHpzM3w5<8sfmP}R5g=Z_}i;rTw9E&u`gZnmiDEl6nlqM2kp@eSPIIvecF9)Kgawk za+-M$DD?GD>ElY-=m~5+C zt){oE2cI#v{gb0P&MdrKNx>D^?kppp;L-_^#Mj>6WR{qlJ5~PSrv9^m9#n;mN11>L zEa$5AFKI?#GmYs4S@+rZ!V#?VaAlwqgkoix0_;iA+%2hX_%cu=vR-5+t zW;xM3Euj2ETFxt}Z4*z^xCKnGrHbHp#2ro3C>*yS=v2j5DZeNRcUnx)G;72%r$iKiFMh=7hw{FM$q&C-T({y2PcnTFAO_@9pXi>RH&ExBV)tJ#cK35G z&AJWu?iZ9{L(z5=D3;`vW|bAW3iZ2)P&_IV7eOl~nYq&-T?~86X(METWK!X~&isu- zzR5{#(QabV+vkV!$?YJ{K=U+E^q%bMJp=6xsl5I-xb=Ckckt}NW;B&UO4H03NuR1v z7w8Z!flb_E*~jzUFrBGodJJAC74xOQ;3m0Mbty5%pquOuPFpxueeX44;(St<4O?KMU>dW3WTfRd0lVTY`Cc9i@PO{r067%Zay46FPls-JezWpgzM8$#;SM2 zQwmUKhM;TJf^if47tYwnR!I&9rv>dR<;<4;iol;mjf}DLskYK>m!uT-Wz>6 z=9_VC!;RAi$1cDFf%>IE?J9po1C;z^ext7Ij7e|mTPL0Aiq+Bm>Y!TezYhycnOl-)z{s!&cvfw5>M%@WRLtNFdREW@C_v-P}8Iw z!7LrS(Dhx%zKtt8dEyWfQ|g*Q$ain`B?gk$A&Rl1oaOUti9XeHqSI z#`KbvEC#^_8*M}B=&NJHBKCgfVc47O0jG`E8!7zn#W?~jZsmlpHv!#a1rps&A{B%h zKrWLqe2k;WbG;U6Hz3f+lu|ubMqRA<$j7NLli^1t?n0wi?AnvB0KKlogr{@pS*RDm z(>&}JX-iZb`8tK9@|$?(hdBj{q!~JM6KDPDH*t9kW1s6Yfm@LM+?0-4fob_km&+ z$dK7D99iLbDvm-DL=*uHdx` zk=%a47#P`o6D@{l`~fUCk>yM)nbzAIAx($Eb~*ID?e0mh7JmpA8P!SAU!FksYwTI z=buJpHs+G8soG#XH8MiPy#zOqZw&kLuCo>u@f!)I&;D?8(O$k*!-Xv6t;0Yq2;#__ zMcg-ljf-={Y`%W9T{9umF6AkM=IKaeDI*r+_BfRet_>_VOF%{=7mQ+%B_>|@&hmAD zlIm&1OJ_rWPPWizm&o6~W4BSc4n|YQ4BBQR#YjKFvfR;yaaRuw4Z`yfMs*&^A+q0Ir2**2i3${)K1+G@&+T=_7l!d1Lineg`vaB8yt59 zm3LUL%*?oZZpJZ*;;FTMEF#mCF>$Y{y5=ClsBp?F_?S!9sjpu_Sq@IxtCG6by0+hi zGm1)#W;J0gf$-R2sARX^+VA$bfbJ_)#VW5Qv@plp;soLdX7l1KE(z+bT(I;he}16~ ztp{2N<+VuDAjYYxsix^MWQAb-2LHAI{L^3m!PGRYX@n)h<=PHV6{nNc4;9Rx&jkFW zS?~rPQ;UkH0J=YjU-A;#xGtAq8#AiBaJ!bzR;h7C6J(+Hlg38N@FfimZHD<1y`pb0 zZ)*Bgs81f3m5WxOiS+%bk68%)x3)PD0*3-4QdEtkhDn zIv5+>vYij(v^9J(5qABw*44Q|Jc`?@TGH78-6j)%iSgVH!Ahy+AY-3>-y{3NUkpd$1+yv`(8->t3%Yvqc7_=FA0hp zOOk;~ac>*hbS}M%_M|sD`K6$_PW~|pjBM@In7+!l19DXkc-%MDXYztsC2&qenB5h!JeW&$ zQxGEwik274OCWdg2lsfW+>#*f>X)ZY#0!3PJMu2E8+PiPm$^PrQOQ*4IoR^ten3d` z3r{7-pw@_;4k-fx~&Wn-b?QhYAM`~VVR;76{!Z9mEN zZp=|ka$c~NbWDG_!snP(O@Ho{_}|}vSj5B#;xZSLa$P^nT9S0wYa6#rjlz^~QiDu} zs+qTxJ*v$KTaHJh`lz;qW(rYHF$d7BTSNkup76V4;iB#F-Gv8eGSDseUpQ<>-x_;2NR0-fy9BKk}e@#&Bfi^(x@)97S7gl$=xl( zs)nsbtxO9um7Hpi72)j?42rsFtcL>gbr!(7({uBxH{-s)ybK;3r6qRhiawVf`T?BY zs=}WRU*@i50uz9!O2M<-{M?~8Ug(>D=^@q;!dHXFuYLWl?U(GFs~C6o$kW3|enI7d zz!yDE9$+gx*jAF{T7{`l9f$_c&ja14Z0f6zPJ`#En3X$+tpaPyu`TR-qT*qbUa}Ta zJ}f;CQlqTo&?xzhI6r^R07 zbggH4V(Q1pz>T;{TAo`eek&DBJph$H#%oFK0hz7af-+Yk0qa$Cp+>tfa)zS={ zk(-@p*INYnbs~$ zX|ja%kF@2Z)!@b**c%A2o)jf^elWpMoh{Kh@ z*t#;wG2XDM&MxQx>HV3eIL!q;RyKVSZes<&N@YepS&i^Sy89ogL?<%kxmbM#p0tJ( zie9<&Ja%3Xp+1+0>3zeMx5EkX5Sp_EMZGo3mHAh?ItaDu^ zBct>2t)g)OZuRDM#bJAE+sNuyMT?Oc25666f;cn+K06F9d4d`{Xi1IM5>r;A+a9E4 zeVOZVFNU3|#ijVZP!EGvKNy;!z?;W&yIM8Aq-WUEB2y~&Juh5RAcW-ek~Xd7D?`6H z86CP@hUFk;JleRn?qme4qd?KDFWmMIM^obKH z(W+z$v%Q!OG6d+&sn}~}Lm9Q-Ly%_>Ikt6S_59>RGyufdk@_Wa)LhecO5bxyfYgor z-pjRlUFa=Q!;EnR9ZK~8+$~#*kw(9k4#hB1gWCYT`0*#F2;QmRH`*nZshKpuF@yE| zq>6V9FEj_O?}#wwMt_mxlvu!aN%9*7Ru57OH20&W@}m8`Q@EOFKQz0z7I&oFB((Lr z^V(f~WK;az41uV3#L89j89O=o#a&+*y>~%C!dR5xCaPX`^R(%%#n;Xhroz$Kz$Gd0 z`|`G%##YdrVhvXhGIV~&gpj!K!yQR$R{<6RNX!k4sy=LZ{2@{wZO?kTYI|pgj+VGl z<-=wv{fy*hZdeO(9H+WrWhiNgu9he~+CpevX&sE-{z2n(L!G9FXL>W1Ss~E#c{+2a zokQ8+^Q=VGAQyA}FcnuFel4m_Qd|hB1xYM|Bvt^I++Q7`jB_5Bc{DO^@LSW{Y|BQ%S(bdfYd z>~A=f)5@fpG_QP_>`xj&1_BTw1IUUY;oMBHO5Qv5{8_siZ1NHjj9#{=$UVDSx32$UhcVKCC60d4i>(e^_ z!wj_Xv~2i+)ei5O-0Rk1U_;wU{zRvY zArmPc1OG;2r>Ur{nC@v;q^={{yj?&OII5<4*X(!Q@DIlgzQq7mkBl5I>eJ^qBcwP$<*vgT+a}T#tj?Oa~@FVpa0}0A|=ffc-85^?2}m zPi)v<(|Oq}Ld4&^_*HkqU1+49Y>o#z2AU2V0&6viqTm z$7*xewnKqr^qUfk7Q6fZd?*n zYHy0|ozC@z@YD3QovWjg#t%NcXaeJ_vp!QUt6{Jm=`+NzE_*v_1-JhHdLYhH$HBWB zUcD(={`Dt~M30YO)}^IdFzbyw%k0bV@@B^5r`v1?t29ARhL9Fv5oFPf&pOh6aNdnu z{nyv(KJljDJ1X*mC3;2%{d^wiKO(e0$9?akJqXcNa=6f!lt$gI$4FJ-aa+QumC=OV ztKj>fOJ41W>{!oypoMl;wct5&Ed@3wCLWdU0%`}|DtW^Q)c^x|&C!-NVoPFQPm|*l zE1yGJH#FO}V;Ddc#kW>mb#+={(k}a%fP0v%poFxSLGV7#djLYE;>}tET&jK!R4Z?9 zvDVngu4)`dmC5?4#Da{5ijoE2Ij$HJ7^j!>V_+qts$QoDCE244kH#eN@UBm*%G|wG z5`2A~?Vho);gZl1)~1?*aJpIhOLZnNZ&HO{ce|^iO)0<| z&iif@B*jRFuV#$xWtCGBy2^77u``b=|*f8gAzvnPTB*v*~m`8i9|%(t8KwLAHa(>BP|M?(q=|d+QMANMLhErw z(U(|_3Uv907l``78NU<}NzEP|p%TrkwH~J$iL%(YW^FlwL^RT%yN3S0md=Y`W1Lf+ zDbAv*HXnY`ZMkqz)pa?aZ5@l9WyB8xygeA{xyGOIoPQnF%RvFt&?Oa#;7*E@^7ULpPemPf*nL;@3T56sMh+ zjiz1AS8FG14|d~ zLQJ8UR=A}*6<^?EDqR3`ryiY?V6rYAh#oE8m`BMM4{F};yk@>(~BIT$6TNb^f|oOnHA0_ zbDMX0$gz6dTToW`9O39l>dIic;Wz8*M)yy~>yoXVL*MfO1{`Ln?YV740^nm|xT>2= zMopUPvvB@>otjDX8*F^_h7z?fFN@NDJ+>ZeFXzeh1N3b(BUB}Zjh}&b%>k%X9XAOg zf)HEDLb>7__=;xV!}-TqhlJb|-xCuluQl>(M)w+B5`Y`-YBc`w`5E-R8Y7JCYjF0( zaHE>wljy&h+t`7B%;}U#$tfrb0*{?4MPBYaflM~S$ICRuk zY)IX-&0D~MUZ`p9?TE(gLGKQHt81oQNQd`U9|59oHBRWORi2>Z?`a`pw=rYZz_wHr(L%Q_aUXd_%TFu)P zz=)ojv8Q=Lyt@JPKv~*~elEK3yYxFYsq7y&n!~2=0#2E7T)2S zwR$%7^513e{pa++X3xmVz!HI}Hr1IvOG<}!hO%3aU1iSFud$4rU@o`MaOeoKbOo8o zad$^JesN2h;w5)KPAs-BElj@kdXU(BO(z(fo9ZA6imD%sV${~IK6-nvS~d`$bIofHo$KFH5cGmO$j zTvyQOEB8PR2-&skTt(dOINwXiS07bXRr76Q7a@y>GX@yx#an#J$qiCWx zgwSzq9OKx+6LgTuYT?3CC8zFbFZAVmF*%3mEV%HsOQL&c=iH4(XJ z9Lx#8V^#o^{$KHCasAT&qvAftv+qeYxx*&lvbGgdm$)s90LCPX zsSu?Zch+c{lm!Hynt5`|N;voab&qh7OIAG|3FsJOr7m+AmrpS#Z$hH`!L>j)Bpvv{ zn*%2&!Yt`k?4+0F_C}qbu)JBX#4th77+?<8b{9JzGEcaJU`1{4=3l%=iN(&DBVcx= zNlg+tPbY`AeD358t8*>0qrY&8(FM$-o*C#X&!ZTl?ZHR^V`Si^KaKu5-MCqn)(dy3 z<%>io`wW|TWb8DId1f8j1;EVD`_n;?2WTllHC40(=q5+v;)1zloToZ!SS>#{FS1&n z5Aq}t9`rzsAmXGn$-^Tt3zR=DAR(uJ$ao?&`5MylMim*+?tCL(Mv%0DBVdHZcl3sn zWW_KwxM-9H0@h6G|Hx}`>zwnyDn4AcO0kx5V-VwPhVjR2Sl!){QR-8&fog?ntFH2q zLtVFI!79=p{?vCG8ZIn5Vz!^uNkHWD#R93zkop`z4D&{XD1hR~9iIvJ*wZoX} zv|CMohgB8wC2#rP4a}kz*f#R})cui6Y)`2TS#;BKrGhY1!)BPoe+OZjn?z4n`~kwG zf;xW(VLrJLecn$^CFaM5pllbZJjT7?GTbTgHm20=K1gei2UycxHv*WBo8v-ac^fHd z{1%ncQ;(&CaY$hV90;Tm|E4L6qYm+>H~-Z8UligT_Uy^!u?0`HUu*G5v|Z)te9pLO z>S{FuXa>xN>hF2Et-@a`WqN0Y#=cf2iFn$Zh^a2()&bma_$LbPGgv51-A5{RT=P;K z=iI+ii}krIT&@Js=dFp9i~KSv$uwB;|Fm}=P)&7R-p7U&5mc&x5PAth==Bj0OlT%Z z4`QJkIu|nX5N|an^|kJ7AxoG z-sD`)J?GrB_uc#d6VYX=%E}XP%50B(stlb#b0)BJnp$vhu#Z2Gx-3#E5<2mW=L~dS z4K`~u>#X&HZ>L~{6f&zXF0&N@g3Ulv6S6@faX(J_mzC15;DJBqTL1FT{b}P@A*Nrm zQhz_bQ{r+}hXmT!z)Ox^j=T2L#iFXvwd{f7#32;VLTj`-%1YJBHwF{h&(Fo#&l6Ce zM*66D%vEQXiy~tFi?Pk2&H`RclDfGh@F2=7;O+5(cU;y{^C|8aB=# zo70&MiQ_xC>Oor<663x7Z}C)MDS-wts90__U@JN;t}qNsAn(>Q2+C-$RRAT%F6+cs9&wHfIN= z*m$hY>8$)b?0rY>#A_EX8-+xbPDAhg+XDM738&?<6iK3N*U|EoiH`_yn_}L6VY~!3 zXLogx#GVwX{$9BgYO=}xIjgCwTxe2RA)jow`8 z^F4;xN^gE5^`!d}z8dMLJ5#G8u-zXe-3K$gX#Cc?E0@RB?;(!#y>Hwv=h)wbvf1RR&Zux#Bw01-=1*3{;W?1`#jL;hh|LhqH}O)SaTod zM3M3N?-V=yyLU%0V6P=rE(yOi9Sxcbk3b2FhPj;Z0VqRT(OD@miz>uC{xHsDM#&Jyp0g|n*umH@TG761|ir@4j#|;Za|>YDLSvb!TSAgL*5hXA9?vcuvc5k!xl$|be(B4j z?-Z?q?m9GN8?CD@nH?v*Wu~}GQT(`FWOg{;WhvJ)Z=$^nZ0bnS5<%`;@2VMnL;*dH zwdT7sx4-VNt00btPHdkTnn5-=$W3LuAfSq`Jlo;7+Qo7lZyAL?7?OJax?^(zMzWEX zt6_s=S(LQ+-Ap_(?K{Z|W!q(>D100*fch9&ylb?0$50jJ*IU!iQ?Jbs@g8PbqfLay zd2E*^;c*^N5A37AvpYO~zhpdypZ4-Z=4N|5KtSp2dt*%ZBuxxH0e$lam-4c4z++-q zH}QpAu2=V0`r)c&t6RIQi%ph7eea|kb!$SGM|Dpo!~=Lmh=XbA2`Fh`>N6-KS+q`p zSbgZv()~rovtdI>?c}qf3+D?e9;m8#k^uV!=+j`Zzxfr?Z@8>sp*Yg602YJzyCbVv_0y9=`gf6{g_>*S*pBnfbhY1T=oIPo@yE^6u^A^#OY} z?1gAJk(7Dr?8|!Ys}EyUbWYT#&2*H27N89ULxx+jSaoXs=YEm zDV(u>gBqzELoL~rHA&Xk_yF@N#5$RXE;*=TCUZZmzn>i&8Y(lH>0SZ7^5+N*Vv|9o42 zuUFR}bu<2`tlKd;y(gmCbYW&mR zNFN1S&z!6fkvl-WYaEh8yst+gO`i4_txbFcY6eB*zz=vUjksj?zf)A6S1@&5GFiML z!^+`0K#CX48ryO3;HMMCxuNL-_AdM8y>90r4BQQ`1}JjPEkNHhu;N~d6R{EvJNy}q z&z`Cr5Ew6O)ugKU?#T+h=Nsk0fjHf0uM+oRn2i&`MIoq-;gwVlht^l=3j61(B4(R{ zHJXR5s><%ADvDII*t*?07zFg$>x#^{1khv$L|TlzF0v)3`r9~}-%2uwCMRC}o;qUj zFFotz&ln!Est4fP@cN+IUhM+jUetgiBPr}yXXmR!44O`9h%nz?4>qnH;8J-O{KbGzqwXSeqi5Q~u=1&! z8HjutU1H<3Qu8W6;Ecl4HwIf_?g5Z@ZV}qvy3{v(l09#?+NjQQy-$;EC_7IazFdp|><8eq`z@f&Ut`UbuE zr^*?{hf=mHbbd$b?dxP0)0BO9D8L4 z-h(XViOAveDq$jht)iIeVxa0g=a7lK@4j?#eliRnY@B)d6&cO5!IJz-rBk%C0eMg#Po2(wp5 zUE+N{GD?9M8U;~+N(6cET>!Rx zrC?&u-xix8NQ}SPTD$a=K>5@wH`m^}a`oGhy_nFN;wj6Qa|s8fw;bxlcNt@Aic>Yg zC_XgDlUJh>)|N?TifIX>fP23k>D<2?yJFVJO2wu1LM{fE;LycS_A~*wYF63&MQV#7 zdhtpGvgnek*@TR*@}*9v3v|wrqTyV3;BF0T097@Jj~bmFSvy81i98R$BKygIL?ZF* z!+*C)|MG`_W6k!nBre(aF)zDNnRmpt_}1grum0OpeJ_SHzEi;bcgcAVR1`o;v* zY_5tsk_{rz1${GZJb^5tm4BF}5xK`m7D7z)SzN$Cq)Lo%aWBx+;T@Amy;R>6xPIb|*l~Avo?6zEr`%=A zP)p6|4gxqMq4fz9WQ5B9E#*v=2TBB3Cbw1Tf2BhTw=J6w9MiPbxF#%LAa z83@D*sW?dU4&}$c$B@N{6I85w_AZR7Q8zNPJ>D^4jeog5-z#x#i>9&%UAJ1X+Spsl zXCJ0Ioo9C&r;x~4fM@e0jhBgQwsb^v-;&cp7rVdtXr-YGx8~#?k~O&mHqPMAik((p zRSxz;LGUzj3{qXraxK*;vbU$86(`pTd z7_rLUn3H#Hj}{3+FoVQD*&inWiG*eW~AfY@4C8>DHddTj%=d`hcG>gmGx zwF%ufqO$18l zWnS4S)ouK64twaAX($i0ECBX>g(42QZG9=fsxiomA5_ipvZbqJ)ii;iigB~&_I3BMP&1Nglxg%19gJP` zY|Lrw^)#FEJeGz^j19G4tBkpPjd%T#O33}TFrm7sb0f8kUukFA-C-hFc2O}knzz-?QZ=d$xv_>6>!TUl#ASS;3Yj3;Sv zuk)%jP*}0u&PwV)sAW>9HMeSQ(y38TC2MKXPFT_(h#*ErdYlw~)pDjzxZR!Yj+e5t zx+uT3+sg)}Ve!DChe0==t#3vlgE;%u4rcbY6nr}~|AFcBZ-3RRy*T5uXX?Q1y2ZdT z;cWWXQ{D+>nnpMd;e_uL6_$z3sw<)*q(&3Exs+t#TKS_zmNJJXJBE7bTyUO`9ylqb zI3?IL)-Hfi{R2c}oRE{?Guh_RX{oud*xkDE;68=`RzRUn`>B>KC8r;*94((xt6EFn zGC|;HqmY{uyO-YKO)0|*+$9I(`uc`^Rqd~`%SF_qM`W_p?KYV}AQbPi42jtfkt7G! z1H~-&%YO?0B#`GG{#DX#<@~qRBqu;$iOQp{4EY?Z+-yjC?79gArst<(yry`Xppp$0 zi_R4&7|g+^;$2pb4x1jMsIN6i+E}>oE|V@TR9~$Pi5aLg6^}q}7Uw|xXwRj_86A9~ z?Qedn1dA{8s{*GLz8@V^6-hg6!$US(hX}pIzZ`z0H1(< zOTL7okhxO>ht}?W1d&T79|;{mwa}Ll!P6|7K`g~l>Qy~i)8Od3q-P0u8Fgq{piStS zbhBg0$QxXm)QGgHn{G#BA8}($Q|pv->f5d?IOBzwjcABQ)j~=&*elll>Iw_S@VS=Q ziWw{{OK5_Wx~w*tk2gWgF6)0eKHg%hU`nfVHTdEg*qA*ash}B5ctX$ON|BqNl5>4H zrpH+{=4DS$$^Cp-5*%%!GOVJ$DHCPk2(5NW=IPAPZFD5n!SQ?X(i?(Ji9qo)zIWXz z+v3jRcvT1=^|_KU356RI{SZR|tmCWa7hrle0GQV{HrdPd_|&%~L*)oBp_}PWp-EL> zn&RX%>|yg`ulb4(qM%oHA|(N~*2!N)No=%zg5hm@?->n!noRpPT=~>ZT(OwuaYlHm($j8%q>^Jl(zCb-0lX5JAo^9!W;)~SP?bWzTmoVw03gQ4jYP7l zKEn7u-4dKH50t}k*Tf^x=r99~G*L*uLyuq2D8F$8bZ-QSMEXyie6nv_IR#f{nLoJN zwLbj>s(xX&@wDFFwg_W_ZrwC$g}6r+YayrV&QQoB|1>85M!?QrAGws}WUeO0lQ$A54l{#R5N?({xvp+N%LBw{bXSk%30 zZhvl$%0&NsL!7~S7w=eWa2n@xayoQ^Z$t|OMAl2S`-P;J`PZ-5AAA3&m~wwpBwB8C zp$Sf$fSZ%dPQHG~e0wU&(o9Iu=&iIt(c(v|O}_vojTzUbWNnz-+veZo`PmXnso1q- zYM!45l67(8XvmMumw;~x8I#SzPM_KXjZ4P);fD(Db{^bCg$Eb8lL*4y#mbS@W4US? zD^$6>#E9pCY@*O){*1hd4o_@i+n)0&+hDid+cDF456U)|clwivit}~8tQLDY5%C2^ z(`ehJY(si3H;ioM{PP`YvPP-UR(Rlv#s?CS@oTEwI*BFD_BdaYw`CPDg=dk zMS82zHbEj|2L_$}10z!)Lwex02UCC(g%`F`inIW9aN{>Et1r02DiXb*e=bHs+{EwE zosAr1j@(ihooH&@^|MpVn%vS-R#luOJ;cZ@$*=kJ4i9{)eU&D3qZb>C6>AD2A}V9L zq-l#D7}U?JlH#|REciN-!8Ewr1>;0YvJQP{WGFFou>UA~Uv4X|rH=AduN!hHgO2JW z%NUVBO&PM`gYuX$(5*6#&Kn<6=P`2*xisQCn2X}rU=g@s_2opk&Bds30nc^P^1>pe zsy6+VFbxgOZZq#LIVD4PeyA+tQ$Ll~nO@|=T|Aa%iy9pAIROGhiHaV-Dwtt&rm5Jx zDA+{vcCYi)@hJrKB6CfK42C5t5KEL=c_K^OX#A0V&OQ|j7APGXvF_Q|h=8Xr$hz5O z=xPj)P-M*BGiv6q#f<+kfPa*JWkQXWvDP~X2iORqf)G*;qd{&l^V_5DA}@5k zWHSQu*u|zxcd|~#mL)#F)6b0}i;xqWI!|>OoA}O5wH9ucTmP_Ge@3`|Ng6kklqfIq zEuMVIpZTA|=$_PF6e??VK%3p))2|NrJ1Y}A^|PG-nGx&`(;oF$(}AMlF>@Ib26shB_1?&+f&>MKLkKf4YXBL&C2Iq z&FkskKuI{SFX)NWoz%1cFat4c24`%N-q!g}S#BdJHc#;!-#$PSfhkrNB9w?=&)Z)`@< z+iAz*qt`Ee*eu?sReqy$VlAki_3CEYCSC1a(yTIheF_dL#@y{8N-&>r3_k1>zVEbN z`FUzMblUD&S7V0mNZsp%x^ai3D=)atb3mlyTmVaEFA?rjvT(#sT(Ly3tXah{B`jCj zL!d0|!ICa7Mpe^IOy#MSrm)+kRP00|FFM3w2IK~weWrFU@7 z*eD===Gn!lmdFvbal|E-j{y>NLP^HfpLA3z9(SsEWFQVk>b65tRl0Mzo)HQinNLTz zCXEg_y+6PoUqmM@HPN-H8vEoG&8J1w8tx8dy}cX5b%(_llW(|+PzTs21U2OH@#%yw zv&P*Sm)X7Kea@*u&Rv!vE9$_ZRzqoQjy8!~5@Ee1v4mx|;a&E`P(N!7h!PlO#e?M#LH4?!Ql4R5)gS2^Xmozhy^j#_wGCD1_C zkr^61Kbgy&v|RU84Pthi5d-H~54tUprO->Z1hPQ)95%o?+sg<`sB_X}=FzF*-+~@46J$_yJwaXqBX4z{jAdNmK=D6`_A}0PKX~@AHV${=r z8g<*g=bvYwzer5t_?^@7Zzrn1_mlt5>5zO5T;F17yw`6Lc9GXvT-Wn$Lzo0scxr+t zb%0{^7nJ!SBX%}n_2-Da+^PXUQ%jl0L+LBQrM-4(2s+RA%xHa*MqgV153Gbi&6G{N zsI%!-XmY>$?N>={%o|S4;Y{t|Q1Rd>o2Qr8iP%F%jvO%jOC!xoW*U1WoVt25Aha)~_^dB~_+|e^HGC zq+&IKYH|xAzS6eBzf(M16x$cDen9g!*)DTtpl>+_*qt>7lBjs2wL}c>v!fPnV^gB- zC>RFphTDwQTs;rY#$luc=9}y-@4e6$w&hwLb9mn%zQugLcl5(&PjQWrO1^qo>v%w| z+T*@e2(wrz{(^!ZGN(Y2j)1faUlB%4^{!eQEmY83n}aXgT^@{Vzq`)GuV@C74Qrmi35d;f0JD(d>>lB>Bkdqh&Pk`?+CMzZ!E~-@wFw|dv zbhjw2@lvDmh#q555qc&g1_YIN*BDUdN1)ZdYAMHf8vA9tWGFGiWEw{_YvQV>)-4~T zaJP0=MAi$Ilr?4M1Q;S{95IQk>LRbFOR@Q>8bdi)SAgio2IxfEsC|Zq&!c5NFwQcG z-oCCry)?qMy()H!&c-Mzr(g^W=^3&nkdaYro{O)hnW~1Uf-;bVPDFb_?2P*POYkn| zf|!OXWwcCANpEc-5)8R-h0I>ML$pH(Q!}4sEE=zySZ*n_^CUx1Mg~E^`{ES`fZXAjkvQ$h z(7kG%u-my-$Y)lnYJT1g{t$iRAywPYfw`*on;Z&$ zzIYeKcjCDlj4ljbR4rqmr~3E%vG}y!MMG7o0=Lu{=Qj*XIlU@5aWeT4RX}wRY%m)W zIe2Y8Ipibm@gz(!eVtgO(;#utI_9ozn}bC7bRl&bWx?y=%Q9AA^Bs{X_qxlucg5+U zLh8;c%(W++SON(<5$4XUw`KY!#M11PnzLY<@?C0!IMJ=G8&6(kf%7me{CWHVyrneb zahZK=v1XM3`+ionjXj>>+1VNCm;XFd{Ibscg&<8ftpCy1<$S@;g6ed=l?#0jzM+A) z%{a=sByH;sJj?6yW*ti@6Y~o%LS4a_2ppM|^5yYQv_~3U3fl6Y(2cA=(H_ZC-2Vmq z(EqFKUp3<|PoaK>ar|I2ojNg+-%Voma;R&AIW%OUE=o+1 z-RHB0v@~PSvVm5&4viyP6cJGm>d5yusht5-^sv>%Qqq#TZK;GNy9S3w-4rQ>OUN&k z?n{lj=mzjuc@OVaHd-|y;(DaZe67OUUBEolP_-vtk+GpvVcnr#=vi-wk1`~-$~m6X zJ!mr7$lEK7MeTf3JnOh<&5X8}Ye99SHvRLyQj^+*nWFLH;_R16PE)*HFk|PG4z!z4 zfO0r2vT2W^xqrduj=u4dSiI{lQ(S}^j{A-E1#ma|R;*o0(p&BWo*4uua#54d0T745 zsKP_+u#P6$<%#l)NeHHjb$}irdFs|?SY8R6Tc|AvLXERoznmG0$1=(8(Zr=gkD0$- zoApbv3E?uQslb1HzXE-j+rm9MYT{{uRW2zE%#yCc575q{M+--ua}7Pqjd}I?s1@Yf zd$?$kf2!24SL%E%Wzi74Mxvjp23w^u<09ZYGOyTQwYsG0cR`eMH~hjgmQKBZ!73?+ zX9Bo-;8f@MVl_+Os`I{8MNsL)0N)rU$%>1r6xkk}>`nXQE_q>2K+3PKsIfVyDl@}f zk$RS|DkF+LFPh#))6@8Zu@%g6HyA95UAK=U^nGG~YIQ?+=_@Jy`m|1`h1Q$8el9se z^wnH5(JZTL7=I*uDHmo80*Gouv zz;@?xSVy~rfB}phaA=q`v)OnzDdA?=Z6|COYx3tvZ*i)`+`dmP!Iiqt!%+q9my!Xc z+frLvqU!3lN-&f@P#v<5K_-+88})65-vBVy603@@=?_EOz1!N1hH|+=Ki>TDtVxxe zrl_$;!}!%JP*(np*Wpp!@ksZx=dlDs32FBcKGC;`Zib)>4-LT}Wr3v?jgitS4yZu& zDl{9*;4Gh!&o|#>YguccW#ek^L`eD4JL3+c&A(1Hm#`(aM?8r5hK-}>Uu#GiqBl6& zfSRS(6_FfY$-gmx%0OWry+KKr}DHV(Y?s)|LY^hZbNtn0;c{d04m?=EEGPYY4E~aF-!Z)^i6b zdxthFc;8Y;%s$4$3GuMTc+%_M+o~#)fc}25n28%+B&&vTFG)(Ss3YBQ5Yitg3 zO3=qoYC(Ui%+d8Jly=E3{E9QTTjP?R`9gy&j90*1$je<=)cy!(mo}zfEyG^CZY|hY z4)@~!%#hbBv?I2Ly{)b+u!dH*N-zO0k#h#((n;tj<})w#Q(u(}cvv-2vcMH4CO%KP zT8Xm?nE3IF(a!o+i9f`F8F7Fmu5VhA6Z=Qr%X8R63(e?L`MD3?v#e^@v)*}?VUeN_ zGZepOT5uVYJUm4s#I1?N#zCuZ@Z2PBviJjCKQk3~m5h6#SuP6>sMM%97xZUF-AI@+ z@zZvN=JaZa_rzc~F9EITr+juI49i!m?z%Rq0p-Ulj51cR@oDWyt` z<;{rNlLhYfzEXm_xKnc4(~0Z$=rTZeV}4#7oxI@Kdr^1M^LcC%H>}bnH085~s5i}5d4i)(jOHFaXl5_&~R+(m5wVrT5Z#mKv;yW({Y$u^&i zqSTMWxXdoJUYCqCdqmes&*XsqV(vQMRXHfMF(aKo&!3)`qm_JZ=0j{yL6~MHW+)V+ z$%`*(w<`QPkpE&JeQZ?PmhFY|CdXCQv+RO+uE11W^h}V1hSU}c5`|_**3fDfa`kjQ z66cQ(p;~40wloH@z|05b>>97)s5%22gd(Wj=@aQcuH@+Q-Y@N_G zh0h0X_l5ItoF8$Vy{jsE?V07k9y_OK{b?sGM7mHv5a&(y2;nBV1sRg&nraD98^4A zm3?SN;)DwX?i5k7$9@_~227Z=1OI5r{|dj|AHVh=Fa7cag$%v@=|2AW`tMnHtZ3&& z#)SnJ(^nnttS!ZTPNWMd1u2dFwgP4Xb&kWliwzxEp~k21vnEg>l>|P4Rj#dj+eW*) zY4kZbjFo#_v2D?!w5UBfztFzhye2hiVZ^T&0#ij=Z(hN`zTnz+ZwHu>!dto_&HnTf z7pBxhy^=Y0yR5n;;J0PUUJv=)@YYG>HWaCvMGV5Q{gs9~ zb5WPWL~_(aXZHBRLwp5FxgE2ywEcI%$(~2%JG$46ibAeptKIBC{1uuRN_o1~BBL0q zX_^dI6pSmEQY#hjMhi>8fea#z zD-^dztna}{4o`ENY}<8(l1{0V&Z{MxQC8uTG75E1hgc;O@85ld)aYoP0+)$dkF0Ox z1{Ol%UguzG_K=d>cAaN$$S46zU)2G}`+0ADt*!yX&qelozl!H((@{5+OS*f7oeWMYDk>ZfrR?R09wdkNFtqx;R!HifOrPVz9E+RDn%-M$`XPE=In4}rD zPX)qF4=&CJ=r2sl+wF{veru-8Q}&|_vBXc(q}hSL=Au@}K%Y%f(Y-T2N8&+h-nIdE zx1UgdEE9`4Q~mdT`%ZB|kp-va zztOLFm`QQ=8}u7#-}O6%JBplAaO#lv(A`h^J4KV&cZ$u&6m0Wqi`A=xN0Vf>QUdko z>@SnsE61ea?-YF$6#hYM(S!MlUy-=udx~u_-PXgngK9F9ikITV|C;*b`HrqltIN*H zXv Date: Mon, 2 Dec 2024 14:37:48 -0800 Subject: [PATCH 94/99] minor changes in app.py --- examples/DocQA/app.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/DocQA/app.py b/examples/DocQA/app.py index 7d83c058c..19b44ae2a 100644 --- a/examples/DocQA/app.py +++ b/examples/DocQA/app.py @@ -216,11 +216,11 @@ def format_context(self, log_str: str) -> str: flags=re.IGNORECASE ) return f""" -

-
Retrieved Context
-
{context}
-
- """ +
+
Retrieved Context
+
{context}
+
+""" return "" @@ -295,5 +295,5 @@ def clear_chat(): # Create and launch the Gradio interface interface = create_gradio_interface() interface.launch( - server_name=HOST, server_port=8888, share=True, debug=True + server_name=HOST, server_port=GRADIO_SERVER_PORT, share=True, debug=True ) From 54a1919c65475c893d86d0af76257cb843d0b165 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 2 Dec 2024 15:44:36 -0800 Subject: [PATCH 95/99] change app.py, change docker compose to use ollama 0.56, updated readme --- .gitignore | 7 ++++++- examples/DocQA/README.md | 17 +++++++++++------ examples/DocQA/app.py | 15 ++++++++------- examples/DocQA/docker/compose.yaml | 2 +- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index a93fa8f91..bb94808bf 100644 --- a/.gitignore +++ b/.gitignore @@ -172,7 +172,12 @@ Package.resolved **/faiss_index.index **/faiss_index.index.json **/chroma + + +# DocQA /examples/DocQA/data/input /examples/DocQA/data/output **/.gradio -**/RAG_service.json \ No newline at end of file +**/RAG_service.json +examples/DocQA/example_data/chroma.sqlite3 +examples/DocQA/example_data/**/*.bin diff --git a/examples/DocQA/README.md b/examples/DocQA/README.md index 601dcb275..fe3ca8145 100644 --- a/examples/DocQA/README.md +++ b/examples/DocQA/README.md @@ -16,25 +16,30 @@ For Mac and Windows users, you need to start the Docker app manually after insta The above is the workflow diagram for this RAG app. To run the app, please read the following instructions: -1. We have main config `RAG_service.json` inside of the docker folder, please change `model_name` and `document_path` accordingly, for example: +1. Copy the template configuration file `RAG_service-template.json` to create your own `RAG_service.json` inside the docker folder: + +```bash +cd docker +cp RAG_service-template.json RAG_service.json +``` + +2. Then update `model_name` and `document_path` accordingly in your `RAG_service.json`, for example: ```yaml { "model_name": "llama3.2:1b-instruct-fp16", - "document_path": "${HOME}/work/llama-stack-apps/examples/DocQA/example_data" + "document_path": "${HOME}/work/llama-stack-apps/examples/DocQA/example_data", } ``` -2. Run following code: +3. Run following code: ```bash cd docker bash run_RAG.sh ``` -3. Once the service is ready, open the link http://localhost:7861/ in your browser to chat with your documents. - - +4. Once the service is ready, open the link http://localhost:7861/ in your browser to chat with your documents. ### Overview of how the RAG app works: diff --git a/examples/DocQA/app.py b/examples/DocQA/app.py index 19b44ae2a..2d076fc8c 100644 --- a/examples/DocQA/app.py +++ b/examples/DocQA/app.py @@ -26,9 +26,9 @@ PORT = int(os.getenv("PORT", "5000")) GRADIO_SERVER_PORT = int(os.getenv("GRADIO_SERVER_PORT", "7861")) USE_GPU = os.getenv("USE_GPU", False) -MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.2-3B-Instruct") +MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.2-1B-Instruct") # if use_gpu, then the documents will be processed to output folder -DOCS_DIR = "/root/rag_data/output" if USE_GPU else "/root/rag_data/" +DOCS_DIR = "/root/rag_data/output" if USE_GPU else "/root/rag_data/" CUSTOM_CSS = """ .context-block { @@ -80,7 +80,7 @@ def __init__(self, host: str, port: int, docs_dir: str): self.client = LlamaStackClient(base_url=f"http://{host}:{port}") self.agent = None self.session_id = None - self.memory_bank_id = "test_bank_235" + self.memory_bank_id = "docqa_bank" async def initialize_system(self): """Initialize the entire system including memory bank and agent.""" @@ -173,7 +173,7 @@ def chat_stream( self, message: str, history: List[List[str]] ) -> Generator[List[List[str]], None, None]: """Stream chat responses token by token with proper history handling.""" - + history = history or [] history.append([message, ""]) @@ -187,7 +187,7 @@ def chat_stream( current_response = "" context_shown = False - + for log in EventLogger().log(response): log.print() if hasattr(log, "content"): @@ -198,14 +198,15 @@ def chat_stream( context_shown = True else: current_response += log.content - + history[-1][1] = current_response yield history.copy() def format_context(self, log_str: str) -> str: """Format the context block with custom styling.""" # Extract context and clean up the markers - context_match = re.search(r"Retrieved context from banks:.*?\n(.*?===.*?===.*?)(?=\n>|$)", log_str, re.DOTALL) + context_match = re.search( + r"Retrieved context from banks:.*?\n(.*?===.*?===.*?)(?=\n>|$)", log_str, re.DOTALL) if context_match: context = context_match.group(1).strip() # Remove the marker lines diff --git a/examples/DocQA/docker/compose.yaml b/examples/DocQA/docker/compose.yaml index bded3c99f..9a8748873 100644 --- a/examples/DocQA/docker/compose.yaml +++ b/examples/DocQA/docker/compose.yaml @@ -25,7 +25,7 @@ services: restart: always entrypoint: ["bash", "/root/ollama_start.sh"] llamastack: - image: llamastack/distribution-ollama:0.0.54 + image: llamastack/distribution-ollama:0.0.56 network_mode: "host" tty: true volumes: From 4dde367a8e0542643edd18068157b964b4a227bd Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 2 Dec 2024 17:16:57 -0800 Subject: [PATCH 96/99] added model table --- examples/DocQA/docker/llama_stack_run.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/DocQA/docker/llama_stack_run.yaml b/examples/DocQA/docker/llama_stack_run.yaml index 1fa0f5b6f..3d67c8200 100644 --- a/examples/DocQA/docker/llama_stack_run.yaml +++ b/examples/DocQA/docker/llama_stack_run.yaml @@ -42,7 +42,11 @@ providers: provider_type: inline::meta-reference config: {} metadata_store: null -models: [] +models: +- metadata: {} + model_id: meta-llama/Llama-3.2-1B-Instruct + provider_id: null + provider_model_id: llama3.2:1b-instruct-fp16 shields: [] memory_banks: [] datasets: [] From c95d48e0ceac4fb1c2ee9a5e67e8c1762fc94da6 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Mon, 2 Dec 2024 17:34:26 -0800 Subject: [PATCH 97/99] add GPU compose.yaml --- .../docker/{compose.yaml => compose-cpu.yaml} | 0 examples/DocQA/docker/compose-gpu.yaml | 56 +++++++++++++++++++ examples/DocQA/docker/llama_stack_start.sh | 4 +- examples/DocQA/docker/run_RAG.sh | 10 +++- 4 files changed, 67 insertions(+), 3 deletions(-) rename examples/DocQA/docker/{compose.yaml => compose-cpu.yaml} (100%) create mode 100644 examples/DocQA/docker/compose-gpu.yaml diff --git a/examples/DocQA/docker/compose.yaml b/examples/DocQA/docker/compose-cpu.yaml similarity index 100% rename from examples/DocQA/docker/compose.yaml rename to examples/DocQA/docker/compose-cpu.yaml diff --git a/examples/DocQA/docker/compose-gpu.yaml b/examples/DocQA/docker/compose-gpu.yaml new file mode 100644 index 000000000..26d9d9e97 --- /dev/null +++ b/examples/DocQA/docker/compose-gpu.yaml @@ -0,0 +1,56 @@ +services: + chromadb: + image: chromadb/chroma:latest + network_mode: "host" + volumes: + - ${DOC_PATH}:/root/rag_data/ + - ./chroma_start.sh:/root/chroma_start.sh + ports: + - "6000:6000" + entrypoint: bash /root/chroma_start.sh + ollama: + image: ollama/ollama:latest + network_mode: "host" + environment: + - MODEL_NAME=${MODEL_NAME} + - OLLAMA_KEEP_ALIVE=24h + - OLLAMA_HOST=127.0.0.1:14343 + volumes: + - ollama:/root/.ollama # this solution synchronizes with the docker volume and loads the model rocket fast + - ./ollama_start.sh:/root/ollama_start.sh + - ./RAG_service.json:/root/RAG_service.json + ports: + - "14343:14343" + tty: true + deploy: + resources: # enable GPUs + reservations: + devices: + - driver: nvidia + capabilities: [gpu] + restart: always + entrypoint: ["bash", "/root/ollama_start.sh"] + llamastack: + image: llamastack/distribution-ollama:0.0.56 + network_mode: "host" + tty: true + volumes: + - ~/.llama:/root/.llama + # Link to ollama run.yaml file + - ./llama_stack_run.yaml:/root/my-run.yaml + - ../../DocQA:/root/DocQA + - ../../DocQA/scripts:/root/DocQA/scripts + - ${DOC_PATH}:/root/rag_data/ + - ./llama_stack_start.sh:/root/llama_stack_start.sh + ports: + - "5000:5000" # for llama-stack + - "7860:7860" # for UI + # Hack: wait for ollama server to start before starting docker + entrypoint: ["bash", "/root/llama_stack_start.sh"] + #entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml" + deploy: + restart_policy: + condition: "no" + +volumes: + ollama: diff --git a/examples/DocQA/docker/llama_stack_start.sh b/examples/DocQA/docker/llama_stack_start.sh index 114688abb..2544f7068 100644 --- a/examples/DocQA/docker/llama_stack_start.sh +++ b/examples/DocQA/docker/llama_stack_start.sh @@ -6,16 +6,16 @@ echo "-----starting to llama-stack docker now---------" # Install required packages pip install gradio -pip install -U llama-stack # Check if GPU is enabled and run ingest files script accordingly if [ "$USE_GPU" = true ]; then + echo "Using GPU to ingest files" pip install docling python /root/DocQA/scripts/ingest_files.py --input_dir /root/rag_data/ fi # Print a message indicating the start of llama-stack server echo "starting the llama-stack server" # Run llama-stack server with specified config and disable ipv6 -python -m llama_stack.distribution.server.server --yaml-config /root/my-run.yaml --disable-ipv6 & +python -m llama_stack.distribution.server.server --yaml-config /root/my-run.yaml --disable-ipv6& # Sleep for 30 seconds sleep 30 # Print a message indicating the start of RAG app diff --git a/examples/DocQA/docker/run_RAG.sh b/examples/DocQA/docker/run_RAG.sh index 6e68b528e..6495cd36e 100755 --- a/examples/DocQA/docker/run_RAG.sh +++ b/examples/DocQA/docker/run_RAG.sh @@ -7,4 +7,12 @@ echo "PORT=$(jq -r '.port' ./RAG_service.json)" >> .env echo "CHROMA_PORT=$(jq -r '.chroma_port' ./RAG_service.json)" >> .env echo "GRADIO_SERVER_PORT=$(jq -r '.gradio_server_port' ./RAG_service.json)" >> .env echo "USE_GPU=$(jq -r '.use_gpu' ./RAG_service.json)" >> .env -docker compose up +# Run GPU version of ollama docker +if [ "$(jq -r '.use_gpu' ./RAG_service.json)" = true ]; then + echo "Running with GPU" + docker compose --file compose-gpu.yaml up +else +# Run CPU version of ollama docker + echo "Running with CPU only" + docker compose --file compose-cpu.yaml up +fi From 860febe78cca81d4bc279bbec8a20407c0639d05 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Tue, 3 Dec 2024 14:48:34 -0800 Subject: [PATCH 98/99] changed var naming and chromaDB docker --- examples/DocQA/README.md | 22 ++++++++------- examples/DocQA/app.py | 27 ++++++++++--------- .../DocQA/docker/RAG_service-template.json | 9 ------- examples/DocQA/docker/chroma_start.sh | 3 --- examples/DocQA/docker/compose-cpu.yaml | 17 +++++++----- examples/DocQA/docker/compose-gpu.yaml | 17 +++++++----- .../{.env.template => docqa_env_template} | 5 ++-- examples/DocQA/docker/llama_stack_start.sh | 12 ++------- examples/DocQA/docker/ollama_start.sh | 1 - examples/DocQA/docker/run_RAG.sh | 14 +++------- 10 files changed, 55 insertions(+), 72 deletions(-) delete mode 100644 examples/DocQA/docker/RAG_service-template.json delete mode 100644 examples/DocQA/docker/chroma_start.sh rename examples/DocQA/docker/{.env.template => docqa_env_template} (63%) diff --git a/examples/DocQA/README.md b/examples/DocQA/README.md index fe3ca8145..7b8cfee31 100644 --- a/examples/DocQA/README.md +++ b/examples/DocQA/README.md @@ -16,26 +16,28 @@ For Mac and Windows users, you need to start the Docker app manually after insta The above is the workflow diagram for this RAG app. To run the app, please read the following instructions: -1. Copy the template configuration file `RAG_service-template.json` to create your own `RAG_service.json` inside the docker folder: +1. Copy the template configuration file `docqa_env_template` to create your own `docqv_env` inside the docker folder: ```bash cd docker -cp RAG_service-template.json RAG_service.json +cp docqa_env_template docqv_env ``` -2. Then update `model_name` and `document_path` accordingly in your `RAG_service.json`, for example: +2. Then update `model_name` and `document_path` accordingly in your `docqv_env`, for example: -```yaml -{ - "model_name": "llama3.2:1b-instruct-fp16", - "document_path": "${HOME}/work/llama-stack-apps/examples/DocQA/example_data", -} +``` +DOC_PATH=/path/to/your/llama-stack-apps/examples/DocQA/example_data +MODEL_NAME=llama3.2:1b-instruct-fp16 +HOST=localhost +LLAMA_STACK_PORT=5000 +CHROMA_PORT=6000 +GRADIO_SERVER_PORT=7860 +USE_GPU_FOR_DOC_INGESTION=false ``` -3. Run following code: +3. In the `docker` folder, run following code: ```bash -cd docker bash run_RAG.sh ``` diff --git a/examples/DocQA/app.py b/examples/DocQA/app.py index 2d076fc8c..3eff8fa52 100644 --- a/examples/DocQA/app.py +++ b/examples/DocQA/app.py @@ -1,6 +1,7 @@ import asyncio import json import os +import re import uuid from queue import Queue from threading import Thread @@ -16,19 +17,18 @@ from llama_stack_client.lib.agents.event_logger import EventLogger from llama_stack_client.types.agent_create_params import AgentConfig from llama_stack_client.types.memory_insert_params import Document -import re # Load environment variables load_dotenv() HOST = os.getenv("HOST", "localhost") -PORT = int(os.getenv("PORT", "5000")) +LLAMA_STACK_PORT = int(os.getenv("LLAMA_STACK_PORT", "5000")) GRADIO_SERVER_PORT = int(os.getenv("GRADIO_SERVER_PORT", "7861")) -USE_GPU = os.getenv("USE_GPU", False) +USE_GPU_FOR_DOC_INGESTION = os.getenv("USE_GPU_FOR_DOC_INGESTION", False) MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.2-1B-Instruct") -# if use_gpu, then the documents will be processed to output folder -DOCS_DIR = "/root/rag_data/output" if USE_GPU else "/root/rag_data/" +# if USE_GPU_FOR_DOC_INGESTION, then the documents will be processed to output folder +DOCS_DIR = "/root/rag_data/output" if USE_GPU_FOR_DOC_INGESTION else "/root/rag_data/" CUSTOM_CSS = """ .context-block { @@ -98,8 +98,7 @@ async def setup_memory_bank(self): if any(bank.identifier == self.memory_bank_id for bank in memory_banks): print(f"Memory bank '{self.memory_bank_id}' exists.") else: - print( - f"Memory bank '{self.memory_bank_id}' does not exist. Creating...") + print(f"Memory bank '{self.memory_bank_id}' does not exist. Creating...") self.client.memory_banks.register( memory_bank_id=self.memory_bank_id, params={ @@ -150,8 +149,7 @@ async def initialize_agent(self): agent_config = AgentConfig( model=model_name, instructions="You are a helpful assistant that can answer questions based on provided documents. Return your answer short and concise, less than 50 words.", - sampling_params={"strategy": "greedy", - "temperature": 1.0, "top_p": 0.9}, + sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, tools=[ { "type": "memory", @@ -206,7 +204,10 @@ def format_context(self, log_str: str) -> str: """Format the context block with custom styling.""" # Extract context and clean up the markers context_match = re.search( - r"Retrieved context from banks:.*?\n(.*?===.*?===.*?)(?=\n>|$)", log_str, re.DOTALL) + r"Retrieved context from banks:.*?\n(.*?===.*?===.*?)(?=\n>|$)", + log_str, + re.DOTALL, + ) if context_match: context = context_match.group(1).strip() # Remove the marker lines @@ -214,7 +215,7 @@ def format_context(self, log_str: str) -> str: r"====\s*Here are the retrieved documents for relevant context:\s*===\s*START-RETRIEVED-CONTEXT\s*===\s*", "", context, - flags=re.IGNORECASE + flags=re.IGNORECASE, ) return f"""
@@ -227,7 +228,7 @@ def format_context(self, log_str: str) -> str: def create_gradio_interface( host: str = HOST, - port: int = PORT, + port: int = LLAMA_STACK_PORT, docs_dir: str = DOCS_DIR, ): chat_interface = LlamaChatInterface(host, port, docs_dir) @@ -240,7 +241,7 @@ def create_gradio_interface( show_label=False, height=400, container=True, - render_markdown=True + render_markdown=True, ) msg = gr.Textbox( label="Message", diff --git a/examples/DocQA/docker/RAG_service-template.json b/examples/DocQA/docker/RAG_service-template.json deleted file mode 100644 index 64032030d..000000000 --- a/examples/DocQA/docker/RAG_service-template.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "model_name": "llama3.2:1b-instruct-fp16", - "document_path": "/your/document_path/to/llama-stack-apps/examples/DocQA/example_data", - "host": "localhost", - "port": 5000, - "chroma_port": 6000, - "gradio_server_port": 7860, - "use_gpu": false -} diff --git a/examples/DocQA/docker/chroma_start.sh b/examples/DocQA/docker/chroma_start.sh deleted file mode 100644 index 69279ad17..000000000 --- a/examples/DocQA/docker/chroma_start.sh +++ /dev/null @@ -1,3 +0,0 @@ -pip install chromadb -chroma run --host localhost --port 6000 --path /root/rag_data/& -while :; do sleep 2073600; done diff --git a/examples/DocQA/docker/compose-cpu.yaml b/examples/DocQA/docker/compose-cpu.yaml index 9a8748873..9b9ea0bad 100644 --- a/examples/DocQA/docker/compose-cpu.yaml +++ b/examples/DocQA/docker/compose-cpu.yaml @@ -1,24 +1,27 @@ services: chromadb: - image: chromadb/chroma:latest + image: ghcr.io/chroma-core/chroma:latest + env_file: "docqa_env" + environment: + - IS_PERSISTENT=TRUE + - CHROMA_HOST_PORT=${CHROMA_PORT} network_mode: "host" volumes: - - ${DOC_PATH}:/root/rag_data/ - - ./chroma_start.sh:/root/chroma_start.sh + - ${DOC_PATH}:/chroma/chroma + tty: true ports: - "6000:6000" - entrypoint: bash /root/chroma_start.sh ollama: image: ollama/ollama:latest + env_file: "docqa_env" network_mode: "host" environment: - MODEL_NAME=${MODEL_NAME} - - OLLAMA_KEEP_ALIVE=24h + - OLLAMA_KEEP_ALIVE=99h - OLLAMA_HOST=127.0.0.1:14343 volumes: - ollama:/root/.ollama # this solution synchronizes with the docker volume and loads the model rocket fast - ./ollama_start.sh:/root/ollama_start.sh - - ./RAG_service.json:/root/RAG_service.json ports: - "14343:14343" tty: true @@ -26,6 +29,7 @@ services: entrypoint: ["bash", "/root/ollama_start.sh"] llamastack: image: llamastack/distribution-ollama:0.0.56 + env_file: "docqa_env" network_mode: "host" tty: true volumes: @@ -33,7 +37,6 @@ services: # Link to ollama run.yaml file - ./llama_stack_run.yaml:/root/my-run.yaml - ../../DocQA:/root/DocQA - - ../../DocQA/scripts:/root/DocQA/scripts - ${DOC_PATH}:/root/rag_data/ - ./llama_stack_start.sh:/root/llama_stack_start.sh ports: diff --git a/examples/DocQA/docker/compose-gpu.yaml b/examples/DocQA/docker/compose-gpu.yaml index 26d9d9e97..f7a5b6fde 100644 --- a/examples/DocQA/docker/compose-gpu.yaml +++ b/examples/DocQA/docker/compose-gpu.yaml @@ -1,24 +1,27 @@ services: chromadb: - image: chromadb/chroma:latest + image: ghcr.io/chroma-core/chroma:latest + env_file: "docqa_env" + environment: + - IS_PERSISTENT=TRUE + - CHROMA_HOST_PORT=${CHROMA_PORT} network_mode: "host" volumes: - - ${DOC_PATH}:/root/rag_data/ - - ./chroma_start.sh:/root/chroma_start.sh + - ${DOC_PATH}:/chroma/chroma + tty: true ports: - "6000:6000" - entrypoint: bash /root/chroma_start.sh ollama: image: ollama/ollama:latest + env_file: "docqa_env" network_mode: "host" environment: - MODEL_NAME=${MODEL_NAME} - - OLLAMA_KEEP_ALIVE=24h + - OLLAMA_KEEP_ALIVE=99h - OLLAMA_HOST=127.0.0.1:14343 volumes: - ollama:/root/.ollama # this solution synchronizes with the docker volume and loads the model rocket fast - ./ollama_start.sh:/root/ollama_start.sh - - ./RAG_service.json:/root/RAG_service.json ports: - "14343:14343" tty: true @@ -32,6 +35,7 @@ services: entrypoint: ["bash", "/root/ollama_start.sh"] llamastack: image: llamastack/distribution-ollama:0.0.56 + env_file: "docqa_env" network_mode: "host" tty: true volumes: @@ -39,7 +43,6 @@ services: # Link to ollama run.yaml file - ./llama_stack_run.yaml:/root/my-run.yaml - ../../DocQA:/root/DocQA - - ../../DocQA/scripts:/root/DocQA/scripts - ${DOC_PATH}:/root/rag_data/ - ./llama_stack_start.sh:/root/llama_stack_start.sh ports: diff --git a/examples/DocQA/docker/.env.template b/examples/DocQA/docker/docqa_env_template similarity index 63% rename from examples/DocQA/docker/.env.template rename to examples/DocQA/docker/docqa_env_template index dec896449..2a45408aa 100644 --- a/examples/DocQA/docker/.env.template +++ b/examples/DocQA/docker/docqa_env_template @@ -1,6 +1,7 @@ DOC_PATH=/path/to/your/llama-stack-apps/examples/DocQA/example_data MODEL_NAME=llama3.2:1b-instruct-fp16 HOST=localhost -PORT=5000 +LLAMA_STACK_PORT=5000 CHROMA_PORT=6000 -GRADIO_SERVER_PORT=7860 \ No newline at end of file +GRADIO_SERVER_PORT=7860 +USE_GPU_FOR_DOC_INGESTION=false diff --git a/examples/DocQA/docker/llama_stack_start.sh b/examples/DocQA/docker/llama_stack_start.sh index 2544f7068..a7af16be7 100644 --- a/examples/DocQA/docker/llama_stack_start.sh +++ b/examples/DocQA/docker/llama_stack_start.sh @@ -1,24 +1,16 @@ #!/bin/bash -# Sleep for 45 seconds + sleep 45 -# Print a message indicating the start of llama-stack docker echo "-----starting to llama-stack docker now---------" -# Install required packages pip install gradio -# Check if GPU is enabled and run ingest files script accordingly -if [ "$USE_GPU" = true ]; then +if [ "$USE_GPU_FOR_DOC_INGESTION" = true ]; then echo "Using GPU to ingest files" pip install docling python /root/DocQA/scripts/ingest_files.py --input_dir /root/rag_data/ fi -# Print a message indicating the start of llama-stack server echo "starting the llama-stack server" -# Run llama-stack server with specified config and disable ipv6 python -m llama_stack.distribution.server.server --yaml-config /root/my-run.yaml --disable-ipv6& -# Sleep for 30 seconds sleep 30 -# Print a message indicating the start of RAG app echo "---------running the RAG app--------------" -# Run RAG app python /root/DocQA/app.py diff --git a/examples/DocQA/docker/ollama_start.sh b/examples/DocQA/docker/ollama_start.sh index d16f30959..d38a90c79 100644 --- a/examples/DocQA/docker/ollama_start.sh +++ b/examples/DocQA/docker/ollama_start.sh @@ -4,4 +4,3 @@ OLLAMA_HOST=127.0.0.1:14343 /usr/bin/ollama serve & echo "Running ollama model: $MODEL_NAME" sleep 5 OLLAMA_HOST=127.0.0.1:14343 /usr/bin/ollama run $MODEL_NAME -while :; do sleep 2073600; done diff --git a/examples/DocQA/docker/run_RAG.sh b/examples/DocQA/docker/run_RAG.sh index 6495cd36e..3380fbe6a 100755 --- a/examples/DocQA/docker/run_RAG.sh +++ b/examples/DocQA/docker/run_RAG.sh @@ -1,14 +1,8 @@ -#!/bin/bash - -echo "DOC_PATH=$(jq -r '.document_path' ./RAG_service.json)" > .env -echo "MODEL_NAME=$(jq -r '.model_name' ./RAG_service.json)" >> .env -echo "HOST=$(jq -r '.host' ./RAG_service.json)" >> .env -echo "PORT=$(jq -r '.port' ./RAG_service.json)" >> .env -echo "CHROMA_PORT=$(jq -r '.chroma_port' ./RAG_service.json)" >> .env -echo "GRADIO_SERVER_PORT=$(jq -r '.gradio_server_port' ./RAG_service.json)" >> .env -echo "USE_GPU=$(jq -r '.use_gpu' ./RAG_service.json)" >> .env +set -a +source docqa_env +echo $DOC_PATH # Run GPU version of ollama docker -if [ "$(jq -r '.use_gpu' ./RAG_service.json)" = true ]; then +if [ "$USE_GPU_FOR_DOC_INGESTION" = true ]; then echo "Running with GPU" docker compose --file compose-gpu.yaml up else From 8ad89ada93ae25bb7195ec11e01f6d434ea91f06 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Wed, 4 Dec 2024 09:59:04 -0800 Subject: [PATCH 99/99] update requirement.txt --- examples/DocQA/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/DocQA/requirements.txt b/examples/DocQA/requirements.txt index dc847762f..d680aa671 100644 --- a/examples/DocQA/requirements.txt +++ b/examples/DocQA/requirements.txt @@ -1,5 +1,4 @@ -llama-stack==0.0.55 -llama-stack-client==0.0.55 +llama-stack==0.0.57 lxml>=4.9.0,<5.0.0 xmltodict>=0.12.0,<1.0.0 pycryptodomex>=3.8.2,<4.0.0

z6{yC&YNmTInDH=pummv1$9bzwdT?Ysu>JLD)qe5CnJ4Wx zl9ZaNT2pYWcw;4H9m(?BS47eF_iAFXjHp;z-iG7g-M!Vo=@_Jx>6-XAOxP#*yTYzp z(2=r@Iq5b~*yXNvD=;F?WjHGf6c52II~8;atM)#yc2TYr@yS~^j~$#FKn;6&G7xp& z+&MJY2~Ku2ZQv6W&xtrK(gD*JudMAneP4J(5N@Gtd|$d1*_j1ewJMObPKpJ4yD@|+%Yy1A`1SVKy!nZMRLIicX_P5pi)c0Q36|CSn$Z8bVuJ} z+)xcS-h*xx&k#>2YKEpDG-8Qav;__AbNg$L%7rophgNdk?D_qE;?`!!pJNk~h;#9%qT30z8{_5Im1t#TI zOu@S&b(Y5|Yj_lH8P0we6nj`cd%>eAA1CxySvFgXzcQ;#Ael2PnbY4QlO@>~a(&8gIUf|(Row2tJ1eCH&MT1{&vVOYfKRLu zgXq`jR(K#P*k$ps??Byc7JDUyxCO$s%^qJa?EL5hp}th5R=@5{btUp0_t(*L^aOro z>%p+RAssj?s^zvRD22>88>Jr zIXEY$nmKIJVpyeqnR!s{HBnkku7r}q=UY;xS-&}wuha%a5cOSXSdU@1x=EZH%;5s1 z{zw4fqd+}D@!uqBMb~u_RjP*W*evFcJf@L}s1^3G#{vl@YeT5X+sy5nTQHvpUkEH@ zgUjHRqvp(QXQTZxuI~44gPjWtm%|Efs46=em05mPWC#8ALu<(fg4ACnmJe%?ZbtkO*$I@{&w6^)OBFvEg8KDe7m z6DQ~^tw8wx}q_89v(!X97EoB?s81%O+Af47R2DPbb2gv!JA5Oh400=XltJu z5$WdU^=!HG8Rz;<*GHeyWLu6DNzhC*=cdWrpRaLplaf(;JeWmi4ChdRtw6E&o}Md6 zj0y`2b6}!@RG`zIZS*j=J96E>{~@nd2T~jJ=+#=O{i_vINHr~)T84cVbrW?jt=3>|6`hCOt8)tmn=u)$&8m zXwM$~jr;>RkBm7+?=EE=_k3k>MyeEW?GLREs2{}7NPqrk+PFV45_0{debt$nXpR$x zH>Z4L==qiU$-#?vn+dm0TR4ww_G+`#I=tJ_xbf(;Qva`^?MXY&qkN~{{f(0Gti<(? zbb-`%gC+{Lf=f3#U6V)ESC3z=IKJzb=WZVY1_0smDK?Q&(@25N4sReDt4 zaqEknoKse2Rv;o)I*X|uVHCkKDcp;fJOB%jy#ryFm#Yp<(;79MD6*{}8Xb~E+WJIza-#%S*Q8KSoM3hv{Q zNY!R?v(Lki%~mN(`AxON4E7;!n3C~u@Dh9t-!0GZSiyio6h|e|R zX0}k0N%qV|d&A2#Z}?61cu03?^XL+F{l7xq0_Wi3LG=DPd1%PZHp#fuUT2nr>7EjY zVyjO^i`Um6ozinch9zOb*>%^;K84IW$Z+r@VFIwC8l2KF7D4|K5yjXheW%|Q^Ry(P zPc+|dl0Tnhg0J156m>#dvn*xREc%9fO@6a6;lWPkiFy}fu-H_Su6y<~lkp&;VPr`= z@A9<;!Jb=BB>Ds^-`iRr7cczK9Ys(iyt(4xOT)*9@ntr#v)e1{VJSYk z(U&V=d0&9P#C0%Bh@X@qOc-2jRWdV7< z)8De{e4H2s6gfP&9tMxuLJUG6356dZ27*u<#s`_6ua$#}JXs@E!L?i5mT-P1WUTSE zDVQr8Hda0^4aq6PdZKB}?WQ>Un%%;;DPb+e*`3Vk4O!IzzFMfYyOeg~oyFLp z2{B^i$I6_dN>Chb+4+DsDkfy%$K_gG45eJ=Ol$<{>*#}_=?B_v#ABW}UB0(PgJW<) zl;JLT+*)0#SDMk`Vfev|{RI=J6@L0+A0zqxt(ahOqHA$JUjrwLEiv;UppxBC+IkL; zr@Iy)h_~tkna(Fh-Y#7VWpMX(DSg;wJ=OnO(2Bw3$Dp;lMq!UjwM^&QHFY*+?JVDt zds!-KDBqSKN`KiWCbC0huY2O@s!%?%pofE(ZEM!=*L4pK`@ZoEeVnjhxAjCz%_p8D z|6J;WF{0uz(nhxWb^x6!5$=ZVPO?SaCKD>yPp)QuxI4nZ)+w&S7bbeHCDURQH6Yg3i4 zhB(mmD5M1F52NU2N4jMX4nU71YT-0yzjHgGwnLn!9U=lUyGF#dfC)T=m5uA%(jwg= z6O;~?WK#~m1tnW<++Vug*U_(%#8A)g5JYN0fFxf<6|QByt_xWJbAQ|tbLw6%d3;w* zy%Lu&hQd=J?qqS`-?w)QZNArIP?4>6nDoK!aDth-XzpaigK|o0!Y^B=@)vAVX_GwP zwmCgA<+>S@U@3&E%wbZ58L&)Bv4-0^6Y+(~=t~n|8m?w0!>Y{h>{o@-N9w=3qgORc zDBtvy^sIrj=sC=A9-o=@R#7d%ga#)%-g&O=^9>S_p`xt-c4f4?Y03dpZ% zkZt(h+SnA=+8BvxSkdAqWir*$)nTFMG|NWWzBodut2YL0HrS->qmI%(A0IhU<1TGX zR<3M4k-wailX+OL?bDNkElp0+SWBp76C-T;79cNM!iT%xl$It$@*UPEeouu><4Qk* zX~eUqFceB3PPeV3Ss-tGXZ_ofwBtYH!D4^5J zU*KTq?e@3xPM5p3j(qGz)#SoD3Nm7&>xF4Msa<*O?$3j37rCDGn4hFW8aN zMMclOGB;6RWS6^=Xy_Gi)K}3hSm!&ic6@s}MCb0u0fOQ{`FfrAfvK4nDDO*|DmNau6dq!E9hZmJ})(V#-IUI6QLj z49!K;b7!r;*iW>@O8(e;h;LMuDsVwvm(I{tL~*c-Vg)*gTIS=&D3DAJy30;zC50(r ztjRaLMn#^=mXqQ+tIFCU)q~no>_ywE4bgOyYoI#|2Nz?Rl9lRu`z}qzM1ORFV^Sf7 z@dX~#aWL2hEA{R|M!BQ>E2>FJ?ea<5I)+*8s;<3ao}zM4B80n=U1QF2Z5TdOOi?a;{gOn4Q17B3fKxRU8N&zn+IwPeNhvMEl4 z>yQ$VW^sA!Z=ug>^PhokemmkO?X}|7JZNv~M(mMgh#G>?-~{>;o-aEc%av&>O>BLS zT61tPl_Xy6`G_X6XR3OuFvsc6?x(N2m5e2`)Wr-O{M6U!G81E?9g9JG)R=%uNkTIl zzk`=_yNH|*VM$%oTCZzl1>4LiJrU7<%x3XG_pM51&38~vr$o-H(VJ`c>=bR{R=ve1 zCgxOojpXq~*N(PEjM?BGhTyI@w83%EjQP5X0IZCg@8o4jJS z7QFE)S zY%tg1j@O^8sX5Yxew-`gBr0?hGGFIaspS(W!$ zU*>_7$`lz@@TbES9XkxOu##D~lAMY%>7w?6*QU&#-`>DPYGN zc>cSTpW#RNgnK(kewK+n@lc~rWKv8?iyth1s*wxv!pv$}DYE~TRNGH7VRTY*4nDsr$PQOCD8RmiD^#u_6`$4K2GLr(KP6E@!0q zf!p;v>?b@tlWVIqw%$FG@uIqs_?5L|t5BD-s_tz|zH@%!P;mxkw|t#TUt~n2vjFRd z9V#-Cg+&Hkn?vBlh3pnoHD`6{2Sgg!ISFGYv>W}paT8`hh<~u-o^T$o?FcK4tY%DB zI>K0rF*6GS&K-u{L{{G%x8{2gK&oT3@))V~HEmbWWxizrixc5CYAF_%o~`m%#}38O z>Sgy5{dlf~%q%iJHsraMgyZV^Hbs+j)rPvx9$gu{SR~=z+V7&PS>R6Zl^mLxS3SRz zh{wEX-Lu0eaL>xVm?%4o15lgYd=oQ*Cb}IKnPZY#+P`)8#`Bjwn`40p@*CS5V{lsK zc-dKw#*rZoBVBA^X*N>&OUB~hsBjQ*vOe)c6(T1ofS@DIP4c*VIA{UAW54}QufeQ3~Ls1E!)PeM#I)whe!9bKGS zP9i#uTdNd3oi~u=`r7XU429o+`7m@e$gZiMyk>m+F5=KTUQ8IJ##cjx-Al0C9e*g` z#7In?5Cu0UF*F%v38OJbZ~fxLkKNK-&=Kz;YJ{z05N%W9Qn!&(gY{aAOU;O3IkOdgiRH z9lTnzl8Uzc?YpAbG~vl(A)yXhhH9!Xf-`YzcsmoqL$4Jt9GA2J{PL$I_RmN8>J0Hi z^j@lYxDDU9%rgN@aD_@w$qGC{(su08Ig$9583swcJcOWd zILSJ^{+weG>L^RSNy!&H=TBN>+ zuBnB`$(1%(xA`;b!Cw904CuUQHHB5Ub;eeDFzD&uc@#-U^@%h6vDvUs&Dx;SY5iz|_q&DkQato4DSdt_F+hAzHH{P=w3K&z_z zq}{Ii8mOdLc19Q-!YZI-0yP-VG6_+EvQPBW-^ukNTP!v>wfMo)s&UBB0TbP#0?)py zQi9{+cjAZDrV@CSOgzw-+k#lr#qJ3J=i{FMim^Tm0ZPuXghrz}h=zF$a(?QQrC+odsVLAquRdZO$8@Ekf5qUTQp z&4f1wJ}?^A6p~Lx^NZsGO=`{Fk+Hv6S$k?S&?U%gHQvyW?f@AalGJe;FIMn*K#irh zBBC(LXsAS*U78 zGghE=m`&3o`Xkf11g-)6k>4W1D;cs~Q{s~rZ{Op8ut z3Gb4vCU-32cp~PKJ|*l9q+@5=#`cA zr-yYuzrz4bh6HKR9-7#7-Mp-a&21Z-q2bo;o#h(6_^NciR*x}tJqaP}Nq#P9x=`_O z@1%RkrQWYoe}$O-y@mZReDxRc2c9gxWYL@4BYp%)^HwNdg z#xm?3U1ScH5JKVLbO*FvUg0g4U!U=d2o$ie-D3ePW? zy?bGsZQo|bAVV!_TKcVbChh9)fvZ%;iZQBOtaj+GSeaPq9fXG+xa#9hNyA~BNlZ^p z_fouFL_t+q>Zv)ohL%Vx#U8-nt8J*{02sKgEH&S!`Zy73PXx3(p`Qr`NKd zHcqtI5fN!lH{T}w{%A_2hz&T9N64uLb3nkn2WespD$35~&)CGtFi!<|dv_cgn`)D7 zW%MOq_3|Oo#9v3JR+~5wiy=^iN$&xjMnkKd)rzU!0&v&SULJmboh^*p-9uMRGE3oR zGTGY7%@og_+A`t*@+FV(mD`&azF*xRoI2O4DRl8@HDK(G1)^ z`D(Sbal{u5$AuuYF6J1E=Qg=N@#h}eeSY$2{9o5#1J8M zYW8P4s{I{#F8isQ=7!HvM15GFcZwC$7WvA9Z@SofnRa>*L}=gcW7>L-N=XUyusJ4bD?_rp6&Dt)9>q~uuE+$yh zH#=MRTyG-)m+?z2o-KXOl)!|*hOr#`aOa-@ZIXPmQF&i-59=(%fx{&w7Z2oebiD=*KkdhoXNj5G`rT9H-o^*h#cwziVZ+LL$L3D4RoS4b# zB)OAL+DpqpRHtFh>HxB|n#C|p41q_JtZlYlT@~N%XY*QkD^PbE;oZ8zBc-xx;PKKW zmJcZ8GNo?jUsW~97luKLy5(gtQ41!-3-8`=YNN@Rd+LQr$*V18HR-mrA0`sb%N>~# z7)Midb4$GsbKi#Gi>FUM5^}>stOoIk;X&D-FZ8c!;+!7IoXqr23>&M_ukS)4Cg%;@ zZ(0avj=W`~V2;sM6vSh>3h|CoT?;!{EtsNOOzLMt?wOePg{Jos?1dt9(k4HD|EiG` zVd1hmQQ}eSR35zkrty7TmQW?QJ@Fn_rH%v`Wd~+k0bJ$&)_1o`lv>EQ*vii2ZN3aM zC53OoCVGQ^Caz09b0GRYD$h~X1B2o$ffQ%qMxuP~>)&#oa1kIE%oii&!uYjLe2 zwm!6@>|3nZXKfX8n4STl&DUvE zYPzv4w^QMU<3;V*c{`Vj#cH-Z*X%e&!8~cArDyT^kJ!FT=NzI&9u^7u)}5vKU!O;Q zbqjr-t%Y0}?3jFWhEC51+$mCcF|lfLtE^xu5Il|henZNQN2NrMk>y!?zvg$kNeb^p zNq>I$>)>g_t$V<<$9=#3c^&-2`&q2osd@7MB%<8@B&T{V&DO40%K8~W0%_AY^{npu zyCwtA+mCMts9`pG2;W^4d=+wZ44~dyOP#e_%%>=`zwQeG=ugWO@AfnMD2-?LF^iI> zWn;CT#Mm_++7xxgIDR9^ndo&~TuHisfYCU_(|=q@gQQJMKqKvgG>t>1be!!IH+ZXP z?&3e4lx%&ygmmD7Z5jzsc_o6ng#9(lgkO^45Gb@^Qj`ZBu6pjQarduK`ioC*%$=GI z=#z|o5GD=OhT=Ru5@@x}2|^Fg$0lKKf@;!@1te3T)M~LzNpL3J&WlVY4ylT1b+h+m zm&Dx!l1i;*7>#(rtcVrRLU;Lhl{E+(Mcfd>5XsMtuueO^<0g93su5UaPMU;2Nn+@- zvLfz|W0|Ux?wKR)jaIYgW0>T`+&>$s8y)Ii&B$t}%l--QdgD`lm1>S%Z*gCnIjN`n z*4(;H%-|2Or>dkl-xjrcPdQ9&cDF{oGlEybD}l!Pd4oq|O?mN~`p!2^-Zu6~y*_b6 z2YzqQ2V;3e^SHlns4_n8xmTVuwrn z>^(IphZa-ci@wyjF!OQ=FOR(P?F6@eH`Y%@cUD-4i&NQk6Z%cr3Etj1NLjf(_e&ih3pSa368o;bjm^dsrIEtTRb?e^3 zOELC4R*i|qnI98hg~jnOrI#E6E}k39(7)#D@@IPE$h=gfiGvK#s4r zWqkbcip__2zgyjzDmSO5_x8^)M>~%Uri+5oyk4c){sd&FnitjGB~RTW5sl?UM!Xq( zx{BBKCSomBxvGm?^_3EkU$Dh~-%$CSq&<7*!GP`q5Y} zjNREf*O>S;oSLsjLQkkUY_P@FK`<>Z>T8`Ibjbw!F^1{-k=5(ogYUG0y*~lJQ&s+$ z^lR>0E1fhf8Wl5y(QNkQxp7vSR9PuHl=~M>udYa7y_upl42&TB@HCFahgODBPltaG z8X?tTkqA-w^}Xv?Wm{7c7?N%v?$c{)`?imT&!g1l67F?ExZ*)+5cY|~)W~hb*c?{U zy2usv7Vb9}1blj%om4wA8qp*YnVvML68uF1Tf3863+A@l`t8YTI7vFvo_du^ia)0@ zjCE1XyH z+}$7kOQ9@MdNs>@6iRwoZ&>Z%Ko2*PE_Oh{JyS9?L1YxL57Ms>`)xbu}BE3OTB7h zg6Q%Jce`?oEhetrI@zV@4hep%ig-II$Na8jEg2Hd@**CaZrp{NBusfFaSx7XY3=fH z+ZjNsZ2{hWl>U7S4T?upNfA~Z)K zVD?H{P&23oM(ulZxiQ)9Lj&XAJNq8I+d5xPXn*C&Z;G8*&1{FQ)5kMgbv*DOO+od` zH^59fxme*_7ht>++UvxvHzYkKJ7iJSTskPtJgo&L%9$& zM0Eo?gMU!71Mkdf!E6&4;s=LwZ<2N*0h#v$r!!VO+c2-VF8ZW=oXlIx3KUg8x&wgl9ye%wv$NROXG? znMsr#+>P_A$YLLNNZ-W!@-^ha0a8UhKYy|#7yiN~t`clR)|Q9b+(#N<==eW;B)ELY z@n#OJI7O)3Vm_F!Xs$J|0l+IRFEPRR|h`C{as8efz zx$%wzX=szy9kj+OmOaJ1F|ioow4VCg0K4nq;(oq*3$<8C8@4V51PTtu`cCgxp@Y{9 zCq1kAi1Y*96>n#RtYU+^T;x`Q!%N*aB=OyPV}c28dLA)FFWr+Q2pNiYkee8bnlB$J zwzUjH=e%hb;GPX3F$IO62}_m??Jz9e7X3>jz3S&SgD5pWQA8KtR$$9jiAw#;__8a365)5AGc4d?DW!@sGxeBpJe7)GN&q^ShfX zs~L|O?<|=!1P)GW?G~rN44~JrqznmmVQXvO`KljUYi31}BPYh8M>l`D4em;nJ*$#4 zIB*nLd-R`ABTL&1!dp)XeX(E9ZWNHzOLBKNxUUpz8n)-UHbc_lP)T%8jD887F@iwE zFsB}k>>NpGTSJBHMU$9LaZT1?=(o+X z7%vt>MkD&Zd`n!_(CP{O2~Zj&co1g!D3*=r(n+S>5TI8K&Dj5_V?B5By>DGD7~#uE z(WIb0`}4{P?Kfy0+gm@2SKc{+81?iqVLt-jv?se%=JaL?K@?He%C=`^h0?44a|Z@Q zB6X;eB|jK7zfGUrHX1cF&~P-7i4l>eY69*JLoviDm`bJ)KPQ~`^mv5ePQ<%E4A}p+ zRPu+~{{6?lUZsC7!PL}I{NjC8V^Xgt&EtAz2K!@nj?&kKnmB7_M#YQs0TQhl*c1=$ zA(fYi2y#v{tgkiSlM6wbQh{{^4KSkz6V)LT51!`H#JT%nLvx6!@iDB6C!=MC92p?# ztVDfnf!rfyP8Z9~2UDq0TkQ2|r|*aVY&a+0Fvo6uA2oW@@8h;Hv*0sjxL_#m!rLkf zu?=-QsF7Z<^E7I7tkcRGYnX67WU2F#;2&J}X}~%RiE$_S4C@Tfy32~nBByHHIPIq1 zkU*t#5Usk&kP`7C3J5kt#aivuL@NmR?^*J0)pvybaLTpTYRf=LXN>!L_+P=E)EDE0Gy` z2UJZX+S&oBigSIA(y94hh1wa2nCc%~(Mw_*x0tmGzdIgJ)}UDZ{Oq)BgkOHtPVuSU zqb__-fuZ4R9J%89B2EJlU5rD(^5pRf_zC!Ug399KJZWY;d4>S3@@s?{GQ6xe# z#35IFe`KS9NQf=8y172p^--*LI}H=M`WQC_bKppCOgpzSsc@S%YD@Tt0BH59kd=A}Efz#AU<{LZng)B7_JVsh|AoIZW z_zwnHs^=F5L{ehI%i`zgvYL#JLtL&bj&{C0a%zuWKvT=GL7Ba?-jGc1W!XZu?}fkk z`-%Cl*Co$HiA3aj5hM}jaV2;+HjaBME*(TDxbn6_KSv850wez6e-r(1Q!#6-cxitt zv8hB=(M%)rUDvXAnjZN=nm|U{llj^cc#u!Oc&0$jL1x82==Hxp$0;BFdV4iLARZ->+auBLlFuT=R@Ay_m%EGK*)$FyV{zZ9YN5`=`h%K& zr@cng`^XMN_`=?$GjCUC#x=wZ697naAf9sz5mqLF>{0lX@%0E}4 zzxdl14PBwzyiVy4_SvS#3#UhvJXmT^Ge5lB@@%uS?Nr1#>xMvA%=C<==*KOa=b=S^ z+4i03*1wz-r{a%%U!@X8G*0pvt#uvC43*hMK0Y{J&bCVo!TCVkY%!4wad_hdj&red z=*fea>G}LNY`&3Q@f1;F(zlGjDVkt$%3wZGuuzhC7O#@VuQr`zsQex zHeAmgrsK>};u)w#=aX1y^8o&!$C}dCB%mskNYzCs(ts4~)8>X9slAIps9nW7lzfc2 z_hMG!*_TfbSD0*(n{uAx>#0afR=!mih?4gyf`>Qk=r`-blCWcDD^jV)V(R78&j!^J zVKrG%@x#@$jGNc6x?$m~w`C{#$VCxYJ20J2ny^WmGI0EGR3gk$j%Ms}-;SM|_}E)R z3>$(Yfi?lxjj`6+0_7eTKeRl#ps}sU{y1gMP%qM&sgAoqyKdBLfBpQzLRn51p|ZS0 zMGO%MEE4Qk8%$TsVhStl4L)}L?I@we zWUC}pz?)dHi3U^!PyMjfK&Dg@*POcE$BMw7{3c7jprl2Y(l2zI%kZ2#z26f(T9bNU znUH(eDZuW#yx~I&Y2IB=3v))*&m)4L}*LZHmByH$q3 zBFJ6ow(^;4(&5$35>FWqHK*0C22Xt6b~%35?O$&b_jPUEP;W=R`MMAp&Z8NMdV|Vx z<^1t^=grjD%N7R>hR59z^*IrQYNL8aD59^G4-+G9wMV9qz3-P&ktC|}wjfOtSG3^o2vNT_C zL+Dj-JAbZYVHRzgkrf987wmZc019{izAbvL`pY=edhdIq&4%wR`g(y!NO)|<e5*gI>lHTRlx&H2n{iW;Qk?gkEmf)%8y8eF{vG*j+o?J)>f9UKHj82?DEWgG76 zjf6jMh5a_BIar7&DGPbrj+Zf54Pd)q$viB8(Pmdu8<5fLVJ1h6_-cEABegqq%h5eTu9H5SNa~N@?tv2f5S`h@a3`GrM zZ$JyCBz14EN8GA;4JBpMLAB6nD|#pt>pDkXFl;eNjZzXN+`U#~#!g^#wjMMsX5Gu7 zhPXR8^aPvd#->qG0*jlRJvr?{WnJpc2+xn?v?qs87jqNgR(h!nK*{PA7>i6L+hD!m3ecE z1{o4#L#jrYMW}=T2F9bv%N1?tp{=^|ssI&@1*GpG2Imz;?&2?Vx#d|Y;3c!nJ=`cH zX&;{#f7sP8h=ORM*d3=OUAFkkm)Uh4dQ%!w?0&)jB3mHrahf&qet1`{+pSCP%(*1a zdN+xNxkh@oQBB^SmPqV1Kar|A&azIVx%`UD`I|FQtlNGx8*%ld7mi_A(d{P~8BQ^nS zr-sN(6(_C12dreb3-iNshqlo>D27CmHWBC zEWiB0>UZLz$)17+7R!+YEaR(SB1Mmf6};*A*zb6Z-50s z<{m8-mh{fLuwgFa=U~&hqRk+cP<`hF4_jmP2!#R)K?Xoy-2wDj@hSa*O>*TRuG~+- zUsdf5?tWj2Xu%IpKa$};F?Nv^!T0Bv(yms>Za*${XtgoVqL*(S=`g?$Bi9H0N|m4b zd(&5x2{fpHqx)`U#JJoY+C3T@2gKB)CkNu?VWa$)D~d~s^58Y=rEy`joaO?^zCMvF z_h4dG<%B)~a#UW8bF#iic&#L+5Pp zs>j93lGM~*&zqp$$=IIg;>qc~(GZZ+{(Up0_Q~^#4ce~>6nm|c>#~zZ2}Ky zr0`5@9_0r2s&_(^)>3I=pOq~ZCiximw;C@0dP10ZsN7JNBJzuKoPArxu|BGbC*&gM zzv9YIMkxR11w7SKQT9sR7_`t2Ru8ng(GIl) zN$?FL_y}3VYTP=Ni)-~Nu1@EB!ou5w`NWHW2O{E`rRSqKOlWm_%6o<(JbZg-%`$bg z9c17u+3 zWn-=9^scgcx^HA`B|_jc@B5)K(9i-kOezxPwy*%wcr2YUsiM9L-tK#zXlrEW_PxDa z$6ii0up*d__$0I?GymMtQ+HjZ6&s9{N?<-iRZfKu`xKJcJgL-QmGcq4xYl@ndPyxw zD-evqCf=3a(F5|n*<#d8@LmVARn(nh$raYO$qgtB*Y9svGf0EqDK3LZUA1qpu;8_K zAf<{rkllWxt}SPBId*%Nl+|}1kIS*-gc`}MWe&cG`Y}=GCQi)~|Caf6VS+;IwVU&q zYTKUNx}8{-*n0G`cB4NEx;|a0sc!l9Ee;!#CtQMcvE!RXUm9|b<&k*uRzAUdT5t|c z%GhT_OR`1Vr^LDx(ez&qZW^~jO|o7x4ZAp5;pG_DTmVbN^})pPB_m4on}gYc49U{% zzR50F3chkq)b1g=N^g|v(~dD+))X=As!8?N8gC&cNVdm^Ak?ClvtwdheE8A?k|NW? zdvZ*D!qRaok7y`xUv^iy-OD-xTS21agwZP((q#Y>x={lT5hi!I%VP41g3WC7byJr= z<7;oUpa`OFK)S}u`-wOEWB2Z7n3-KX+hFJty3%ILUq&=n(N=>8g&xoWJqv5wOII}! z?*+f#<=uZC+4Ao){r@d}z=@Ik{WSKZ3y%udX`3vrzQZ-G?4|Tqz5*JcS2dSr`JSqARr%t?zx=*`6H`n3*F*(>z1@XBNy(qL`@a}A>wfHO#9vxnGkiz& zM4Ze-_{)=0oBxI#;jdr%j~D;%C;y+0*?GLv<%8EQm%|WKIkZht<+~JJ-Vy%$&pwGg38I5&dA-Az z{3*M;nfvU0mL*GmZ^U=DyBW0{er$DYrj#oP_O82*JgrbY{t$0H%WA4^`EAQ10e_QN zqgBmVjq_p{Q_Uew=|(+Qu0CijJN+-ICU@isgeF2b zFZuOdGd_wYa$7qf*-T3^^STC555*QCsH94zvc*b>$yQL!fnOVG%*2kaP>B@+o}RmJ z5x=J5V8zxRA(0F1t$471!AGbC1BPmlQia5n0US{$e7da1{6A?D$|eq!lz-A702QYS zx5_#62bL9s?O)_>>8GsPZuVN9-ES!tZTrAGbZ2-w<=dXci}|OUn^;}j?u42>e&Lhu zT|=6yN6f*nhtiFPYEFInGWu?pg0?lK+&QpJQBPg@bW}9FTXAN|=P?+z2)sK~7v(nn zJ5kg>94BFQe^!hCK8NeiH~PPLM1CieqSPIq$fUmkp# zKxKrD(G8h)=cy0x>6`9z{7^U_y&RCwlej+X88e1Oa0gv9O_g=z@gro@&H*5rwI4lZ z7SZB?rp{sB3~Sot>RwUK(=apoh7wm0UJb=bH&%*D^T`RqlqGZ*n?ftWh2qcoXe2cQ zT!gKAU7QqdMc!HVs53XUW&LvJjfr6zqGT-6&OOgw2bn0RI?1W8`^h1_)GS5c(UhHC z9JZF(R?jthG-XP~zx;3G_vE=hj$iy+#6DanQ7K~RGf1cRCw=Vn{{>)f#wLzHe@t5N}JMJ|9pkMxe)jyp4 zV)!ZQldaoxW#Wq!U#jXk>3x3rm;a7+)qP`eb^aHJZ)V@9X@c!P!qWV1Y8?vu{`>C( zB-+LQ$70*>3JYKZxYj=}JV;}S^3zjEbR<(ohX6><}uuA5{!{#a)MvVk@ zrPoaHde8EQv+Nb%@Unx{FfJ+tKBPKXMq)(_<99OJ{QFkvWLR1GM3nmf@4r38HIxtS z_A~pVK2BCT%JJnC81z5bvcKgB9foUy1>PowlM*yj0f3qH=&DQlF22|8t-c>c9ZI~E zd%jKan>`(2F|0h&S(F(pq**VhYgPaX6fsAu-Y1j0Q4GvKSZO{yOn2Nd9Ro}ZZ*8O{y%)g0fGHO?f-Wq$IHXaPj9MNe> zY0oF1L*l}1j_7njdr*8fpQRfJq_oZylmg#O81kU!#o$>9)Hw&^1Zo?gAdvD`J6m$( zon6~8B}rL&&E|2#XxkqZ%N;j|eQ;8%n;dv07DxBYl3DTrKQ2)Ku7)i z?725n`y+Jt56FidHkn)joxZC17Mju{U|vkQzqZq~6EvzGBE`Ccd%61Z?@#uRCt&H( zs!~t-u;(0csR#u|AHIq>2#=|F*rtdgX$FGE3Z+@$|y1S_V@;ObSC{bF=M7;JoO#!ufpj2#DTJ zwpzNiG^W>Gcs2(sPLC|JI$KHyr63I7e>RU@hR8Hp2YrjoY{9h!WqIQ8x=i#< zYw@*bf0a|>v?O;pz!Fr;oReW_I@*L7FqU#H!^;5U1vEol;=Tb=&1TKfaJ`@*`o^1$ zgiiu4ZocUDw+jg&1zpJ^TTd%*1W}xkpHhh!eE9EJ(#!OD{Re!Rq5L(%2p)5zxlVA_ z^Xm+D3-$9QZ-80ZwFcA1rDrDbL*_3&>0HflTIz3rTpb?sOPStnb=Ijje9=D3_L?t3 zKAT0m*VS9aDU65xDqpAR5hdxh$`KR28<4iu$~f zV?>vvJ0=*TuN|$Q?G<0Tdv(b<*UQd$DqsA3KS{q2qzGwc#A~W%m6$ARVH)61MiGLW ze{&VtB-(y01^uK^U-JK!#QN})blXRJ*m2b0Pnu@gQ`yIKwz$6GOuep z<{LM!y)PQQXHd??17TDnu%Z}Palhg!6`rj44;v&06*eXF%*XP8{MLs6Gv8rN`W~HG zbsbNz0PH4<9>YtrdwqSONAs85b>CM&gdAU%Nl&x ze2e67pV@t(U+Njm?I(>JNeVln3EI1sFbGkCD{V|qR~T8eI07W*Nl$JVzH;W{1zc$x zju3htN}^*PHBY2FcRr*R3`&+#+&>8GKsqUZ8wGu44CpZ#TV1B221*6-h2w!y<=5Y` z`g6vDh6&jH8Lk7Lww+I2;Km6rDt)Ms^0=jnzpX35-<#yuWs^sBr3fC1rurnwF7g{5 zc@@jj3;(3SX|w*M8COmb8kH}fFv}7j4)hJnj976CJBVfZ3_DI|dUQ%ai8I&fu#U^g zWz?i;Roxb&>`1wCz*_brBc!pDm%;zhVNPRcxH74*vDxz$uh;IKBnJT(ZDp}t(v3PV zCK5UIoLxAToK>Bid-E3)s}^ZNdY@}PN^x^$rbH7E$y-9Z{Gw*?DA?vBp#YF0F++)@ za=XAf&o~ebLWsohC>&RO8)rvF&E)AV&mRDtKtbA`HGCP?)Z^S7BsvYG$Mplm=rNy^ zXv&!RV*@2+$j~VG5Jrl;b8pQb3JS{}?%7yhlE(p*?3<$M&c2~N`*L|9ytVUCZ6(7~ z7XdjB)+qv4wit=J?zc?Gv{o9{~&V$v`BK%PqsKmEn9jI+C ziK^w(fAa=~TZd@QKzn^d1;A_=dsS6?yWbR$L32JnmW#pZ#?yPhK!)A4AHd&(rbpxm zv$HMIjFm~Eey#X-6!Qae5Xb0pGj)dmhkCL#UVIvsVGz>3L$Zs)zQE&pG&)}HTjV~unW=8l@HL@+sQfxs@&qKlV#S4pIeO?Fhpx#b+%&Cz+ z)`-O2h%v1tV^F|^vlwUjyyk*SZNzRDZzto_#tMIe1kVZ(&D443Z%OwjrRt5=?&oh+=bZXGaKY%QA#6^V$ z1i8R$FZN#VgL-U>X>)b~O21Bdh(X>4qch`Gib4aNtDWiZ(MOBQRxYIqAvBGwm8&|Y=t-KW%~HU7n?26<8|jO zCZ=fAFgQ^O0x0TG&a1p`hrw>+LQ@whbGJpi!3&Fd<%FL!F}0;86us2#43+QttC_o^ ziVo4fQ5_rZGY=e1rAF1r(#hofV9b3J1vXeSSirE$3oCp3>cRO|Ro~isKtWxp*rZfP9tfK$ml zXhYykvSa;!bSj3-9A|_WzNCg9G=lb{m- z9!pZbVS$mV;c3S+l5cSSWmMtC@7nh>DAvIb8(E(;KK;Hy_Yjf!lV+Dk-KKCw-`;9a znZH2BUg|)L-|vnqMWN@MWO{@gM`IT5*E)vjpEQeAt~g(O;q5${3*5^`uY$bU{dG-L zL!u_?TPzK?or@S({UF+lrThn zw3z5%Mk{WHotJ4Zql(KVE?WrZ_#M%*IVB-)wRAa|LWKsptChZ&r9*R*B&IFw32hR} z>md?+pS&yCy~`9yp=QotsuO4f$gme4HFJ~e-AQ5n=jZ=9v}9eY?5MY}@3_IQigQL# zGnHOJ^#<@*`C)5R!b59XJ4+iW5hsv}BT&Q%qCs}wI9!P`w`Ft-oAPUs#;b=hf)v0K zGgWCS3=bc7RVJAbF6gc)+1shwKMcz4ul%wCL)7P0@Htf8N<42(*hV22Hb7A4G_DTw zb|LST8O3kzN*bDuzxnC+O0*Nb4Aez=}PTmh{irK1x@U!W4axpyT1ilZQ;{+9U1=0-(5~w(}7PFUAq4 zD)~!`w5^_MVlr1qi*I2r(^x3jek4LR1}MFu!`rScABZ-hR!hKV^ZrgRHt`a(uv93} zy2?bTC@=0Nau-!g$`o*m98BlkMX~4bqJR>t3-|1}c~$*1Jb1zG1g8Ie5|&y2J&3 z?YrTb=-$V@hU$9N#^GBGSS_d?GlHL>7E3Y(JCNH;;Ixj)8UOvX-PicjJkSajZAAi0 zozqp?t!r=|h0iPu@iN+>ThJP_wYYO#MCybv>aVeVzAqvF}@O)c1o5`we2qEvZXquQQ4p?r&4R8PqiWdA#aPYiu&T;gR+-#@_86RAe}p_W>ofobZj z#86B@IF>l{!n{XhA#b&Z;APZ~JG&NlMX}|i8Sir(=$dI1S6s#|?b7L_nvaBabIZbc zjIp;Sv;2Cr2*Hp+@|IKWB`=TNtU_+U5~LfE5r( z0svr;+dT$1yImYH)7Pr+M{pU&@iPf|TG07ZPzI^v$Txcf=cYv+xKLP|+7OyWWF0#A z_WM^Do9#7(5Q0TD-W?#T37^H7w}Yu0kbO(*q4QKAv2buwiYKY{!MDXQY@hJ$3^*;m zM?-JJf^5M2+)QE>L(xQGWhkon8lB-Oa1=-ZI;k|Wr1Yk$CqD}l{A=1Q1dK&eahs^< zD_MVflz-Crlds^Pgie5F3zV*YCeege``YVMHX5F%*Kx-DH}xgah8TQmvJf98=-0GW zP0FA#@sa!}Q{0ZH&;E1?LTvIGX|?Gii>V@Zf=3^;aw)3Id3`Xl5QRoIt)RvnI3NiL z!)#dwPd_IFTUp$wLIFT27!V20`~HGhbSSUt{loKaAO4}cv{x%Unxv&=dki=9Owbgt zc7+sjXe+0py(}xy3%i-9C#YRBfkVhnUfHRR%E%l97O!11(<+cB)$GytSs4lJCHp0w zqZ}yyq`^n}#tk2m;8emaj0%Y1FrPoyOIyj0Yx~)Di<^=Jinq}t(Et@|-h6x`Gxmac zSOR7mq%`~C?=r7;+sJHeXmt_YUe*{oCDurH#@cb+$~RPtR0UF&DiQz=xWrx zU=8`VmaU=ldG^l5uF|~0kCX*t-Q3PffPp3u@E#=%u3>#qbhBP0&^JhsT6(5T>Ot20 zdnx*c8U-Ye8tje$ERF)cM^%B08X;l3ztaD`2Y(-&OZ+a2Z(LlE8M(7*r9eD7(Zy-1 zbkPtI1J{#Y6`Pk%gC=@3f`*YTT&wP^MKsMGN*_g{Lehp|$bO3fqG!&2u{wEiX;29} zG|Yo>{Sji#|iN6ocG#JpanlHwwkh z&`HgTZrz9~+_#~Y7yav@EGj+HnU76`jF@T*LBenXQC(_Qg6C%|0X?kkjyNSXVJA{Q z%f&q|E&IfEf&p9OVPu*|z}`b>w8a;4SI2F?_E*}2ESjGkf#v6b*?zf6ZtkJ8@o~lE z{-)mha|rd$%Bx3w$w(y_O&5*Lew#vY>1}zJaSj>l8!_j;-A)0?JR%{~M&k=uy*x_R zZ@|rKYZApVf9>R7+W+CTcP}N;Sex3)Vv9W@=nb8k03$U`-$vWiS6a)$C(}Fa8jBfC zyt(*S_QJ>#di;HaRR>qZ9lwL(3EXLkm#;Ut6R*dS=5?tKefC1k{2~7F&K}X|kDl22 z2unm1l#lDCl3bD;*fViX83)vT=^!Xm{(M+nC$$zxSRI)dLd%fr?ZB>iXLtoCM}H~3 zNq}jNw=JOb$IjKm%kI4!kc|xi5%0QBW-ONfc@SD1|H&BqSA#%LAKwLBvanT&H7yF+ zUMZ^b68~~goDY!A+AtAYQjN1zZ$D_T*|^jT-h%rKjU?(?y;$;^{f+$f<{{Dk6QN{Z z|Ioj6aYo)>kp1Y`?bL}yf>C>(bi2CgX`;QRvq-67@`!(HWZCD#H>qoyCCs#-uo>0Z zox@-=Mql3g&9zg-pig@)HbslKiB=!4u-~^82PpCl`mnwl_M@0tGSN5iA*9k_dHqP-)W2Hdk@x*WA2tOz@Fz`glkU;IKY~b-$6O2?rU=dD zS1~_!3ZAh0%2>kC;-Jjx7@j+=_YUvhBviBMI>vwLnZeq-0#$uu`^GH*D-)c3yKysJ zTdh4!g|-Ht?fR}g$j=cpe<5Pya3AKV0KSuxcn@ z-CG*<61U~o+xHc~+Mn}Vc}-=+#TkA9QPTPG^5u-tLvBjs2&eBn;u}iJn~N-SHT@5R zS_-Ql3q%BWeBzzTFfMk%w|VRm%?k@2LtP$)emg0bsY#0cNsnL{<_{3XdKrPa^hjN>JBt1$SGm1pCcoJSz6uLif z4m($xckw5UQ1jfBi!Lm~zOO%N4xnwlW8{1{J+lMRlG?t$A&L#6SKKz57#Uu*oETZC zOHpzM3w5<8sfmP}R5g=Z_}i;rTw9E&u`gZnmiDEl6nlqM2kp@eSPIIvecF9)Kgawk za+-M$DD?GD>ElY-=m~5+C zt){oE2cI#v{gb0P&MdrKNx>D^?kppp;L-_^#Mj>6WR{qlJ5~PSrv9^m9#n;mN11>L zEa$5AFKI?#GmYs4S@+rZ!V#?VaAlwqgkoix0_;iA+%2hX_%cu=vR-5+t zW;xM3Euj2ETFxt}Z4*z^xCKnGrHbHp#2ro3C>*yS=v2j5DZeNRcUnx)G;72%r$iKiFMh=7hw{FM$q&C-T({y2PcnTFAO_@9pXi>RH&ExBV)tJ#cK35G z&AJWu?iZ9{L(z5=D3;`vW|bAW3iZ2)P&_IV7eOl~nYq&-T?~86X(METWK!X~&isu- zzR5{#(QabV+vkV!$?YJ{K=U+E^q%bMJp=6xsl5I-xb=Ckckt}NW;B&UO4H03NuR1v z7w8Z!flb_E*~jzUFrBGodJJAC74xOQ;3m0Mbty5%pquOuPFpxueeX44;(St<4O?KMU>dW3WTfRd0lVTY`Cc9i@PO{r067%Zay46FPls-JezWpgzM8$#;SM2 zQwmUKhM;TJf^if47tYwnR!I&9rv>dR<;<4;iol;mjf}DLskYK>m!uT-Wz>6 z=9_VC!;RAi$1cDFf%>IE?J9po1C;z^ext7Ij7e|mTPL0Aiq+Bm>Y!TezYhycnOl-)z{s!&cvfw5>M%@WRLtNFdREW@C_v-P}8Iw z!7LrS(Dhx%zKtt8dEyWfQ|g*Q$ain`B?gk$A&Rl1oaOUti9XeHqSI z#`KbvEC#^_8*M}B=&NJHBKCgfVc47O0jG`E8!7zn#W?~jZsmlpHv!#a1rps&A{B%h zKrWLqe2k;WbG;U6Hz3f+lu|ubMqRA<$j7NLli^1t?n0wi?AnvB0KKlogr{@pS*RDm z(>&}JX-iZb`8tK9@|$?(hdBj{q!~JM6KDPDH*t9kW1s6Yfm@LM+?0-4fob_km&+ z$dK7D99iLbDvm-DL=*uHdx` zk=%a47#P`o6D@{l`~fUCk>yM)nbzAIAx($Eb~*ID?e0mh7JmpA8P!SAU!FksYwTI z=buJpHs+G8soG#XH8MiPy#zOqZw&kLuCo>u@f!)I&;D?8(O$k*!-Xv6t;0Yq2;#__ zMcg-ljf-={Y`%W9T{9umF6AkM=IKaeDI*r+_BfRet_>_VOF%{=7mQ+%B_>|@&hmAD zlIm&1OJ_rWPPWizm&o6~W4BSc4n|YQ4BBQR#YjKFvfR;yaaRuw4Z`yfMs*&^A+q0Ir2**2i3${)K1+G@&+T=_7l!d1Lineg`vaB8yt59 zm3LUL%*?oZZpJZ*;;FTMEF#mCF>$Y{y5=ClsBp?F_?S!9sjpu_Sq@IxtCG6by0+hi zGm1)#W;J0gf$-R2sARX^+VA$bfbJ_)#VW5Qv@plp;soLdX7l1KE(z+bT(I;he}16~ ztp{2N<+VuDAjYYxsix^MWQAb-2LHAI{L^3m!PGRYX@n)h<=PHV6{nNc4;9Rx&jkFW zS?~rPQ;UkH0J=YjU-A;#xGtAq8#AiBaJ!bzR;h7C6J(+Hlg38N@FfimZHD<1y`pb0 zZ)*Bgs81f3m5WxOiS+%bk68%)x3)PD0*3-4QdEtkhDn zIv5+>vYij(v^9J(5qABw*44Q|Jc`?@TGH78-6j)%iSgVH!Ahy+AY-3>-y{3NUkpd$1+yv`(8->t3%Yvqc7_=FA0hp zOOk;~ac>*hbS}M%_M|sD`K6$_PW~|pjBM@In7+!l19DXkc-%MDXYztsC2&qenB5h!JeW&$ zQxGEwik274OCWdg2lsfW+>#*f>X)ZY#0!3PJMu2E8+PiPm$^PrQOQ*4IoR^ten3d` z3r{7-pw@_;4k-fx~&Wn-b?QhYAM`~VVR;76{!Z9mEN zZp=|ka$c~NbWDG_!snP(O@Ho{_}|}vSj5B#;xZSLa$P^nT9S0wYa6#rjlz^~QiDu} zs+qTxJ*v$KTaHJh`lz;qW(rYHF$d7BTSNkup76V4;iB#F-Gv8eGSDseUpQ<>-x_;2NR0-fy9BKk}e@#&Bfi^(x@)97S7gl$=xl( zs)nsbtxO9um7Hpi72)j?42rsFtcL>gbr!(7({uBxH{-s)ybK;3r6qRhiawVf`T?BY zs=}WRU*@i50uz9!O2M<-{M?~8Ug(>D=^@q;!dHXFuYLWl?U(GFs~C6o$kW3|enI7d zz!yDE9$+gx*jAF{T7{`l9f$_c&ja14Z0f6zPJ`#En3X$+tpaPyu`TR-qT*qbUa}Ta zJ}f;CQlqTo&?xzhI6r^R07 zbggH4V(Q1pz>T;{TAo`eek&DBJph$H#%oFK0hz7af-+Yk0qa$Cp+>tfa)zS={ zk(-@p*INYnbs~$ zX|ja%kF@2Z)!@b**c%A2o)jf^elWpMoh{Kh@ z*t#;wG2XDM&MxQx>HV3eIL!q;RyKVSZes<&N@YepS&i^Sy89ogL?<%kxmbM#p0tJ( zie9<&Ja%3Xp+1+0>3zeMx5EkX5Sp_EMZGo3mHAh?ItaDu^ zBct>2t)g)OZuRDM#bJAE+sNuyMT?Oc25666f;cn+K06F9d4d`{Xi1IM5>r;A+a9E4 zeVOZVFNU3|#ijVZP!EGvKNy;!z?;W&yIM8Aq-WUEB2y~&Juh5RAcW-ek~Xd7D?`6H z86CP@hUFk;JleRn?qme4qd?KDFWmMIM^obKH z(W+z$v%Q!OG6d+&sn}~}Lm9Q-Ly%_>Ikt6S_59>RGyufdk@_Wa)LhecO5bxyfYgor z-pjRlUFa=Q!;EnR9ZK~8+$~#*kw(9k4#hB1gWCYT`0*#F2;QmRH`*nZshKpuF@yE| zq>6V9FEj_O?}#wwMt_mxlvu!aN%9*7Ru57OH20&W@}m8`Q@EOFKQz0z7I&oFB((Lr z^V(f~WK;az41uV3#L89j89O=o#a&+*y>~%C!dR5xCaPX`^R(%%#n;Xhroz$Kz$Gd0 z`|`G%##YdrVhvXhGIV~&gpj!K!yQR$R{<6RNX!k4sy=LZ{2@{wZO?kTYI|pgj+VGl z<-=wv{fy*hZdeO(9H+WrWhiNgu9he~+CpevX&sE-{z2n(L!G9FXL>W1Ss~E#c{+2a zokQ8+^Q=VGAQyA}FcnuFel4m_Qd|hB1xYM|Bvt^I++Q7`jB_5Bc{DO^@LSW{Y|BQ%S(bdfYd z>~A=f)5@fpG_QP_>`xj&1_BTw1IUUY;oMBHO5Qv5{8_siZ1NHjj9#{=$UVDSx32$UhcVKCC60d4i>(e^_ z!wj_Xv~2i+)ei5O-0Rk1U_;wU{zRvY zArmPc1OG;2r>Ur{nC@v;q^={{yj?&OII5<4*X(!Q@DIlgzQq7mkBl5I>eJ^qBcwP$<*vgT+a}T#tj?Oa~@FVpa0}0A|=ffc-85^?2}m zPi)v<(|Oq}Ld4&^_*HkqU1+49Y>o#z2AU2V0&6viqTm z$7*xewnKqr^qUfk7Q6fZd?*n zYHy0|ozC@z@YD3QovWjg#t%NcXaeJ_vp!QUt6{Jm=`+NzE_*v_1-JhHdLYhH$HBWB zUcD(={`Dt~M30YO)}^IdFzbyw%k0bV@@B^5r`v1?t29ARhL9Fv5oFPf&pOh6aNdnu z{nyv(KJljDJ1X*mC3;2%{d^wiKO(e0$9?akJqXcNa=6f!lt$gI$4FJ-aa+QumC=OV ztKj>fOJ41W>{!oypoMl;wct5&Ed@3wCLWdU0%`}|DtW^Q)c^x|&C!-NVoPFQPm|*l zE1yGJH#FO}V;Ddc#kW>mb#+={(k}a%fP0v%poFxSLGV7#djLYE;>}tET&jK!R4Z?9 zvDVngu4)`dmC5?4#Da{5ijoE2Ij$HJ7^j!>V_+qts$QoDCE244kH#eN@UBm*%G|wG z5`2A~?Vho);gZl1)~1?*aJpIhOLZnNZ&HO{ce|^iO)0<| z&iif@B*jRFuV#$xWtCGBy2^77u``b=|*f8gAzvnPTB*v*~m`8i9|%(t8KwLAHa(>BP|M?(q=|d+QMANMLhErw z(U(|_3Uv907l``78NU<}NzEP|p%TrkwH~J$iL%(YW^FlwL^RT%yN3S0md=Y`W1Lf+ zDbAv*HXnY`ZMkqz)pa?aZ5@l9WyB8xygeA{xyGOIoPQnF%RvFt&?Oa#;7*E@^7ULpPemPf*nL;@3T56sMh+ zjiz1AS8FG14|d~ zLQJ8UR=A}*6<^?EDqR3`ryiY?V6rYAh#oE8m`BMM4{F};yk@>(~BIT$6TNb^f|oOnHA0_ zbDMX0$gz6dTToW`9O39l>dIic;Wz8*M)yy~>yoXVL*MfO1{`Ln?YV740^nm|xT>2= zMopUPvvB@>otjDX8*F^_h7z?fFN@NDJ+>ZeFXzeh1N3b(BUB}Zjh}&b%>k%X9XAOg zf)HEDLb>7__=;xV!}-TqhlJb|-xCuluQl>(M)w+B5`Y`-YBc`w`5E-R8Y7JCYjF0( zaHE>wljy&h+t`7B%;}U#$tfrb0*{?4MPBYaflM~S$ICRuk zY)IX-&0D~MUZ`p9?TE(gLGKQHt81oQNQd`U9|59oHBRWORi2>Z?`a`pw=rYZz_wHr(L%Q_aUXd_%TFu)P zz=)ojv8Q=Lyt@JPKv~*~elEK3yYxFYsq7y&n!~2=0#2E7T)2S zwR$%7^513e{pa++X3xmVz!HI}Hr1IvOG<}!hO%3aU1iSFud$4rU@o`MaOeoKbOo8o zad$^JesN2h;w5)KPAs-BElj@kdXU(BO(z(fo9ZA6imD%sV${~IK6-nvS~d`$bIofHo$KFH5cGmO$j zTvyQOEB8PR2-&skTt(dOINwXiS07bXRr76Q7a@y>GX@yx#an#J$qiCWx zgwSzq9OKx+6LgTuYT?3CC8zFbFZAVmF*%3mEV%HsOQL&c=iH4(XJ z9Lx#8V^#o^{$KHCasAT&qvAftv+qeYxx*&lvbGgdm$)s90LCPX zsSu?Zch+c{lm!Hynt5`|N;voab&qh7OIAG|3FsJOr7m+AmrpS#Z$hH`!L>j)Bpvv{ zn*%2&!Yt`k?4+0F_C}qbu)JBX#4th77+?<8b{9JzGEcaJU`1{4=3l%=iN(&DBVcx= zNlg+tPbY`AeD358t8*>0qrY&8(FM$-o*C#X&!ZTl?ZHR^V`Si^KaKu5-MCqn)(dy3 z<%>io`wW|TWb8DId1f8j1;EVD`_n;?2WTllHC40(=q5+v;)1zloToZ!SS>#{FS1&n z5Aq}t9`rzsAmXGn$-^Tt3zR=DAR(uJ$ao?&`5MylMim*+?tCL(Mv%0DBVdHZcl3sn zWW_KwxM-9H0@h6G|Hx}`>zwnyDn4AcO0kx5V-VwPhVjR2Sl!){QR-8&fog?ntFH2q zLtVFI!79=p{?vCG8ZIn5Vz!^uNkHWD#R93zkop`z4D&{XD1hR~9iIvJ*wZoX} zv|CMohgB8wC2#rP4a}kz*f#R})cui6Y)`2TS#;BKrGhY1!)BPoe+OZjn?z4n`~kwG zf;xW(VLrJLecn$^CFaM5pllbZJjT7?GTbTgHm20=K1gei2UycxHv*WBo8v-ac^fHd z{1%ncQ;(&CaY$hV90;Tm|E4L6qYm+>H~-Z8UligT_Uy^!u?0`HUu*G5v|Z)te9pLO z>S{FuXa>xN>hF2Et-@a`WqN0Y#=cf2iFn$Zh^a2()&bma_$LbPGgv51-A5{RT=P;K z=iI+ii}krIT&@Js=dFp9i~KSv$uwB;|Fm}=P)&7R-p7U&5mc&x5PAth==Bj0OlT%Z z4`QJkIu|nX5N|an^|kJ7AxoG z-sD`)J?GrB_uc#d6VYX=%E}XP%50B(stlb#b0)BJnp$vhu#Z2Gx-3#E5<2mW=L~dS z4K`~u>#X&HZ>L~{6f&zXF0&N@g3Ulv6S6@faX(J_mzC15;DJBqTL1FT{b}P@A*Nrm zQhz_bQ{r+}hXmT!z)Ox^j=T2L#iFXvwd{f7#32;VLTj`-%1YJBHwF{h&(Fo#&l6Ce zM*66D%vEQXiy~tFi?Pk2&H`RclDfGh@F2=7;O+5(cU;y{^C|8aB=# zo70&MiQ_xC>Oor<663x7Z}C)MDS-wts90__U@JN;t}qNsAn(>Q2+C-$RRAT%F6+cs9&wHfIN= z*m$hY>8$)b?0rY>#A_EX8-+xbPDAhg+XDM738&?<6iK3N*U|EoiH`_yn_}L6VY~!3 zXLogx#GVwX{$9BgYO=}xIjgCwTxe2RA)jow`8 z^F4;xN^gE5^`!d}z8dMLJ5#G8u-zXe-3K$gX#Cc?E0@RB?;(!#y>Hwv=h)wbvf1RR&Zux#Bw01-=1*3{;W?1`#jL;hh|LhqH}O)SaTod zM3M3N?-V=yyLU%0V6P=rE(yOi9Sxcbk3b2FhPj;Z0VqRT(OD@miz>uC{xHsDM#&Jyp0g|n*umH@TG761|ir@4j#|;Za|>YDLSvb!TSAgL*5hXA9?vcuvc5k!xl$|be(B4j z?-Z?q?m9GN8?CD@nH?v*Wu~}GQT(`FWOg{;WhvJ)Z=$^nZ0bnS5<%`;@2VMnL;*dH zwdT7sx4-VNt00btPHdkTnn5-=$W3LuAfSq`Jlo;7+Qo7lZyAL?7?OJax?^(zMzWEX zt6_s=S(LQ+-Ap_(?K{Z|W!q(>D100*fch9&ylb?0$50jJ*IU!iQ?Jbs@g8PbqfLay zd2E*^;c*^N5A37AvpYO~zhpdypZ4-Z=4N|5KtSp2dt*%ZBuxxH0e$lam-4c4z++-q zH}QpAu2=V0`r)c&t6RIQi%ph7eea|kb!$SGM|Dpo!~=Lmh=XbA2`Fh`>N6-KS+q`p zSbgZv()~rovtdI>?c}qf3+D?e9;m8#k^uV!=+j`Zzxfr?Z@8>sp*Yg602YJzyCbVv_0y9=`gf6{g_>*S*pBnfbhY1T=oIPo@yE^6u^A^#OY} z?1gAJk(7Dr?8|!Ys}EyUbWYT#&2*H27N89ULxx+jSaoXs=YEm zDV(u>gBqzELoL~rHA&Xk_yF@N#5$RXE;*=TCUZZmzn>i&8Y(lH>0SZ7^5+N*Vv|9o42 zuUFR}bu<2`tlKd;y(gmCbYW&mR zNFN1S&z!6fkvl-WYaEh8yst+gO`i4_txbFcY6eB*zz=vUjksj?zf)A6S1@&5GFiML z!^+`0K#CX48ryO3;HMMCxuNL-_AdM8y>90r4BQQ`1}JjPEkNHhu;N~d6R{EvJNy}q z&z`Cr5Ew6O)ugKU?#T+h=Nsk0fjHf0uM+oRn2i&`MIoq-;gwVlht^l=3j61(B4(R{ zHJXR5s><%ADvDII*t*?07zFg$>x#^{1khv$L|TlzF0v)3`r9~}-%2uwCMRC}o;qUj zFFotz&ln!Est4fP@cN+IUhM+jUetgiBPr}yXXmR!44O`9h%nz?4>qnH;8J-O{KbGzqwXSeqi5Q~u=1&! z8HjutU1H<3Qu8W6;Ecl4HwIf_?g5Z@ZV}qvy3{v(l09#?+NjQQy-$;EC_7IazFdp|><8eq`z@f&Ut`UbuE zr^*?{hf=mHbbd$b?dxP0)0BO9D8L4 z-h(XViOAveDq$jht)iIeVxa0g=a7lK@4j?#eliRnY@B)d6&cO5!IJz-rBk%C0eMg#Po2(wp5 zUE+N{GD?9M8U;~+N(6cET>!Rx zrC?&u-xix8NQ}SPTD$a=K>5@wH`m^}a`oGhy_nFN;wj6Qa|s8fw;bxlcNt@Aic>Yg zC_XgDlUJh>)|N?TifIX>fP23k>D<2?yJFVJO2wu1LM{fE;LycS_A~*wYF63&MQV#7 zdhtpGvgnek*@TR*@}*9v3v|wrqTyV3;BF0T097@Jj~bmFSvy81i98R$BKygIL?ZF* z!+*C)|MG`_W6k!nBre(aF)zDNnRmpt_}1grum0OpeJ_SHzEi;bcgcAVR1`o;v* zY_5tsk_{rz1${GZJb^5tm4BF}5xK`m7D7z)SzN$Cq)Lo%aWBx+;T@Amy;R>6xPIb|*l~Avo?6zEr`%=A zP)p6|4gxqMq4fz9WQ5B9E#*v=2TBB3Cbw1Tf2BhTw=J6w9MiPbxF#%LAa z83@D*sW?dU4&}$c$B@N{6I85w_AZR7Q8zNPJ>D^4jeog5-z#x#i>9&%UAJ1X+Spsl zXCJ0Ioo9C&r;x~4fM@e0jhBgQwsb^v-;&cp7rVdtXr-YGx8~#?k~O&mHqPMAik((p zRSxz;LGUzj3{qXraxK*;vbU$86(`pTd z7_rLUn3H#Hj}{3+FoVQD*&inWiG*eW~AfY@4C8>DHddTj%=d`hcG>gmGx zwF%ufqO$18l zWnS4S)ouK64twaAX($i0ECBX>g(42QZG9=fsxiomA5_ipvZbqJ)ii;iigB~&_I3BMP&1Nglxg%19gJP` zY|Lrw^)#FEJeGz^j19G4tBkpPjd%T#O33}TFrm7sb0f8kUukFA-C-hFc2O}knzz-?QZ=d$xv_>6>!TUl#ASS;3Yj3;Sv zuk)%jP*}0u&PwV)sAW>9HMeSQ(y38TC2MKXPFT_(h#*ErdYlw~)pDjzxZR!Yj+e5t zx+uT3+sg)}Ve!DChe0==t#3vlgE;%u4rcbY6nr}~|AFcBZ-3RRy*T5uXX?Q1y2ZdT z;cWWXQ{D+>nnpMd;e_uL6_$z3sw<)*q(&3Exs+t#TKS_zmNJJXJBE7bTyUO`9ylqb zI3?IL)-Hfi{R2c}oRE{?Guh_RX{oud*xkDE;68=`RzRUn`>B>KC8r;*94((xt6EFn zGC|;HqmY{uyO-YKO)0|*+$9I(`uc`^Rqd~`%SF_qM`W_p?KYV}AQbPi42jtfkt7G! z1H~-&%YO?0B#`GG{#DX#<@~qRBqu;$iOQp{4EY?Z+-yjC?79gArst<(yry`Xppp$0 zi_R4&7|g+^;$2pb4x1jMsIN6i+E}>oE|V@TR9~$Pi5aLg6^}q}7Uw|xXwRj_86A9~ z?Qedn1dA{8s{*GLz8@V^6-hg6!$US(hX}pIzZ`z0H1(< zOTL7okhxO>ht}?W1d&T79|;{mwa}Ll!P6|7K`g~l>Qy~i)8Od3q-P0u8Fgq{piStS zbhBg0$QxXm)QGgHn{G#BA8}($Q|pv->f5d?IOBzwjcABQ)j~=&*elll>Iw_S@VS=Q ziWw{{OK5_Wx~w*tk2gWgF6)0eKHg%hU`nfVHTdEg*qA*ash}B5ctX$ON|BqNl5>4H zrpH+{=4DS$$^Cp-5*%%!GOVJ$DHCPk2(5NW=IPAPZFD5n!SQ?X(i?(Ji9qo)zIWXz z+v3jRcvT1=^|_KU356RI{SZR|tmCWa7hrle0GQV{HrdPd_|&%~L*)oBp_}PWp-EL> zn&RX%>|yg`ulb4(qM%oHA|(N~*2!N)No=%zg5hm@?->n!noRpPT=~>ZT(OwuaYlHm($j8%q>^Jl(zCb-0lX5JAo^9!W;)~SP?bWzTmoVw03gQ4jYP7l zKEn7u-4dKH50t}k*Tf^x=r99~G*L*uLyuq2D8F$8bZ-QSMEXyie6nv_IR#f{nLoJN zwLbj>s(xX&@wDFFwg_W_ZrwC$g}6r+YayrV&QQoB|1>85M!?QrAGws}WUeO0lQ$A54l{#R5N?({xvp+N%LBw{bXSk%30 zZhvl$%0&NsL!7~S7w=eWa2n@xayoQ^Z$t|OMAl2S`-P;J`PZ-5AAA3&m~wwpBwB8C zp$Sf$fSZ%dPQHG~e0wU&(o9Iu=&iIt(c(v|O}_vojTzUbWNnz-+veZo`PmXnso1q- zYM!45l67(8XvmMumw;~x8I#SzPM_KXjZ4P);fD(Db{^bCg$Eb8lL*4y#mbS@W4US? zD^$6>#E9pCY@*O){*1hd4o_@i+n)0&+hDid+cDF456U)|clwivit}~8tQLDY5%C2^ z(`ehJY(si3H;ioM{PP`YvPP-UR(Rlv#s?CS@oTEwI*BFD_BdaYw`CPDg=dk zMS82zHbEj|2L_$}10z!)Lwex02UCC(g%`F`inIW9aN{>Et1r02DiXb*e=bHs+{EwE zosAr1j@(ihooH&@^|MpVn%vS-R#luOJ;cZ@$*=kJ4i9{)eU&D3qZb>C6>AD2A}V9L zq-l#D7}U?JlH#|REciN-!8Ewr1>;0YvJQP{WGFFou>UA~Uv4X|rH=AduN!hHgO2JW z%NUVBO&PM`gYuX$(5*6#&Kn<6=P`2*xisQCn2X}rU=g@s_2opk&Bds30nc^P^1>pe zsy6+VFbxgOZZq#LIVD4PeyA+tQ$Ll~nO@|=T|Aa%iy9pAIROGhiHaV-Dwtt&rm5Jx zDA+{vcCYi)@hJrKB6CfK42C5t5KEL=c_K^OX#A0V&OQ|j7APGXvF_Q|h=8Xr$hz5O z=xPj)P-M*BGiv6q#f<+kfPa*JWkQXWvDP~X2iORqf)G*;qd{&l^V_5DA}@5k zWHSQu*u|zxcd|~#mL)#F)6b0}i;xqWI!|>OoA}O5wH9ucTmP_Ge@3`|Ng6kklqfIq zEuMVIpZTA|=$_PF6e??VK%3p))2|NrJ1Y}A^|PG-nGx&`(;oF$(}AMlF>@Ib26shB_1?&+f&>MKLkKf4YXBL&C2Iq z&FkskKuI{SFX)NWoz%1cFat4c24`%N-q!g}S#BdJHc#;!-#$PSfhkrNB9w?=&)Z)`@< z+iAz*qt`Ee*eu?sReqy$VlAki_3CEYCSC1a(yTIheF_dL#@y{8N-&>r3_k1>zVEbN z`FUzMblUD&S7V0mNZsp%x^ai3D=)atb3mlyTmVaEFA?rjvT(#sT(Ly3tXah{B`jCj zL!d0|!ICa7Mpe^IOy#MSrm)+kRP00|FFM3w2IK~weWrFU@7 z*eD===Gn!lmdFvbal|E-j{y>NLP^HfpLA3z9(SsEWFQVk>b65tRl0Mzo)HQinNLTz zCXEg_y+6PoUqmM@HPN-H8vEoG&8J1w8tx8dy}cX5b%(_llW(|+PzTs21U2OH@#%yw zv&P*Sm)X7Kea@*u&Rv!vE9$_ZRzqoQjy8!~5@Ee1v4mx|;a&E`P(N!7h!PlO#e?M#LH4?!Ql4R5)gS2^Xmozhy^j#_wGCD1_C zkr^61Kbgy&v|RU84Pthi5d-H~54tUprO->Z1hPQ)95%o?+sg<`sB_X}=FzF*-+~@46J$_yJwaXqBX4z{jAdNmK=D6`_A}0PKX~@AHV${=r z8g<*g=bvYwzer5t_?^@7Zzrn1_mlt5>5zO5T;F17yw`6Lc9GXvT-Wn$Lzo0scxr+t zb%0{^7nJ!SBX%}n_2-Da+^PXUQ%jl0L+LBQrM-4(2s+RA%xHa*MqgV153Gbi&6G{N zsI%!-XmY>$?N>={%o|S4;Y{t|Q1Rd>o2Qr8iP%F%jvO%jOC!xoW*U1WoVt25Aha)~_^dB~_+|e^HGC zq+&IKYH|xAzS6eBzf(M16x$cDen9g!*)DTtpl>+_*qt>7lBjs2wL}c>v!fPnV^gB- zC>RFphTDwQTs;rY#$luc=9}y-@4e6$w&hwLb9mn%zQugLcl5(&PjQWrO1^qo>v%w| z+T*@e2(wrz{(^!ZGN(Y2j)1faUlB%4^{!eQEmY83n}aXgT^@{Vzq`)GuV@C74Qrmi35d;f0JD(d>>lB>Bkdqh&Pk`?+CMzZ!E~-@wFw|dv zbhjw2@lvDmh#q555qc&g1_YIN*BDUdN1)ZdYAMHf8vA9tWGFGiWEw{_YvQV>)-4~T zaJP0=MAi$Ilr?4M1Q;S{95IQk>LRbFOR@Q>8bdi)SAgio2IxfEsC|Zq&!c5NFwQcG z-oCCry)?qMy()H!&c-Mzr(g^W=^3&nkdaYro{O)hnW~1Uf-;bVPDFb_?2P*POYkn| zf|!OXWwcCANpEc-5)8R-h0I>ML$pH(Q!}4sEE=zySZ*n_^CUx1Mg~E^`{ES`fZXAjkvQ$h z(7kG%u-my-$Y)lnYJT1g{t$iRAywPYfw`*on;Z&$ zzIYeKcjCDlj4ljbR4rqmr~3E%vG}y!MMG7o0=Lu{=Qj*XIlU@5aWeT4RX}wRY%m)W zIe2Y8Ipibm@gz(!eVtgO(;#utI_9ozn}bC7bRl&bWx?y=%Q9AA^Bs{X_qxlucg5+U zLh8;c%(W++SON(<5$4XUw`KY!#M11PnzLY<@?C0!IMJ=G8&6(kf%7me{CWHVyrneb zahZK=v1XM3`+ionjXj>>+1VNCm;XFd{Ibscg&<8ftpCy1<$S@;g6ed=l?#0jzM+A) z%{a=sByH;sJj?6yW*ti@6Y~o%LS4a_2ppM|^5yYQv_~3U3fl6Y(2cA=(H_ZC-2Vmq z(EqFKUp3<|PoaK>ar|I2ojNg+-%Voma;R&AIW%OUE=o+1 z-RHB0v@~PSvVm5&4viyP6cJGm>d5yusht5-^sv>%Qqq#TZK;GNy9S3w-4rQ>OUN&k z?n{lj=mzjuc@OVaHd-|y;(DaZe67OUUBEolP_-vtk+GpvVcnr#=vi-wk1`~-$~m6X zJ!mr7$lEK7MeTf3JnOh<&5X8}Ye99SHvRLyQj^+*nWFLH;_R16PE)*HFk|PG4z!z4 zfO0r2vT2W^xqrduj=u4dSiI{lQ(S}^j{A-E1#ma|R;*o0(p&BWo*4uua#54d0T745 zsKP_+u#P6$<%#l)NeHHjb$}irdFs|?SY8R6Tc|AvLXERoznmG0$1=(8(Zr=gkD0$- zoApbv3E?uQslb1HzXE-j+rm9MYT{{uRW2zE%#yCc575q{M+--ua}7Pqjd}I?s1@Yf zd$?$kf2!24SL%E%Wzi74Mxvjp23w^u<09ZYGOyTQwYsG0cR`eMH~hjgmQKBZ!73?+ zX9Bo-;8f@MVl_+Os`I{8MNsL)0N)rU$%>1r6xkk}>`nXQE_q>2K+3PKsIfVyDl@}f zk$RS|DkF+LFPh#))6@8Zu@%g6HyA95UAK=U^nGG~YIQ?+=_@Jy`m|1`h1Q$8el9se z^wnH5(JZTL7=I*uDHmo80*Gouv zz;@?xSVy~rfB}phaA=q`v)OnzDdA?=Z6|COYx3tvZ*i)`+`dmP!Iiqt!%+q9my!Xc z+frLvqU!3lN-&f@P#v<5K_-+88})65-vBVy603@@=?_EOz1!N1hH|+=Ki>TDtVxxe zrl_$;!}!%JP*(np*Wpp!@ksZx=dlDs32FBcKGC;`Zib)>4-LT}Wr3v?jgitS4yZu& zDl{9*;4Gh!&o|#>YguccW#ek^L`eD4JL3+c&A(1Hm#`(aM?8r5hK-}>Uu#GiqBl6& zfSRS(6_FfY$-gmx%0OWry+KKr}DHV(Y?s)|LY^hZbNtn0;c{d04m?=EEGPYY4E~aF-!Z)^i6b zdxthFc;8Y;%s$4$3GuMTc+%_M+o~#)fc}25n28%+B&&vTFG)(Ss3YBQ5Yitg3 zO3=qoYC(Ui%+d8Jly=E3{E9QTTjP?R`9gy&j90*1$je<=)cy!(mo}zfEyG^CZY|hY z4)@~!%#hbBv?I2Ly{)b+u!dH*N-zO0k#h#((n;tj<})w#Q(u(}cvv-2vcMH4CO%KP zT8Xm?nE3IF(a!o+i9f`F8F7Fmu5VhA6Z=Qr%X8R63(e?L`MD3?v#e^@v)*}?VUeN_ zGZepOT5uVYJUm4s#I1?N#zCuZ@Z2PBviJjCKQk3~m5h6#SuP6>sMM%97xZUF-AI@+ z@zZvN=JaZa_rzc~F9EITr+juI49i!m?z%Rq0p-Ulj51cR@oDWyt` z<;{rNlLhYfzEXm_xKnc4(~0Z$=rTZeV}4#7oxI@Kdr^1M^LcC%H>}bnH085~s5i}5d4i)(jOHFaXl5_&~R+(m5wVrT5Z#mKv;yW({Y$u^&i zqSTMWxXdoJUYCqCdqmes&*XsqV(vQMRXHfMF(aKo&!3)`qm_JZ=0j{yL6~MHW+)V+ z$%`*(w<`QPkpE&JeQZ?PmhFY|CdXCQv+RO+uE11W^h}V1hSU}c5`|_**3fDfa`kjQ z66cQ(p;~40wloH@z|05b>>97)s5%22gd(Wj=@aQcuH@+Q-Y@N_G zh0h0X_l5ItoF8$Vy{jsE?V07k9y_OK{b?sGM7mHv5a&(y2;nBV1sRg&nraD98^4A zm3?SN;)DwX?i5k7$9@_~227Z=1OI5r{|dj|AHVh=Fa7cag$%v@=|2AW`tMnHtZ3&& z#)SnJ(^nnttS!ZTPNWMd1u2dFwgP4Xb&kWliwzxEp~k21vnEg>l>|P4Rj#dj+eW*) zY4kZbjFo#_v2D?!w5UBfztFzhye2hiVZ^T&0#ij=Z(hN`zTnz+ZwHu>!dto_&HnTf z7pBxhy^=Y0yR5n;;J0PUUJv=)@YYG>HWaCvMGV5Q{gs9~ zb5WPWL~_(aXZHBRLwp5FxgE2ywEcI%$(~2%JG$46ibAeptKIBC{1uuRN_o1~BBL0q zX_^dI6pSmEQY#hjMhi>8fea#z zD-^dztna}{4o`ENY}<8(l1{0V&Z{MxQC8uTG75E1hgc;O@85ld)aYoP0+)$dkF0Ox z1{Ol%UguzG_K=d>cAaN$$S46zU)2G}`+0ADt*!yX&qelozl!H((@{5+OS*f7oeWMYDk>ZfrR?R09wdkNFtqx;R!HifOrPVz9E+RDn%-M$`XPE=In4}rD zPX)qF4=&CJ=r2sl+wF{veru-8Q}&|_vBXc(q}hSL=Au@}K%Y%f(Y-T2N8&+h-nIdE zx1UgdEE9`4Q~mdT`%ZB|kp-va zztOLFm`QQ=8}u7#-}O6%JBplAaO#lv(A`h^J4KV&cZ$u&6m0Wqi`A=xN0Vf>QUdko z>@SnsE61ea?-YF$6#hYM(S!MlUy-=udx~u_-PXgngK9F9ikITV|C;*b`HrqltIN*H zXvxgL(3IZsm7ataAhb{f0vI6lA|Q5p4>eSQgwO)gLJ?7s z7Mcx4*ehmOXd-(Yn>!@F`u(Y~z^8KH49K5r4 z^z}Np|3RRYyG=bP9RL^<|ARFDESetaT_%p0> z00;Vc`O(rC9l$=O#u_vjNrQ!54#5tGU`H>XgY>bqG%9F!|AV>?!U4u-ojl+swEHRA zp9|m%Fb3!V)DOO&_Dl<&1pt7;765Q8@o#YsSOB0t3;?(^{07rlX4Uz&}14shoeoX*00LP9VgoEobS~yO3{2-j7qdP%If9mvU`cw4ur_Y`{ zd-@FH8G3pKCI-fH=g%{pKYf;&h50-S4L*NR$dQAb$Bv((Wjud|{tPYle;9tf1TdXC z8goqa*bzR!QKloun2!8v18^UVwWG(591O+3gp+iqj-Q~X#nE2dod*Euj?fTipr@gI zhVH}(z>%ZJj-Q}A$#jaDh4%v6B|b?7i@>z>y1`-Ai&FA8%*<0s^!zY;0cmybFp9Cm zgCwAwhOUXv!-)Lac6LD-Sx1bfpMOEsqoSHuL$nGm9IE&)^&iAC{SBSJ#*PEd9;2bk zbc_jb9k8YMS8V?$_v81XyEDsv0Hd@*ESWn5#M9tR zeL-z)ZKT6Xpfxnq#x&^VtLt$1{XikFs<}bDZTLt?;+nw+M=}BOoT1_yUnu}+L!J~1 zl36s*uX{JAoZMisfudAuxIOPGy-#oM6L^M4UkUyR9oWb?x5lQD=~wo4{Iie9lcVs- zoYu}{gVbP7n>CF_<*)?79Febb&nBab0jd9Atjut$K7x4kRn?skP;2dV!}Yyls|Sy^ zW#eq6PYV<&ZUhfqO`=Xf27ALwgsQgi+B!qda;A`mwB4rS%?A(fmX;lX|4+K=KTXSA z0c@!#Re66iue7xN->UzJ$P8j>sj#1pez&w7u2~l4Syq~$a7k#X^eOAXQucR&ThKD= z!)Ru`2-ujWL`>BBji!4)Hdd;QlHOtUi3;n4w1EC)> zip=~`R%-ozSb{I#v@hc#T4}Gbw54RDG6GOa%L7G?O!dLH3{&qYDC29F*A_~53EeGR zM@x^wr^E?%fwJbqYVa@b*U|d`9r z;qmbj{@Z_F$l+nj3OhVLPMH3E4*SgdT;0J*39UN4;}&*?L8aMukCLsE#gS!&(Sy-o z_WX(@O%KKQ8Ypj>0@;Wt+X%lRa7XRb*W5tN<8izxG{<&TqeYco+2nT`v#7r9^@2`l1c&=`nMEamS% zr*N#eJm7l(OxQ=KyOWapHb36>6Va zeP5OH%9r%nSN&BHEoSY6X6KZ8*%Kq&!%XbkD&ae`%?QzEzag#u$LvTB|Lh@G+cR2u zFy=wJ?K36_w6iuTPHS3@*wmO+$IseoP0j>NgjvPCVHu{Oe0L>ti1 zp{T`Bl0bDt7Botq0|ON(kL*e*m=2BI=fAR=5Pk2}bCIod$1hh{jhHJ&zZ3R3DV3H1 z*N-wDf{(zDh#XFO82*1e8}HDZRY_%oG~8^bA`!JGQ{!{m3Un3zanq^Wah_B7i_sFh zPfm(rS0mUGH>JG@UI|$_N~QIrP}8&0WSNz!CKb<1tI49hM8=|MqOUSTSVK5h;^v#K zIm_A}_v)Dx9I1|d0}p}~JNCNGO)?iO1Tx=l<5tzuEKb7KM%`S^(Z`a3U!B;0J|O6~ znz%x;5Fja8en)dfK-K-M-)5w>#cD2PT+kaUe3nVFL`Cp|(bb8|$M?L=HYNnKFh#Lx zFls@d=gL_(zS(n{B3`ktWpG^r(}Z*Qq~H|`{|)1k^-S26$>=(gZsG_8cjcY?5QRb0 z%~&_KOZ=tasTp&v5c`t0Ip?Stw9h1~G>;~|K8u7NSVWYW%LEzurU_!me-*?iJcJRz zVo%>bYXYljyXm)X8)LNEH`UuVJYXudWMvaOFE{&mM7N^oZ5lRcgbjHoPHKh(!(ndU zu~8Yh6Zg2kPxhy+tZWGOWh+QDnd#y5a~&v(2T{q+w1Qt55GD)i*Hd-&obFK?G6!!l zZq;2r*W>=y?$!3Z@wMcim(>yNFCr>mS7EtLkC!cY!tRv&n{P~-zkjtUU)*A*RJP+G z`o$g&V6MbI*V?EWw(Ie>dvvn*UBQmUQssO@<*P7L0Hnov>tc5&{GNFG-LsdagJb3$ zOJAhTyr5$>IwG=@HDo8s4d1V4{eJCE-_q68_|l(<=>1FZ-~RLJT(<)1clCu>#!MH5 zKPOVb2^45J{^I?#N#VpPnakTzS9x`YtCL4sZV;LtH|K`-vnRH@zPQ#N!c}1}ViHNEX3jB;InpA%_$DZp7G{5||fzTGh*}UUsxFOLyQHEa;<+$59?TPOh z+aOJcHfi~+_Ey|1y??OQ(3>+)o5Oj$t{D!)9ZG+<*ZOk;z_O26I#(NL4yovI0|ADg zkDN28N}Ig3l9b!#=KWfLP~O$_W|do&TDIt+n4sQM%?)XH{ZyW&-&&L9%N>a^yL+6p zLDjX)?W_`b^YWO;kr*4-iu75JYyDa!m+BB0tTXgfmN)k!e?kfh)rO&3ldUkw)QiTk zu`!_$ThEOLg$mx}_7}{aiG$0^4!+ZG_SFQt!hm8*6`@&vLrBdq_d@B3o9w@xwd5$n zuQ^a^(zf{7%;ZnBON)GvXX-!0D9{xrdM~dK64@KsHKfy+_>(*=E)@~g`NnBU;#!Ba zSXbecq9^w?9%*U5xkR^5!;MMl#x1XpqyRdIvhw<$CW~B`jl4^@=B!`v7r|>7xYH$> z2vuaLM3#@#MDjg)MrtST6GE090v%*`+Nq}0p+yBNj3#>kRXV~kh5X){HqPM5?d2n* zfVT3jK=7i2U}!&K#)x0SH#I6)f5MJE2*1xAY3F(+_uZbaAS;_?AEHo>%C7^W8vdNT2v&a&dzcTW1JY`>jRR7iFECw zzHavYbXw)|`bQFdsL+#{O_d$};MRA=PUAwFTcTdBSA{I3qTY}uibXGu+(ga8*fFQ( zu~(Y89_Nt6BZEy%U3cDn9|9nq*M07+|wr7+6lCyWwrpS2D(=+%9^UgZ|j(kI>PyOx5OdmPbsRQD)Vd<6F z0?(&INliLP*AlGjI83>cP29aR4BC5zC1xBGT~wtQtT{fi&GboKq%emh9#w3+$}(zq)+KIgDOJBrA^wJ>fBf8e%ayccL7z<$_AKGsbB?#^ zdT9d+NHms1sq3Y0^>vO=mCbwD5FIX7*y-XSuFED3j|TN#7)p(o$ETU)EocsnkLiXM z!YpQ;Z%KpmM0$LHSGJ7}i@RcNe*4ovxObK;;0jA(Vq0+8KGkx)!}BaV4B6dT7oO{F z|K)NWNp9kv^9QL4b7ZY^!Xg9EW|5fdm$ZV|O6-$WBETwVs745;!Oa?mO=2pjO9>VtMK=nur%yaut>RCOaH_;e=B{p$ z>H={eJmPlaG(if~{11&`+u+z)^SC$FZBF$^=hMc$q6NZ82EZCi90uQHn+%QHK5iq84udfkW3~w+T za3^TiHtrI&o;~o-o=Xp47c_A>0ySY?5omIG<~(CmVOpDJgut~#z?j^B6MKKqrbppP z-v<{zX6^TkE4(j_GSitgB;iQe>=6XAri^^33ViFb;~M(+pYBVIV7D2N?PN!V9K#H<$L( zIo@282aa&WVZZc{N(+8`by}G@ld%I1;kbajQwsCHKTbQf9Rcl(_QKk5s+Y1J)t%(Q z##q{EHTgJj1^5o&e!KNHJTav9@wP+cuKEHj$!A6L&ZLd@5Cb@`bSo2xtDGG+^Dugb znRsXWDmdiYjNd$WI5u*y7c^q+i&Jw_O34?$Y#gVld5OSbWs~75OP4z_)4&q6e90Dl z`sW05fL;I$P0ye*-u3JX8Be5}G1Ilgm}(Y$S<$Jix0YMdGM!Q$lKFy;nTEYACtrb3 zM!$J;IbgIN&CN<-N}vi$#uN+`N8YIKd&kz^@U(X(0xo%{Wc6D2qPI)6tMaqc%b>Rz zq1Om)2J#Z`E~IgobTlj(&*EvwOpel)i$JomZ!{0+Q@7e>PSLgvIqoj2f+&eKRTN*Q zDzWe^F|x$FGZ?-pqJdgKGxE|EK&Uzm3^H*q8LI=r_%HW<{aGnas(2gbC{sPx^Z2vF z9Hb#tyto%MoJ1m71d+%;FmV_pbXkf^f=>b6RhxWr$PzwoVu*-C5*Jh5PjB-?_aa1v zDZz?ph42*TWZ4F=Nhz4Qai~DnKV#B%HI_}QHrOik>d*vA&IIj->SyJIc#PY)^|-o8 zQVEotiBHrTMEGNNfvyxc{u=>`7cXQQY>Ow%n@FCG-(+ekE$^CY(AR)IfU_a8v;)A+ zwRyTwFg738FeA5MtV@z+(89Kwkqqj(WUbCIvdUV_j;Cs1>Iz?ojQC02RKNcfb&iQt z@p_TVdnT{|hA~Xl4EctuER#4dOZ*|CNBSD>uZPXd3B;N?49UX{6k~3rv!AVg;|P6s zycUahCK=BnA6ojbd(<*~8|JpeQ#wFNa?!iHgM7LE!|)53W~GV&$zt zY4t7=GB8^2pP8?l5bAe0Wk&JZ;2_iGFZPPbuPTK=vwB(FB}|SDi{;r12xI=@p|qjL zeqF#45!LXE_t!lMmTU3GAA{PNCVC!d-Ei}2Ds3S@?6`4rLiL2Ty0@#oe2{m2k=_xPE01(a)&u*AJ{oeVKRDd8oMvy6b1?5ZT9Ej* zEvB-U@`9ch8^rHeB+cJ!Ttxc#9BUtrDafxEx2Rg->DO>mw~>oG6Ib(i$=W1@XUP~W zDzm^*Nc^f~=LU}#y-2Q(>A79KVpZB{&!?C6zFn?clR)3e2|=jmqLh6(sCbM~YVO>P zk^mLHC0WJ>=+nVIi3cVRtB4A=2;25M8Ipyv@YA_aX&moEZ^3v=MqC^ht}e3?lghRr zKc|^9Gp&Dm{hIxtlQf7U-Q31LxymXJ<0d$oAEp_{+b%AV91VgsHgXyxA=v?aQ zgf3aw)OOx9y{XvqT4qP&zC3wIiWFE_MAZstnY5ZWiduIU3V`xBOg zk|WOUw(B?rB@9oQ?I)oHF@DOoL*sx^u|8jkC0yORg2a0M(QRq*9>;5aZT~(m<#-wY zI$`jQ{7!nU_bWJac|u2?q001v?La5skJv5;lmc-3*dA;@L{@(P;^>!%7q4Gl9#5DZ zD|zw0rStKz@m!W>Ga1jmpQG^f*l>rqyiB&dFI)sM%dsmimpdh`h4j6Z$c%=K$uW-SXKgH>)$_w9LdAh% zQYj_v!ow8I(5eoay?Dl!K ze8`-XNoqiWvAlG3J{aBJaQ~ChmxkbeM!xQl?H=B{59y%EY_c|E=c|g7(^~G&5j}ve zd~@E{pFyYv_8y$K3?D5cu5~rVm76w&D9$>|!Ej9v^fn(OGMNO;)F`PxTP-t|B#9S> z%VdTZh8w@#x_I&Xb8W?J^U~s?F7Kt`X3c$ifeq%?;X$>B-ug`tw7zg+s=$4x^WVz{ z7aMiD!1>))rFW^J40rWbgIQosF7)R_GSx`3Y6husE3`>9et)_ru`h3+UIL^cF_{bIhNqT3IG(Z*As+ zOx-$)VfqPtYjq)<~wQ&%X0ZVhySC=SF3YRf+t`lCs@xrh;Pp=o(&Xm*OgeW$U+Nin5 z?8B%KW9Bx~Q&RDGC2%GH!Ke#KHSVS?dkDO(6G#cWl2X`SWpGwHJIl~-Cwow&h*(`( z;Y5b!#nlLIr`h)hBjU|RJ40JK{9A*>o05exr>c!jNOMo>mPyrfnNAstcO`XnBY2%H zj}26y?c&q1l@)PJW5(w^a*`o^XNz|=K0?k%n0L)!(nbdAI*QWk;w|O_M_bT05F8II z;UQ_MAe6bxqJL4k1RwKE3JTnHv#%apA7rE6C@01Kvlzt?P-5+1f?f$0R+ucih zaZj}*)#r(L#a)L_+j=#2a^2I(=$O!M@MIhod1>jE|3&k%H7O z^PIa717q^e8DYspR~=1^fG}BgC+UR4FM!pjs!-VnCG2!plV9zaY2G#xw!M$3PnOVV z8@f$ScvGcN$IjDj1qMmWX3~pHFk{=(K-|UWKP*<{kIoq*^7x!(ne*|M@G6G3dfr8l z3Heqc#QXRobbD6w`~dUrcAtNOzSB~`RmP^QD<07sY42FKWRsuni5Cy@H9-w^-zX_* zQ*#4`Fjj`Cug5e)F6A1&Rdv#@6?Akgp@e5jAyfh0E z8t_$F+h6{;AU%0?BtTPPfT;WIh^v~9S6@3wSb+Dm zrErg5l7GCbLd)x1Vfet;StDDr^JM=MF(+2;*kFH>13!+~dM<Wi$+xzpJzG{QZ(rBeDJA!= zNI~d8my8p;Z_YZgeScqix_}!&Lbrf}nV92{5tod6kc7PA*ptq=B4wLr4Hg1&XX>9y zB*p6XR;5*24aQvtmc^s9;yYv%+#qHKG7lG-Bbde*#3qb`6C%MJ_Dmo_J?}M5d69mUcqfl9mLV%fM|&EiP=kiZYzzpm3pX&1Q{Hz&$?XI2K zMS26Owq=sq-YGP$#4z~|+0%t{-Z%st|p{0`8rb=fyY9xx!`xS+S{t zqJv`ZYs4#}X6yu4o0s%OrgUZ9Aq!yP6H=W9YDQED17gReaOWInUFT23mC#*}zT^{Y zS@?y|LCPo7(^Gbu^uI~iq&}V-=P!}=QE@R{4~3ULDjxEeDs~qWbd0`RvGDvO=9}h< z3|J!$RL`lNSbM$E;{&^JdR1U>W%k^q+=4}MRnvJ{MAEcnTfMy)sSX_9C$OV?gCoX| zU6mW}kqpk0=m=p?>U8_Q6(Lu{D<`u}qO4(M3?$(<(%Wj})l6zV8Vc3+ZV49lpgN7* zT>J^;F}N>SI=$&r2|a1Cg}g%vDL%`bJ<7{_ri01)`T_b}(cu>M4{z^rXRI`j<%=Ln z&9w`R>+PhxcJ@z>D<_#xw+&f4?Z~otXDP7BHK2NHqgR!9@sjK*K7(&>_#5@LON}$P z1dco_6(?JIfZ9D!ZrJt?pXX--!cR*ddnGQGlq4%INgTc~FZ-6H4>asF(H;N|%78=z z(~~q)gRDYcCtq5-%2N?eHhXJ#ZML=dE*Sp; zYzOt$x4TJR&z=l+^lu3DkmL!aa*k~*+k{crb&0Z20d=9FnP3M2Vmq)}pdqJgK(~4J zB&VlI)#SO;<%F&$NG4H2M~xYnhq>IbCvjF~3$2t3WpOExZAwuJ1Bw`e#NOCPny){~ zb>#4}G{f0vJhr6xOcv#&3p&G=-WRDlJ&FZZq%Jg0VJb{eq_^WuwvQ)awUBvT%PkY= z#y&-xJI#J9$A-=+ygTUoW|2vSA3H-=e33x)oI@xc!`hpQBU6k`$Af!mLf?chQN<17 zVgX(@o)r$;{!V=oT4=`@uJ|``N>ry9k7^3A{;jS(d4pa=Rw2w6n|Y>$pSNJ9U_vF? z@+#CXya$NiVGQ5hfQ`xX=+U9Gge=jKlhG>#1ajA$rGJcBUm)2kA;nt(Ib-bcbkgd* zIL5Pl9YpHtCiYPa`ReRMpsr0E?CSDC?K(H(Abb$x6s8hatGFx5XF0>IA3-bn3Hn<% zWHN*JocvQtkDF}vnXF&wj2MuYFA^w_E{<%?_Ix7CjvlfXV{l4u+ z2UN+l5*SktKU9M^BB3JjG$vT8^k;+Qz&ik4~R z5pfSx=#W&tQ~Oaz%Gs1Wyo71xQy7oy*yC}qFllbXHEfd&Z@U>vR!ELM9J=#Hd@x96 z9PQ1wAkG6ZiwxayB_@bKV*;42#D(X$9>>d6B)OXN1!3YHj*tf#SvX#ztM0`0+<}}O z7b|eq-RClAaKO~H2Iue2X`_lJjVZc9eS`kP&2^$!)g^s_*dDk{f@HU^mA=lP>W#3- zIsa6-OZ({yl$|hOtEz#lMS1#tN z1$<4$jdD?mmZ>n|JbA}*3YpV5QNO;QLa(#e=I3#>(sK>n>Ez6Gj8i#QDZF}+zcZ(2 z;OIB`N0ww5#iv6+M>!#S!%@OxvT`HqVXd_>(=ddGz<# zC8S77w#iwzi2yS9_p>J@K0MqsSf9Kn(k>^e+Cb^U`c8uA&DoTpV?VaDrP|K&1mi;su zFXvp#uAphKB|O+lR(MR+OAvbPrIf9jEw@NCabxPSa$StdYS)({zJ5y2EqCv>~Q zND(XpCAAw_kC%?Q{_Z5p8PO5011~K9xMgLJ&U)SPrMov>r0VVr7g$UL;f6pb>sO~J zec)|L9aQIl7}aJP7pkdO|5Mq8<>ffF+@Ke|&kN&E7pEfc%IpR$`~paA$1D7Uoa0ZY z-oIRq9X{@={^SAt9Jf^pR!v5rb!)LL1A{|CbHlHrMUKW>30jYEj<@Cnhce&5M+t~i%+%9KgeUz(OfY%zF z-pt@tn5D$4Pj3gm;?y#Tif>7_JWd;H2&{*uLslHJmObsdP3jD39gSKoCFuHDun$OG;;JC zA1yE$-)kQ>c;c1(3ozaj@#4|*pMr>CZTd2c6BTZD`snEKIz+02S>&_A#9sh;$A(=i z?{t3MxzBMC*dS^;X(89IK@%^BQA_{$6L|fr-e;(+NcgLkEoy;-Nu`BsX!vAw(Omv5 zmcAFM!zMi{VB0TT0hIPv-}|dOyEB;WPbV zjSoeTA$j#l4vU5NQbLad-*5Yoy+iy3=EiH-lum7{?HDkzO}9}XYhU-6T6SFT5?p#a z9YM{HI8Kt+a~BH-U!gn{B8d%18~*V6G(rv^SfRk3G>>}zNcB&1?4NLkzLk#CpEKP& z>!@geoBvKE@%47NS>tvTZ-L35$Ri9a4!U5)Zi+F~P$WvklBOHrx#Qe?y{1?J*(@qD zoV{imlvp}j3R69Du3qr$OvdC8ljf&r^+_no@~iV$K8}f3s42bzz;EdFhHm9O&%Lsy z2iX0L{mz;F@-1KOF-S$~kvXQ<;jcEbq`A!YHksd!B_%Q8Ew)@vd@l#HR*o;_271B0 zMMP(7fN-KrWL*HP{7S8^11Ad1JKB~C5`cJ1H1-xsxeq*jdDQW0M6-x{i80Q?hP}5B zmeYhu9J}aJ#K-qlt0p9wo8#2>bY4a;8EK=zO{ut0wJO-PUjDGmSXxwjokP*0UEe=y zWSgJ$DkyAQUQ`lc948vvz3T{TsxiDuOoubn@aZej! zDpK}^$P3*3L1E9?t=lk`o&JSZ#+d81Z!RadvRyMry}%FkzRNTTP&`gbW$NDIl&rwj zi`)?$TG1+DsL*hpvF2ZS>=y&E5!k?Z{&tjJGVnGFzEH@|Td6iCyd(2@LLa;aBZ^ei^`0oIgiZ3lOfyS?hW&SWDu z{v5VTFZwaQKE|&tJ~dwgDEf&)jM9W$9`CS_U+>pxkmGTY49w2HAUR!v}k&k>O|YjYO%=uN*_ zZ2?*CQ!*YKjg!Xu9~EJ}A2x6?<(`kSt#VaEE=d%B9L1Cs=n+ukWTT-O6b@|qm@4ot zwnf{*YDiC9S%qSX5;{v7{|UIZe-c`?s+fndFzTrZ3ep@+#uvHg<`r58e}Bp;Al#GP zH6Bh$aWfurGk%lh$R{SJbJilZxAfhNGa^+r!NxEhEqY#2PcX+9uWcQ4F4LH-R$ygq z)rVI>`$fwuDn`bZ|8|tgyj4zW$PC-A&eSX}TQ|yDl=S^xJ5mm8N)WX7I z@J?gMD=2`&vXqP6DFU*cvM5tFYVrJ?BrJAxj1A0=NgL8qhz~in6pk;SaMDDXYSmG& zSsqDO>XpC!i>3Q7w3T+FKO~uam>D3u_*5UdGFwzs~ z-k4PuvHy;1t#>7VnM(&D8ik5L*zA^(n*S1>4E$a)*&D@r}kky52*er_fFxq&N z6rT84nj-(K8-%co&8?qXbB;3}T|`9!U_P3)T~HJynBf zr3Ra()u&uOuGOc|<`Z0*MO@0Kh@Jyg{8%9hDc;E^KLl5=8d=Y!<=EsF>GWme)~FMb zE16HgsDQ$7Sti{IovIYhlQ`&%u1?WWGw}zz#|huKER~piPP(R9uF|;SId$(&-m!s; z$jw=aQl8>@X4knJh?HIsT5@y(Kb7#@YyUXPpabd9Slq0!ekO8$Hz~zlDM^!>yXb+6 zdE3@`Ix3={a^jIUb8Ue0eji>{5p)_1(rH`^VLCr4mKPBR=&N*cV$Tg<-1^Zwd!xIm zw!D9;mAjAoEuM(YexlYWuE|-oGBvCSg|RdG#syRdL~k9PWC1( zD6L<`+bNkMXH1iCfT(>UR|{0tSx~!nuiy0hArRtnuP)|UZQ6EZYze?E)rz4`LgEsc ziaXx4N6xaWF3!MW%c;Nc$6`ETD3i)QYJm>k}JMw!#U6XlSn z?QwaweUFwN)1%Qe|2(x>kC{zF>GDFuE4i4?huIhge&Yg2ikWBG)3`B)l4?ul#1Ht> zLZ>1VeE83+UMn5?!TVAsc>RvrlGP<&NOiUnTwG06c1+Tf=c(~TfQf`ViGFj6;*Ah! z(wHARQ5BpBP$8&s)}51mDdz0V5mZcYi}m^iFj!D&wW^NB&GkyCV;-VgY|{|Vx<4pU z5Q)3BTZvz@3^q(@8ayY{tfl%s45}*&E-0ntX;wVo1}TKOqb#q|w5#~}C&Q7FW0}4L z67##f>&H8B;8DWk09?@?;JbD_D6`@n@wo8pT%Jnp0_-G1Vd) zcq5!|NgpxrKEIJwdWH<id?Dirop`$T}aE zO`J4SO+c39w)*g&wkl}Y!8VLQ-%wN8eVLoSO}ak?ocaY|jT?WCt70$6V&=wd#`0d9 zgT|JKN$t$D=5S1+B;*L^A)|fAXBafKHF2LCEeu(Ri18~44vxJ*+O>U3bWJ40$o>kwSSy)4UN?m&RI$?B-oI8WCH&iKl z8wQbK;w)25)OMjt|2GBzf1Mq)`#;%C{$3yKzw-QZ#V`Of5!x+l_|fqEpsI9BJ4g-D z`MJ`$@A;Etw_kvlkQ3BJ$2kM>1dQ~t2g_Q z@1g^~5WUDars%rMS6xR;N!QiK48@UK<>Ew$UW+l_%2CU7*(2Us#R@-7?rq*n4-hG{ z=dg<&uyA}Zlmb3kZ3GW{|ArOYQt@p^SdVL9~3-1uL?hksms-YeoY;_F#49z zCCf9#Y$bm#r&TqQ^2ppWalvJGwrJPe$wnhEN#pKDKeejg^VHK+#L!>vUff234n3DZ8yOa>t=P&iF=GL$t^ut?Do-w~e^3 z9hNC2!h#br_&UEB;|m4jX#dyl_BsPgXOD5`Y?!GgwRF!?JH0zT+|ZVHe{H@0_)c7M zjD5ANzi&zP7p!OYg3@$RhCid5&zgx1TGVi{$kx_m*dYE#%I)9<*ACsdY4fX_tQu;!#pepp$uRlczCe9D9N8ly5KFWSrxC0#x{4S{0jOp-Q9~1djP@v4rmPXTG zXjjlDgR!96^xG{h1@Gom@@8r{U8f_b?8x3ZR)b1WBO`td+okvFnNvBEnv7d)?t2y`ct0q}e1$iv3%iI9e>-Y<-Q+T95hj}B1J%~NW%xPm zk@R$Nl(WnJDW}S1k8@|_t6!s?>#h5o#FRTF@V(K)K4U36)!ocBfq5BJ@aWJ(^+pQV zICh%{z5vho=9+kIw|N8%g!A+FPupMHEC-JzSyuT2k7&+)oJ2?V%V~OmQn(&t=$ zb@uE`2Hv5><3ao;GIeCk*TR$Sj8a=yFHiZFRdU{So|_&5u1$oYXxiz?pwar|R6m%M4iBIvZbDL$WT3C8a0*)Z|;KO!UlMNbZZ|NB(v zkg^U<&E}BG?O_|f!zw+0km?Ao+>2Z;{6^oq>eG5v^PX(8$|5AYQ<5T?r0i+72o#m} z%G0QJ6hE$ATjX*FJa0Bp3y@*H%((k3ZgH!F-z`772Q3{Z5f%12rDUty^HS~vm1}M{ zJ5)r^ccnwXw_}mb71@)Xk;Tra^<+G)!#UfVc*$2d(W)+HVR1-kVdP48DSsmKhAA{i zDmWl3eiJ#4kSS?#66k}>o2>%QDdu}&E1<7GKP5L%ZR8fR*!wr-PJGfFGA$AtuSBMUc#-(7Yv{}ha)C?GEPT~8rsBCPIX8q8XAJ$y>%R!c*7JW! zx<4eMa?H{@drhMLw}CUv5my!KNS>#|oGgZRKS`DFz0wQI#0i4L$&#j<#Qb^m;AUX= zN8YI~FL1kggvPiN9iasLk+BRWGYd$Z9RKRbsJy2V1Z#!)s6<9XJ(F%hlHailzeQ?> z{B75CSbbh&`osHkt2*l|L(^aE99xg98vgFg0JIjmU4CmiYd>VH_jECBT<_z^35?3s z^DPr%*TNK*aD8xu|`N*NpF;Y_HQq*u40!E3WYJHG3Inou#l`1$>PJ@NVl6gf_-S=H~s>#a#hxTx5V}b zp_0~yd@9irWnEZ~rv5Vqev;NmlDe&WA{3JLd2L58@~tz(5~%uR)P8}KdNas?Hyd>~ zze2V*7Q=&^+X;sOzD`^XvwCgb%O^@~4IOVW7DN~-`c1Li>6{h^@f&@`(Yp5VR{r0# z$vjx;{u=&+?El=P^WRfe{P$A*70JKd|9$$bC8@?$9m~mD)LP{syk*8JaovumqDkJ~ zk}lh%68Z?jtjhh-EO+ks^6~78INSG8(~d%K}ZOy^f22!ki{lZW(wgl>-+ffZ{$3-TOj<)wQb=Q7^t$O-6FazxQ z9ogP>gqJvRuxeLt_}jxGuAAKdTP_QHN^c`SkRDQ5S1-~Kw{f!JZS-2uY{Z)`OF?NtBJ^+yhqB%WV@-}AiA-Mjzi9CVfI%TF%J zm3?~WSgbg+;tnT#SJSY+VSyFwl@kl0?>y}mrQPj6l%>y150Mdw3SHdHz<)Z)nBmFb zZ+<&1(_*|nYTV*Z31gbyBTO`bnroiKJ^%Atz9ny=fC4_#%s@0_SA);nai*#~*K^~t zi`t*fX8r-&e;siYe#p|MH0qF8^XA`|`7f<@{#4sh!_rFG_$4aF88fBMr|RSs)r>EW z5*KN`s@L873lN=G>Gbewov|?@6mM}Ut?Xv>&>hA%Y%#Z*%7mDs;Bx3EDNW04YzA#9 z7%3@f^qgFa2|tjDLoYVAi_YrUNs5hD;PuXw;UB@Eja@B4IzwpV=5M*+&yDiS)`8c{ z?Wd2$fle=}QY>Ho@uzR@f!@Dcj>@j*rFLhH8a+W?G-GH(??6#d2eEWMxpknJf^~(U zqC)b*B&8OM(c6J~_(uS1@1FLnJXU9YXHe87KYqpkqCU);U^Mj1wgs2PK!mtum2{GE zw;8l8E2QI$J~V832*tp4$_a$%N#Sd=gjCWcv;b{8vVSV_e!$H?<8~C@o)Fzp9rU6r zH2XWP>DkmoImk4QDDSrK%-c$nodb%^guKS;&mg3rI(oAVK{FMUgR& ztJO`+>h#!k@QHd$>?V8O7uI}{Uw|C>AJe}8OINrnybZsT@wP;s==2Ca!m9ZQDQ~Sc>v3va~Uu zg^b;Zj5dDb>TZXAdr6Ii6EGCEehe1Tl-U=6x6SUGH%?Zw#w2S1K~g~REE&A8y6%ux zk2q6~T8&^IuvZjtteikzZw?b^IvZY17EHF}QbadvV^m}JUaJ+TRet8q$hG!be!Zq% zX!A1+wVIhyV429^?f8|c5Iy)D<=ks3+vs84 z?X3x(Y})<#N+q>|I!1vZUQ@aGMT54)2~whRQk)K+aKvAR4i62av9X;PZH^0FwGCuiD4K6pv5xl$GGLU)ymCOiu*^8bnh&!yd zIp%7YlB^Aq*N1$y#?BB(qtmfegS@H!nF%5yklP51k~0SFAoP3N^@T`Ru!-C4v$vc>jwTf_Y|y( z!K$KYip)?x7)NB}wZm4<_uiQf@;~3ean|!@R$i&qIh2Dik!zvv{3HLx^o^lT&Qa~o&<>HIr zWQnRx>8MqPuz2b`D&X4-Ell-Qq5l{k% z7mc2};wafwg&pzGdJylh734Tlp&dB)?L$8z3YmF3se2|NwZa&jr|Zeg)QeL{`4}8p zlA&E<@*?Pw&3--r}&rDcUqaFH-F2Vcjf7U><8G6w1yb;kG4mJWxQAiIGg1 z%*TPgO=*bLLf}D6#7=8;txzkT2omq@inw~nkI$>a#QcxF1RX5!f8-_TzzNkMFG1mn zciI-O{(O6P(RN5yv^KYMweaarSE~lG?-VxRQGqc!OG;0IcZ=5}b`K({+%-oo{tKd&4rT^j$i2E|I*rp=$NVh^XddXI3y zgMg}APjd$_hw4Hk}|VV7wapc#?7>OWX&i?ZKpDVyF_PvSH~rt!}LS$oTAFH!%&s@v8Wa`1}qJ zZ(@7#(+W`+f6Y3^#xxHXcuEJR$9-FQ&;f_j_L~5%EF)Xsz!&r$T2Z25+%5^`)|n%& zmabI)p}k$_Ef&68gMtpcbsQR`A_M)ZPnnSgK1%V@1|_H^3H>*|0J>!8`w5GW?`0JV zsh6Hfxp}Dw#!e=Ls$Eyjl|_CHrx;YXa-*a4)$YAe$E*Z0Yo|0;3EO=4q_{uQ7u|@_ zBatqgxl=mEG8Nxph)Qe-&x{>pVA*DaEZPM+zaff~9uty|C(kb^pZ|wx@W&DKzu)cr zexd#2Y=3z}|9k22VNMX>&;gL9cXwz${9JxlzOmBTWM@-xt_UvMfm&-MB)C0_jGrHbH1UsCz`hU24 z@35w}?QIkriV6xSNC4SD=uLW&A}x@FBB4o_76=fKrl7Q~(rX&MC?pVi2k9!kOXy9c zDovE86ye5w&iQTjIeVXTzVCkbxzD}xkFeHatvTl!Yt}K|@s3>$x!wwb-IJFHQ*?33 zx2X_!P6G)NH?YlL;O^IW_i%2s&2qQTa?o;85~JWVD^OuiRHVI+=4-nT+%fLd*)C27 z4H7t)#MR(%jh?AG-|wGt_Y{_)QPFtj3|+iSe<3j4kcW&0XyygsmnUnk$t%|%uvqLw zc_h&kN)@owTn6}tkBl#UG^JLs%j6WQ{dNi!^y!mlDi1&QP|)fY{uS!F{6_idH%XTe z@~dv%To=W`o>g;?l-AkRXF1ufiXYO9{*n(!XGxQSf+pxhK5#U7S*tcUa&@(Zs-vEAea?Xn}}7_#nIcqFvG8!GfT5$TP)u!I;pLMa$+&>(8&pqWW_d^tR1tnKGo_pXzEAmsh%- zzvp?rYq<>Y9;LNwZ?H=!CGoi0c}4SqmxQ`KW;~`S&z}Hio|xiK`fQ#*u{Qtk&-)^B zqvZPE#P9xjOaH&OG5Et|Lf%%KP?}P({7z69MISjOZ6v%&eX~y@OFO-H;l8iO=bRC7 zBwd5k{W-Y-CRx4~o&fuVXP!r@w@)(O-SO>D_)^)zfOzED^3W>l+AgCnW!Afe_+;D7 z1q2laYI17KrD}e4LZIxzd$W*dis}qM;7@*W=l9VYF$EMjD^<-Nom&*thIMRy^!k;- zaMgy|{yDHc0xfMg7GXvsO`>;Hge^ZIaV~LeM^X6xN|e9)^-SE1O-(iz9_zEMrF0?B zUxt$rshLw*=I{$fvdeg6YpP56)-i>qlO8jxa3>@zEUN61@YTw)jqpyB>5+{0x2*y* z43|n>=cWhRv<2aJC!)h?gl`XG7XtL{1vRYh%=%nIbKcwbqi{|*o2pco3Gnyi;eYds z-ck~SDpHCByVgvyS2q~NT}fNQW=fFBF9- zn0qzvY;s!+vd82YP9@F>i$(Sm8wc5+ElwGK+4(gs7P+5tx_lvpU|UZ8X;aFnQ}Cg3 z`&5d;@BkBU)l{qphq&I_8I9>#0ZXwSGbvQ;gBgo#VF8W#`-seNcXJ2yG>5Hlo=I*3 zRVL&vDJ;!bZLTJ=QO{L<+KKH{jo8Ok!NW5u?%#{Y1ays`-M+e-l^d)AQ@m&3DNR4t zUiM^5Q77R*e#zFqqTj&lnBv}-@4mZ9_Ov3O8~~)Aj;R*&4h3v5mmO3k?2u27Ho@5V zURRK>17V*hbtNR<4BEU6$;G^!Gn(N_%S}tWUL%@6+bjOK4wF)5fSaMQ zCZMDTkww>PLe50?4>;B43o;5fcDPn*OVh%Rbm{aoKP4(Y?=#^e`BE3BDa@I2iqN!w zngVo9wA4pTd>^1^U+nZrL7Mk zx$xyVEl&6526zxE5jXXk;eMx~u`TZL@4-c|t9Ar@6~_{@#_OUGxT(H1F$rYy+;YyM zQt4iZbHUp3^s~1f?}T^nRw{;rOnsB0Ggc0jQlQt!CDkV z^H*eR4^i+t6M_Qw|yQi>v(sS|wlhsKC ztmH8TY(*w#NXvuuJo$0sU2bLdCxd=-_v(5#kp(6{7)3e|llV2u*pz^887cP2E?+Ab zKm8->W#fW6wqJjekv?p1Rr>HA_qeo~URW*m@9J~Hp@ZdjwLp}!~Pyw zlj0nEXhUL?xqQIi@>le0|F69zW$he*J#!zIZ#w^kIH4MBZ=~gLHB(aT^Hbo*No$z~ z0Ged(Tge*-F&zSPPI?oM1{o|MwcVxl7q>Xxh-Q}wOc0$@XwurTont8$`&sNQ1oIdf zZ0-d;d`CP!2^GH}k@H+wUUdE8$W%Gapn`Oh`QZl8jkn}+CsOhzI$QF(IZv8t3aocH zPfJo_LKCeqC`JH0f?beG`3tS!Z`xbF~wEc(MGt%=}IXqgu}mhAZj|C8lJIo_k9hY-EnjElY9@o51(^+F#|SE3PO$ z_?%O?mBJi2khMG3Yi|*aH z?DsZ1mfpMVvkOAd5|{oaR%a>^S+<-h6v|&Gsw>S!5R0GSR6I-=0BD^v{tMaAcGF1b z=$Tl2WEiVIi&#x$U_-xS-i>aJU+C_*0C7|LK8pEZwKy zR03X<{!n@QsqAq@ebPC~rJAJD&LI@q{L?f9)1yY9y*U~G@leC>f6w@ssaz}@V|(V8 z!8DOKn{;g-omkhB_ZV}dhC*McMW0q{f5KXg&W`>jclIUsoDmisU!?AG26sX$eUhg< zitIK}DIIFMtXEXVWZLp@)L`e{0?+WpbCWu~X3+q9nQmiPs91;=rrxi>bu=+m*X|NGiAx!(dSr!tio}T+q87o-?2oK;F*&2e$SmwY5m2Q zP*dWXFB=Kq%GJ9yrO#?B*mmoQUu|XlCUGS6`YR}+TF++6iwT5|Nu#;uVcO}J{mlJ* zO@c47Jw!6<wey377!=r)7Z)q)$P>4{7J!Il+-<$)ww?d{kEkRA8|DR|q7 z<6_PUZT)}Gz3}wIzJ2zDeak|k95UzC2oJ!OQWp%5$(BE>SXMHIttfkrj*86pTphD~ zo`2*u^DXe0f=H%)|1JFi<4PWWPcZBKhKl*0*79E+{I_};D*L5cjWnbH-M|_g^kU)a zloU5V@M)~xz2!i-{nK#mm+Bv+lA!&J69bI*<{3~9Nu%3o@L}fjw#5Zsr_@Jt^I=w> zZnIxn8&|N2IT~{8OR&UvcY9)xkgz4?7Qify+o1QH7T@n3~e2nZJ9;ywXr;BFVn@0=S z!q*gZ8L15}g&RfRT}u+t`xeFa*Ga=iZ{vw6#}prwLR+$_4Bvkol8vXa-%jfg?bL48 z__~zbWgeEX+i*+~lpjQs59s>p(wAE|rNoujQ21oO23O9EeS1t%)7C=@++Zc9E=(yQ zbigj3wML{TjzAlvrgosLzFiilxG-AHRwW(Gl;N`>cg2$vj7nnDsj57t(1PNl*`rO` zCsy@EFxP9Cc}2Eabb)+%V`1U^TKcQvlEnQW;~*Oun!H(4bhx%{ybecKMJI#7)_f6H z^yB^mkv6aJmOw6KM`5)J*1}ep;%c`JT`83LQkcsMrNfzv9fdew&*!bEYkBZ)GyLjg zu3jJ1rFIrDZC_0yyUb5j!NNY@Kdpy)Fdt_1ZgQwRFwjJ-T~KgBs6ZoeEw9@K$R(`t z5uu$-#VfHX7kYtIiP~r~Ru>4qn2Y9fO&k89vK~{&-~wroy_=e#sPHsVRx8;&Dmx2! zz=T$AMdGBeNnC19;+*#qHqEk~Vygm(964m#d@pmp6)2>Wx}q6*s~HSMj8~y0IqY`VQmd{p1EIqED+<_ErKSJOGgSte0WWJ}Q@xI(GnRus91c z;nud@RUh->qORN4;rTTOq93)2Dw_&y45T>Uk2TxJql8tGqY})d=~>g0UagWvCC0^R ztH(d}H<6sQZ>gOepf1zPk7#L}D|H5Ne_W6?sM^2hGt7LBzjZq>8}F)VU3^r&iYzYA z2zVKj0~qz!{ITnRE6ur9M5bU6p5ro=iJj%>T=0;tLDq7XLBL69D`UN&eorCgHxi#4 zq-s7pfMZ%rHearS*Fn&TV+wWdxyU37`%;D_xZSO%b0Tb2#}r|5{zP9C+rYze=Epz> zr4>kt%)pe$4th3cFXr3jcR%=F7Z^T?ogKLDGfQV#Ut+o@WW_PF-$^p4x%4Gz%~@#} zvGSsC?&p4t={IPfK^+ff;k_jpubn-UB%dO1pWg4(9D#y;AQ(xwO6);| zflo`|x)@vY+_{L@?|XPaQUty+rT?y9L6=f;YMzHFEMo^(O6&9R`h!yu)_~&dWOPEE zYTu^~i|AvD7L8!ZYPP7rHzU={0L8KuQ_cN0BhkzAnibtJgYNOhEBemDd>ag&rJdp& z`N*0?bacAC8B>B717=mRG%R1&`K6T-h_Hiq`6ADr?Q7%hhJndqkoa zSpVXqDe!{lMo9JD>GzV);S#$tAe&|0NqilUr<_Xh`djofeR|qig=CXg=v7l@#g`g? z+ExDi;(y)TQ#x9#+M@r{_~2JydCK0z_6jpTo4#S2O^M9L2xy0YTMe#qAr8_ z9}M?EX9;eMfJiPaGxIV#i=C9-3<)^a?3hB$Uag#S{G;4ZXTX?nx}EO`^pc5p(Ro$G zwV}_V5xH+4N=Zyay5f4PRYxv@*Q#qfGsTfvlWDGFK@WHxdA~aOB<_l9U(7CQ$b~cYgazMN)qzD}yyP<*;{MXeoQRP*TSI9=#npYS3WL z57=I8rnZZB%}rV5gm?X|o5?=$l~OMJ5*@G!9wvVRLmLjb9~JcQ4*S@T#gA8N&bg+2 zjNcL}5lFi5Yoy&^ntqvpo#3uhQR}dG0rm;EJRyBmA^AaXpL(79KWhE&{<1|?%{&(9 zX~m3f2vu@b*HHNbqH1wn{u-q_YXP(eR}xr|@k3jET+}bQ349u*z{t1oKx>LE8#Y$c ze38hIZQNrXIz=Kg&8!$zXWw-#nA&iMVR{?dyQ3rQsV{h%5|57N=NNNk9&dCjBqbPT zm3Kv6*qBI3+vIZV)w|i{nnpmqeES8}^8h=*6tH~7JC}~vPZ6 z5Fbel`O*FcG%e z`-Bh0U~Y_F6{*Oa7EmFhZQ{24Nq5MSu5K=M^XZ^I#-_xT2#0;OC<(7L|7_McGy5?qsv8ZI3rR+BSF@Og^QmZ7i;F8}a)?xbzT6;5HVfFAl-+3Y ziB|1WP z|F=V$a;`R_Crd!{^)CLbaf69=a}yMsuGme4dMW;x;+O(tVfTc|#g50+3}tsCURyxb z`(kHj`G;G%uL@fdWYZdQlnFefd>bxk6oI|EahTxJFzc?C{Q7kz=5$$aX}p6C+VolC zJPa}qNNvn?az=~EOrI`0XqVUaLf`V2n^v(#MN(p6911_Y z__PbPJ0do|xF^KTLq*PuJB``B{+H_i^94@XI#OkwG%l-BC-Ad6qllZQ=>t-3aW0>k zK{T63^?{ z`;Ra*$kCFu?a@<3gIZEFJg4;sQytRwM#L`EadbK7Nduui-EZfMa-EIGK!uSu92}Qp zBpkoYSJH7(mo#61A|)okyhtjca6U4B>d(aX_dU*udPfLdoZY(ZQ2K5XV3DjVA%{^& zY1^=qF^?ce65-HtH_x`DT*&3NSY27Dj49@Ab1awl=%(*nHOyWZi+DQB@-D8!9p z{3@KaBDTso)?E-qz)dU7WP<1H|b!?l;F}4{}wTNN$U< za!6KXa*8R`hBBTE`EkNJO;)4+72^3PTH~(}&p*)`CxU}KJCXZaGEmRzL~xKRCjyF) zcZUCMan?)cnNnTdgPW`Ad_!Wa&6UXv4?ijN9M(lwc(_Dze!Z!(H#EiY?upPMt;^F5 z&FZOd;=6ZGLfC_z9R6Kqe>C&|3!X}5#OM}cm0)HHag~q}EYb>{QivPW1m{BaemqwL zIs9lw1E(~SH{2l>yfNj=lH9w(vRquVa_aqbjXzg>byM;M6-1JaAI}1k>9LZJ=cAzV^#l0h^sAERRKU`c2LYc0|G@3O%c44k|I7~ zzVdfhKK5(NQATPgX(Ku93zO;K<>72e4AeQU4WpK8Bi(M5_Crrxyp7TjyK!5>Ty0Ze zzDwZGr;QW^!G>E`N%V?4mbQH9QYU(J#%#`g8q>y*_ho3Sc2s#J6VEIJBLwvS8dns>sQRUwI$k zu)eYVQHt%N+per@z2-Q}i=Z!r!+mv7CI$-JBzouGSiFOBKa zR#c9q&R}#gG>j!l7bh{lMMJslUC-c@tI4ya`kcKhH#MUv~?xV8z!#HyIbtB~fI2 z&*Vi9dhYrV{$>|=hw19tYYOkmJt=87BtqFR&ZxIM%S?MhC03}K?P;|c4*(Fu&?F9B3NSdPpy}pzx$-PJQ@Xy%DAcsTss5; z6TwwtoJqij`I=l*K5eUbo~%rpbF|^iU=Ii1Rl(V6{@Zmo7keh3BNU>-x;<9JI8PDU zljkOMi;RcsC3*|8-=tJD=X%txjCHu?mAwb4EHDYZ#DJxlzHy+#`=T&4T*2QBaBOZV zU-|N@WLpXqUOSjTUEx`lcO{kBBJZw!dbgT)zmXVqh~E;WZbq?h!BSx(nuxa$t&Zs1 z*Y%nMuWVm8@-j#k2~p0k3mamVxeRMpGbyN=OGS!^ffqb+POz{cke$OUOU+}dmxHBT z_hA)}E?P>|fK&5g<7}|vXe?Jn!#lLBoQewFQ_HETT#gx=G0IXK0HHO$F#An`n;bE- zB6CoxIHf4s%t}FO6k1Xn&N6hMqe!MuNu(W_N6IuBCUNm|aG?uRC0ErGuJU*82$Occ{jmcaW%sG z+Y~nmOE=}+&QN54ex$diW_I;*#qS@Q>ux)_RML3T}c^u+2TDz(+N=Tk|!2 zHg~-8;+SGsFX9z;h3$~N7@%>tsN$`Ta)A2bwI0wMUDLE(=0AFvTwQ%WW1B^xEwGb+ zCAB6wVq!=qaq?n{kR3d14c$|#VF1=c)7Qq@j@5?lKGcmid~Y}}-x4> zHI>w94a|uS!P@13D{{FTVTv$4ru7;Ma$otG+{a6cF=E<&z5~;^%W)nzRCD#C&>g)j z%be5lEg<`*P}i3o6sJb&(DP9Xf;J*{*9+fwMbz{d@tfHTjdj3a)#%O)xfBQ%o7_R3 zq-fKZRLjV>8`^!JUYnGgkPj(qvk-h5?r;-W_1=Si32h>=34dV4YV!jXAqqEj&RWWHT&8ZQi;$;IRX$8acSvBr_>Oa;O* zcF2|I`{+F?ly$I(SJ(K#EOoULhYI8Z7sWU2_wgFGke9h1KJ(KdR@R^C_GkQX!>mCQ ze$4RYA5*+Kckj11|NGBhJJ6Bo{LfpUdQcXzruWoLxW2#3@U0}rPOEs)6KnfrCyu6t z@>c{8oxG}YyU*hxA;;k!-!28S8tin$#Y~9{U#z81Es9Mn2ps9Pr$BnuJJcMy(q9hF zPC>rP77BlmIXYk9wJp#!(MM?T@L4liHh5fv4+9N4bYT>uNa@I3F+F0ZM#&ZRJ5Jgn zw1CJwmaCQN_xFOYu{u~@_O%f((R!5OO`8t0@OoM7$(VoU;f?Bpbi9?xuCFJji zhU*Sl$o0I>%Ixvv_cOj2PR|-D=DzJ6%<*ngHb$&nv~iXbANP@iuKPI{R)6YK;uVRn zu7W;!!{(g2%~cvd^x|H^?ldL?Yl_n?3KXKAf-yS_HzBeH~z5~&NsdO+jsg; zo3hh}Sq+a-MG?j%!A&~oQL)oD-0FLBumQwtaG$1fl@(Tt6ZYpCXZ#8sj#lS9rtp6p z`6Gz`x6H|L8_N{V3?gB3>G=`K7jzeyoG?7}76g;7#}l~^z_hlbuaIPTwl^PmlUQZ) z%YLo6MBA2=U^f#<5FL6KfbHEDx-l5PC@v6?^mM(j97#a!ka_7|a%+r$(rT;5p3p5k zylLht-|Vcf(KA7&;8H%OpgW>ex^vTJNnXQA-cmAibkHe8<23!_Qszr(%C>=Br2L^S z{hn#ay-z9DPGVAvg#6I0C#HHicyzln!$XiFuykmS(e$OnvC3hWf7ve9d^>vTUXR$1t*ZB;b&cGy**Gj7vLHdOV0tF%__bf(ZIsmj@Y+k z9aTxw6php*QEFPn2=j%YUUTCLyF|4f&|ogHmBGF^sqYeji6$@&{Hk|3)x!Vz0pmrUs)I003L1`C<3aLHZyA>3?G99hVa{Ht|W@%&h+jk3}= zegL^sNT%%i+t;gFIqKJ>%Dd6_dI;p#OqWgC1A=V&cXpCuCwH{hC ztojQrEzO<3gvb2c_?<-3oIL)ym;4J<&Hv9I+sfGuae8#k2P)_69(LDYE*T}f{>h8l zn#pgqW_RH6O@!cW^L2e{h`j)B-Z*@g0MY495`t)(g}UT~PgNUByl;-sR&4+3OnSXJ z7?wPqowLYUUb?vqY!mjEZcks&2jMUa~{eJD;bdNJDRK^UE=DvEOq?Y`Qp(DZC%Y{x-6n(Q9%vGKsyP zamys#3_k5GBwTEuogvhc3$_lKhrr084#mYdI2AL+ZzvXirOi%$S!~Lg2at@H#Td_5 z)MA61^r@Zw#m2Hj$ja?OT`mRA)O;5bMp$2x)@_z~jzZo(d+w3h_27@CUw`;^TB;q2 z06MHSJ^2mm$LOs(erXSMz_DUWKC11ixt-{qkV$@W(1;G%bp_&)7x?l&>CP%qK#^NjjiiNUMXYfe=CGd-H915fW zx}y*7@##pddJ+p#y(|uoLM40=JGD?$vZ7d|oD{S|8S6x8kwhoawzs!uzB019(GO6c zR6q&eb`k^EhrrOtVOl_nFgDU3D|HaDu7Lx;4dKKg0hai&Nl@RL z6H#obc&ek3te!o@8w=wVV4v(&~xWO>G#!=lh zsfW(;s+e!C<)%G6cNLSh5hWDJU!5 z1w0FVj!JBd{1j(>j`S0AbT>agZCq)+I|hR^vyH2l(#n;IkEQ4r06QUy*k~Zmg^2Aq z%4n6Rv22Q~II>dOZ%DIcOZah2!6QJP)+l2hSP&2If1FbLxaFN4LEZRrzA`Gij=9H! z>fYq_yH?)d$67bEjKK(!I5MOUOlR1U$!UiFyjY)B`c?P4G{LfLE#NF)(Wr5gltSbe z+ZtgGqp*!ad`cAIi6{D+eJ$8LcgBsTwJFGh?6+k*m+{37(qKP!c;WVmv`zWIlLr!Isp)J^+*7n6Da^*l#C~lgK*iA|Vv8W|uF8yXEJ_%G}^t z$t%r9ZApKTqx$x>z$HIEZ!Mh>{w96u97ro#-5V#=>B^1>>*Rv#Xi5+#lyecnc5zoU z&o}p!v*X(1NUVsVWQJfo76Xuu6FWGcv2U2VxS#8;N{CsNtn()185ueU0(ut4 z6xT(Za%4jW99NMY=(l*npjz-R(Q3h7P8+bY!>@-2cenc~U-W(UcDTE#QHGT61ly#(ba-2|~4!rcl}j}kjxyEZh&AXtys&+8UT)X|l+t_%gg!DFCI z9kd2%d)mYm<*Mj#2*)DWoA;O^JeHINsmQ-!AZsA!7GaQ=TjLnA0P*Qz0WIVwgQXD4 zUI=eB$b(25llYY>r*`Vv{QO5!6Omzv-d&~?NWO(ou#1_6W|}AMt1@PExQ?XL1MFIK zx^<=mdzSqr>8m(Y4%Tb5rgFXgsxJezL+;4P=l9yR_&v6p-9T~v!ly)EyOvsG04L&k zVVQfr%qk?lvJB)`qu+ zKUK494uq6^f(YQXk(Y+FT852d^oTx z&MPs5&d<&2f#oH)vxcQN2LLW(`pVr!s@aEW^Rrz@Ank2CE}lGJ@q0%0ad%fk%sB7a zs&o!x=Bed#2Z%RfCEn?B(^ac+#=HT}6H)aURNE=ZXy(ms!FGGv`3kb}4)~m%3P|AN z7$e)!!onKg4A~`BF^6DPyEnU>boxyvBR8X#vbWrcUDp;)BX&BsJO`@R8 zcVyIUHd(rb!3>3%>PfixGzwRcWCTM=nPCE8VLdy5Av@xy=aroP$!62DJg?K^R~nZT zUH9P*HNjEQisX=S%l5quajVPLVYk@q(oFInMW5$FEO{n@uu>55#b0jz+LFNFXXK!a z9ttBdLksFjosRAog-Cc}ZPd?1{`H~$J)~gB!PLrdm%UoTlR^_qg;dd``<2)3)fqwH zl7zDv=!;r0ZIce~Ru;4+J60uf`5Y1>8!Vp(oD9Nn{I`8JS-bKdTpp=ZLnn?Yu0_if z?G(&VN@*UoD(J}!fsZK)9AB9vOxR5{4Xosci(GYDca>t!=M};{vn#c`$J^Ld^PEUe zZM7m54n`LrQ}Bb#)=PsO|5556{|0#2iPDiNvgAXO>@T{vGd$s<;+X2`U*q7Qr@3L< z_?Rtv25OJg?5YzN^t~h$kDKUz{F;>c1(jfrtRAnLxqtMP&stPYPw6VrA8F>`VLQ9C z;&QM%dVluTYUhzlyZ%PTrrmz*Yd*s-?8A4WO)OY1tA}*mu*Ne!o^&J|SVR`o)b7as zu}c4Zc3Q86>M*YDRJsp+#79Y8`KE2AG?JkVS>tQSS0>3erj~Z}#`s2d{w&o{nndRp z#f~)xHz)ktSkCpR{S?LUxg$qUl>H&;j#$Z!5R}8kp)|3!@<&-LCJ!rnZ_Dwm_cGW( z(OTYQnxV-~_wpl0(S)FXuEEc9Do{#B_ry%TEaFkGaZ$n*`#|empkbe9!XDlF5JqV` z&i+Swo?&mRtAeiF(UK|0>w>7YPTk^972OEQLWjd}pu^D{rP;UHT@2{q_cLndW~mL! zCiWQNzqAd5j8ExB%LG;{mkp1x^`yk~TBC5*xg& zxALvx0@PhsGLar^Qd#l|eYeF(30KEu9BLm5fA3W@6O7vRBEQ%l*thge>E^UAf%*7_ zqR13;A|JR?e|JfYAYQ?~q*5R~%PF9h@gHE)q{SHVl?^euST}Q z>1=q3TF=eA2@|X`~WetvbwF%u*sva*PT=<+RSdpSH;GNVk+3s;xqph4yXpi&g zOIK~D`UkMoYQFVw#$n1j85VW_UeM20_)p1tqL?VOT*SovBk^KcW!$Ra5H3vdsI7Cy z0~C|-0b+v6wLB|@E7(WF&(K36VKTv7Rd$Qq=oXl^`bdHp)0&H&)v88N`OKSPJ4@DB zr%%mZ7z)dj4_`j<=n620PCe|}Y`5jiaA1e8C`D*tkN zHd5EX^hp8J!+1?G*E(+fnl{-cJ*N6OJ!e*>mH|FTbl_2@)$84&@V06j#sn|Py%<`S zw4Mjy!H+b@i{r9AUTR{_IUGF?TlFq##J@ty_#@-GzNEHA-gW0M7-fp9&KKt7(5IZE zruilIZRk~h@+>hepu9r>!*mOx&i@ex@LS!^#47sn6UC5%i{%U~Qt^kO?@idNQj`JM zO5Xi&GBgbyaQYXzLVpuDq+J5QzEzVBg?)xu_%kS6eYdcs7nu1?{OaLNrr^WNUA4eu zb{e>?;zujDOr@JTb|G2s)Z>ELJ7l)Fvm$Wp7LMLKLY=~ldPh5WfkO246NWHyBy;|g z&bn%U2*CRX5&oAVqo;S@q9JH1BgY9Dq)X1T*Hwma$w)zG&UUb@~}1t^OfAw)!> z4$;<%LnlqR%un`0hOYZLwIpx-$(c;_+o-4-Obv>lhA_QWA&ABCMu~l)TNfP`KqR;# zK=~SxhD5{C-c4@J0h%L#7H0GL8NH-rX^HKqq~tNvaQhIOCsKP{R%F zK*Xb+-CK!hEHKhjT%3$SIk@B3oj}*fIry>5Ny4!=_hY)RUC0EIs}@UOgK36E_4E_N zo`>qp>kJ=`H@G*3%->jtSwsh^dSP+5S%*BD9;{qbLzqAR9CBfQfTh<$gB_uJ*Hv>a zyA>1TTAFoz1C!@z3n_PaS4hJCXyPJ}K@MacK2}Zr;HhHrHkLqMLjl3$MHCrvc7FSh zb^g;c^&FwzGegFod(Xa4{^;4es$r0|(7utb2gI`(bwmn*)MhuzJub)l>lT75Vpv-h zj8yfdLUwEA_g8OxR!?A1wX4doM@ST20`HIsw>mn=_*t6!7vSP?6JNI6&TD%ECbZTV z-m0+A$r_AOJ3X&V?=={!EnY_@ameG?HlVo53zv+vN%hnzH9eHR_SD^3z)V64_bP|Q9U=~! zc5h|MZ8xgqb2g14V?!Zi#Q&-HL4T|q&Rr}#Lvmp0EwQqEPyAMk5Ktq(D8R3%H4u5Y}1ShS}u-%#cagG4_q;JC( z)$w4>M#;+hdXk=9RkoG&0^Ou{B}11A+Mf(!Ok!fwLk0bS;d6obBqkcJOlEygPd&_V+h%V>>+eiEd;rUL`p~`gK!8Q4(?m4C=WQb1u z0zS!`iOC+X}zsrT5N$Ws3U#Nfa&H!C9>r5H`$@fO4>vm zYNgsJ1XH zvrOW50Or*aOay9w0-3nU-$D*I(&Z5)piXqpANnZERTyHM>;-kMx`a`gj+!`15oJ>5 zBD!SkQ*&`=PNwNFd0xDFCBy_S&Kt{$mly;C*?&T#zzi*F>O*SkyI1}&+5Dr?{OjLS zuNzOtFmhMWS6YJF7ItGn%gxc;DCH8}(js%l(YPsz=D0oHybSHlt|^{%eCr zrnaAJ=fboe-i+UKxDGt&pcCZ3D9e_$%|^S$pBnI-F@&)OeP}Z#0fhCDnInJ<{h8I_vY1`R3C(!=B)4 z`Y;^dP{yz00#@YEDw7&R0{7+1=jfja`8Cc-Gjvq+HM707c~>VY=V{;(f3RZVp<}uP z^|U1iet}?TYb%ACA@6j(v#gh^cx=6$qm9PwDq8hj*S!2*o)h*8j|E0L+zmdaK%Ol# z=HIWul&+aYow<9|;1zpF_Fv=v^*<94{JmBEmt+QPsbOnS>yENSAq6Ox`ZG+Z1%UEE3Dl!8HAEZd4EM#S~1LB?hNdx)j2n;`|~6mCgIZ$^G+uG~-sv+YmQ9#cHX52Dg_aM!)muxM6OaWv|? z!QBGaXr(F8nnL@R?MHV-jw$9ht|ob98*YD?FU=fiGd`vmLS9PDI1q#W_+e8-aVttd zZM#^vz}Pp^`hiJ}FDvt?Sg$AYrmofFh4G8#p_dBwv9sTGzbF0=(f{8Cc$Tyk<}dZ4 zvq4(!y~B`9lk42)pSZL*OvMagD{`;S)^--ZjO*$JmW%PwH#SF}BnSJ+9(ZDI zJ4xzAW&-|8Qm@~*3gZWh>yqcV~U-G3RpU^+ys@BAWw`$j$VlB*)t;W6nx{H`nD1Ysf35-U!3DuZo_PIEMiMO zGr8^2jq{cnSnbTfni9#4Pv_9U_)&H=e}~l3jAXQu^p7;c<}pQm`?so_eBBLuFG+;J z;;m>KtD~8?{$q*-vZg+#OeQDe{UM5AK(>mzEiyA(S{*2ywIG|vRlh%WEBrQX;&!#j zxS}|_k1a~QXsT&onTOw_qbXUN|A;c9tcZf!P+kThD8Tz|w1Q6TATfWLsjQo)$?Jf| zM|OOTAVk}b?Ec25?44n367iez3FpK%T7){Lh$R>Qj_j98479A(I8r*Mz})#60rS60 z{{d|IpEAKY&Ws4x`MwU_H+oKH>;N5=dQbM9RU%bz{1w0If5&nAd%0Yup*ySi!1-f} zJ&B9NKfd#e-*5Z(NB;p9{qG+plTVpS??e{8f53i(dr2mG9B3-Ac)0cJmF>a{zl9pz zEm6Qsb5EAu*HWOK=gd^OZ3GOu}OUNx`OyjZZOQvYr) zVybU}q#1tGpR*KQ4Q$*(ro!jT;uBb%9Bb=-=}riMoobT#c(qnE9&x(zbAn<}!MV$g z`=9abV}(`m7Wz7#>eW;mOG|s5Px$nDthh>=0Fu zV0rM446U-O2-lX%F%=gR_M%03G90+7f~?*ZaXMAxn|g7$MpwAc+Z*gBsq1Ns)A0$~ z@+|vMD4pWy{{<)e@k8O#S51Pw#C)c{ zlV>tNt7@#6oth4NrDyWxT`q6@woSjb_x`+k3E+u8f77!Zukpq^brBsykIGGxXx%vv zQ*PxRaHUYr)3*b|Grjqc7DrP#UDm7jmpug=Lr=A?l?$e;t{EDXcyP(IXb@zuv?$m3 zBuJ{sP67<(a2T3luJqtd{72K$r){HMKOmLuL@yM^`e1C6AHA?ym+YR^1J$h^Z|u=P zl?(VfksAt0qZUeEJYdP`vK?wHuw@PvdE5>uk=`D>toi_-+Y{8+`w?eLTc#_X?XE|i zX(}7mfvcLfJ2T{1W|DF}2{q(|*d?u8CLzQjksedA1+}rS{cU${dK-OUbewZFnARk% z+t&12cDa8my^N0Tba!&*rKQGrS7(UJbXZr@&1zbMKe~g|)SQehY2SXPFMe!YD7Ye( zIS)wR)m3HYDeW6CXn`w6p^7w&nJ}q`pW74Xuv)1`a~hmW!s%HR(C$3OdibkmpUEDFd zpUCwzUw|LSP2$vn_>M9KoVHMn7zb+lXOEIxARJsMfG$q>shxaCE7-lrHOk0##^ z<+`u%?bzm5&=K6~^O?>;TJy~V$uI=kH91jyQK*`OL+|Pp0}9{Z>_h1lnye1el(BJ% zwkqgCFWh4du)Lvby}v))*p+A9?^{XtZkCo$6VbZ$C|syYr`cQPmV;d6+mV#FLqua* zG0h5Mv}|Mu3)~q#G4h;iNJgzpkDYg4y(YfxG>J?PeUcI&FZ@JE%bl(}11-1c?(JL{^$ws3#->0V-UFVF*1oKPKSN7Y4yBr`$9O4;EM?8*+Nx*h> zj2u{wi=r* zyQ7*+w|5zJ7*IiJA_yoYK!AXR4uXT!geIXkQAp@TK#CyZNRbwSKoU9vN$4#=fFLNn z_aaq9nsn(^y>ZT*HO^V<%(=gF*IoCY`%k`nYwhg!O}_X2^6kB!{XCm0oE6EKdi6nk zfeo)BDqiA*#wOAIvbj|nyn8lb-{fuyK-~v!%33{=COnwrFsNG;()Y4YTXRW6MnFbP zoNY}$T{KUWc#m~Y?|PdM|HOo;4Mu#nF_hE2J$5BgA3&N^X2)bx`ssKjE`tIF>Yla{ zO=V>7WiI(s5?*J!@*Qzw4UvPnB2(dA(d4dD-Zn?;atGKdnAdeqK zcig-^6(pNw4`^B1cXfGj9Z^LFRCSZAEJRs_+7m2fC%PKmeNSIp-8g3*?0><RBa!5X6Y0hq`ZCwx86D9VTPxn&Q( z&B7xMidIdLeph2Wm|GG*g!HW%ked=8b9lYthhNoTi3V(_iZb(H%#~&+tV3@+}!4_gT)bO$v-DzXi;N~>? zF(O!OS_sINOzshwx$HJ#2MGCCtmv#0J!nv^ohJrC(k3qIcmq}Oc)U z6c=H+Ue{Oyxk4}HVptSEszE*e;)J#wlACbA)0fQ)VGkcEbFnz21|_aiCyv>?L*Fbn zZzDiT;yDSh*&Wb`G!73KnUw%@=b8HwlB=)|y#VLm9B$q6$Vak@z{kt{D;KwGn2gMq zfpnP*fY!m`$D(`-De7*Wk|HDn!dZ8h!|(y2_QR=})G1H6Ht|#0ao$~*vDmI5vXsc= z^5t@f3xr3h9uFdZE>ifK>`@|bm4BIDT@gK6sYtL!3uPG|E|-Qi=>hTbJ2W0p*K$cW zs8SJ9iEf|0Cd{Fp__im&i8RDPYGwES<$C8etEs}rOw(bJH8=TyI?5LM<{tRG7jzY@ z7OKdExey0k_Z&;5`9u|VeLHhos+_yGv5F|^q1cXh19D*IE;q3Zd&oixfV9GQ@OlcD z{;Je)i-~X`KfDprgcjVaSB{BO!M2B*jJ5cB@w_}XezL)+l{!@B7Q6__jZjZFmMEDp zik-XKt0x00YGcCYH5)Y6Ww3M9II9rGGf6#7%Pid)XXrFWt=k@OR`+uZ-E8-|*qOs> z`%8@-mZ7}iN=|kWi-}ZsuK;(E_$XW(1!p#a0NP3E4`ZEprf`O-N~=2jDO0vd($CU0 z0&?Z;dik1OAk61X@ezCja!_Qr11lyjebzmXv;UTsf#?v#HO$0* zJRz)e1hdqpgO4wp;JITwUVt+rO@w#wSuG0()~@#f3ws}?6oxf1=5zxE-W$dVH4>$Q zMZ3b)0~pAihR*JVUCRu9>g5VeZWhcXYh%r}ef2?RW!flEZEwq34Ra|pvxZTp$r|jV z!gS!!jd{}Ri95ct*e2^TJ85(isoRWA8Addl-by(FyTQBKCi6-yDIIgwU!txp0QtA7 zv6v)X-Z)oW_mm{=ncLgnxAp%yIS#F{+?-zSQ}@p^4)Zqo0MjL;Qs&W{roimDXa6RQ z`I|cb3-6cpKy-e5_zoUXhJZ0fB1V;-4+@HbQRj| zteyGg{|`pVPW+pnk?t?I_Q;0|7@L}2{h}mYmUttIkdOBn8C%nl9yd#qBk@Q{vAEG5 zO%ovsy?EO&F6p}2)yikTudd*gn9FR^Xik)$g4yqsLb)LWC^J1EHZQ9f5mPf>?7Ou<^kIS@szor{}?2WLAmtllTUk@9M zsEbN*C&Y!SeQ5BT(-*l}q~^xC&Uev>Y!;u_tL3Vjqw}02p8MP}^OlwxE}DB^7J(qYl!s3FD+4om%oHDS{dtPl#@NnRcQ=DMz(Gg zlxXTxX-)n9xRqFWNhSX`V4H=T%Eq+_FytCNdjt@6L_xDA0o2pm*jtG@d?_f6HV?BT zZI|?8-8*g?{vwi%GO?3^$j;T;l7uAQB_19nR}$!ja^feJumYGSrI#bC z3XQJLGs8uu1dN}AAbK&#Voyo-qo@axFWTNGn;NQ-f5RUWP9Gm`Z@0r!1G5a1?ZRvM zon)RwWy6P`>$9N}oHbOj5QxO-#-U7Q`6$zfk5}~l`EslTS{*IJu$F>y<7)}81=yI4tK|`*NQgJ!&7atBQPNUF`gJbMAYso zDH=R8vM=m)@5Y1E!nxG)SF^pKzBnh%Zp^@KHaIE?_;XkHS_m89$uyJem6Nd~MD99w z7(0c)8g=J#NF3mIFy8BPs%I>=GZ>F#0?m;3t0ld?GkqCx_nw?UcBlZsD!vX% zDF7Fjo1hEWu`1ZF+gEEQC^J39OCfFd?wacdU0Qb0@>al{BwO;%*g|sL$hlPU*)K#q zVX61DawqUla*TRsZ>Nr{xZ0O7#8v)0dTl{5=8@<2b@7c1>c&~yJE`8fW3xn1k0X>h zB`R-UQ!*|o&Jz;oI2PS~%EcMes47f}Sf_HD%xlNl*+QW%ryyt!VjumD%B+gf(fB>% z6gfKll8iTNpEAFbL`?1z)B&SJN8nIFej0eDH{eXdi!TYlBO^2?)hk_fc ztly|0CJ7S__b8zMoif1rsW-%cjd%(0q{nwU%%#pxx3}z#rHrwML7UM9if_Pj3Wr9s zGs{8w=_WnBHN$&zuhnbe4y($D#8ft1wok$qfY)Uo*x=rqenRM{WEl zmKdSd9m){523NPhhJ?TMe5mY8J!Ehyj zq1RCevMMSBxu>I)nM4v5qZ0EOdr}X5Jm15vT53(*zrCTS;-rCWWG$2~< zVh^Km*tl^!OeLg6Q;5bPFcchach>r|uAr39Q%J(BL`8GqPK4P)sG^br&q8>a(CUq$ zk=uZG0@97hRSok$=99I>vf&Xo>!@Le-N3@4)P$5X1kB{NVc~Hj zT|=y9+k4$iZC5h6q|Km*_Ly*u=e)2Er_SuXyNq(_Fp|Azf?64=wN(qUfhT22qS?|J z#jljBgDah=U8-+-UU-uH=DM7|lFkbyO7oJAIZgI%C$!hLm(h5o=1Sm3hZMjnJN4#$ zQ5)wbw0o=%UIQRj?5} zwT{GTG zhz8(Ja|+}_Pv_gZh)*!oH@ipavpAw?<0JBP@-Fg2O3DBm5CcMp?8jjUz&rNAkqoCV z##A~BLl-#*TBs(8zQ!CWj&5@1=6+K0;}mgyQ7pk6+Qk6KLhMTvd< z?S)*W-T4xE@%KEzTu*!7lQ?W@4ef6-z7xK!(8VS$y=Ob&z6%P>2 zj)oGh%n=tG-PkG|b)UG<*S;}|rml07Gk2F|)ZAE;p9Qx;{ux>uMgt?lPpz_-;?`NCBK3>kTNapihFt(f&mRDq!cRl; z;nJ~{jnUh-V%gqM*zur~t3upGx?%;lhdL}n)4z^rYjLP{ol5f?IFxtJDSH~YF>EA% zic+oxepQXnheyyTg9Mk2pR%WE(3Edxt7i>?h3p?QEnK%O|O=2b$@-OSZ%?EO0kdR0)R*pVH?B zDg)Imk^n&6rD0Vo71V#Nb@DgBq7SlYGf|C9bXl2|a&Ro%uhI+NeX)_5H*wxCZ_~o5 zCrn3gHypwsWZ?=#1Ni3%4V!hWmwHOqm9y>n8$6Q>pA$6pyqpnL05n%~fY*)g@O^!@ zxIN55^+xkIDv^FGf$ow^Lgg(AzuYje>n#4X1j#9fT|8JYY!{V0!kp}}?~gl+of%eV zreZ8ft}}YI+He?JYk?RIf9!foD{(#JGk~a%5RC3o&$0}?KEQX)Q4d%v7Adcr1C)Go z^NxSQoV3K&UT#$3C?=tMe&gvGWwP2EvzpW2m1^?}N`sDaE}9wEonGgGGovUxQbpsi zIoA)0`!kJcRZ~aSw_&E=s1}pi@=JX-W4@koGN$0}w2p;0Hun$kecYe$B?OV8icY+J#cDlQlcfD`blTSMyMXojS9A_C$Vux63u zxnY4hxqkQjTUxo5rzBG{r6C4U9fAfxm%$=g`8+TKDmp$-qvug13-WRSC!kK*W!Z}zeNV>?V*b8kkEHSL9vazw# zsd#jE2r}JfOc9x)fhIi3DdTFobd=G%d9wcNyTW3dOUrlK*zLl^w#RiC7qGvbr$8j1 z#tGIxNB$i9AQ^;D4uUUifI>~$$dAhrlS$|DeB5RBrRU%EcwTw_nbEMEheGQVe(plV zs^yOPA$&wGqE+;!C4ViY`)H9I8CJAOe@oz=BW+|oLOEtIlRDcko?A5-B0az+8vy@n zVfKIafb`v())@K4X0`a^U3Lj|LcBcQwxwnOdcsKTcMi%poSi6{^j%hDUVOPUp&@Jc|pXNhGM8}aQc z)kVjwa~joO2(j+KN}teY6XTR_`Jvup?{#&>HqqxTvU@-w$^Dc02H}tmh&fWNEGcox zctC$hvlOeZLw#X?uvWUEeV~7Eq5V_;jP^2R<6EmwIxHG5b%tP07%tO4{PWBIOHNHH z11@>>9aFNm0`KdR{OHAB5{WMe9V#@i-X=~k1ZbyfPc#hh^WptUCGtvKFAXVjs?`!C zhy42RXl$^r`L)ST;>^-d=w~fTK9c2IVl1bNGDl5IwpzCtx2$_@gHoxD&ocjDdFclS zY^E0tez5$(!CS=r|IMLV6;ie?lKPF)TBdDwSD>hY z1Po(AMW7J0Z99pfeDc2{B2VE;kEK{U6pVtvEl-UDE&olD%^S8$={joL zx+B)zW!BM@n83R>$I#TmUjJ>oSa zB`<#C&~04DMREy^|HPk{|BrV+P9gHYm&W|3x4(rj{KF`$Bw7&|QzBP#N#f(ZrZ)}Z zd-E8HjF%lIbu0{Hu@%Hwc{gCA8&tbP2Upv&w|C`$d;S|0OX8&Qsrt%St>~4}q+#9q zT72uM*Up4#2?4!AD}{ZsRtprZn4)20nN%=Pe31F2Ho> z^kD9yQ(m985_MZ~j4ZtYE?#P~~Ji+q< z0!ni(>K;U15A9d;d~u`>CD`qW(u{{j{e0~mT@)c%Xi2iMWvY=4 z)AnK7g8^|#98D1%{=rNizEK%_beq4dPbN^O&5JbuX3fSh*QU|YcThxn&i9H;=^zL< z`%$6k5AjL%El8`ks6pDy{llIrJPQ%+kCG7>W^}w03{zAkg$I;vz$Il^KV%!D@yVf#P3qgG($b~ko!_WXLulL2 zCh+xu<%cuTQ-Y3>Rn!9rPm<1yGhHw1Mm6?q4)YxF*oHKd0(ZmnxB6k@a4 zO7DA2r{+Ni{2JOiijPB~w)U=kqjG|nH$;EB#E$kC4;w&`tq`&Am){qBOj6U)GGkG> zTx(tQ9JhZfFhk4bHI}J!Iaw1^1!>+*JB|X3(qEH}9X*w?^cLeDyjsb2FL0vBh@R~_ zPxxAb$!M?RGX3qFnD8&WA?mobxL11i~vqM3#CgYq-d#EJhEGCwbLm3MrH3!*(?9CZRjuC>OY;F`wMpOFMzi{aQIX| zQD`w<&6=Bpk1dg3#5HBT%D-cYl{b%0aiIXev}|~#fKV7L0T(@Nr>bffKwI%r;_!Y{ z4&6!?A;$>>K=iY-7kR)EplEN7hc+ni7bZIv{hlKy zYBeKw-7t?6g)q(}hV59)g180e!C+(mWQ2^ol3||a)PZE&D!;OTk?8v8X%A!huu8GZ zU7!;7Oe9V&+TZ>|Z&)~Cay!mo=r)K}LtKe;Dl|rpCkH#1RFBR&v$y0*Sfs&o?686n zJl?7}$rC(?a78l!4I()KGNC*McB(|MF$EsvatS7T+$f%QC%>Uoh`dI)b;{Kx??1b$ z@j%sy_wrOcC#}A^Zw8*vw`(c_w_&Gcve8SME$?GWZyUOpq-AEfeAjkSLM$l)cl6xM zu64OJA#9f0$jU`J0mzSMXPM#ZQ0$J%X|5nnyr4lMdqmPallonPwGv3XT@un?jnY7R z;4QYHxP5VeD%5yt{weja)qNTERpe97hfY)QJrx6BS?!Y;&L|ZBPrxIHlR^(uNT3k= zr0r$|$9$+xp2lyB-(9R@5_Wef_Sm z()uz=>1mZtKw|A49K>k(TY1qUNUKI;&6E7ai~l5h?&He+{Z<`*r&!=c_-K80`=Er|0=U>UL87n|kg)KKK_TYWDfjQ*f95uC>xo zr@gt;Q~juk!4{9XaT>cdoW7iOy4v`vS z7~X}@D3RVf2H+v!&zSH1H}n6T-yhSY`s?!*YD)zerO4U7OTLySs+=d~_o;H0MRpnO=7ULazA5@>gYVnRud$ zt9T-8;dXmSCdWqg1g6-K8wl#l1M-MwagY2UF#oqwQs0yF3ZXIlX+pcwwNpQ;+5cbA zt@?jBSak6D0Xo(p3lsN8OPUW0jXMrk!q+;XG_lqmp+F%6s0U^+Tlu>RSf6Wu{0WWS zf4D?v&vor)f%ZDfFIo%MHiLs*LHV6|+Xi4%Ojg9hX@EM)!uR#6JK>Hm|Fm;ptI~U& zW?hy6)sEf80{yJa^2PFIyK*zB7EH5h{wgb~(^q0~5!g1;Yl(A&$XR?>=}r?W-js;& z^%IUbw#N)~<-bII1&A+p3blt1I!WHj(vXKd$yd@({+Jy4Y$=+#@S1GfHop$hFEy{+ z&RmB&fU(LE3aVO}c?UUsL12|jSAh|ndkfYG$O@n)GKIo&CGSjeWdmGz>{u(Q8F!!(o%hRd_UE|uSy3W4rMF@trp}}|u>^&ae)@EymDr;*J~GG79M}>`&7b#d;BT?tAfs> zl!y`HH>!pDcqfgaae>1cnefw9Jn{_o`Vf0t3LnAc2v}2dC(6@A#F!NJ{>yTv-O04- zlODGKeiGHqK>0xqdtEE6OY=!|Fb@&J-^Np)=(&c##dvBcb*O+C;@ECBUW3PY@i8PN NE&Xvrx&3YYe*yTLVt)Vt diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index d85b8bf4a..7b9576abf 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -14,22 +14,49 @@ Overview of how it works: ![RAG_workflow](./RAG_workflow.jpg) -How to run: -Install docker according to this [web page](https://docs.docker.com/engine/install/). -1. We have main config `RAG_service.json` , please change `model_name` and `document_path` accordingly. -2. `run_RAG.sh` is the main script that can create `.env` file for compose.yaml and then actually start the `docker compose`. `compose.yaml` is the main docker yaml that specifies all the mount option and docker configs, change the mounts if needed. + +## Prerequisite: + +Install docker: Check [this doc for Mac](https://docs.docker.com/desktop/setup/install/mac-install/), [this doc for Windows](https://docs.docker.com/desktop/setup/install/windows-install/) and this [instruction for Linux](https://docs.docker.com/engine/install/). + +For Mac and Windows users, you need to start the Docker app manually after installation. + +## How to run: + +1. We have main config `RAG_service.json` inside of the docker folder, please change `model_name` and `document_path` accordingly, for example: + +```yaml +{ + "model_name": "llama3.2:1b-instruct-fp16", + "document_path": "${HOME}/work/llama-stack-apps/examples/E2E-RAG-App/example_data" +} +``` + +2. Inside of docker folder, `run_RAG.sh` is the main script that can create `.env` file for compose.yaml and then actually start the `docker compose` process to launch all the pipelines in our dockers. `compose.yaml` is the main docker yaml that specifies all the mount option and docker configs, change the mounts if needed. ```bash cd docker bash run_RAG.sh ``` -3. Ollama docker will start and this docker will pull and run the llama model specified. The `ollama_start.sh` control the Ollama docker startup behavior, change it if needed. You can log into the docker by +> [!TIP] +> You can check the status of dockers by typing `docker ps` on another terminal. + +3. Ollama docker will start and this docker will pull and run the llama model specified. The `ollama_start.sh` control the Ollama docker startup behavior, change it if needed. + +> [!TIP] +> On anther terminal, you can log into the docker and check which model has been hosted, by following code: ```bash docker exec -it docker-ollama-1 bash +ollama ps ``` -4. Llama-stack docker will start as well. The `llama_stack_start.sh` control the docker startup behavior, change it if needed. (As we have not yet have a working llama-stack docker image, this `llama_stack_start.sh` currently on-the-fly install llama-stack from source) It should be able to run llama-stack server based on the `llama_stack_run.yaml` config. Once the server is ready, then it will run the `rag_main.py`. +> Check more about Ollama instruction [here](https://github.com/ollama/ollama) + +4. ChromaDB docker will also start. This docker will host the chroma database that can interact with llama-stack. + +5. Lastly, Llama-stack docker will start. The `llama_stack_start.sh` control the docker startup behavior, change it if needed. It should be able to run llama-stack server based on the `llama_stack_run.yaml` config. Once the server is ready, then it will run the `gradio_interface.py`. +6. `gradio_interface.py` will show a public link. You can access the gradio UI by putting this link to the browser. Then you can start your chat in the gradio web page. diff --git a/examples/E2E-RAG-App/docker/RAG_service.json b/examples/E2E-RAG-App/docker/RAG_service.json index 4084446cb..a4f58c5e8 100644 --- a/examples/E2E-RAG-App/docker/RAG_service.json +++ b/examples/E2E-RAG-App/docker/RAG_service.json @@ -1,4 +1,4 @@ { - "model_name": "llama3.2:3b", + "model_name": "llama3.2:1b-instruct-fp16", "document_path": "${HOME}/work/llama-stack-apps/examples/E2E-RAG-App/example_data" } diff --git a/examples/E2E-RAG-App/docker/llama_stack_start.sh b/examples/E2E-RAG-App/docker/llama_stack_start.sh index 9ea363efd..bd6e68045 100644 --- a/examples/E2E-RAG-App/docker/llama_stack_start.sh +++ b/examples/E2E-RAG-App/docker/llama_stack_start.sh @@ -1,12 +1,13 @@ sleep 45 -#echo "starting to install llama-stack" +echo "-----starting to llama-stack docker now---------" pip install gradio echo "starting the llama-stack server" + python -m llama_stack.distribution.server.server --yaml-config /root/my-run.yaml --disable-ipv6& -sleep 36000000 -echo "running the RAG app" + +sleep 30 +echo "---------running the RAG app--------------" python /root/E2E-RAG-App/gradio_interface.py -#python /root/E2E-RAG-App/ollama_main.py localhost 5000 diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index 01389623d..c86e37ba5 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -3,7 +3,6 @@ import os import uuid from typing import AsyncGenerator, Generator, List, Optional - import chromadb import gradio as gr @@ -28,7 +27,7 @@ def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): self.client = LlamaStackClient(base_url=f"http://{host}:{port}") self.chroma_client = chromadb.HttpClient(host=host, port=chroma_port) self.agent = None - self.memory_bank_id = "test_bank_999" + self.memory_bank_id = f"bank-{uuid.uuid4()}" self.chat_history = [] async def initialize_system(self): @@ -85,8 +84,22 @@ async def load_documents(self): async def initialize_agent(self): """Initialize the agent with model registration and configuration.""" - model_name = "Llama3.2-3B-Instruct" - + model_name = "Llama3.2-1B-Instruct" + # Model registration + response = requests.post( + f"http://{self.host}:{self.port}/alpha/models/register", + headers={"Content-Type": "application/json"}, + data=json.dumps( + { + "model_id": model_name, + "provider_model_id": None, + "provider_id": "remote::ollama", + # "provider_id": "inline::meta-reference-0", + "metadata": None, + } + ), + ) + print(f"Model registration status: {response.status_code}") agent_config = AgentConfig( model=model_name, instructions="You are a helpful assistant that can answer questions based on provided documents.", From 1b02e076d94a3aaf28a4078179948e48485a1a76 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 19 Nov 2024 20:47:17 -0800 Subject: [PATCH 60/99] Update README.md --- examples/E2E-RAG-App/README.md | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index 7b9576abf..f91a21635 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -2,19 +2,7 @@ This is an End to End RAG App leveraging llama-stack that handles the logic for ingesting documents, storing them in a vector db and providing an inference interface. -We share the details of how it works first and then detailed steps on how to run below: - -All of the steps are run using a single-step via docker script. - -Overview of how it works: -1. We use [docling](https://github.com/DS4SD/docling) framework for handling multiple file input formats (PDF, PPTX, DOCX) -2. If you are using a GPU inference machine, we have an option to use `Llama-3.2-11B-Vision` to caption images in the documents, on CPU machine this step is skipped -3. Once ingested, we use a llama-stack distribution running chroma-db and `Llama-3.2-3B-Instruct` to ingest chunks into a memory_bank -4. Once the vectordb is created, we then use llama-stack with the `Llama-3.2-3B-Instruct` to chat with the model. - -![RAG_workflow](./RAG_workflow.jpg) - - +We share the details of how to run first ## Prerequisite: @@ -60,3 +48,14 @@ ollama ps 5. Lastly, Llama-stack docker will start. The `llama_stack_start.sh` control the docker startup behavior, change it if needed. It should be able to run llama-stack server based on the `llama_stack_run.yaml` config. Once the server is ready, then it will run the `gradio_interface.py`. 6. `gradio_interface.py` will show a public link. You can access the gradio UI by putting this link to the browser. Then you can start your chat in the gradio web page. + + +All of the steps are run using a single-step via docker script. + +Overview of how it works: +1. We use [docling](https://github.com/DS4SD/docling) framework for handling multiple file input formats (PDF, PPTX, DOCX) +2. If you are using a GPU inference machine, we have an option to use `Llama-3.2-11B-Vision` to caption images in the documents, on CPU machine this step is skipped +3. Once ingested, we use a llama-stack distribution running chroma-db and `Llama-3.2-3B-Instruct` to ingest chunks into a memory_bank +4. Once the vectordb is created, we then use llama-stack with the `Llama-3.2-3B-Instruct` to chat with the model. + +![RAG_workflow](./RAG_workflow.jpg) From 25e37a7091d55cd0d047e16c2c7300a0f8c52de0 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 19 Nov 2024 20:47:31 -0800 Subject: [PATCH 61/99] Update README.md --- examples/E2E-RAG-App/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index f91a21635..745fd465d 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -2,7 +2,7 @@ This is an End to End RAG App leveraging llama-stack that handles the logic for ingesting documents, storing them in a vector db and providing an inference interface. -We share the details of how to run first +We share the details of how to run first and then an outline of how it works ## Prerequisite: From a09a5d94de16b2e36213513da3bd18e031c47763 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 19 Nov 2024 20:47:39 -0800 Subject: [PATCH 62/99] Update README.md --- examples/E2E-RAG-App/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/E2E-RAG-App/README.md b/examples/E2E-RAG-App/README.md index 745fd465d..29af3e654 100644 --- a/examples/E2E-RAG-App/README.md +++ b/examples/E2E-RAG-App/README.md @@ -2,7 +2,7 @@ This is an End to End RAG App leveraging llama-stack that handles the logic for ingesting documents, storing them in a vector db and providing an inference interface. -We share the details of how to run first and then an outline of how it works +We share the details of how to run first and then an outline of how it works: ## Prerequisite: From a520faec8bb25d290a7f45d089001b6aeff09a5d Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Tue, 19 Nov 2024 22:05:52 -0800 Subject: [PATCH 63/99] implemented streaming, logic to frontload db, and checking db --- examples/E2E-RAG-App/gradio_interface.py | 117 ++++++++++++----------- 1 file changed, 61 insertions(+), 56 deletions(-) diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index c86e37ba5..429aa7450 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -3,8 +3,13 @@ import os import uuid from typing import AsyncGenerator, Generator, List, Optional +from threading import Thread +from queue import Queue + + import chromadb + import gradio as gr import requests from chromadb.utils import embedding_functions @@ -15,6 +20,7 @@ from llama_stack_client.types.agent_create_params import AgentConfig from llama_stack_client.types.memory_insert_params import Document + # Load environment variables load_dotenv() @@ -27,13 +33,12 @@ def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): self.client = LlamaStackClient(base_url=f"http://{host}:{port}") self.chroma_client = chromadb.HttpClient(host=host, port=chroma_port) self.agent = None - self.memory_bank_id = f"bank-{uuid.uuid4()}" - self.chat_history = [] + self.session_id = None + self.memory_bank_id = "test_bank_691" async def initialize_system(self): """Initialize the entire system including memory bank and agent.""" await self.setup_memory_bank() - await self.load_documents() await self.initialize_agent() async def setup_memory_bank(self): @@ -48,7 +53,7 @@ async def setup_memory_bank(self): print( f"The collection '{self.memory_bank_id}' does not exist. Creating the collection..." ) - memory_bank = self.client.memory_banks.register( + self.client.memory_banks.register( memory_bank_id=self.memory_bank_id, params={ "embedding_model": "all-MiniLM-L6-v2", @@ -57,7 +62,8 @@ async def setup_memory_bank(self): }, provider_id=provider_id, ) - print(f"Memory bank registered: {memory_bank}") + await self.load_documents() + print(f"Memory bank registered.") async def load_documents(self): """Load documents from the specified directory into memory bank.""" @@ -85,24 +91,10 @@ async def load_documents(self): async def initialize_agent(self): """Initialize the agent with model registration and configuration.""" model_name = "Llama3.2-1B-Instruct" - # Model registration - response = requests.post( - f"http://{self.host}:{self.port}/alpha/models/register", - headers={"Content-Type": "application/json"}, - data=json.dumps( - { - "model_id": model_name, - "provider_model_id": None, - "provider_id": "remote::ollama", - # "provider_id": "inline::meta-reference-0", - "metadata": None, - } - ), - ) - print(f"Model registration status: {response.status_code}") + agent_config = AgentConfig( model=model_name, - instructions="You are a helpful assistant that can answer questions based on provided documents.", + instructions="You are a helpful assistant that can answer questions based on provided documents. Return your answer short and concise, less than 50 words.", sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, tools=[ { @@ -117,48 +109,62 @@ async def initialize_agent(self): ], tool_choice="auto", tool_prompt_format="json", - enable_session_persistence=False, + enable_session_persistence=True, ) self.agent = Agent(self.client, agent_config) + self.session_id = self.agent.create_session(f"session-{uuid.uuid4()}") - async def chat_stream( + def chat_stream( self, message: str, history: List[List[str]] - ) -> AsyncGenerator[List[List[str]], None]: + ) -> Generator[List[List[str]], None, None]: """Stream chat responses token by token with proper history handling.""" - if self.agent is None: - await self.initialize_system() - # Initialize history if None - if history: - # Add assistant message to history - self.chat_history.append({"role": "assistant", "content": history[-1][1]}) + history = history or [] + history.append([message, ""]) + + output_queue = Queue() + def run_async(): + async def async_process(): + if self.agent is None: + await self.initialize_system() - # Add user message to history - history.append([message, ""]) - self.chat_history.append({"role": "user", "content": message}) - session_id = self.agent.create_session(f"session-{uuid.uuid4()}") - # Get streaming response from agent - response = self.agent.create_turn( - messages=self.chat_history, session_id=session_id - ) - # Stream the response using EventLogger - current_response = "" - async for log in EventLogger().log(response): - if hasattr(log, "content"): - current_response += log.content - history[-1][1] = current_response - yield history + response = self.agent.create_turn( + messages=[{"role": "user", "content": message}], session_id=self.session_id + ) + + current_response = "" + async for log in EventLogger().log(response): + log.print() + if hasattr(log, "content"): + current_response += log.content + history[-1][1] = current_response + output_queue.put(history.copy()) + + output_queue.put(None) + + asyncio.run(async_process()) + + thread = Thread(target=run_async) + thread.start() + + while True: + item = output_queue.get() + if item is None: + break + else: + yield item + + thread.join() def create_gradio_interface( host: str = "localhost", - port: int = 5000, + port: int = 5555, chroma_port: int = 6000, - docs_dir: str = "./docs", + docs_dir: str = "/root/E2E-RAG-App/example_data/", ): - # Initialize the chat interface - chat_interface = LlamaChatInterface(host, port, chroma_port,docs_dir) + chat_interface = LlamaChatInterface(host, port, chroma_port, docs_dir) with gr.Blocks(theme=gr.themes.Soft()) as interface: gr.Markdown("# LlamaStack Chat") @@ -186,14 +192,13 @@ def create_gradio_interface( def clear_chat(): return [], "" - # Set up event handlers with streaming submit_event = msg.submit( fn=chat_interface.chat_stream, inputs=[msg, chatbot], outputs=chatbot, - queue=False, + queue=True, ).then( - fn=lambda: "", # Clear textbox after sending + fn=lambda: "", outputs=msg, ) @@ -203,19 +208,19 @@ def clear_chat(): outputs=chatbot, queue=True, ).then( - fn=lambda: "", # Clear textbox after sending + fn=lambda: "", outputs=msg, ) clear.click(clear_chat, outputs=[chatbot, msg], queue=False) - # Add keyboard shortcut for submit msg.submit(lambda: None, None, None, api_name=False) + interface.load(fn=chat_interface.initialize_system) return interface if __name__ == "__main__": # Create and launch the Gradio interface - interface = create_gradio_interface(docs_dir="/root/rag_data") - interface.launch(server_name="0.0.0.0", server_port=7860, share=True, debug=True,inline=False) + interface = create_gradio_interface() + interface.launch(server_name="0.0.0.0", server_port=7860, share=True, debug=True) From 459e00482177a303cc3563df5cb7fbba726a8de7 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Tue, 19 Nov 2024 22:07:17 -0800 Subject: [PATCH 64/99] code cleanup --- examples/E2E-RAG-App/ollama_main.py | 15 -- examples/E2E-RAG-App/rag_main.py | 267 ----------------------- examples/E2E-RAG-App/rag_stack_memory.py | 202 ----------------- examples/E2E-RAG-App/requirements.txt | 131 ----------- examples/E2E-RAG-App/run_app.sh | 24 -- 5 files changed, 639 deletions(-) delete mode 100644 examples/E2E-RAG-App/rag_main.py delete mode 100644 examples/E2E-RAG-App/rag_stack_memory.py delete mode 100644 examples/E2E-RAG-App/requirements.txt delete mode 100755 examples/E2E-RAG-App/run_app.sh diff --git a/examples/E2E-RAG-App/ollama_main.py b/examples/E2E-RAG-App/ollama_main.py index f5a812aff..efac2feb8 100644 --- a/examples/E2E-RAG-App/ollama_main.py +++ b/examples/E2E-RAG-App/ollama_main.py @@ -90,20 +90,6 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: # # Model registration model_name = "Llama3.2-3B-Instruct" - # response = requests.post( - # f"http://{host}:{port}//alpha/models/register", - # headers={"Content-Type": "application/json"}, - # data=json.dumps( - # { - # "model_id": model_name, - # "provider_model_id": None, - # "provider_id": "remote::ollama", - # # "provider_id": "inline::meta-reference-0", - # "metadata": None, - # } - # ), - # ) - # cprint(f"Model registration status: {response.status_code}", "blue") # Agent configuration agent_config = AgentConfig( @@ -125,7 +111,6 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: ) agent = Agent(client, agent_config) - session_id = agent.create_session(f"session-{uuid.uuid4()}") while True: diff --git a/examples/E2E-RAG-App/rag_main.py b/examples/E2E-RAG-App/rag_main.py deleted file mode 100644 index 1c59b7a27..000000000 --- a/examples/E2E-RAG-App/rag_main.py +++ /dev/null @@ -1,267 +0,0 @@ -import asyncio -import json -import os -import uuid -from typing import List, Optional - -import chromadb -import fire -import requests -from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction -from datasets import Dataset -from dotenv import load_dotenv -from llama_stack_client import LlamaStackClient -from llama_stack_client.lib.agents.agent import Agent -from llama_stack_client.types.agent_create_params import AgentConfig -from ragas import evaluate, EvaluationDataset -from ragas.metrics import ( - AnswerRelevancy, - ContextPrecision, - ContextRecall, - FactualCorrectness, - Faithfulness, - SemanticSimilarity, -) -from termcolor import cprint -from tqdm import tqdm - -# Initialization -load_dotenv() -embedding_function = SentenceTransformerEmbeddingFunction( - model_name="all-mpnet-base-v2" -) -chroma_client = chromadb.PersistentClient(path="chroma") - - -def chunk_text(content: str, chunk_size: int = 500) -> List[str]: - """Splits content into chunks with overlap.""" - chunks = [] - current_chunk = [] - overlap = 100 - - for paragraph in content.split("\n\n"): - if sum(len(p) for p in current_chunk) + len(paragraph) <= chunk_size: - current_chunk.append(paragraph) - else: - chunks.append("\n\n".join(current_chunk).strip()) - current_chunk = ( - [current_chunk[-1], paragraph] if current_chunk else [paragraph] - ) - - if current_chunk: - chunks.append("\n\n".join(current_chunk).strip()) - - return chunks - - -def insert_documents_to_chromadb(file_dir: str, chunk_size: int = 350) -> None: - """Inserts text documents from a directory into ChromaDB.""" - collection_name = "documents" - existing_collections = chroma_client.list_collections() - collection_names = [col.name for col in existing_collections] - - if collection_name in collection_names: - cprint( - f"Collection '{collection_name}' already exists. Skipping document insertion.", - "yellow", - ) - return - - collection = chroma_client.create_collection( - name=collection_name, embedding_function=embedding_function - ) - - cprint(f"Collection '{collection_name}' created.", "green") - - for filename in os.listdir(file_dir): - if filename.endswith((".txt", ".md")): - file_path = os.path.join(file_dir, filename) - with open(file_path, "r", encoding="utf-8") as file: - content = file.read() - chunks = chunk_text(content, chunk_size=chunk_size) - - for i, chunk in enumerate(chunks): - chunk_id = f"{filename}_chunk_{i}" - collection.add( - documents=[chunk], - ids=[chunk_id], - metadatas=[ - {"filename": filename, "chunk_index": i, "content": chunk} - ], - ) - - cprint(f"Inserted documents from {file_dir} into ChromaDB.", "green") - - -def query_chromadb(query: str) -> Optional[dict]: - """Queries ChromaDB for relevant context based on input query.""" - collection = chroma_client.get_collection( - name="documents", embedding_function=embedding_function - ) - - results = collection.query( - query_texts=[query], - n_results=5, - include=["documents", "metadatas", "distances"], - ) - - return results if results else None - - -async def get_response_with_context( - agent: Agent, input_query: str, session_id: str -) -> (str, List[str]): - """Fetches response from the agent with context from ChromaDB.""" - results = query_chromadb(input_query) - if results and results["metadatas"]: - context = "\n".join( - f"Filename: {metadata['filename']}, Chunk index: {metadata['chunk_index']}\n{metadata['content']}" - for metadata_list in results["metadatas"] - for metadata in metadata_list - ) - contexts = [ - metadata["content"] - for metadata_list in results["metadatas"] - for metadata in metadata_list - ] - else: - context = "No relevant context found." - contexts = [] - - messages = [ - {"role": "user", "content": f"Context: {context}\n\nQuestion: {input_query}"} - ] - - response = agent.create_turn(messages=messages, session_id=session_id) - - async for chunk in response: - if chunk.event.payload.event_type == "turn_complete": - return chunk.event.payload.turn.output_message.content, contexts - - return "No response generated.", contexts - - -async def run_main(host: str, port: int, docs_dir: str) -> None: - """Main async function to register model, insert documents, and generate responses.""" - client = LlamaStackClient(base_url=f"http://{host}:{port}") - - insert_documents_to_chromadb(docs_dir) - - model_name = "Llama3.2-3B-Instruct" - url = f"http://{host}:{port}/models/register" - headers = {"Content-Type": "application/json"} - data = { - "model_id": model_name, - "provider_model_id": None, - "provider_id": "inline::meta-reference-0", - "metadata": None, - } - response = requests.post(url, headers=headers, data=json.dumps(data)) - cprint(f"Model registration status: {response.status_code}", "blue") - - agent_config = AgentConfig( - model=model_name, - instructions="You are a helpful assistant that can answer questions based on provided documents.", - sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, - enable_session_persistence=True, - ) - agent = Agent(client, agent_config) - - # QA data - qa_data = [ - { - "Question": "What is the policy regarding smoking in City offices?", - "Answer": "Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)", - }, - { - "Question": "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", - "Answer": "Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)", - }, - { - "Question": "What are the three categories of employees eligible for health coverage?", - "Answer": "The following employees are eligible:\n\nAll permanent employees working at least 20 hours per week\n\nAll regularly scheduled provisional employees working at least 20 hours per week\n\nAll other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)", - }, - { - "Question": "How long must an employee wait before using vacation time after starting employment?", - "Answer": "Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)", - }, - { - "Question": "What must an employee do if they're summoned for jury duty?", - "Answer": "An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)", - }, - { - "Question": "What happens if an employee is absent without authorization for more than five consecutive working days?", - "Answer": "If an employee is absent from their job without proper authorization for more than five consecutive working days, or fails to return from an approved leave, their absence will be deemed an 'automatic resignation.' (Source: Page 19, 'Automatic Resignation' section)", - }, - { - "Question": "How long is the normal probationary period for permanent civil service positions?", - "Answer": "The document states that all appointments to permanent civil service positions require a probationary period, but the duration is governed by the collective bargaining agreement. Absences from work will extend the probationary period. (Source: Page 14, 'Probationary Period' section)", - }, - { - "Question": "What are employees required to do in case of a catastrophic event while off duty?", - "Answer": "Employees should ensure the safety of their family and follow their department's instructions. If phone lines are down, they are required to listen to the radio for any reporting instructions. (Source: Page 51, 'Catastrophic Event While off Duty' section)", - }, - { - "Question": "What is the city's policy on accepting gifts from subordinates?", - "Answer": "Employees may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to them. (Source: Page 49, 'Gifts' section)", - }, - ] - - questions = [] - generated_answers = [] - retrieved_contexts = [] - ground_truths = [] - - session_id = agent.create_session(f"session-{uuid.uuid4()}") - for qa in tqdm(qa_data, desc="Generating responses"): - question = qa["Question"] - ground_truth_answer = qa["Answer"] - - cprint(f"Generating response for: {question}", "green") - try: - generated_answer, contexts = await get_response_with_context( - agent, question, session_id - ) - cprint(f"Response: {generated_answer}", "green") - - questions.append(question) - generated_answers.append(generated_answer) - retrieved_contexts.append(contexts) - ground_truths.append(ground_truth_answer) - except Exception as e: - cprint(f"Error generating response for {question}: {e}", "red") - - # Create a Dataset for RAGAS - eval_data = Dataset.from_dict( - { - "user_input": questions, - "response": generated_answers, - "retrieved_contexts": retrieved_contexts, - "reference": ground_truths, - } - ) - - result = evaluate( - eval_data, - metrics=[ - ContextPrecision(), - ContextRecall(), - Faithfulness(), - AnswerRelevancy(), - FactualCorrectness(), - SemanticSimilarity(), - ], - ) - - df = result.to_pandas() - df.to_csv("evaluation_results2.csv", index=False) - print(df.head()) - - -def main(host: str, port: int, docs_dir: str) -> None: - """Entry point for the script.""" - asyncio.run(run_main(host, port, docs_dir)) - - -if __name__ == "__main__": - fire.Fire(main) diff --git a/examples/E2E-RAG-App/rag_stack_memory.py b/examples/E2E-RAG-App/rag_stack_memory.py deleted file mode 100644 index 6ffe158a2..000000000 --- a/examples/E2E-RAG-App/rag_stack_memory.py +++ /dev/null @@ -1,202 +0,0 @@ -import asyncio -import json -import os -import uuid -from typing import List, Optional - -import fire -import requests -from datasets import Dataset -from dotenv import load_dotenv -from llama_stack_client import LlamaStackClient -from llama_stack_client.lib.agents.agent import Agent -from llama_stack_client.lib.agents.event_logger import EventLogger -from llama_stack_client.types.agent_create_params import AgentConfig -from llama_stack_client.types.memory_insert_params import Document -from ragas import evaluate, EvaluationDataset -from ragas.metrics import ( - AnswerRelevancy, - ContextPrecision, - ContextRecall, - FactualCorrectness, - Faithfulness, - SemanticSimilarity, -) -from termcolor import cprint -from tqdm import tqdm - -# Initialization -load_dotenv() - - -async def insert_documents_to_memory_bank(client: LlamaStackClient, docs_dir: str): - """Inserts entire text documents from a directory into a memory bank.""" - memory_bank_id = "test_bank_2" - providers = client.providers.list() - provider_id = providers["memory"][0].provider_id - - # Register a memory bank - memory_bank = client.memory_banks.register( - memory_bank_id=memory_bank_id, - params={ - "embedding_model": "all-MiniLM-L6-v2", - "chunk_size_in_tokens": 512, - "overlap_size_in_tokens": 64, - }, - provider_id=provider_id, - ) - cprint(f"Memory bank registered: {memory_bank}", "green") - - # Prepare entire documents for insertion - documents = [] - for filename in os.listdir(docs_dir): - if filename.endswith((".txt", ".md")): - file_path = os.path.join(docs_dir, filename) - with open(file_path, "r", encoding="utf-8") as file: - content = file.read() - - document = Document( - document_id=f"{filename}", - content=content, - mime_type="text/plain", - metadata={"filename": filename}, - ) - documents.append(document) - - # Insert documents into the memory bank - client.memory.insert( - bank_id=memory_bank_id, - documents=documents, - ) - cprint( - f"Inserted documents from {docs_dir} into memory bank '{memory_bank_id}'.", - "green", - ) - - -async def run_main(host: str, port: int, docs_dir: str) -> None: - """Main async function to register model, insert documents, and generate responses.""" - client = LlamaStackClient(base_url=f"http://{host}:{port}") - - # Insert documents to the memory bank - await insert_documents_to_memory_bank(client, docs_dir) - - # Model registration - model_name = "Llama3.2-3B-Instruct" - response = requests.post( - f"http://{host}:{port}/models/register", - headers={"Content-Type": "application/json"}, - data=json.dumps( - { - "model_id": model_name, - "provider_model_id": None, - "provider_id": "inline::meta-reference-0", - "metadata": None, - } - ), - ) - cprint(f"Model registration status: {response.status_code}", "blue") - - # Agent configuration - agent_config = AgentConfig( - model=model_name, - instructions="You are a helpful assistant that can answer questions based on provided documents.", - sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, - tools=[ - { - "type": "memory", - "memory_bank_configs": [{"bank_id": "test_bank", "type": "vector"}], - "query_generator_config": {"type": "default", "sep": " "}, - "max_tokens_in_context": 4096, - "max_chunks": 10, - } - ], - tool_choice="auto", - tool_prompt_format="json", - enable_session_persistence=True, - ) - agent = Agent(client, agent_config) - - # QA data for testing - qa_data = [ - { - "Question": "What is the policy regarding smoking in City offices?", - "Answer": "Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)", - }, - { - "Question": "How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?", - "Answer": "Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)", - }, - { - "Question": "What are the three categories of employees eligible for health coverage?", - "Answer": "The following employees are eligible:\n\nAll permanent employees working at least 20 hours per week\n\nAll regularly scheduled provisional employees working at least 20 hours per week\n\nAll other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)", - }, - { - "Question": "How long must an employee wait before using vacation time after starting employment?", - "Answer": "Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)", - }, - { - "Question": "What must an employee do if they're summoned for jury duty?", - "Answer": "An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)", - }, - # Add more questions as needed - ] - - questions = [] - generated_answers = [] - retrieved_contexts = [] - ground_truths = [] - - session_id = agent.create_session(f"session-{uuid.uuid4()}") - for qa in tqdm(qa_data, desc="Generating responses"): - question = qa["Question"] - ground_truth_answer = qa["Answer"] - - cprint(f"Generating response for: {question}", "green") - response = agent.create_turn( - messages=[ - { - "role": "user", - "content": question, - } - ], - session_id=session_id, - ) - - async for log in EventLogger().log(response): - log.print() - - # Create a Dataset for RAGAS evaluation - # eval_data = Dataset.from_dict( - # { - # "user_input": questions, - # "response": generated_answers, - # "retrieved_contexts": retrieved_contexts, - # "reference": ground_truths, - # } - # ) - - # result = evaluate( - # eval_data, - # metrics=[ - # ContextPrecision(), - # ContextRecall(), - # Faithfulness(), - # AnswerRelevancy(), - # FactualCorrectness(), - # SemanticSimilarity(), - # ], - # ) - - # df = result.to_pandas() - # df.to_csv("evaluation_results_with_memory.csv", index=False) - # print(df.head()) - - -def main(host: str, port: int, docs_dir: str) -> None: - """Entry point for the script.""" - asyncio.run(run_main(host, port, docs_dir)) - - -if __name__ == "__main__": - fire.Fire(main) diff --git a/examples/E2E-RAG-App/requirements.txt b/examples/E2E-RAG-App/requirements.txt deleted file mode 100644 index c04f15c0a..000000000 --- a/examples/E2E-RAG-App/requirements.txt +++ /dev/null @@ -1,131 +0,0 @@ -annotated-types==0.7.0 -anyio==4.6.2.post1 -asgiref==3.8.1 -backoff==2.2.1 -bcrypt==4.2.0 -Bottleneck @ file:///croot/bottleneck_1731058641041/work -build==1.2.2.post1 -cachetools==5.5.0 -certifi==2024.8.30 -charset-normalizer==3.4.0 -chroma-hnswlib==0.7.6 -chromadb==0.5.18 -click==8.1.7 -colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work -coloredlogs==15.0.1 -Deprecated==1.2.14 -distro==1.9.0 -durationpy==0.9 -exceptiongroup==1.2.2 -fastapi==0.115.5 -filelock==3.16.1 -fire==0.7.0 -flatbuffers==24.3.25 -fsspec==2024.10.0 -google-auth==2.36.0 -googleapis-common-protos==1.66.0 -grpcio==1.67.1 -h11==0.14.0 -httpcore==1.0.6 -httptools==0.6.4 -httpx==0.27.2 -huggingface-hub==0.26.2 -humanfriendly==10.0 -idna==3.10 -importlib_metadata==8.5.0 -importlib_resources==6.4.5 -Jinja2==3.1.4 -jiter==0.7.1 -joblib==1.4.2 -kubernetes==31.0.0 -llama_stack_client==0.0.50 -markdown-it-py==3.0.0 -MarkupSafe==3.0.2 -mdurl==0.1.2 -mkl-fft==1.3.1 -mkl-random @ file:///home/builder/ci_310/mkl_random_1641843545607/work -mkl-service==2.4.0 -mmh3==5.0.1 -monotonic==1.6 -mpmath==1.3.0 -networkx==3.4.2 -numexpr @ file:///croot/numexpr_1668713893690/work -numpy @ file:///croot/numpy_and_numpy_base_1682520569166/work -nvidia-cublas-cu12==12.4.5.8 -nvidia-cuda-cupti-cu12==12.4.127 -nvidia-cuda-nvrtc-cu12==12.4.127 -nvidia-cuda-runtime-cu12==12.4.127 -nvidia-cudnn-cu12==9.1.0.70 -nvidia-cufft-cu12==11.2.1.3 -nvidia-curand-cu12==10.3.5.147 -nvidia-cusolver-cu12==11.6.1.9 -nvidia-cusparse-cu12==12.3.1.170 -nvidia-nccl-cu12==2.21.5 -nvidia-nvjitlink-cu12==12.4.127 -nvidia-nvtx-cu12==12.4.127 -oauthlib==3.2.2 -onnxruntime==1.20.0 -openai==1.54.4 -opentelemetry-api==1.28.1 -opentelemetry-exporter-otlp-proto-common==1.28.1 -opentelemetry-exporter-otlp-proto-grpc==1.28.1 -opentelemetry-instrumentation==0.49b1 -opentelemetry-instrumentation-asgi==0.49b1 -opentelemetry-instrumentation-fastapi==0.49b1 -opentelemetry-proto==1.28.1 -opentelemetry-sdk==1.28.1 -opentelemetry-semantic-conventions==0.49b1 -opentelemetry-util-http==0.49b1 -orjson==3.10.11 -overrides==7.7.0 -packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1731456981915/work -pandas @ file:///croot/pandas_1718308974269/work/dist/pandas-2.2.2-cp310-cp310-linux_x86_64.whl#sha256=a2434a313a5eb5b4fdbec6c770abfde859f643eff7b3a703791bf8b73e54c394 -pillow==11.0.0 -posthog==3.7.0 -protobuf==5.28.3 -pyasn1==0.6.1 -pyasn1_modules==0.4.1 -pydantic==2.9.2 -pydantic_core==2.23.4 -Pygments==2.18.0 -PyPika==0.48.9 -pyproject_hooks==1.2.0 -python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1709299778482/work -python-dotenv==1.0.1 -pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1726055524169/work -PyYAML==6.0.2 -regex==2024.11.6 -requests==2.32.3 -requests-oauthlib==2.0.0 -rich==13.9.4 -rsa==4.9 -safetensors==0.4.5 -scikit-learn==1.5.2 -scipy==1.14.1 -sentence-transformers==3.3.0 -shellingham==1.5.4 -six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work -sniffio==1.3.1 -starlette==0.41.2 -sympy==1.13.1 -tabulate==0.9.0 -tenacity==9.0.0 -termcolor @ file:///home/conda/feedstock_root/build_artifacts/termcolor_1728288976871/work -threadpoolctl==3.5.0 -tokenizers==0.20.3 -tomli==2.1.0 -torch==2.5.1 -tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1730926118411/work -transformers==4.46.2 -triton==3.1.0 -typer==0.13.0 -typing_extensions==4.12.2 -tzdata @ file:///home/conda/feedstock_root/build_artifacts/python-tzdata_1727140567071/work -urllib3==2.2.3 -uvicorn==0.32.0 -uvloop==0.21.0 -watchfiles==0.24.0 -websocket-client==1.8.0 -websockets==14.1 -wrapt==1.16.0 -zipp==3.21.0 diff --git a/examples/E2E-RAG-App/run_app.sh b/examples/E2E-RAG-App/run_app.sh deleted file mode 100755 index 3259b506a..000000000 --- a/examples/E2E-RAG-App/run_app.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -echo "Running 01_ingest_files.py..." -python 01_ingest_files.py -if [ $? -ne 0 ]; then - echo "Error running 01_ingest_files.py" - exit 1 -fi - -echo "Running 02_caption_outputs.py..." -python 02_caption_outputs.py -if [ $? -ne 0 ]; then - echo "Error running 02_caption_outputs.py" - exit 1 -fi - -echo "Running ollama_main.py..." -python ollama_main.py localhost 5000 ./data/output/ -if [ $? -ne 0 ]; then - echo "Error running ollama_main.py" - exit 1 -fi - -echo "All scripts ran successfully!" From f940eac3264501e8b3942769c4ba2dfd0390473e Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Wed, 20 Nov 2024 10:34:04 -0800 Subject: [PATCH 65/99] update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index b0cac4c44..d190335c7 100644 --- a/.gitignore +++ b/.gitignore @@ -174,3 +174,4 @@ Package.resolved **/chroma /examples/E2E-RAG-App/data/input /examples/E2E-RAG-App/data/output +**/.gradio \ No newline at end of file From fcc41b46daea2a7bc27d1fde25b320f885dc96e9 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Wed, 20 Nov 2024 11:03:16 -0800 Subject: [PATCH 66/99] Update 01_ingest_files.py --- examples/E2E-RAG-App/01_ingest_files.py | 32 +++++++++++-------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/examples/E2E-RAG-App/01_ingest_files.py b/examples/E2E-RAG-App/01_ingest_files.py index fe2d22bb0..75b3e3617 100644 --- a/examples/E2E-RAG-App/01_ingest_files.py +++ b/examples/E2E-RAG-App/01_ingest_files.py @@ -1,3 +1,4 @@ +import argparse import json import logging from pathlib import Path @@ -16,7 +17,11 @@ from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.datamodel.document import TableItem, PictureItem - +def parse_args(): + parser = argparse.ArgumentParser(description='Process documents from input directory') + parser.add_argument('--input_dir', type=str, required=True, help='Input directory containing documents') + parser.add_argument('--output_dir', type=str, help='Output directory for processed files (default: input_dir/output)') + return parser.parse_args() def get_document_files(input_dir: Path) -> list[Path]: """ @@ -32,8 +37,6 @@ def get_document_files(input_dir: Path) -> list[Path]: return document_files - - def save_images(res, output_subdir: Path, doc_filename: str) -> List[Tuple[str, Path]]: """ Extracts and saves images from the document. @@ -73,16 +76,18 @@ def save_images(res, output_subdir: Path, doc_filename: str) -> List[Tuple[str, return saved_images def main(): - # Define input and output directories relative to current directory - input_dir = Path("data/input") - output_dir = Path("data/output") + args = parse_args() + + # Set up input and output directories + input_dir = Path(args.input_dir) + output_dir = Path(args.output_dir) if args.output_dir else input_dir / "output" output_dir.mkdir(parents=True, exist_ok=True) # Get all document files recursively input_paths = get_document_files(input_dir) if not input_paths: - print("No documents found in DATA directory!") + print(f"No documents found in {input_dir}!") return print(f"Found {len(input_paths)} documents to process:") @@ -91,8 +96,8 @@ def main(): # Configure pipeline options pipeline_options = PdfPipelineOptions() - pipeline_options.do_ocr = False # Disable OCR - pipeline_options.images_scale = 2.0 # Set image resolution + pipeline_options.do_ocr = False + pipeline_options.images_scale = 2.0 pipeline_options.generate_page_images = False pipeline_options.generate_table_images = False pipeline_options.generate_picture_images = True @@ -116,36 +121,27 @@ def main(): # Process all documents conv_results = doc_converter.convert_all(input_paths) - - # Store all extracted images for later processing all_extracted_images = [] # Save results for res in conv_results: - # Preserve directory structure in output relative_path = res.input.file.relative_to(input_dir) output_subdir = output_dir / relative_path.parent output_subdir.mkdir(parents=True, exist_ok=True) - # Create output filenames md_path = output_subdir / f"{res.input.file.stem}.md" json_path = output_subdir / f"{res.input.file.stem}.json" print(f"Converting: {res.input.file}" f"\nSaving to: {md_path}") - # Extract and save images extracted_images = save_images(res, output_subdir, res.input.file.stem) all_extracted_images.extend(extracted_images) - # Save markdown version with embedded images with md_path.open("w", encoding="utf-8") as fp: fp.write(res.document.export_to_markdown()) print(f"\nExtracted {len(all_extracted_images)} images in total") print("Ready for image captioning processing") - # TODO: Add captioning logic here - if __name__ == "__main__": main() - From f0ab5cdf9407901231056728fb57f3959fca788e Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Wed, 20 Nov 2024 11:10:15 -0800 Subject: [PATCH 67/99] Update 02_caption_outputs.py --- examples/E2E-RAG-App/02_caption_outputs.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/examples/E2E-RAG-App/02_caption_outputs.py b/examples/E2E-RAG-App/02_caption_outputs.py index a48ab1fd8..8ab5cdf01 100644 --- a/examples/E2E-RAG-App/02_caption_outputs.py +++ b/examples/E2E-RAG-App/02_caption_outputs.py @@ -1,13 +1,18 @@ import os import asyncio +import argparse import base64 import mimetypes from pathlib import Path from llama_stack_client import LlamaStackClient from llama_stack_client.lib.inference.event_logger import EventLogger -HOST = "localhost" -PORT = 5000 +def parse_args(): + parser = argparse.ArgumentParser(description='Process document images with LlamaStack Vision API') + parser.add_argument('--host', type=str, default='localhost', help='LlamaStack server host (default: localhost)') + parser.add_argument('--port', type=int, default=5000, help='LlamaStack server port (default: 5000)') + parser.add_argument('--input_dir', type=str, required=True, help='Input directory containing markdown files and images') + return parser.parse_args() def encode_image_to_data_url(file_path: str) -> str: """Encode an image file to a data URL.""" @@ -21,8 +26,8 @@ def encode_image_to_data_url(file_path: str) -> str: return f"data:{mime_type};base64,{encoded_string}" class DocumentProcessor: - def __init__(self): - self.client = LlamaStackClient(base_url=f"http://{HOST}:{PORT}") + def __init__(self, host: str, port: int): + self.client = LlamaStackClient(base_url=f"http://{host}:{port}") self.processed_images = {} async def get_image_caption(self, image_path: str) -> str: @@ -99,13 +104,14 @@ async def process_markdown_file(self, output_dir: str, md_filename: str) -> None print(f"Failed to write updated content to {md_filename}: {str(e)}") async def main(): - output_dir = Path('DATA') / 'output' + args = parse_args() + output_dir = Path(args.input_dir) if not output_dir.exists(): - print(f"Output directory not found: {output_dir}") + print(f"Input directory not found: {output_dir}") return - processor = DocumentProcessor() + processor = DocumentProcessor(host=args.host, port=args.port) md_files = list(output_dir.glob('*.md')) if not md_files: From 2d8f989e5b26902629c47e1ff83ca063e78ef2e4 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Wed, 20 Nov 2024 11:31:46 -0800 Subject: [PATCH 68/99] abstraction for hostname, port and dir --- examples/E2E-RAG-App/docker/RAG_service.json | 7 ++++++- examples/E2E-RAG-App/docker/run_RAG.sh | 11 +++++++++-- examples/E2E-RAG-App/gradio_interface.py | 15 ++++++++++----- examples/E2E-RAG-App/ollama_main.py | 12 ++++++------ 4 files changed, 31 insertions(+), 14 deletions(-) mode change 100644 => 100755 examples/E2E-RAG-App/docker/run_RAG.sh diff --git a/examples/E2E-RAG-App/docker/RAG_service.json b/examples/E2E-RAG-App/docker/RAG_service.json index a4f58c5e8..23e9ce20d 100644 --- a/examples/E2E-RAG-App/docker/RAG_service.json +++ b/examples/E2E-RAG-App/docker/RAG_service.json @@ -1,4 +1,9 @@ { "model_name": "llama3.2:1b-instruct-fp16", - "document_path": "${HOME}/work/llama-stack-apps/examples/E2E-RAG-App/example_data" + "document_path": "${HOME}/work/llama-stack-apps/examples/E2E-RAG-App/example_data", + "host": "localhost", + "port": 5000, + "chroma_port": 6000, + "docs_dir": "/root/E2E-RAG-App/example_data/", + "gradio_server_port": 7860 } diff --git a/examples/E2E-RAG-App/docker/run_RAG.sh b/examples/E2E-RAG-App/docker/run_RAG.sh old mode 100644 new mode 100755 index aeecb526f..fbf24f658 --- a/examples/E2E-RAG-App/docker/run_RAG.sh +++ b/examples/E2E-RAG-App/docker/run_RAG.sh @@ -1,3 +1,10 @@ -echo "DOC_PATH=$(jq '.document_path' ./RAG_service.json)" > .env -echo "MODEL_NAME=$(jq '.model_name' ./RAG_service.json)" >> .env +#!/bin/bash + +echo "DOC_PATH=$(jq -r '.document_path' ./RAG_service.json)" > .env +echo "MODEL_NAME=$(jq -r '.model_name' ./RAG_service.json)" >> .env +echo "HOST=$(jq -r '.host' ./RAG_service.json)" >> .env +echo "PORT=$(jq -r '.port' ./RAG_service.json)" >> .env +echo "CHROMA_PORT=$(jq -r '.chroma_port' ./RAG_service.json)" >> .env +echo "DOCS_DIR=$(jq -r '.docs_dir' ./RAG_service.json)" >> .env +echo "GRADIO_SERVER_PORT=$(jq -r '.gradio_server_port' ./RAG_service.json)" >> .env docker compose up diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index 429aa7450..e8dbce483 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -24,6 +24,11 @@ # Load environment variables load_dotenv() +HOST = os.getenv("HOST", "localhost") +PORT = int(os.getenv("PORT", 5000)) +CHROMA_PORT = int(os.getenv("CHROMA_PORT", 6000)) +DOCS_DIR = os.getenv("DOCS_DIR", "/root/E2E-RAG-App/example_data/") +GRADIO_SERVER_PORT = int(os.getenv("GRADIO_SERVER_PORT", 7861)) class LlamaChatInterface: def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): @@ -159,10 +164,10 @@ async def async_process(): def create_gradio_interface( - host: str = "localhost", - port: int = 5555, - chroma_port: int = 6000, - docs_dir: str = "/root/E2E-RAG-App/example_data/", + host: str = HOST, + port: int = PORT, + chroma_port: int = CHROMA_PORT, + docs_dir: str = DOCS_DIR, ): chat_interface = LlamaChatInterface(host, port, chroma_port, docs_dir) @@ -223,4 +228,4 @@ def clear_chat(): if __name__ == "__main__": # Create and launch the Gradio interface interface = create_gradio_interface() - interface.launch(server_name="0.0.0.0", server_port=7860, share=True, debug=True) + interface.launch(server_name=HOST, server_port=GRADIO_SERVER_PORT, share=True, debug=True) diff --git a/examples/E2E-RAG-App/ollama_main.py b/examples/E2E-RAG-App/ollama_main.py index efac2feb8..a854ee6b8 100644 --- a/examples/E2E-RAG-App/ollama_main.py +++ b/examples/E2E-RAG-App/ollama_main.py @@ -33,11 +33,11 @@ async def insert_documents_to_memory_bank(client: LlamaStackClient, docs_dir: st memorybank_boolean = is_memory_bank_present(client, memory_bank_id) memorybank_list = client.memory_banks.list() print(memorybank_list) - for bank in memorybank_list: - try: - client.memory_banks.unregister(memory_bank_id=bank.provider_resource_id) - except Exception as e: - print(e) + # for bank in memorybank_list: + # try: + # client.memory_banks.unregister(memory_bank_id=bank.provider_resource_id) + # except Exception as e: + # print(e) print("after unregistration: ", client.memory_banks.list()) @@ -89,7 +89,7 @@ async def run_main(host: str, port: int, docs_dir: str) -> None: await insert_documents_to_memory_bank(client, docs_dir) # # Model registration - model_name = "Llama3.2-3B-Instruct" + model_name = "llama3.2:1b-instruct-fp16" # Agent configuration agent_config = AgentConfig( From caaf788aa0c89e3e24ada127e81035ee5995db5f Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Wed, 20 Nov 2024 11:52:31 -0800 Subject: [PATCH 69/99] code clean up, deleted unused files --- examples/E2E-RAG-App/docker/RAG_service.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/E2E-RAG-App/docker/RAG_service.json b/examples/E2E-RAG-App/docker/RAG_service.json index 23e9ce20d..f206f661d 100644 --- a/examples/E2E-RAG-App/docker/RAG_service.json +++ b/examples/E2E-RAG-App/docker/RAG_service.json @@ -1,6 +1,6 @@ { "model_name": "llama3.2:1b-instruct-fp16", - "document_path": "${HOME}/work/llama-stack-apps/examples/E2E-RAG-App/example_data", + "document_path": "/Users/justinai/Documents/Code/llama-stack-apps/examples/E2E-RAG-App/output/", "host": "localhost", "port": 5000, "chroma_port": 6000, From b57c87db2a5539ee04e38fe467f1d3e7b9612ca6 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Wed, 20 Nov 2024 11:52:51 -0800 Subject: [PATCH 70/99] code clean up, deleted unused files --- examples/E2E-RAG-App/config.yaml | 69 --------- .../data/eval/evaluation_results.csv | 29 ---- .../data/eval/evaluation_results2.csv | 39 ----- .../evaluation_results_with_memory.csv | 12 -- examples/E2E-RAG-App/gradio_interface.py | 2 +- examples/E2E-RAG-App/ollama_main.py | 140 ------------------ 6 files changed, 1 insertion(+), 290 deletions(-) delete mode 100644 examples/E2E-RAG-App/config.yaml delete mode 100644 examples/E2E-RAG-App/data/eval/evaluation_results.csv delete mode 100644 examples/E2E-RAG-App/data/eval/evaluation_results2.csv delete mode 100644 examples/E2E-RAG-App/evaluation_results_with_memory.csv delete mode 100644 examples/E2E-RAG-App/ollama_main.py diff --git a/examples/E2E-RAG-App/config.yaml b/examples/E2E-RAG-App/config.yaml deleted file mode 100644 index 75ed40b1a..000000000 --- a/examples/E2E-RAG-App/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -version: '2' -built_at: '2024-11-14T11:26:34.931183' -image_name: meta-reference-gpu -docker_image: null -conda_env: meta-reference-gpu -apis: -- inference -- memory -- safety -- agents -- telemetry -providers: - inference: - - provider_id: inline::meta-reference-0 - provider_type: inline::meta-reference - config: - model: Llama3.2-3B-Instruct - torch_seed: null - max_seq_len: 16384 - max_batch_size: 1 - create_distributed_process_group: true - checkpoint_dir: null - - provider_id: inline::meta-reference-0 - provider_type: inline::meta-reference - config: - model: Llama3.2-11B-Vision-Instruct - torch_seed: null - max_seq_len: 16384 - max_batch_size: 1 - create_distributed_process_group: true - checkpoint_dir: null - memory: - # - provider_id: inline::faiss-0 - # provider_type: inline::faiss - # config: - # kvstore: - # namespace: null - # type: sqlite - # db_path: /home/justinai/.llama/runtime/faiss_store.db - - provider_id: remote::chromadb-1 - provider_type: remote::chromadb - config: - host: localhost - port: 6000 - protocol: http - safety: - - provider_id: inline::llama-guard-0 - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: inline::meta-reference-0 - provider_type: inline::meta-reference - config: - persistence_store: - namespace: null - type: sqlite - db_path: /home/justinai/.llama/runtime/kvstore.db - telemetry: - - provider_id: inline::meta-reference-0 - provider_type: inline::meta-reference - config: {} -metadata_store: null -models: [] -shields: [] -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] diff --git a/examples/E2E-RAG-App/data/eval/evaluation_results.csv b/examples/E2E-RAG-App/data/eval/evaluation_results.csv deleted file mode 100644 index f2fa741ff..000000000 --- a/examples/E2E-RAG-App/data/eval/evaluation_results.csv +++ /dev/null @@ -1,29 +0,0 @@ -user_input,retrieved_contexts,response,reference,context_precision,context_recall,faithfulness,answer_relevancy -What is the policy regarding smoking in City offices?,"['## Smoke-Free Workplace\n\nSmoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings.\n\n## Drug-Free Workplace', '## Policy Regarding the Treatment of Co-Workers and Members of the Public\n\nCity policy requires employees to treat co-workers and members of the public with courtesy and respect. City employees and managers are responsible for maintaining a safe and productive workplace which is free from inappropriate workplace behavior.\n\n## Smoke-Free Workplace', '## Political Activity\n\nIt is unlawful for City employees to use public resources or personnel to engage in political activity relating to elective offices and ballot measures. City employees may not engage in political activities while on duty or in the workplace. Employees may not use City resources, such as photocopier or fax machines, telephones, postage, or email, for political activities. The ban on engaging in political activity while on duty prohibits such activities as circulating petitions, addressing campaign mailers or engaging in any other political activities that use City resources or divert employees from their assigned duties.', '## Use of City and County Property for Business Purposes Only\n\nNo officer or employee may use, nor allow any other r person to use, City resources for any non-City business purpose. Use of City resources fofor personal, political, employee organization or other non-City business is strictly prohibited. City resources include, but are not limited to, facilities, equipment, devices, telephones, computers, copier, fax machine, email, internet access, supplies and any time for which you are receiving compensation from the City. Inappropriate uses of City resources include, but are not limited to: online gambling; viewing sports events online; playing games, streaming video or music on a work computer; viewing or distributing materials that are not related to City business or that are sexually explicit; and frequent talking on a personal cell phone or texting during work hours.', ""The safety and well-being of our employees is very important, and in order to promote a safe and healthy work environment, the City works to identify and eliminate employee exposures to avoidable hazards and conditions that can lead to injury or illness. City departments have Injury and Illness Prevention Programs that comply with federal and state regulations, laws, and statutes in order to help maintain a safe and healthful work environment.\n\nSafety is every City employee's responsibility. All employees are required to remain alert and to correct hazardous conditions and unsafe acts-if it is safe to do so-and to report unsafe conditions to their supervisors.""]","According to the document, smoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings.","Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)",0.99999999995,1.0,1.0,0.9684763208853084 -How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?,"['Most full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.\n\nSome employees may be entitled to accrue sick leave under Chapter 12W of the Administrative Code. Please contact your departmental personnel officer for more information.', 'Sick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.\n\nMost full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.', '## Accrual of Paid Sick Leave\n\nSick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.', 'The maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.\n\n| Year of Service 12-Month Max. Entitlement | Maximum Accumulation | Maximum Accumulation |\n|-----------------------------------------------------------|-----------------------------------------------------------|------------------------|\n| | 1 through 5 years 80 hours (10 days) 320 hours (40 days) | |\n| More than 5 years 120 hours (15 days) 360 hours (45 days) | | |\n| More than 15 years 160 hours (20 days) 400 hours | | (50 days) |', 'You may not use sick leave with pay accrued under the Civil Service Rules during your first 6 months of employment with the City.\n\nYou may not use sick leave for reasons not set forth th in this Handbook, the Civil Service Rules, the applicable collective bargaining agreement or other applicable laws. Misuse of sick leave is grounds for discipline, up to and including termination.']",Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules.,"Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)",0.99999999995,0.5,1.0,0.9869727487494077 -What are the three categories of employees eligible for health coverage?,"['The following employees are eligible for health coverage:\n\n- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\n- (3) All other employees of the City including temporary y exempt or ""as needed"" employees who have worked more than 1040 hours in any consecutive 12 month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.', ""Employees in category (3) must obtain a signed certification from their department's human resources manager in order to be eligible for health coverage.\n\nAn employee's spouse, domestic partner and children may also be eligible for coverage through HSS. If you have enrolled a domestic partner, same sex spouse and/or their children in your health plan, it is important that you seek tax advice from a qualified professional regarding the tax consequences of such enrollment. A detailed list of eligibility requirements and necessary documentation for enrolling employee dependents is available on the HSS website at www.myhss.org/benefits/ccsf.html ."", 'Please note that the information in this section is subject to change. You should consult with HSS if you have any questions at (415) 554-1750.\n\n## Eligibility\n\nThe following employees are eligible for health coverage:', ""Coverage for a new employee in category (1) or (2) listed above starts on the first day of the coverage period following his or her start work date, provided an enrollment application and other required documentation has been submitted to HSS by applicable deadlines.\n\nEmployees in category (3) must obtain a signed certification from their department's human resources manager in order to be eligible for health coverage."", '- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\n- (3) All other employees of the City including temporary y exempt or ""as needed"" employees who have worked more than 1040 hours in any consecutive 12 month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.\n\nCoverage for a new employee in category (1) or (2) listed above starts on the first day of the coverage period following his or her start work date, provided an enrollment application and other required documentation has been submitted to HSS by applicable deadlines.']","The three categories of employees eligible for health coverage are: - -1. All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours. -2. All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours. -3. All other employees of the City, including temporary, exempt, or ""as needed"" employees who have worked more than 1040 hours in any consecutive 12-month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.","The following employees are eligible: - -All permanent employees working at least 20 hours per week - -All regularly scheduled provisional employees working at least 20 hours per week - -All other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)",0.6999999999766667,1.0,0.8,0.9812535196983435 -How long must an employee wait before using vacation time after starting employment?,"['An additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will receive an additional 40 hours.\n\nThe maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.', 'The maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.\n\n| Year of Service 12-Month Max. Entitlement | Maximum Accumulation | Maximum Accumulation |\n|-----------------------------------------------------------|-----------------------------------------------------------|------------------------|\n| | 1 through 5 years 80 hours (10 days) 320 hours (40 days) | |\n| More than 5 years 120 hours (15 days) 360 hours (45 days) | | |\n| More than 15 years 160 hours (20 days) 400 hours | | (50 days) |', 'The City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.\n\nAn additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will receive an additional 40 hours.', '## Vacation\n\nThe City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.', 'Permanent employees may request unpaid personal leave for a period of up to twelve (12) months within any two-year period. Your department t head has discretion to grant or deny requests for personal leave. With certain exceptions, if you are a temporary or provisional employee, you may request personal leave for a maximum of one month, and only if a replacement for your position is not required.\n\n## Time Off for Voting']","An employee must wait at least one year of continuous service before using vacation time. At the end of the first year, the employee will be awarded a vacation allowance at the rate of 0.0385 of an hour for each hour of paid service in the preceding year, and will be eligible to use accrued vacation time.","Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)",0.4166666666458333,0.6666666666666666,1.0,0.9792513522173962 -What must an employee do if they're summoned for jury duty?,"['You must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.\n\n## Work Schedule While on Jury Duty', '## Jury Duty Leave\n\nYou must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.', '## Compensation While on Jury Duty\n\nAs a guiding principal, you are required to work or r perform jury service for the number of hours for which you are paid during that workweek. For example, if you are regularly on an alternative workweek schedule working 10-hour workdays Monday through Thursday with Friday off and you are required to serve jury duty on a Monday through Thursday, you must work a regular workday on Friday, or use personal leave to compensate for the eight hour balance. However, you and your supervisor may agree to maintain the alternative schedule, whereby you would return to the workplace to work the remaining two hours each day and continue to take Friday off.', 'As a guiding principal, you are required to work or r perform jury service for the number of hours for which you are paid during that workweek. For example, if you are regularly on an alternative workweek schedule working 10-hour workdays Monday through Thursday with Friday off and you are required to serve jury duty on a Monday through Thursday, you must work a regular workday on Friday, or use personal leave to compensate for the eight hour balance. However, you and your supervisor may agree to maintain the alternative schedule, whereby you would return to the workplace to work the remaining two hours each day and continue to take Friday off.\n\nYou are not entitled to a per diem pay from the City or County for which Jury Duty was served, as the City and County of San Francisco already provides regular compensation and benefits to you while performing jury service. You must notify the jury staff that you are a government employee when reporting for jury service', 'If you are summoned as a witness on behalf of the City and County of San Francisco, you are entitled to be paid for any time that you are required to serve in that capacity. If you are summoned to serve as a witness in a case involving outside employment or personal business\n\naffairs, you will be placed on leave without pay unless vacation leave or compensatory time is requested and granted.']","An employee must notify their supervisor as soon as they receive a jury summons. If they are required to report for jury duty during their working hours, they will be excused from work on the work day they perform jury service, provided they give prior notification to their supervisor.","An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)",0.9166666666361111,0.75,1.0,0.9654143234777989 -What happens if an employee is absent without authorization for more than five consecutive working days?,"['If you are absent from your job for any period of time without proper authorization, you may be subject to discipline. If you are absent from your r job without proper authorization for more than five consecutive working days, or if you fail to return from an approved leave, your absence will be deemed an ""automatic resignation."" If you receive a notice of automatic resignation and wish to appeal the determination to the Civil Service Commission, you will have fifteen days from the date on which the notice was mailed to do so. The rules regarding automatic resignation may be affected by the collective bargaining agreement between the City and your employee organization. For more information, consult your departmental personnel officer or your collective bargaining agreement.\n\n## Layoff', '## Automatic Resignation\n\nIf you are absent from your job for any period of time without proper authorization, you may be subject to discipline. If you are absent from your r job without proper authorization for more than five consecutive working days, or if you fail to return from an approved leave, your absence will be deemed an ""automatic resignation."" If you receive a notice of automatic resignation and wish to appeal the determination to the Civil Service Commission, you will have fifteen days from the date on which the notice was mailed to do so. The rules regarding automatic resignation may be affected by the collective bargaining agreement between the City and your employee organization. For more information, consult your departmental personnel officer or your collective bargaining agreement.', ""If you are absent from your job for more than five consecutive working days, you must submit to your supervisor a completed Request for Leave form stating the duration of your illness, signed by a doctor, dentist, podiatrist, licensed clinical psychologist, Christian Science practitioner or licensed doctor of chiropractic medicine. This form is available from your supervisor or departmental personnel officer.\n\nUnder certain conditions, a department head may require you to furnish a doctor's certification for any amount of absence due to illness."", 'All planned absences must be requested and approved d in advance. If illness or some other emergency causes an unplanned or unforeseeable absence, you must notify your department as soon as possible on the first day of absence, and keep the department advised daily during the absence. In the case of an extended unforeseeable absence, you may be asked to complete forms and submit medical certifications as appropriate during your leave. Improper use of sick leave, failure to present medical certification when required, excessive absenteeism, tardiness, unauthorized absence or failure to notify your deparartment when you are unable to report to work, may result in sick leave restriction, disciplinary action or termination.\n\n## Appearance and Dress Code', 'Regular and prompt attendance is an essential requirement of your job. As a City employee, you are to report to your work station at your scheduled work time. Your time records must accurately reflect the time you start work and the number of hours worked in every work day.\n\nAll planned absences must be requested and approved d in advance. If illness or some other emergency causes an unplanned or unforeseeable absence, you must notify your department as soon as possible on the first day of absence, and keep the department advised daily during the absence. In the case of an extended unforeseeable absence, you may be asked to complete forms and submit medical certifications as appropriate during your leave. Improper use of sick leave, failure to present medical certification when required, excessive absenteeism, tardiness, unauthorized absence or failure to notify your deparartment when you are unable to report to work, may result in sick leave restriction, disciplinary action or termination.']","If an employee is absent without authorization for more than five consecutive working days, their absence will be deemed an ""automatic resignation"".","If an employee is absent from their job without proper authorization for more than five consecutive working days, or fails to return from an approved leave, their absence will be deemed an 'automatic resignation.' (Source: Page 19, 'Automatic Resignation' section)",0.99999999998,1.0,1.0,1.0000000000000004 -How long is the normal probationary period for permanent civil service positions?,"['## Probationary Period\n\nAll appointments to permanent civil service positions require that you serve a probationary period. The probationary period is the final and most important phase of the selection process. This period is used to evaluate your performance and suitability for the position. The duration of the probationary period is governed by the collective bargaining agreement. Absences from work will extend your probationary period. For infoformation about the length of your probationary period, consult your departmental personnel officer or collective bargaining agreement.', ""All appointments to permanent civil service positions require that you serve a probationary period. The probationary period is the final and most important phase of the selection process. This period is used to evaluate your performance and suitability for the position. The duration of the probationary period is governed by the collective bargaining agreement. Absences from work will extend your probationary period. For infoformation about the length of your probationary period, consult your departmental personnel officer or collective bargaining agreement.\n\nThe department's appointing officer may release an employee at any time during the probationary period. At the conclusion of the probationary period, your department head may recommend permanent appointment, release from the position, or an extension of the probationary period. You may also be required to serve an additional probationary period in certain circumstances, such as transferring to a different department."", '## Permanent\n\nPersons who have competed in a civil service examination and are appointed from a list of eligibles to a permanent position. Permanent empmployees who have successfully completed the probationary period may only be removed for cause.\n\n## Exempt', '## Reappointment\n\nAn employee who has resigned with satisfactory services after completing probation may request reappointment to his or her position. Such h former employees may request to reenter the City service to a vacant position in the class from m which they resigned within four years from the date of resignation. Former members of the uniformed ranks of the Police and Fire departments may be reappointed within two years of the date of resignation. Former employees may request reappointment to a vacancy in any class in which the employee has completed the probationary period. The receiving department, at i its discretion, may approve a request for reappointment.', 'An employee who has resigned with satisfactory services after completing probation may request reappointment to his or her position. Such h former employees may request to reenter the City service to a vacant position in the class from m which they resigned within four years from the date of resignation. Former members of the uniformed ranks of the Police and Fire departments may be reappointed within two years of the date of resignation. Former employees may request reappointment to a vacancy in any class in which the employee has completed the probationary period. The receiving department, at i its discretion, may approve a request for reappointment.\n\n## Reinstatement']","The duration of the probationary period for permanent civil service positions is governed by the collective bargaining agreement. However, the exact length of the probationary period is not specified in the provided documents. It is recommended to consult the departmental personnel officer or collective bargaining agreement for information on the length of the probationary period.","The document states that all appointments to permanent civil service positions require a probationary period, but the duration is governed by the collective bargaining agreement. Absences from work will extend the probationary period. (Source: Page 14, 'Probationary Period' section)",0.9999999999,0.6666666666666666,1.0,0.0 -What are employees required to do in case of a catastrophic event while off duty?,"['Catastrophic Event While off Duty: Should a catastrophic event occur while you are off duty, you should ensure the safety of your family and follow your department\'s instructions. In the event that phone lines are down, you are required to listen to the radio for any reporting instructions.\n\nIn the event of an emergency, the City may contact you via its two-way emergency communication system on your home phone, cell phone, work phone, and/or work email. Please be aware that the system uses an electronic communication, so you may think it is an unsolicited ""robo-call."" Do NOT hang up; you are required to listen to the entire message, follow the instructions and report for your emergency assignment if so directed.', ""Catastrophic Event While on Duty: Should a catastrophic event occur while you are on duty, report immediately to your supervisor or designated areas for assignment.\n\nCatastrophic Event While off Duty: Should a catastrophic event occur while you are off duty, you should ensure the safety of your family and follow your department's instructions. In the event that phone lines are down, you are required to listen to the radio for any reporting instructions."", 'All Disaster Services Workers will be issued a citywide identification badge. You must have your badge with you at all times.\n\nCatastrophic Event While on Duty: Should a catastrophic event occur while you are on duty, report immediately to your supervisor or designated areas for assignment.', ""The safety and well-being of our employees is very important, and in order to promote a safe and healthy work environment, the City works to identify and eliminate employee exposures to avoidable hazards and conditions that can lead to injury or illness. City departments have Injury and Illness Prevention Programs that comply with federal and state regulations, laws, and statutes in order to help maintain a safe and healthful work environment.\n\nSafety is every City employee's responsibility. All employees are required to remain alert and to correct hazardous conditions and unsafe acts-if it is safe to do so-and to report unsafe conditions to their supervisors."", 'All City employees are designated Disaster Service Workers under state and local law. If the Mayor or an authorized City official proclaims a local emergency due to actual or threatened disaster such as earthquake, fire, riot, etc., City employees, as Disaster Service Workers, may be required to provide services during the emergency and subsequent period of assessment and recovery.\n\nSimply put, being a Disaster Service Worker means that any time a catastrophic event- natural or manmade-places life or property in jeopardy, you could be assigned to any disaster service activity that promotes the protection of public health and safety. Your assignment might require you to serve at a location, at times and/or under conditions that significantly differ from your normal work assignment. As a Disaster Service Worker, however, you have an absolute obligation to serve the public in a time of need in whatever role you are directed to perform by a superior or by law.']","In case of a catastrophic event while off duty, employees are required to: - -1. Ensure the safety of their family. -2. Follow their department's instructions. -3. In the event of phone lines being down, listen to the radio for reporting instructions. -4. If contacted by the City via its two-way emergency communication system on their home phone, cell phone, work phone, or work email, they must: - * Not hang up. - * Listen to the entire message. - * Follow the instructions. - * Report for their emergency assignment if directed to do so.","Employees should ensure the safety of their family and follow their department's instructions. If phone lines are down, they are required to listen to the radio for any reporting instructions. (Source: Page 51, 'Catastrophic Event While off Duty' section)",0.8666666666377778,0.6666666666666666,1.0,0.9872406055919023 -What is the city's policy on accepting gifts from subordinates?,"['You may not solicit or accept any gift from any person or entity who has a contract with your department or who has attempted to influence you in a governmental decision during the past 12 months.\n\nYou may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to you .', '## Gifts\n\nYou may not accept any gift that is intended to inflfluence you in the performance of your job.\n\nYou may not solicit or accept any gift from any person or entity who has a contract with your department or who has attempted to influence you in a governmental decision during the past 12 months.', 'There are exceptions to all of these gift rules - please check with the Ethics Commission or your departmental personnel officer if you have questions.\n\n## Outside or Additional Employment', 'You may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to you .\n\nIf you are required to file a Statement of Economic Interests (""SEI"") financial disclosure, you cannot accept gifts worth more than a certain amount in a calendar year from any source that you must report on your SEI. That amount is $420.00 for calendar year 2011; please contact the Ethics Commission to determine what the amount is for future calendar years.', 'If you are required to file a Statement of Economic Interests (""SEI"") financial disclosure, you cannot accept gifts worth more than a certain amount in a calendar year from any source that you must report on your SEI. That amount is $420.00 for calendar year 2011; please contact the Ethics Commission to determine what the amount is for future calendar years.\n\nThere are exceptions to all of these gift rules - please check with the Ethics Commission or your departmental personnel officer if you have questions.']","According to the document, the city's policy is that employees may not accept gifts from subordinates, as well as candidates or applicants for positions as employees or subordinates.","Employees may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to them. (Source: Page 49, 'Gifts' section)",0.8874999999778125,1.0,0.6666666666666666,0.9569189442440059 diff --git a/examples/E2E-RAG-App/data/eval/evaluation_results2.csv b/examples/E2E-RAG-App/data/eval/evaluation_results2.csv deleted file mode 100644 index e80a7f000..000000000 --- a/examples/E2E-RAG-App/data/eval/evaluation_results2.csv +++ /dev/null @@ -1,39 +0,0 @@ -user_input,retrieved_contexts,response,reference,context_precision,context_recall,faithfulness,answer_relevancy,factual_correctness,semantic_similarity -What is the policy regarding smoking in City offices?,"['## Smoke-Free Workplace\n\nSmoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings.\n\n## Drug-Free Workplace', '## Policy Regarding the Treatment of Co-Workers and Members of the Public\n\nCity policy requires employees to treat co-workers and members of the public with courtesy and respect. City employees and managers are responsible for maintaining a safe and productive workplace which is free from inappropriate workplace behavior.\n\n## Smoke-Free Workplace', '## Political Activity\n\nIt is unlawful for City employees to use public resources or personnel to engage in political activity relating to elective offices and ballot measures. City employees may not engage in political activities while on duty or in the workplace. Employees may not use City resources, such as photocopier or fax machines, telephones, postage, or email, for political activities. The ban on engaging in political activity while on duty prohibits such activities as circulating petitions, addressing campaign mailers or engaging in any other political activities that use City resources or divert employees from their assigned duties.', '## Use of City and County Property for Business Purposes Only\n\nNo officer or employee may use, nor allow any other r person to use, City resources for any non-City business purpose. Use of City resources fofor personal, political, employee organization or other non-City business is strictly prohibited. City resources include, but are not limited to, facilities, equipment, devices, telephones, computers, copier, fax machine, email, internet access, supplies and any time for which you are receiving compensation from the City. Inappropriate uses of City resources include, but are not limited to: online gambling; viewing sports events online; playing games, streaming video or music on a work computer; viewing or distributing materials that are not related to City business or that are sexually explicit; and frequent talking on a personal cell phone or texting during work hours.', ""The safety and well-being of our employees is very important, and in order to promote a safe and healthy work environment, the City works to identify and eliminate employee exposures to avoidable hazards and conditions that can lead to injury or illness. City departments have Injury and Illness Prevention Programs that comply with federal and state regulations, laws, and statutes in order to help maintain a safe and healthful work environment.\n\nSafety is every City employee's responsibility. All employees are required to remain alert and to correct hazardous conditions and unsafe acts-if it is safe to do so-and to report unsafe conditions to their supervisors.""]","According to the Employee Handbook, smoking is not permitted in City offices, or within n 20 feet of entrances, exits, or operable windows of public buildings.","Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)",0.99999999995,1.0,1.0,0.9536062736849935,0.8,0.9695324605746768 -How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?,"['Most full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.\n\nSome employees may be entitled to accrue sick leave under Chapter 12W of the Administrative Code. Please contact your departmental personnel officer for more information.', 'Sick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.\n\nMost full time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. Unused sick leave accrued under the Civil Service Rules may be accumulated from year to year up to a maximum of 130 8-hour working days.', '## Accrual of Paid Sick Leave\n\nSick leave with pay is a privilege under the Civil Service Rules, Charter and City Ordinance.', 'The maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.\n\n| Year of Service 12-Month Max. Entitlement | Maximum Accumulation | Maximum Accumulation |\n|-----------------------------------------------------------|-----------------------------------------------------------|------------------------|\n| | 1 through 5 years 80 hours (10 days) 320 hours (40 days) | |\n| More than 5 years 120 hours (15 days) 360 hours (45 days) | | |\n| More than 15 years 160 hours (20 days) 400 hours | | (50 days) |', 'You may not use sick leave with pay accrued under the Civil Service Rules during your first 6 months of employment with the City.\n\nYou may not use sick leave for reasons not set forth th in this Handbook, the Civil Service Rules, the applicable collective bargaining agreement or other applicable laws. Misuse of sick leave is grounds for discipline, up to and including termination.']","According to the Employee Handbook, most full-time employees earn 13 days of paid sick leave per year under the Civil Service Rules.","Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)",0.99999999995,0.5,1.0,0.948902942120787,0.67,0.9666512821564128 -What are the three categories of employees eligible for health coverage?,"['The following employees are eligible for health coverage:\n\n- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\n- (3) All other employees of the City including temporary y exempt or ""as needed"" employees who have worked more than 1040 hours in any consecutive 12 month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.', ""Employees in category (3) must obtain a signed certification from their department's human resources manager in order to be eligible for health coverage.\n\nAn employee's spouse, domestic partner and children may also be eligible for coverage through HSS. If you have enrolled a domestic partner, same sex spouse and/or their children in your health plan, it is important that you seek tax advice from a qualified professional regarding the tax consequences of such enrollment. A detailed list of eligibility requirements and necessary documentation for enrolling employee dependents is available on the HSS website at www.myhss.org/benefits/ccsf.html ."", 'Please note that the information in this section is subject to change. You should consult with HSS if you have any questions at (415) 554-1750.\n\n## Eligibility\n\nThe following employees are eligible for health coverage:', ""Coverage for a new employee in category (1) or (2) listed above starts on the first day of the coverage period following his or her start work date, provided an enrollment application and other required documentation has been submitted to HSS by applicable deadlines.\n\nEmployees in category (3) must obtain a signed certification from their department's human resources manager in order to be eligible for health coverage."", '- (1) All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours;\n- (2) All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours; and\n- (3) All other employees of the City including temporary y exempt or ""as needed"" employees who have worked more than 1040 hours in any consecutive 12 month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.\n\nCoverage for a new employee in category (1) or (2) listed above starts on the first day of the coverage period following his or her start work date, provided an enrollment application and other required documentation has been submitted to HSS by applicable deadlines.']","According to the Employee Handbook, the three categories of employees eligible for health coverage are: - -1. All permanent employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours. -2. All regularly scheduled provisional employees of the City whose normal work week at the time of inclusion in the system is not less than 20 hours. -3. All other employees of the City, including temporary, exempt, or ""as needed"" employees who have worked more than 1040 hours in any consecutive 12-month period, and whose normal work week at the time of inclusion in the system is not less than 20 hours.","The following employees are eligible: - -All permanent employees working at least 20 hours per week - -All regularly scheduled provisional employees working at least 20 hours per week - -All other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)",0.6999999999766667,1.0,0.8,0.9463983778035442,1.0,0.9263984404512609 -How long must an employee wait before using vacation time after starting employment?,"['An additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will receive an additional 40 hours.\n\nThe maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.', 'The maximum vacation entitlement in any 12 month period and maximum accrual permitted for most employees are provided in the chart on the following page.\n\n| Year of Service 12-Month Max. Entitlement | Maximum Accumulation | Maximum Accumulation |\n|-----------------------------------------------------------|-----------------------------------------------------------|------------------------|\n| | 1 through 5 years 80 hours (10 days) 320 hours (40 days) | |\n| More than 5 years 120 hours (15 days) 360 hours (45 days) | | |\n| More than 15 years 160 hours (20 days) 400 hours | | (50 days) |', 'The City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.\n\nAn additional vacation entitlement will be awarded after five years of continuous service, usually 40 hours for full-time employees. After fifteen years of service, full-time employees will receive an additional 40 hours.', '## Vacation\n\nThe City provides annual vacations for employees who work a regular schedule and who have completed one year or more of continuous service. Certain temporary exempt employees may also be eligible for vacation benefits. You are not eligible to use vacation in the first year of continuous service; however, at the end of one year of continuous service, you will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year and will be eligible to use accrued vacation time. For the purpose of computing vacation, most employees may be credited with no more than 2080 hours of service in a 12month period.', 'Permanent employees may request unpaid personal leave for a period of up to twelve (12) months within any two-year period. Your department t head has discretion to grant or deny requests for personal leave. With certain exceptions, if you are a temporary or provisional employee, you may request personal leave for a maximum of one month, and only if a replacement for your position is not required.\n\n## Time Off for Voting']","According to the Employee Handbook, an employee is not eligible to use vacation in the first year of continuous service. However, at the end of one year of continuous service, they will be awarded a vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year, and will be eligible to use accrued vacation time.","Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)",0.8055555555287036,0.6666666666666666,1.0,0.8803037847487826,0.89,0.9834024693854851 -What must an employee do if they're summoned for jury duty?,"['You must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.\n\n## Work Schedule While on Jury Duty', '## Jury Duty Leave\n\nYou must notify your supervisor as soon as you receive a jury summons. If you are required to report for jury duty during your working hours, you will be excused from work on the work day you perform jury service, provided you give prior notification to your supervisor. If you report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, you must return to work as soon as possible.', '## Compensation While on Jury Duty\n\nAs a guiding principal, you are required to work or r perform jury service for the number of hours for which you are paid during that workweek. For example, if you are regularly on an alternative workweek schedule working 10-hour workdays Monday through Thursday with Friday off and you are required to serve jury duty on a Monday through Thursday, you must work a regular workday on Friday, or use personal leave to compensate for the eight hour balance. However, you and your supervisor may agree to maintain the alternative schedule, whereby you would return to the workplace to work the remaining two hours each day and continue to take Friday off.', 'As a guiding principal, you are required to work or r perform jury service for the number of hours for which you are paid during that workweek. For example, if you are regularly on an alternative workweek schedule working 10-hour workdays Monday through Thursday with Friday off and you are required to serve jury duty on a Monday through Thursday, you must work a regular workday on Friday, or use personal leave to compensate for the eight hour balance. However, you and your supervisor may agree to maintain the alternative schedule, whereby you would return to the workplace to work the remaining two hours each day and continue to take Friday off.\n\nYou are not entitled to a per diem pay from the City or County for which Jury Duty was served, as the City and County of San Francisco already provides regular compensation and benefits to you while performing jury service. You must notify the jury staff that you are a government employee when reporting for jury service', 'If you are summoned as a witness on behalf of the City and County of San Francisco, you are entitled to be paid for any time that you are required to serve in that capacity. If you are summoned to serve as a witness in a case involving outside employment or personal business\n\naffairs, you will be placed on leave without pay unless vacation leave or compensatory time is requested and granted.']","According to the Employee Handbook, if an employee is summoned for jury duty, they must: - -1. Notify their supervisor as soon as they receive the summons. -2. If they are required to report for jury duty during their working hours, they will be excused from work on the day they perform jury service, provided they give prior notification to their supervisor. -3. If they report for jury duty and are not selected as a juror, or if the court dismisses the proceedings early for the day, they must return to work as soon as possible. -4. They are not entitled to a per diem pay from the City or County for jury duty served, as the City and County already provides regular compensation and benefits to them while performing jury service. - -Note that they must also give prior notification to their supervisor if they are required to report for jury duty during their working hours.","An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)",0.9166666666361111,0.75,1.0,0.9595106560491319,0.73,0.9666893503074692 -What happens if an employee is absent without authorization for more than five consecutive working days?,"['If you are absent from your job for any period of time without proper authorization, you may be subject to discipline. If you are absent from your r job without proper authorization for more than five consecutive working days, or if you fail to return from an approved leave, your absence will be deemed an ""automatic resignation."" If you receive a notice of automatic resignation and wish to appeal the determination to the Civil Service Commission, you will have fifteen days from the date on which the notice was mailed to do so. The rules regarding automatic resignation may be affected by the collective bargaining agreement between the City and your employee organization. For more information, consult your departmental personnel officer or your collective bargaining agreement.\n\n## Layoff', '## Automatic Resignation\n\nIf you are absent from your job for any period of time without proper authorization, you may be subject to discipline. If you are absent from your r job without proper authorization for more than five consecutive working days, or if you fail to return from an approved leave, your absence will be deemed an ""automatic resignation."" If you receive a notice of automatic resignation and wish to appeal the determination to the Civil Service Commission, you will have fifteen days from the date on which the notice was mailed to do so. The rules regarding automatic resignation may be affected by the collective bargaining agreement between the City and your employee organization. For more information, consult your departmental personnel officer or your collective bargaining agreement.', ""If you are absent from your job for more than five consecutive working days, you must submit to your supervisor a completed Request for Leave form stating the duration of your illness, signed by a doctor, dentist, podiatrist, licensed clinical psychologist, Christian Science practitioner or licensed doctor of chiropractic medicine. This form is available from your supervisor or departmental personnel officer.\n\nUnder certain conditions, a department head may require you to furnish a doctor's certification for any amount of absence due to illness."", 'All planned absences must be requested and approved d in advance. If illness or some other emergency causes an unplanned or unforeseeable absence, you must notify your department as soon as possible on the first day of absence, and keep the department advised daily during the absence. In the case of an extended unforeseeable absence, you may be asked to complete forms and submit medical certifications as appropriate during your leave. Improper use of sick leave, failure to present medical certification when required, excessive absenteeism, tardiness, unauthorized absence or failure to notify your deparartment when you are unable to report to work, may result in sick leave restriction, disciplinary action or termination.\n\n## Appearance and Dress Code', 'Regular and prompt attendance is an essential requirement of your job. As a City employee, you are to report to your work station at your scheduled work time. Your time records must accurately reflect the time you start work and the number of hours worked in every work day.\n\nAll planned absences must be requested and approved d in advance. If illness or some other emergency causes an unplanned or unforeseeable absence, you must notify your department as soon as possible on the first day of absence, and keep the department advised daily during the absence. In the case of an extended unforeseeable absence, you may be asked to complete forms and submit medical certifications as appropriate during your leave. Improper use of sick leave, failure to present medical certification when required, excessive absenteeism, tardiness, unauthorized absence or failure to notify your deparartment when you are unable to report to work, may result in sick leave restriction, disciplinary action or termination.']","According to the Employee Handbook, if an employee is absent without authorization for more than five consecutive working days, their absence will be deemed an ""automatic resignation."" This means that their absence will be considered a resignation of their position, and they will be considered to have quit their job. However, the employee can appeal the determination to the Civil Service Commission within 15 days of receiving the notice of automatic resignation.","If an employee is absent from their job without proper authorization for more than five consecutive working days, or fails to return from an approved leave, their absence will be deemed an 'automatic resignation.' (Source: Page 19, 'Automatic Resignation' section)",0.94999999997625,1.0,1.0,1.0000000000000004,0.57,0.9676767185463638 -How long is the normal probationary period for permanent civil service positions?,"['## Probationary Period\n\nAll appointments to permanent civil service positions require that you serve a probationary period. The probationary period is the final and most important phase of the selection process. This period is used to evaluate your performance and suitability for the position. The duration of the probationary period is governed by the collective bargaining agreement. Absences from work will extend your probationary period. For infoformation about the length of your probationary period, consult your departmental personnel officer or collective bargaining agreement.', ""All appointments to permanent civil service positions require that you serve a probationary period. The probationary period is the final and most important phase of the selection process. This period is used to evaluate your performance and suitability for the position. The duration of the probationary period is governed by the collective bargaining agreement. Absences from work will extend your probationary period. For infoformation about the length of your probationary period, consult your departmental personnel officer or collective bargaining agreement.\n\nThe department's appointing officer may release an employee at any time during the probationary period. At the conclusion of the probationary period, your department head may recommend permanent appointment, release from the position, or an extension of the probationary period. You may also be required to serve an additional probationary period in certain circumstances, such as transferring to a different department."", '## Permanent\n\nPersons who have competed in a civil service examination and are appointed from a list of eligibles to a permanent position. Permanent empmployees who have successfully completed the probationary period may only be removed for cause.\n\n## Exempt', '## Reappointment\n\nAn employee who has resigned with satisfactory services after completing probation may request reappointment to his or her position. Such h former employees may request to reenter the City service to a vacant position in the class from m which they resigned within four years from the date of resignation. Former members of the uniformed ranks of the Police and Fire departments may be reappointed within two years of the date of resignation. Former employees may request reappointment to a vacancy in any class in which the employee has completed the probationary period. The receiving department, at i its discretion, may approve a request for reappointment.', 'An employee who has resigned with satisfactory services after completing probation may request reappointment to his or her position. Such h former employees may request to reenter the City service to a vacant position in the class from m which they resigned within four years from the date of resignation. Former members of the uniformed ranks of the Police and Fire departments may be reappointed within two years of the date of resignation. Former employees may request reappointment to a vacancy in any class in which the employee has completed the probationary period. The receiving department, at i its discretion, may approve a request for reappointment.\n\n## Reinstatement']","The duration of the probationary period is governed by the collective bargaining agreement, but the exact length is not specified in the provided text. However, it is mentioned that the probationary period may be extended by absences from work, and that an employee may be required to serve an additional probationary period in certain circumstances, such as transferring to a different department. - -It is also mentioned that former employees who have resigned with satisfactory services after completing probation may request reappointment to their position, and that the receiving department may approve a request for reappointment at its discretion. - -Therefore, it can be inferred that the probationary period for permanent civil service positions is a flexible period that can be adjusted based on individual circumstances, but its duration is not fixed.","The document states that all appointments to permanent civil service positions require a probationary period, but the duration is governed by the collective bargaining agreement. Absences from work will extend the probationary period. (Source: Page 14, 'Probationary Period' section)",0.9999999999,0.6666666666666666,0.7,0.0,0.46,0.9538981887418683 -What are employees required to do in case of a catastrophic event while off duty?,"['Catastrophic Event While off Duty: Should a catastrophic event occur while you are off duty, you should ensure the safety of your family and follow your department\'s instructions. In the event that phone lines are down, you are required to listen to the radio for any reporting instructions.\n\nIn the event of an emergency, the City may contact you via its two-way emergency communication system on your home phone, cell phone, work phone, and/or work email. Please be aware that the system uses an electronic communication, so you may think it is an unsolicited ""robo-call."" Do NOT hang up; you are required to listen to the entire message, follow the instructions and report for your emergency assignment if so directed.', ""Catastrophic Event While on Duty: Should a catastrophic event occur while you are on duty, report immediately to your supervisor or designated areas for assignment.\n\nCatastrophic Event While off Duty: Should a catastrophic event occur while you are off duty, you should ensure the safety of your family and follow your department's instructions. In the event that phone lines are down, you are required to listen to the radio for any reporting instructions."", 'All Disaster Services Workers will be issued a citywide identification badge. You must have your badge with you at all times.\n\nCatastrophic Event While on Duty: Should a catastrophic event occur while you are on duty, report immediately to your supervisor or designated areas for assignment.', ""The safety and well-being of our employees is very important, and in order to promote a safe and healthy work environment, the City works to identify and eliminate employee exposures to avoidable hazards and conditions that can lead to injury or illness. City departments have Injury and Illness Prevention Programs that comply with federal and state regulations, laws, and statutes in order to help maintain a safe and healthful work environment.\n\nSafety is every City employee's responsibility. All employees are required to remain alert and to correct hazardous conditions and unsafe acts-if it is safe to do so-and to report unsafe conditions to their supervisors."", 'All City employees are designated Disaster Service Workers under state and local law. If the Mayor or an authorized City official proclaims a local emergency due to actual or threatened disaster such as earthquake, fire, riot, etc., City employees, as Disaster Service Workers, may be required to provide services during the emergency and subsequent period of assessment and recovery.\n\nSimply put, being a Disaster Service Worker means that any time a catastrophic event- natural or manmade-places life or property in jeopardy, you could be assigned to any disaster service activity that promotes the protection of public health and safety. Your assignment might require you to serve at a location, at times and/or under conditions that significantly differ from your normal work assignment. As a Disaster Service Worker, however, you have an absolute obligation to serve the public in a time of need in whatever role you are directed to perform by a superior or by law.']","According to the Employee Handbook, if a catastrophic event occurs while an employee is off duty, they should: - -1. Ensure the safety of their family. -2. Follow their department's instructions. -3. If phone lines are down, listen to the radio for any reporting instructions. -4. Do not hang up the phone, as it may be an unsolicited ""robo-call"". -5. Listen to the entire message, follow the instructions, and report for their emergency assignment if directed to do so. - -They must also have their citywide identification badge with them at all times.","Employees should ensure the safety of their family and follow their department's instructions. If phone lines are down, they are required to listen to the radio for any reporting instructions. (Source: Page 51, 'Catastrophic Event While off Duty' section)",0.8666666666377778,0.6666666666666666,1.0,0.9794192074279029,0.6,0.9508825814602573 -What is the city's policy on accepting gifts from subordinates?,"['You may not solicit or accept any gift from any person or entity who has a contract with your department or who has attempted to influence you in a governmental decision during the past 12 months.\n\nYou may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to you .', '## Gifts\n\nYou may not accept any gift that is intended to inflfluence you in the performance of your job.\n\nYou may not solicit or accept any gift from any person or entity who has a contract with your department or who has attempted to influence you in a governmental decision during the past 12 months.', 'There are exceptions to all of these gift rules - please check with the Ethics Commission or your departmental personnel officer if you have questions.\n\n## Outside or Additional Employment', 'You may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to you .\n\nIf you are required to file a Statement of Economic Interests (""SEI"") financial disclosure, you cannot accept gifts worth more than a certain amount in a calendar year from any source that you must report on your SEI. That amount is $420.00 for calendar year 2011; please contact the Ethics Commission to determine what the amount is for future calendar years.', 'If you are required to file a Statement of Economic Interests (""SEI"") financial disclosure, you cannot accept gifts worth more than a certain amount in a calendar year from any source that you must report on your SEI. That amount is $420.00 for calendar year 2011; please contact the Ethics Commission to determine what the amount is for future calendar years.\n\nThere are exceptions to all of these gift rules - please check with the Ethics Commission or your departmental personnel officer if you have questions.']","According to the Employee Handbook, the city has a policy of not accepting gifts from subordinates. This policy is outlined in section 411, which states that ""You may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to you."" This means that employees should not receive or accept any gifts, benefits, or favors from their subordinates, as well as from candidates or applicants for positions that they may have authority over.","Employees may not solicit or accept any gifts from any subordinate, or any candidate or applicant for a position as an employee or subordinate to them. (Source: Page 49, 'Gifts' section)",0.8874999999778125,1.0,0.375,0.9316696175848537,0.44,0.9459774250884739 diff --git a/examples/E2E-RAG-App/evaluation_results_with_memory.csv b/examples/E2E-RAG-App/evaluation_results_with_memory.csv deleted file mode 100644 index d8fc68b43..000000000 --- a/examples/E2E-RAG-App/evaluation_results_with_memory.csv +++ /dev/null @@ -1,12 +0,0 @@ -user_input,retrieved_contexts,response,reference,context_precision,context_recall,faithfulness,answer_relevancy,factual_correctness,semantic_similarity -What is the policy regarding smoking in City offices?,[],No response generated.,"Smoking is not permitted in City offices, or within 20 feet of entrances, exits, or operable windows of public buildings. (Source: Page 46, 'Smoke-Free Workplace' section)",0.0,0.0,0.0,0.0,0.0,0.7311136274312703 -How many days of paid sick leave do most full-time employees earn per year under Civil Service Rules?,[],No response generated.,"Most full-time employees earn 13 8-hour working days per year of paid sick leave under the Civil Service Rules. (Source: Page 32, 'Accrual of Paid Sick Leave' section)",0.0,0.0,0.0,0.0,0.0,0.7087668374354706 -What are the three categories of employees eligible for health coverage?,[],No response generated.,"The following employees are eligible: - -All permanent employees working at least 20 hours per week - -All regularly scheduled provisional employees working at least 20 hours per week - -All other employees (including temporary exempt or 'as needed') who have worked more than 1040 hours in any consecutive 12-month period and work at least 20 hours per week (Source: Page 25, 'Eligibility' section)",0.0,0.0,0.0,0.0,0.0,0.6871347926183213 -How long must an employee wait before using vacation time after starting employment?,[],No response generated.,"Employees are not eligible to use vacation in the first year of continuous service. After one year of continuous service, they are awarded vacation allowance at the rate of .0385 of an hour for each hour of paid service in the preceding year. (Source: Page 30, 'Vacation' section)",0.0,0.0,0.0,0.0,0.0,0.7100602328142038 -What must an employee do if they're summoned for jury duty?,[],No response generated.,"An employee must notify their supervisor as soon as they receive a jury summons. If required to report during working hours, they will be excused from work on the day they perform jury service, provided they give prior notification. If not selected or dismissed early, they must return to work as soon as possible. (Source: Page 37, 'Jury Duty Leave' section)",0.0,0.0,0.0,0.0,0.0,0.7249350381113908 diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index e8dbce483..6710105a4 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -39,7 +39,7 @@ def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): self.chroma_client = chromadb.HttpClient(host=host, port=chroma_port) self.agent = None self.session_id = None - self.memory_bank_id = "test_bank_691" + self.memory_bank_id = "test_bank_122" async def initialize_system(self): """Initialize the entire system including memory bank and agent.""" diff --git a/examples/E2E-RAG-App/ollama_main.py b/examples/E2E-RAG-App/ollama_main.py deleted file mode 100644 index a854ee6b8..000000000 --- a/examples/E2E-RAG-App/ollama_main.py +++ /dev/null @@ -1,140 +0,0 @@ -import asyncio -import json -import os -import uuid -from typing import List, Optional - -import fire -import requests -from llama_stack_client import LlamaStackClient -from llama_stack_client.lib.agents.agent import Agent -from llama_stack_client.lib.agents.event_logger import EventLogger -from llama_stack_client.types.agent_create_params import AgentConfig -from llama_stack_client.types.memory_insert_params import Document -from termcolor import cprint -from tqdm import tqdm - -# Initialization - - -def is_memory_bank_present(client, target_identifier): - """Checks if a memory bank with the given identifier is present in the list.""" - return any( - bank.identifier == target_identifier for bank in client.memory_banks.list() - ) - - -async def insert_documents_to_memory_bank(client: LlamaStackClient, docs_dir: str): - """Inserts entire text documents from a directory into a memory bank.""" - memory_bank_id = "test_bank_0" - providers = client.providers.list() - provider_id = providers["memory"][0].provider_id - - memorybank_boolean = is_memory_bank_present(client, memory_bank_id) - memorybank_list = client.memory_banks.list() - print(memorybank_list) - # for bank in memorybank_list: - # try: - # client.memory_banks.unregister(memory_bank_id=bank.provider_resource_id) - # except Exception as e: - # print(e) - - print("after unregistration: ", client.memory_banks.list()) - - if not memorybank_boolean: - # Register a memory bank - memory_bank = client.memory_banks.register( - memory_bank_id=memory_bank_id, - params={ - "embedding_model": "all-MiniLM-L6-v2", - "chunk_size_in_tokens": 100, - "overlap_size_in_tokens": 10, - }, - provider_id=provider_id, - ) - cprint(f"Memory bank registered: {memory_bank}", "green") - - # Prepare entire documents for insertion - documents = [] - for filename in os.listdir(docs_dir): - if filename.endswith((".txt", ".md")): - file_path = os.path.join(docs_dir, filename) - with open(file_path, "r", encoding="utf-8") as file: - content = file.read() - - document = Document( - document_id=f"{filename}", - content=content, - mime_type="text/plain", - metadata={"filename": filename}, - ) - documents.append(document) - - # Insert documents into the memory bank - client.memory.insert( - bank_id=memory_bank_id, - documents=documents, - ) - cprint( - f"Inserted documents from {docs_dir} into memory bank '{memory_bank_id}'.", - "green", - ) - - -async def run_main(host: str, port: int, docs_dir: str) -> None: - """Main async function to register model, insert documents, and generate responses.""" - client = LlamaStackClient(base_url=f"http://{host}:{port}") - - # Insert documents to the memory bank - await insert_documents_to_memory_bank(client, docs_dir) - - # # Model registration - model_name = "llama3.2:1b-instruct-fp16" - - # Agent configuration - agent_config = AgentConfig( - model=model_name, - instructions="You are a helpful assistant that can answer questions based on provided documents.", - sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, - tools=[ - { - "type": "memory", - "memory_bank_configs": [{"bank_id": "test_bank_0", "type": "vector"}], - "query_generator_config": {"type": "default", "sep": " "}, - "max_tokens_in_context": 512, - "max_chunks": 5, - } - ], - tool_choice="auto", - tool_prompt_format="json", - enable_session_persistence=True, - ) - agent = Agent(client, agent_config) - - session_id = agent.create_session(f"session-{uuid.uuid4()}") - - while True: - user_input = input("User> ") - if user_input.lower() in ["exit", "quit", "bye"]: - cprint("Ending conversation. Goodbye!", "yellow") - break - - cprint(f"Generating response for: {user_input}", "green") - - # Create a turn and generate the response asynchronously - response = agent.create_turn( - messages=[{"role": "user", "content": user_input}], session_id=session_id - ) - - # Log and display each response asynchronously - async for log in EventLogger().log(response): - log.print() - - -def main(host: str, port: int, docs_dir: str) -> None: - """Entry point for the script.""" - asyncio.run(run_main(host, port, docs_dir)) - - -if __name__ == "__main__": - fire.Fire(main) From 4f2ee2e6d86b9575537ab0dccf94ef0a5ac156f2 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Wed, 20 Nov 2024 13:08:41 -0800 Subject: [PATCH 71/99] Changes req by Kai --- examples/E2E-RAG-App/01_ingest_files.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/E2E-RAG-App/01_ingest_files.py b/examples/E2E-RAG-App/01_ingest_files.py index 75b3e3617..379a5cfd0 100644 --- a/examples/E2E-RAG-App/01_ingest_files.py +++ b/examples/E2E-RAG-App/01_ingest_files.py @@ -130,7 +130,6 @@ def main(): output_subdir.mkdir(parents=True, exist_ok=True) md_path = output_subdir / f"{res.input.file.stem}.md" - json_path = output_subdir / f"{res.input.file.stem}.json" print(f"Converting: {res.input.file}" f"\nSaving to: {md_path}") From 14052fb1a4c7b029cf51a485d027b7a187c42f08 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Wed, 20 Nov 2024 13:11:57 -0800 Subject: [PATCH 72/99] Update 01_ingest_files.py --- examples/E2E-RAG-App/01_ingest_files.py | 137 +++++++++++++----------- 1 file changed, 72 insertions(+), 65 deletions(-) diff --git a/examples/E2E-RAG-App/01_ingest_files.py b/examples/E2E-RAG-App/01_ingest_files.py index 379a5cfd0..cea7aa459 100644 --- a/examples/E2E-RAG-App/01_ingest_files.py +++ b/examples/E2E-RAG-App/01_ingest_files.py @@ -1,6 +1,7 @@ import argparse import json import logging +import shutil from pathlib import Path from typing import Tuple, List @@ -23,19 +24,24 @@ def parse_args(): parser.add_argument('--output_dir', type=str, help='Output directory for processed files (default: input_dir/output)') return parser.parse_args() -def get_document_files(input_dir: Path) -> list[Path]: +def get_document_files(input_dir: Path) -> Tuple[List[Path], List[Path]]: """ Recursively scan directory for document files. - Returns a list of Path objects for supported document types. + Returns: + tuple: (documents_to_process, markdown_files) """ - supported_extensions = {".pdf", ".docx", ".pptx"} - document_files = [] + process_extensions = {".pdf", ".docx", ".pptx"} + documents_to_process = [] + markdown_files = [] for path in input_dir.rglob("*"): - if path.is_file() and path.suffix.lower() in supported_extensions: - document_files.append(path) + if path.is_file(): + if path.suffix.lower() in process_extensions: + documents_to_process.append(path) + elif path.suffix.lower() == ".md": + markdown_files.append(path) - return document_files + return documents_to_process, markdown_files def save_images(res, output_subdir: Path, doc_filename: str) -> List[Tuple[str, Path]]: """ @@ -84,63 +90,64 @@ def main(): output_dir.mkdir(parents=True, exist_ok=True) # Get all document files recursively - input_paths = get_document_files(input_dir) - - if not input_paths: - print(f"No documents found in {input_dir}!") - return - - print(f"Found {len(input_paths)} documents to process:") - for path in input_paths: - print(f"- {path}") - - # Configure pipeline options - pipeline_options = PdfPipelineOptions() - pipeline_options.do_ocr = False - pipeline_options.images_scale = 2.0 - pipeline_options.generate_page_images = False - pipeline_options.generate_table_images = False - pipeline_options.generate_picture_images = True - - # Configure document converter - doc_converter = DocumentConverter( - allowed_formats=[ - InputFormat.PDF, - InputFormat.DOCX, - InputFormat.PPTX, - ], - format_options={ - InputFormat.PDF: PdfFormatOption( - pipeline_cls=StandardPdfPipeline, - backend=PyPdfiumDocumentBackend, - pipeline_options=pipeline_options - ), - InputFormat.DOCX: WordFormatOption(pipeline_cls=SimplePipeline), - }, - ) - - # Process all documents - conv_results = doc_converter.convert_all(input_paths) - all_extracted_images = [] - - # Save results - for res in conv_results: - relative_path = res.input.file.relative_to(input_dir) - output_subdir = output_dir / relative_path.parent - output_subdir.mkdir(parents=True, exist_ok=True) - - md_path = output_subdir / f"{res.input.file.stem}.md" - - print(f"Converting: {res.input.file}" f"\nSaving to: {md_path}") - - extracted_images = save_images(res, output_subdir, res.input.file.stem) - all_extracted_images.extend(extracted_images) - - with md_path.open("w", encoding="utf-8") as fp: - fp.write(res.document.export_to_markdown()) - - print(f"\nExtracted {len(all_extracted_images)} images in total") - print("Ready for image captioning processing") + documents_to_process, markdown_files = get_document_files(input_dir) + + # Copy markdown files directly + for md_file in markdown_files: + relative_path = md_file.relative_to(input_dir) + output_path = output_dir / relative_path + output_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(md_file, output_path) + print(f"Copied: {md_file}") + + if documents_to_process: + # Configure pipeline options + pipeline_options = PdfPipelineOptions() + pipeline_options.do_ocr = False + pipeline_options.images_scale = 2.0 + pipeline_options.generate_page_images = False + pipeline_options.generate_table_images = False + pipeline_options.generate_picture_images = True + + # Configure document converter + doc_converter = DocumentConverter( + allowed_formats=[ + InputFormat.PDF, + InputFormat.DOCX, + InputFormat.PPTX, + ], + format_options={ + InputFormat.PDF: PdfFormatOption( + pipeline_cls=StandardPdfPipeline, + backend=PyPdfiumDocumentBackend, + pipeline_options=pipeline_options + ), + InputFormat.DOCX: WordFormatOption(pipeline_cls=SimplePipeline), + }, + ) + + # Process all documents + conv_results = doc_converter.convert_all(documents_to_process) + all_extracted_images = [] + + # Save results + for res in conv_results: + relative_path = res.input.file.relative_to(input_dir) + output_subdir = output_dir / relative_path.parent + output_subdir.mkdir(parents=True, exist_ok=True) + + md_path = output_subdir / f"{res.input.file.stem}.md" + + print(f"Converting: {res.input.file}" f"\nSaving to: {md_path}") + + extracted_images = save_images(res, output_subdir, res.input.file.stem) + all_extracted_images.extend(extracted_images) + + with md_path.open("w", encoding="utf-8") as fp: + fp.write(res.document.export_to_markdown()) + + print(f"\nExtracted {len(all_extracted_images)} images in total") + print("Ready for image captioning processing") if __name__ == "__main__": - main() + main() \ No newline at end of file From 43d42025c5afbd09d255f82f4ad9991f2b61f512 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Wed, 20 Nov 2024 13:38:39 -0800 Subject: [PATCH 73/99] kind of working --- examples/E2E-RAG-App/docker/RAG_service.json | 5 ++-- .../E2E-RAG-App/docker/llama_stack_start.sh | 5 ++++ examples/E2E-RAG-App/gradio_interface.py | 29 +++++++++++++------ 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/examples/E2E-RAG-App/docker/RAG_service.json b/examples/E2E-RAG-App/docker/RAG_service.json index f206f661d..47c9fe934 100644 --- a/examples/E2E-RAG-App/docker/RAG_service.json +++ b/examples/E2E-RAG-App/docker/RAG_service.json @@ -1,9 +1,8 @@ { - "model_name": "llama3.2:1b-instruct-fp16", - "document_path": "/Users/justinai/Documents/Code/llama-stack-apps/examples/E2E-RAG-App/output/", + "model_name": "llama3.2:3b", + "document_path": "/Users/kaiwu/work/llama-stack-apps/examples/E2E-RAG-App/example_data", "host": "localhost", "port": 5000, "chroma_port": 6000, - "docs_dir": "/root/E2E-RAG-App/example_data/", "gradio_server_port": 7860 } diff --git a/examples/E2E-RAG-App/docker/llama_stack_start.sh b/examples/E2E-RAG-App/docker/llama_stack_start.sh index bd6e68045..fc07bec3c 100644 --- a/examples/E2E-RAG-App/docker/llama_stack_start.sh +++ b/examples/E2E-RAG-App/docker/llama_stack_start.sh @@ -1,8 +1,12 @@ sleep 45 + echo "-----starting to llama-stack docker now---------" pip install gradio +pip install docling + +python /root/E2E-RAG-App/01_ingest_files.py --input_dir /root/rag_data/ echo "starting the llama-stack server" @@ -10,4 +14,5 @@ python -m llama_stack.distribution.server.server --yaml-config /root/my-run.yaml sleep 30 echo "---------running the RAG app--------------" + python /root/E2E-RAG-App/gradio_interface.py diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index 6710105a4..234013df6 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -2,14 +2,12 @@ import json import os import uuid -from typing import AsyncGenerator, Generator, List, Optional -from threading import Thread from queue import Queue - +from threading import Thread +from typing import AsyncGenerator, Generator, List, Optional import chromadb - import gradio as gr import requests from chromadb.utils import embedding_functions @@ -27,8 +25,10 @@ HOST = os.getenv("HOST", "localhost") PORT = int(os.getenv("PORT", 5000)) CHROMA_PORT = int(os.getenv("CHROMA_PORT", 6000)) -DOCS_DIR = os.getenv("DOCS_DIR", "/root/E2E-RAG-App/example_data/") +DOCS_DIR = "/root/rag_data/output" GRADIO_SERVER_PORT = int(os.getenv("GRADIO_SERVER_PORT", 7861)) +MODEL_NAME = os.getenv("MODEL_NAME", "Llama3.2-1B-Instruct") + class LlamaChatInterface: def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): @@ -39,7 +39,7 @@ def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): self.chroma_client = chromadb.HttpClient(host=host, port=chroma_port) self.agent = None self.session_id = None - self.memory_bank_id = "test_bank_122" + self.memory_bank_id = "test_bank_212" async def initialize_system(self): """Initialize the entire system including memory bank and agent.""" @@ -95,7 +95,15 @@ async def load_documents(self): async def initialize_agent(self): """Initialize the agent with model registration and configuration.""" - model_name = "Llama3.2-1B-Instruct" + + if "1b" in MODEL_NAME: + model_name = "Llama3.2-1B-Instruct" + elif "3b" in MODEL_NAME: + model_name = "Llama3.2-3B-Instruct" + elif "8b" in MODEL_NAME: + model_name = "Llama3.1-8B-Instruct" + else: + model_name = MODEL_NAME agent_config = AgentConfig( model=model_name, @@ -135,7 +143,8 @@ async def async_process(): await self.initialize_system() response = self.agent.create_turn( - messages=[{"role": "user", "content": message}], session_id=self.session_id + messages=[{"role": "user", "content": message}], + session_id=self.session_id, ) current_response = "" @@ -228,4 +237,6 @@ def clear_chat(): if __name__ == "__main__": # Create and launch the Gradio interface interface = create_gradio_interface() - interface.launch(server_name=HOST, server_port=GRADIO_SERVER_PORT, share=True, debug=True) + interface.launch( + server_name=HOST, server_port=GRADIO_SERVER_PORT, share=True, debug=True + ) From 88f1d63773441866db6958f56ee7b121ff0a6a51 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Wed, 20 Nov 2024 13:47:38 -0800 Subject: [PATCH 74/99] Update gradio_interface.py --- examples/E2E-RAG-App/gradio_interface.py | 199 +++++++++++------------ 1 file changed, 92 insertions(+), 107 deletions(-) diff --git a/examples/E2E-RAG-App/gradio_interface.py b/examples/E2E-RAG-App/gradio_interface.py index 234013df6..eec553e8c 100644 --- a/examples/E2E-RAG-App/gradio_interface.py +++ b/examples/E2E-RAG-App/gradio_interface.py @@ -7,9 +7,7 @@ from typing import AsyncGenerator, Generator, List, Optional import chromadb - import gradio as gr -import requests from chromadb.utils import embedding_functions from dotenv import load_dotenv from llama_stack_client import LlamaStackClient @@ -18,7 +16,6 @@ from llama_stack_client.types.agent_create_params import AgentConfig from llama_stack_client.types.memory_insert_params import Document - # Load environment variables load_dotenv() @@ -29,6 +26,44 @@ GRADIO_SERVER_PORT = int(os.getenv("GRADIO_SERVER_PORT", 7861)) MODEL_NAME = os.getenv("MODEL_NAME", "Llama3.2-1B-Instruct") +# Custom CSS for enhanced styling +CUSTOM_CSS = """ +.message-rag { + font-size: 0.875rem !important; + background-color: rgba(30, 41, 59, 0.5) !important; + border-radius: 0.5rem !important; + padding: 0.75rem !important; + margin-bottom: 1rem !important; + font-family: ui-monospace, monospace !important; +} + +.message-rag-title { + font-size: 0.75rem !important; + color: #94a3b8 !important; + margin-bottom: 0.25rem !important; + display: flex !important; + align-items: center !important; + gap: 0.5rem !important; +} + +.message-rag-title::before { + content: "📄" !important; + font-size: 1rem !important; +} + +.message-rag-content { + color: #cbd5e1 !important; +} + +.bot-message { + font-size: 1rem !important; + line-height: 1.5 !important; +} + +.user-message { + background-color: rgb(79, 70, 229) !important; +} +""" class LlamaChatInterface: def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): @@ -41,100 +76,21 @@ def __init__(self, host: str, port: int, chroma_port: int, docs_dir: str): self.session_id = None self.memory_bank_id = "test_bank_212" - async def initialize_system(self): - """Initialize the entire system including memory bank and agent.""" - await self.setup_memory_bank() - await self.initialize_agent() - - async def setup_memory_bank(self): - """Set up the memory bank if it doesn't exist.""" - providers = self.client.providers.list() - provider_id = providers["memory"][0].provider_id - collections = self.chroma_client.list_collections() - - if any(col.name == self.memory_bank_id for col in collections): - print(f"The collection '{self.memory_bank_id}' exists.") - else: - print( - f"The collection '{self.memory_bank_id}' does not exist. Creating the collection..." - ) - self.client.memory_banks.register( - memory_bank_id=self.memory_bank_id, - params={ - "embedding_model": "all-MiniLM-L6-v2", - "chunk_size_in_tokens": 100, - "overlap_size_in_tokens": 10, - }, - provider_id=provider_id, - ) - await self.load_documents() - print(f"Memory bank registered.") - - async def load_documents(self): - """Load documents from the specified directory into memory bank.""" - documents = [] - for filename in os.listdir(self.docs_dir): - if filename.endswith((".txt", ".md")): - file_path = os.path.join(self.docs_dir, filename) - with open(file_path, "r", encoding="utf-8") as file: - content = file.read() - document = Document( - document_id=filename, - content=content, - mime_type="text/plain", - metadata={"filename": filename}, - ) - documents.append(document) - - if documents: - self.client.memory.insert( - bank_id=self.memory_bank_id, - documents=documents, - ) - print(f"Loaded {len(documents)} documents from {self.docs_dir}") - - async def initialize_agent(self): - """Initialize the agent with model registration and configuration.""" - - if "1b" in MODEL_NAME: - model_name = "Llama3.2-1B-Instruct" - elif "3b" in MODEL_NAME: - model_name = "Llama3.2-3B-Instruct" - elif "8b" in MODEL_NAME: - model_name = "Llama3.1-8B-Instruct" - else: - model_name = MODEL_NAME - - agent_config = AgentConfig( - model=model_name, - instructions="You are a helpful assistant that can answer questions based on provided documents. Return your answer short and concise, less than 50 words.", - sampling_params={"strategy": "greedy", "temperature": 1.0, "top_p": 0.9}, - tools=[ - { - "type": "memory", - "memory_bank_configs": [ - {"bank_id": self.memory_bank_id, "type": "vector"} - ], - "query_generator_config": {"type": "default", "sep": " "}, - "max_tokens_in_context": 300, - "max_chunks": 5, - } - ], - tool_choice="auto", - tool_prompt_format="json", - enable_session_persistence=True, - ) - self.agent = Agent(self.client, agent_config) - self.session_id = self.agent.create_session(f"session-{uuid.uuid4()}") + # ... [previous methods remain the same until chat_stream] ... + + def format_rag_context(self, context: str) -> str: + """Format RAG context with custom styling.""" + return f"""