Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auto evol instruct #1054

Draft
wants to merge 6 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/distilabel/steps/tasks/auto_evol_instruct/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

165 changes: 165 additions & 0 deletions src/distilabel/steps/tasks/auto_evol_instruct/analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import TYPE_CHECKING, Optional, Union

from jinja2 import Template
from pydantic import PrivateAttr
from typing_extensions import override

from distilabel.steps.tasks.base import Task

if TYPE_CHECKING:
from distilabel.steps.tasks.typing import ChatType
from distilabel.steps.typing import StepColumns


SYSTEM_PROMPT: str = """\
You are an expert at analyzing the evolution of a given instruction. You will look at the trajectory of the evolution from an initial instruction and make feedbacks based on how the complexity is being increased in each stage.

Please strictly output using the following format, do not add anything else to the response:

***FORMAT INSTRUCTION***
Choose one of the two options:
Option 1 - If all cases are evolving correctly, please strictly output:
### PASSED

Option 2 - If you identify cases that did not evolve correctly, please strictly output:
### FAILED - Reason: [reason_of_fail]
and so on...
***END OF FORMAT INSTRUCTION***"""


USER_PROMPT: str = """\
The following list shows cases where an Instruction evolves into a more complex version of an Instruction.
For each case, stage 0 represents the Instruction in its initial state, and stage 1 requires an increase in complexity based on the previous stage.

Please identify cases that failed to evolve, and provide the reason why it fails.

Evolution Trajectory:
{{ evol_trajectory }}
"""


class AutoEvolTrajectoryAnalizer(Task):
"""_summary_

Attributes:
system_prompt: The system prompt to be used in the completions.
user_prompt: ...

Input columns:
- instruction (`str`): The original instruction.
- evolved_instruction (`str`): The evolved instruction from using AutoEvolver task.

Output columns:
- feedback (`str`): Feedback for the optimization.
- model_name (`str`): The name of the model used to generate the feedback.

Categories:
- text-generation

References:
- [`Automatic Instruction Evolving for Large Language Models`](https://arxiv.org/abs/2406.00770)

Examples:
Annotate your steps with the Math Shepherd Completer:

```python
from distilabel.steps.tasks import AutoEvolver
from distilabel.models import InferenceEndpointsLLM

model_id = "Qwen/Qwen2.5-72B-Instruct"

llm = InferenceEndpointsLLM(
model_id=model_id,
tokenizer_id=model_id,
generation_kwargs={
"max_new_tokens": 2048, "temperature": 0.2,
},
)
evolver = AutoEvolTrajectoryAnalizer(
llm=llm # evol_llm
)
evolver.load()

result_analyzer = next(
analyzer.process(
[
{
"instruction": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?",
"evolved_instruction": "Natalia sold hair clips to 48 of her friends in April, and then she sold half as many clips, but no less than 20, in May. How many hair clips did Natalia sell altogether in April and May?"
}
]
)
)
print(result[0]["feedback"])
# '### PASSED'
```

Citations:

```
@misc{zeng2024automaticinstructionevolvinglarge,
title={Automatic Instruction Evolving for Large Language Models},
author={Weihao Zeng and Can Xu and Yingxiu Zhao and Jian-Guang Lou and Weizhu Chen},
year={2024},
eprint={2406.00770},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2406.00770},
}
```
"""

system_prompt: Optional[str] = SYSTEM_PROMPT
user_prompt: str = USER_PROMPT

_template: Union[Template, None] = PrivateAttr(...)

def load(self) -> None:
super().load()
self._template = Template(self.user_prompt)

@property
def inputs(self) -> "StepColumns":
return ["instruction", "evolved_instruction"]

@property
def outputs(self) -> "StepColumns":
return ["feedback", "model_name"]

def format_input(self, input: dict[str, any]) -> "ChatType":
"""The input is formatted as a `ChatType` assuming that the instruction
is the first interaction from the user within a conversation."""
evol_trajectory = (
f"Stage 0: {input['instruction']}\nStage 1: {input['evolved_instruction']}"
)

messages = [
{
"role": "user",
"content": self._template.render(evol_trajectory=evol_trajectory),
},
]
if self.system_prompt:
messages.insert(0, {"role": "system", "content": self.system_prompt})

return messages

@override
def format_output(
self, output: Union[str, None], input: dict[str, any]
) -> dict[str, any]:
return {"feedback": output, "model_name": self.llm.model_name}
107 changes: 107 additions & 0 deletions src/distilabel/steps/tasks/auto_evol_instruct/evol_optimizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import TYPE_CHECKING, Optional, Union

from jinja2 import Template
from pydantic import PrivateAttr

from distilabel.steps.tasks.base import Task

if TYPE_CHECKING:
from distilabel.steps.tasks.typing import ChatType
from distilabel.steps.typing import StepColumns


SYSTEM_PROMPT: str = """You are an Instruction Method Optimizer.

**Output Instructions**
Add more steps to achieve the most refined method if needed, however, REMEMBER that the final step in your output has to be "#Finally Rewritten Instruction#" no matter how many steps are added.
Please generate the optimized method strictly using ONLY the given below format, do not add anything else."""


USER_PROMPT: str = """\
Feedback: {{ feedback }}
Based on the feedback from the evolution failure case, optimize the method below to create a more effective instruction rewriting process without negatively impacting performance on other cases. Ensure that the complexity of the optimized method is not lower than the previous method.
If the feedback is "### PASSED", then come up with a better method than the current one to create a more complex and effective instruction rewriting process. Remember that the new method should not be very similar to the current method, be creative with new steps for the new method.

Current Method:
{{ current_method }}

```Optimized Method
Step 1:
#Methods List#
Describe how to generate a list of methods to make instructions more complex, incorporating the feedback

Step 2:
#Plan#
Explain how to create a comprehensive plan based on the Methods List

[Note]Add more steps here as you want to achieve the best method. The steps should align with the instruction domain/topic, and should not involve any tools or visualization, it should be text-only methods. The last step should always be #Finally Rewritten Instruction#.

Step N-1:
#Rewritten Instruction#
Do not generate new Instruction here, but please provide a detailed the process of executing the plan to rewrite the instruction. You are generating a guide to write a better instruction, NOT THE INSTRUCTION ITSELF.

Step N:
#Finally Rewritten Instruction#
Do not generate new Instruction here, but please provide the process to write the final rewritten instruction. You are generating a guide to write a better instruction, NOT THE INSTRUCTION ITSELF.
```"""


class AutoEvolOptimizer(Task):
system_prompt: Optional[str] = SYSTEM_PROMPT
user_prompt: Optional[str] = USER_PROMPT

_template: Union[Template, None] = PrivateAttr(...)

def load(self) -> None:
super().load()
self._template = Template(self.user_prompt)

@property
def inputs(self) -> "StepColumns":
return ["optimization_method", "feedback"]

@property
def outputs(self) -> "StepColumns":
return ["optimized_method", "model_name"]

def format_input(self, input: dict[str, any]) -> "ChatType":
messages = [
{
"role": "user",
"content": self._template.render(
feedback=input["feedback"],
current_method=input["optimization_method"],
),
},
]
if self.system_prompt:
messages.insert(0, {"role": "system", "content": self.system_prompt})

return messages

def format_output(
self, output: Union[str, None], input: dict[str, any]
) -> dict[str, any]:
if output is None:
input.update(
**{"optimized_method": None, "model_name": self.llm.model_name}
)
return input

# steps = parse_steps(output)
input.update(**{"optimized_method": output, "model_name": self.llm.model_name})
return input
Loading