-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ModelingLLM: Add Structured Grading Instruction Generation and Restru…
…cture Module (#340)
- Loading branch information
1 parent
bbbcbc9
commit d9ff3bd
Showing
33 changed files
with
542 additions
and
411 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
52 changes: 52 additions & 0 deletions
52
modules/modeling/module_modeling_llm/module_modeling_llm/core/filter_feedback.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from langchain_core.output_parsers import PydanticOutputParser | ||
from langchain_core.prompts import ChatPromptTemplate | ||
|
||
from athena import emit_meta | ||
from module_modeling_llm.config import BasicApproachConfig | ||
from module_modeling_llm.utils.predict_and_parse import predict_and_parse | ||
from module_modeling_llm.models.assessment_model import AssessmentModel | ||
from module_modeling_llm.models.exercise_model import ExerciseModel | ||
from module_modeling_llm.prompts.filter_feedback_prompt import FilterFeedbackInputs | ||
|
||
async def filter_feedback( | ||
exercise: ExerciseModel, | ||
original_feedback: AssessmentModel, | ||
config: BasicApproachConfig, | ||
debug: bool, | ||
) -> AssessmentModel: | ||
|
||
print(f"\n\n\n\n\n{original_feedback.json()}\n\n\n\n\n") | ||
|
||
chat_prompt = ChatPromptTemplate.from_messages([ | ||
("system", config.generate_suggestions_prompt.filter_feedback_system_message), | ||
("human", config.generate_suggestions_prompt.filter_feedback_human_message) | ||
]) | ||
|
||
prompt_inputs = FilterFeedbackInputs( | ||
original_feedback=original_feedback.json(), | ||
feedback_output_format=PydanticOutputParser(pydantic_object=AssessmentModel).get_format_instructions() | ||
) | ||
|
||
feedback_result = await predict_and_parse( | ||
model=config.model.get_model(), # type: ignore[attr-defined] | ||
chat_prompt=chat_prompt, | ||
prompt_input=prompt_inputs.dict(), | ||
pydantic_object=AssessmentModel, | ||
tags=[ | ||
f"exercise-{exercise.exercise_id}-filter", | ||
f"submission-{exercise.submission_id}-filter", | ||
] | ||
) | ||
|
||
if debug: | ||
emit_meta("filter_feedback", { | ||
"prompt": chat_prompt.format(**prompt_inputs.dict()), | ||
"result": feedback_result.dict() if feedback_result is not None else None | ||
}) | ||
|
||
if feedback_result is None: | ||
raise ValueError("No feedback was returned by the model.") | ||
|
||
print(f"\n\n\n\n\n{feedback_result.json()}\n\n\n\n\n") | ||
|
||
return feedback_result |
64 changes: 64 additions & 0 deletions
64
modules/modeling/module_modeling_llm/module_modeling_llm/core/generate_suggestions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from athena.schemas.grading_criterion import StructuredGradingCriterion | ||
from langchain_core.output_parsers import PydanticOutputParser | ||
from langchain_core.prompts import ChatPromptTemplate | ||
|
||
from athena import emit_meta | ||
from module_modeling_llm.config import BasicApproachConfig | ||
from module_modeling_llm.models.assessment_model import AssessmentModel | ||
from module_modeling_llm.prompts.apollon_format_description import apollon_format_description | ||
from module_modeling_llm.utils.predict_and_parse import predict_and_parse | ||
from module_modeling_llm.prompts.graded_feedback_prompt import GradedFeedbackInputs | ||
from module_modeling_llm.models.exercise_model import ExerciseModel | ||
|
||
async def generate_suggestions( | ||
exercise_model: ExerciseModel, | ||
structured_grading_instructions: StructuredGradingCriterion, | ||
config: BasicApproachConfig, | ||
debug: bool) -> AssessmentModel: | ||
""" | ||
Generate feedback suggestions for modeling exercise submissions | ||
:param exercise: The exercise for which a submission is assessed | ||
:param submission: The submission that is assessed | ||
:param is_graded: Indicates whether the submission is graded | ||
:param config: A configuration object for the feedback module | ||
:param debug: Indicates whether additional debugging information should be provided | ||
:return: A list of feedback items for the assessed submission | ||
""" | ||
|
||
prompt_inputs = GradedFeedbackInputs( | ||
submission=exercise_model.transformed_submission, | ||
problem_statement=exercise_model.problem_statement, | ||
max_points=exercise_model.max_points, | ||
bonus_points=exercise_model.bonus_points, | ||
structured_grading_instructions=structured_grading_instructions.json(), | ||
submission_uml_type=exercise_model.submission_uml_type, | ||
example_solution=exercise_model.transformed_example_solution, | ||
uml_diagram_format=apollon_format_description, | ||
feedback_output_format=PydanticOutputParser(pydantic_object=AssessmentModel).get_format_instructions() | ||
) | ||
|
||
chat_prompt = ChatPromptTemplate.from_messages([ | ||
("system", config.generate_suggestions_prompt.graded_feedback_system_message), | ||
("human", config.generate_suggestions_prompt.graded_feedback_human_message)]) | ||
|
||
feedback_result = await predict_and_parse( | ||
model=config.model.get_model(), # type: ignore[attr-defined] | ||
chat_prompt=chat_prompt, | ||
prompt_input=prompt_inputs.dict(), | ||
pydantic_object=AssessmentModel, | ||
tags=[ | ||
f"exercise-{exercise_model.exercise_id}", | ||
f"submission-{exercise_model.submission_id}", | ||
] | ||
) | ||
|
||
if debug: | ||
emit_meta("generate_suggestions", { | ||
"prompt": chat_prompt.format(**prompt_inputs.dict()), | ||
"result": feedback_result.dict() if feedback_result is not None else None | ||
}) | ||
|
||
if feedback_result is None: | ||
raise ValueError("No feedback was generated") | ||
|
||
return feedback_result |
Oops, something went wrong.