Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature UDF code barfs on Literal #6

Closed
volkfox opened this issue Jul 10, 2024 · 1 comment
Closed

Feature UDF code barfs on Literal #6

volkfox opened this issue Jul 10, 2024 · 1 comment
Assignees
Labels
bug Something isn't working priority-p1

Comments

@volkfox
Copy link
Contributor

volkfox commented Jul 10, 2024

Description

This code works:

import os
import json

from datachain.lib.feature import Feature
from datachain.lib.dc import Column, DataChain
#from datachain.lib.feature_utils import pydantic_to_feature

from typing import Callable, Literal, Optional

import anthropic
from anthropic.types.message import Message

PROMPT = "Was this dialog successful? Describe the 'result' as 'Yes' or 'No' in JSON format and print nothing else"

claude_model = "claude-3-5-sonnet-20240620"
claude_api_key = os.environ["ANTHROPIC_API_KEY"]

### define the Claude data model ###
class UsageFr(Feature):
    input_tokens: int = 0
    output_tokens: int = 0

class TextBlockFr(Feature):
    text: str = ""
    type: str = "text"

class ClaudeMessage(Feature):
    id: str = ""
    content: list[TextBlockFr]
    type: str = "message"
    usage: UsageFr = UsageFr()

def claude_api_response(claude_client, content, prompt=PROMPT):
    response = claude_client.messages.create(
                             model=claude_model,
                             max_tokens=1024,
                             system=prompt,
                             messages= [{"role":"user", "content": f"{content}"},]
               )
    return ClaudeMessage(**response.dict())

# Twitter GIF starts here

chain = (
    DataChain
    .from_storage("gs://datachain-demo/chatbot-KiT/")
    .settings(parallel=4, cache=True)
    .filter(Column("file.name").glob("*.txt"))
    .map(claude=lambda file: claude_api_response(anthropic.Anthropic(api_key=claude_api_key), file.get_value(), prompt=PROMPT), output=ClaudeMessage)
    .save("llm-claude-mistral")
)


But replace "type" field with a Literal from src/datachain/lib/claude.py and it stops working:

class ClaudeMessage(Feature):
    id: str = ""
    content: list[TextBlockFr]
    model: str = ""
    role: str = ""
    stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence"]] = None
    stop_sequence: Optional[str] = None
    type: Literal["message"] = "message"
    usage: UsageFr = UsageFr()

-->

Traceback (most recent call last):
File "/Users/dkh/venv/clean-datachain/bin/datachain", line 8, in
sys.exit(main())
File "/Users/dkh/dvcx/src/datachain/cli.py", line 916, in main
return udf_entrypoint()
File "/Users/dkh/dvcx/src/datachain/query/dispatch.py", line 88, in udf_entrypoint
udf_info = load(stdin.buffer) # noqa: S301
File "/Users/dkh/venv/clean-datachain/lib/python3.9/site-packages/dill/_dill.py", line 289, in load
return Unpickler(file, ignore=ignore, **kwds).load()
File "/Users/dkh/venv/clean-datachain/lib/python3.9/site-packages/dill/_dill.py", line 444, in load
obj = StockUnpickler.load(self)
File "/Users/dkh/venv/clean-datachain/lib/python3.9/site-packages/dill/_dill.py", line 434, in find_class
return StockUnpickler.find_class(self, module, name)
File "/Users/dkh/dvcx/tmpFunjoHv5uo.py", line 75, in
class ClaudeMessage(Feature):
File "/Users/dkh/dvcx/tmpFunjoHv5uo.py", line 80, in ClaudeMessage
stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence"]] = None
TypeError: 'type' object is not subscriptable
Traceback (most recent call last):
File "", line 2, in
File "/Users/dkh/dvcx/src/datachain/lib/dc.py", line 399, in save
return super().save(name=name, version=version, feature_schema=schema)
File "/Users/dkh/dvcx/src/datachain/query/dataset.py", line 1720, in save
query = self.apply_steps()
File "/Users/dkh/dvcx/src/datachain/query/dataset.py", line 1256, in apply_steps
result = step.apply(
File "/Users/dkh/dvcx/src/datachain/query/dataset.py", line 708, in apply
self.populate_udf_table(udf_table, query)
File "/Users/dkh/dvcx/src/datachain/query/dataset.py", line 541, in populate_udf_table
raise RuntimeError("UDF Execution Failed!")
RuntimeError: UDF Execution Failed!


### Version Info

```Text
datachain -V
0.1.14.dev43+g9bef8c7c


 python --version
Python 3.9.4
@dmpetrov
Copy link
Member

fixed

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working priority-p1
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants