forked from Hoper-J/AI-Guide-and-Demos-zh_CN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
app_fastapi.py
35 lines (30 loc) · 1.06 KB
/
app_fastapi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# 定义请求体的数据模型
class PromptRequest(BaseModel):
prompt: str
app = FastAPI()
# 加载模型和分词器
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
@app.post("/generate")
def generate_text(request: PromptRequest):
prompt = request.prompt
if not prompt:
raise HTTPException(status_code=400, detail="No prompt provided")
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=200,
num_beams=5,
no_repeat_ngram_size=2,
early_stopping=True
)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return {"generated_text": generated_text}