generated from aniketmaurya/python-project-template
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a0e4f12
commit 6e55c54
Showing
4 changed files
with
74 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
import uvicorn | ||
|
||
from .fastserve import app | ||
from .fastserve import FastServe | ||
|
||
uvicorn.run(app) | ||
serve = FastServe() | ||
serve.run_server() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,42 @@ | ||
from typing import List | ||
from typing import Any, List | ||
|
||
from fastapi import FastAPI | ||
from pydantic import BaseModel | ||
|
||
from .models.llama_cpp import LlamaCppLLM | ||
from .batching import BatchProcessor | ||
|
||
|
||
class PromptRequest(BaseModel): | ||
prompt: str | ||
temperature: float = 0.2 | ||
max_tokens: int = 60 | ||
stop: List[str] = [] | ||
class BaseRequest(BaseModel): | ||
request: Any | ||
|
||
|
||
app = FastAPI() | ||
llm = LlamaCppLLM(model_path="openhermes-2-mistral-7b.Q5_K_M.gguf") | ||
class FastServe: | ||
def __init__(self, batch_size=2, timeout=0.5) -> None: | ||
self.batch_processing = BatchProcessor( | ||
func=self.handle, bs=batch_size, timeout=timeout | ||
) | ||
self._app = FastAPI() | ||
|
||
@self._app.on_event("shutdown") | ||
def shutdown_event(): | ||
self.batch_processing.cancel() | ||
|
||
@app.post("/serve") | ||
def serve(prompt: PromptRequest): | ||
result = llm( | ||
prompt=prompt.prompt, | ||
temperature=prompt.temperature, | ||
max_tokens=prompt.max_tokens, | ||
stop=prompt.stop, | ||
) | ||
return result | ||
def serve( | ||
self, | ||
): | ||
@self._app.post(path="/endpoint") | ||
def api(request: BaseRequest): | ||
wait_obj = self.batch_processing.process(request) | ||
return wait_obj.get() | ||
|
||
def handle(self, batch: List[BaseRequest]): | ||
n = len(batch) | ||
return n * [0.5 * n] | ||
|
||
def run_server( | ||
self, | ||
): | ||
self.serve() | ||
import uvicorn | ||
|
||
uvicorn.run(self._app) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from typing import List | ||
|
||
from fastapi import FastAPI | ||
from pydantic import BaseModel | ||
|
||
from .models.llama_cpp import LlamaCppLLM | ||
|
||
|
||
class PromptRequest(BaseModel): | ||
prompt: str | ||
temperature: float = 0.2 | ||
max_tokens: int = 60 | ||
stop: List[str] = [] | ||
|
||
|
||
app = FastAPI() | ||
llm = LlamaCppLLM(model_path="openhermes-2-mistral-7b.Q5_K_M.gguf") | ||
|
||
|
||
@app.post("/serve") | ||
def serve(prompt: PromptRequest): | ||
result = llm( | ||
prompt=prompt.prompt, | ||
temperature=prompt.temperature, | ||
max_tokens=prompt.max_tokens, | ||
stop=prompt.stop, | ||
) | ||
return result |