Skip to content

Commit

Permalink
Switch path to router.huggingface.co (#1188)
Browse files Browse the repository at this point in the history
  • Loading branch information
julien-c authored Feb 6, 2025
1 parent e47c3d7 commit 658e1b9
Show file tree
Hide file tree
Showing 7 changed files with 218 additions and 190 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the

// Chat Completion
const llamaEndpoint = inference.endpoint(
"https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct"
"https://router.huggingface.co/together/models/meta-llama/Llama-3.1-8B-Instruct"
);
const out = await llamaEndpoint.chatCompletion({
model: "meta-llama/Llama-3.1-8B-Instruct",
Expand Down
4 changes: 2 additions & 2 deletions packages/inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ for await (const output of hf.textGenerationStream({

### Text Generation (Chat Completion API Compatible)

Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://api-inference.huggingface.co/framework/text-generation-inference) on Hugging Face support Messages API.
Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://huggingface.co/docs/text-generation-inference/) on Hugging Face support Messages API.

[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-chat-completion)

Expand Down Expand Up @@ -611,7 +611,7 @@ const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the

// Chat Completion Example
const ep = hf.endpoint(
"https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct"
"https://router.huggingface.co/together/models/meta-llama/Llama-3.1-8B-Instruct"
);
const stream = ep.chatCompletionStream({
model: "tgi",
Expand Down
1 change: 1 addition & 0 deletions packages/inference/src/config.ts
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
export const HF_HUB_URL = "https://huggingface.co";
export const HF_ROUTER_URL = "https://router.huggingface.co";
4 changes: 2 additions & 2 deletions packages/inference/src/lib/makeRequestOptions.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { HF_HUB_URL } from "../config";
import { HF_HUB_URL, HF_ROUTER_URL } from "../config";
import { FAL_AI_API_BASE_URL } from "../providers/fal-ai";
import { REPLICATE_API_BASE_URL } from "../providers/replicate";
import { SAMBANOVA_API_BASE_URL } from "../providers/sambanova";
Expand All @@ -9,7 +9,7 @@ import { isUrl } from "./isUrl";
import { version as packageVersion, name as packageName } from "../../package.json";
import { getProviderModelId } from "./getProviderModelId";

const HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_HUB_URL}/api/inference-proxy/{{PROVIDER}}`;
const HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_ROUTER_URL}/{{PROVIDER}}`;

/**
* Lazy-loaded from huggingface.co/api/tasks when needed
Expand Down
13 changes: 7 additions & 6 deletions packages/inference/test/HfInference.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@ describe.concurrent("HfInference", () => {
"HF Inference",
() => {
const hf = new HfInference(env.HF_TOKEN);

it("throws error if model does not exist", () => {
expect(
hf.fillMask({
model: "this-model-does-not-exist-123",
model: "this-model/does-not-exist-123",
inputs: "[MASK] world!",
})
).rejects.toThrowError("Not Found: Model not found");
).rejects.toThrowError("Model this-model/does-not-exist-123 does not exist");
});

it("fillMask", async () => {
Expand Down Expand Up @@ -647,7 +648,7 @@ describe.concurrent("HfInference", () => {
});

it("endpoint - makes request to specified endpoint", async () => {
const ep = hf.endpoint("https://api-inference.huggingface.co/models/openai-community/gpt2");
const ep = hf.endpoint("https://router.huggingface.co/hf-inference/models/openai-community/gpt2");
const { generated_text } = await ep.textGeneration({
inputs: "one plus two equals",
});
Expand Down Expand Up @@ -685,7 +686,7 @@ describe.concurrent("HfInference", () => {
expect(out).toContain("2");
});

it("chatCompletionStream modelId Fail - OpenAI Specs", async () => {
it.skip("chatCompletionStream modelId Fail - OpenAI Specs", async () => {
expect(
hf
.chatCompletionStream({
Expand All @@ -702,7 +703,7 @@ describe.concurrent("HfInference", () => {
});

it("chatCompletion - OpenAI Specs", async () => {
const ep = hf.endpoint("https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2");
const ep = hf.endpoint("https://router.huggingface.co/hf-inference/models/mistralai/Mistral-7B-Instruct-v0.2");
const res = await ep.chatCompletion({
model: "tgi",
messages: [{ role: "user", content: "Complete the this sentence with words one plus one is equal " }],
Expand All @@ -716,7 +717,7 @@ describe.concurrent("HfInference", () => {
}
});
it("chatCompletionStream - OpenAI Specs", async () => {
const ep = hf.endpoint("https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2");
const ep = hf.endpoint("https://router.huggingface.co/hf-inference/models/mistralai/Mistral-7B-Instruct-v0.2");
const stream = ep.chatCompletionStream({
model: "tgi",
messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }],
Expand Down
Loading

0 comments on commit 658e1b9

Please sign in to comment.