Skip to content

Commit

Permalink
Revert the LLM model for kubernetes GMS (#675)
Browse files Browse the repository at this point in the history
* revert the LLM model to meta-llama/CodeLlama-7b-hf
Signed-off-by: zhlsunshine <huailong.zhang@intel.com>
  • Loading branch information
zhlsunshine committed Aug 30, 2024
1 parent d487093 commit f5f1e32
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion CodeGen/kubernetes/codegen_xeon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ spec:
internalService:
serviceName: tgi-service
config:
MODEL_ID: HuggingFaceH4/mistral-7b-grok
MODEL_ID: meta-llama/CodeLlama-7b-hf
endpoint: /generate
isDownstreamService: true
2 changes: 1 addition & 1 deletion CodeGen/tests/test_gmc_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ function validate_codegen() {
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
echo "$CLIENT_POD"
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
exit_code=$?
if [ $exit_code -ne 0 ]; then
echo "chatqna failed, please check the logs in ${LOG_PATH}!"
Expand Down
2 changes: 1 addition & 1 deletion CodeGen/tests/test_gmc_on_xeon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ function validate_codegen() {
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
echo "$CLIENT_POD"
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
exit_code=$?
if [ $exit_code -ne 0 ]; then
echo "chatqna failed, please check the logs in ${LOG_PATH}!"
Expand Down

0 comments on commit f5f1e32

Please sign in to comment.