Merge branch 'mindsdb:staging' into staging

clickzetta · Jan 15, 2024 · 74edf5b · 74edf5b
2 parents 236bddb + b62f485
commit 74edf5b
Show file tree

Hide file tree

Showing 12 changed files with 129 additions and 35 deletions.
diff --git a/.github/workflows/docker_build.yml → .github/workflows/deploy-dev.yml b/.github/workflows/docker_build.yml → .github/workflows/deploy-dev.yml
@@ -1,4 +1,4 @@
-name: MindsDB Docker Build
+name: Deploy to dev
 
 on:
   pull_request:
@@ -64,6 +64,6 @@ jobs:
           owner: mindsdb
           repo: INTERNAL-mindsdb-build-deploy-to-kubernetes
           github_token: ${{ secrets.REPO_DISPATCH_PAT_TOKEN }}
-          workflow_file_name: dev-deploy.yml
+          workflow_file_name: deploy-dev.yml
           ref: master
           client_payload: '{"image-tag-prefix": "${{ env.CI_SHA }}", "deploy-env": "${{matrix.deploy-env}}"}'
diff --git a/.github/workflows/deploy-staging.yml b/.github/workflows/deploy-staging.yml
@@ -0,0 +1,41 @@
+name: Deploy to staging
+
+on:
+  push:
+    branches:
+      - stable
+
+jobs:
+  build:
+    # Build our docker images based on our bake file
+    runs-on: [self-hosted, dev]
+    steps:
+    - uses: actions/checkout@v2
+    # Get clean environment variables via https://github.com/marketplace/actions/github-environment-variables-action
+    - uses: FranzDiebold/github-env-vars-action@v2
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+    - name: Login to Amazon ECR
+      uses: aws-actions/amazon-ecr-login@v1
+    - name: Build and push
+      shell: bash
+      run: |
+        docker buildx create --name=remote-buildkit-agent --driver=remote --use tcp://remote-buildkit-agent.infrastructure.svc.cluster.local:80 || true # Create the builder (might already exist)
+        VERSION=${{ env.CI_SHA }} docker buildx bake --push --progress plain -f docker/docker-bake.hcl
+
+  trigger_deploy:
+    # Trigger private repo to deploy to staging env
+    runs-on: [self-hosted, dev]
+    needs: [build]
+    environment:
+      name: staging
+    steps:
+      - uses: FranzDiebold/github-env-vars-action@v2
+      - uses: convictional/trigger-workflow-and-wait@v1.6.5
+        with:
+          owner: mindsdb
+          repo: INTERNAL-mindsdb-build-deploy-to-kubernetes
+          github_token: ${{ secrets.REPO_DISPATCH_PAT_TOKEN }}
+          workflow_file_name: deploy-dev.yml
+          ref: master
+          client_payload: '{"image-tag-prefix": "${{ env.CI_SHA }}", "deploy-env": "staging"}'
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -45,8 +45,9 @@ jobs:
         python setup.py sdist
         twine upload dist/*
 
-  deploy_to_dockerhub:
+  docker_build:
       # Build our docker images based on our bake file
+      # This will tag with the release version tag and push to both dockerhub and ECR
       runs-on: [self-hosted, dev]
       needs: check-version
       if: github.actor != 'mindsdbadmin'
@@ -61,8 +62,28 @@ jobs:
           with:
             username: ${{ secrets.DOCKER_USERNAME }}
             password: ${{ secrets.DOCKER_PASSWORD }}
+        - name: Login to Amazon ECR
+          uses: aws-actions/amazon-ecr-login@v1
         - name: Build and push
           shell: bash
           run: |
             docker buildx create --name=remote-buildkit-agent --driver=remote --use tcp://remote-buildkit-agent.infrastructure.svc.cluster.local:80 || true # Create the builder (might already exist)
-            VERSION=${{ env.CI_REF_NAME }} REGISTRY=mindsdb docker buildx bake --push --progress plain -f docker/docker-bake.hcl
+            VERSION=${{ env.CI_REF_NAME }} PUSH_TO_DOCKERHUB=true docker buildx bake --push --progress plain -f docker/docker-bake.hcl
+
+  trigger_deploy:
+    # Trigger private repo to deploy to prod env
+    runs-on: [self-hosted, dev]
+    needs: docker_build
+    if: github.actor != 'mindsdbadmin'
+    environment:
+      name: prod
+    steps:
+      - uses: FranzDiebold/github-env-vars-action@v2
+      - uses: convictional/trigger-workflow-and-wait@v1.6.5
+        with:
+          owner: mindsdb
+          repo: INTERNAL-mindsdb-build-deploy-to-kubernetes
+          github_token: ${{ secrets.REPO_DISPATCH_PAT_TOKEN }}
+          workflow_file_name: deploy-prod.yml
+          ref: master
+          client_payload: '{"image-tag-prefix": "${{ env.CI_REF_NAME }}", "deploy-env": "prod"}'
diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl
@@ -1,18 +1,33 @@
+# The default targets to be built if none are specified
 group "default" {
   targets = ["bare", "devel", "cloud", "lightwood", "huggingface"]
 }
 
-
-variable "REGISTRY" {
-  default = "454861456664.dkr.ecr.us-east-2.amazonaws.com"
+variable "PUSH_TO_DOCKERHUB" {
+  default = false
 }
 variable "IMAGE" {
   default = "mindsdb"
 }
+# This is a semver for releases but otherwise is a github sha
 variable "VERSION" {
   default = "unknown"
 }
 
+# Generate the list of tags for a given image.
+# e.g. for the 'cloud' images this generates:
+# - "mindsdb:cloud"        - This functions as a 'latest' tag for the cloud image
+# - "mindsdb:v1.2.3-cloud" - For this specific version
+# The same tags are pushed to dockerhub as well if the PUSH_TO_DOCKERHUB variable is set.
+function "get_tags" {
+  params = [target]
+  result = [
+    "454861456664.dkr.ecr.us-east-2.amazonaws.com/${IMAGE}:${VERSION}${notequal(target, "") ? "-" : ""}${target}",
+    "454861456664.dkr.ecr.us-east-2.amazonaws.com/${IMAGE}:${notequal(target, "") ? target : "latest"}",
+    PUSH_TO_DOCKERHUB ? "mindsdb/${IMAGE}:${VERSION}${notequal(target, "") ? "-" : ""}${target}" : "",
+    PUSH_TO_DOCKERHUB ? "mindsdb/${IMAGE}:${notequal(target, "") ? target : "latest"}" : ""
+  ]
+} 
 
 # This is effectively the base image for all of our images.
 # We define it separately so we can use it as a base and only build it once.
@@ -21,50 +36,51 @@ target "builder" {
   target = "build"
   platforms = ["linux/amd64", "linux/arm64"]
 }
+
 # Common traits of every image that we use to reduce duplication below.
 target "_common" {
   dockerfile = "docker/mindsdb.Dockerfile" # If you change this, also change it in target:builder
   contexts = {
-    builder = "target:builder" # Use a target to only perform base build steps once
+    build = "target:builder" # Use a target to only perform base build steps once
   }
   platforms = ["linux/amd64", "linux/arm64"]
 }
 
 
 
-### IMAGES ###
+### OUTPUT IMAGES ###
 
 target "bare" {
   inherits = ["_common"]
-  tags = ["${REGISTRY}/${IMAGE}:${VERSION}", "${REGISTRY}/${IMAGE}:latest"]
+  tags = get_tags("")
 }
 
 target "devel" {
   inherits = ["_common"]
-  tags = ["${REGISTRY}/${IMAGE}:${VERSION}-dev", "${REGISTRY}/${IMAGE}:dev"]
+  tags = get_tags("dev")
   target = "dev"
 }
 
 target "cloud" {
   inherits = ["_common"]
+  tags = get_tags("cloud")
   args = {
-    EXTRAS = ".[lightwood,huggingface,statsforecast-extra,neuralforecast-extra,timegpt,surrealdb,youtube,ignite,gmail,pgvector]"
+    EXTRAS = ".[lightwood,huggingface,statsforecast-extra,neuralforecast-extra,timegpt,surrealdb,youtube,ignite,gmail,pgvector] darts datasetsforecast"
   }
-  tags = ["${REGISTRY}/${IMAGE}:${VERSION}-cloud", "${REGISTRY}/${IMAGE}:cloud"]
 }
 
 target "lightwood" {
   inherits = ["_common"]
+  tags = get_tags("lightwood")
   args = {
     EXTRAS = ".[lightwood]"
   }
-  tags = ["${REGISTRY}/${IMAGE}:${VERSION}-lightwood", "${REGISTRY}/${IMAGE}:lightwood"]
 }
 
 target "huggingface" {
   inherits = ["_common"]
+  tags = get_tags("huggingface")
   args = {
     EXTRAS = ".[huggingface]"
   }
-  tags = ["${REGISTRY}/${IMAGE}:${VERSION}-huggingface", "${REGISTRY}/${IMAGE}:huggingface"]
 }
diff --git a/docs/agents/chatbot.mdx b/docs/agents/chatbot.mdx
@@ -44,9 +44,9 @@ Here is how to interact with chatbots using MindsDB SQL:
     * `enable_dms` is the initially supported mode of talking to a chatbot. A chatbot responds to direct messages.
     * `is_running` indicates whether or not to start the chatbot upon creation.
 
-    <Tip>
-    Currently, the recommended chat app is Slack. MS Teams will be fully supported soon.
-    </Tip>
+    <Note>
+    If you want to use Slack in the [`CREATE CHATBOT`](/agents/chatbot) syntax, use [this method of connecting Slack to MindsDB](/integrations/app-integrations/slack#method-1-chatbot-responds-in-direct-messages-to-a-slack-app).
+    </Note>
 
 * Deleting a chatbot:
 

diff --git a/docs/finetune/anyscale.mdx b/docs/finetune/anyscale.mdx
@@ -2,6 +2,10 @@
 title: Mistral7B Model Fine-Tuning
 ---
 
+<Tip>
+Follow [this blog post](https://mindsdb.com/blog/fine-tuning-an-ai-model-in-mindsdb-using-anyscale-endpoints) for a comprehensive tutorial on how to fine-tune a Mistral 7B model.
+</Tip>
+
 <p align="center">
   <img src="https://docs.google.com/drawings/d/e/2PACX-1vTtA4vBzgbVQS9laQf6i5cYQpvP0-trZ5nYt6WJZHw-Iv_TssIWJxlLQnmgiT0SNQadyybSSsA3FH1J/pub?w=955&h=460" />
 </p>

diff --git a/docs/integrations/app-integrations/slack.mdx b/docs/integrations/app-integrations/slack.mdx
@@ -26,6 +26,10 @@ Please note that `app_token` is an optional parameter. Without providing it, you
 
 One way to connect Slack is to use both bot and app tokens. By following the instructions below, you'll set up the Slack app and be able to message this Slack app directly to chat with the bot.
 
+<Note>
+If you want to use Slack in the [`CREATE CHATBOT`](/agents/chatbot) syntax, use this method of connecting Slack to MindsDB.
+</Note>
+
 Here is how to set up a Slack app and generate both a Slack bot token and a Slack app token:
 
   1. Follow [this link](https://api.slack.com/apps) and sign in with your Slack account.

diff --git a/docs/sql/api/describe.mdx b/docs/sql/api/describe.mdx
@@ -17,6 +17,12 @@ Here is how to retrieve general information on the model:
 DESCRIBE model_name;
 ```
 
+Or:
+
+```sql
+DESCRIBE MODEL model_name;
+```
+
 This command is similar to the below command:
 
 ```sql
@@ -34,7 +40,7 @@ One difference between these two commands is that `DESCRIBE` outputs an addition
 MindsDB uses the Lightwood engine by default. Let's see how to describe such models.
 
 ```sql
-DESCRIBE home_rentals_model;
+DESCRIBE [MODEL] home_rentals_model;
 ```
 
 On execution we get:
@@ -53,7 +59,7 @@ The `tables` output column lists all available options to describe a model.
 <Tab title='DESCRIBE info'>
 
 ```sql
-DESCRIBE home_rentals_model.info;
+DESCRIBE [MODEL] home_rentals_model.info;
 ```
 
 The above command returns the following output columns:
@@ -89,7 +95,7 @@ The above command returns the following output columns:
 <Tab title='DESCRIBE features'>
 
 ```sql
-DESCRIBE home_rentals_model.features;
+DESCRIBE [MODEL] home_rentals_model.features;
 ```
 
 The above command returns the following output columns:
@@ -125,7 +131,7 @@ The above command returns the following output columns:
 <Tab title='DESCRIBE model'>
 
 ```sql
-DESCRIBE home_rentals_model.model;
+DESCRIBE [MODEL] home_rentals_model.model;
 ```
 
 The above command returns the following output columns:
@@ -165,7 +171,7 @@ The above command returns the following output columns:
 <Tab title='DESCRIBE jsonai'>
 
 ```sql
-DESCRIBE home_rentals_model.jsonai;
+DESCRIBE [MODEL] home_rentals_model.jsonai;
 ```
 
 The above command returns the following output column:
@@ -193,7 +199,7 @@ The above command returns the following output column:
 MindsDB offers NLP models that utilize either Hugging Face or OpenAI engines. Let's see how to describe such models.
 
 ```sql
-DESCRIBE sentiment_classifier;
+DESCRIBE [MODEL] sentiment_classifier;
 ```
 
 On execution we get:
@@ -212,7 +218,7 @@ The `tables` output column lists all available options to describe a model.
 <Tab title='DESCRIBE args'>
 
 ```sql
-DESCRIBE sentiment_classifier.args;
+DESCRIBE [MODEL] sentiment_classifier.args;
 ```
 
 The above command returns the following output columns:
@@ -240,7 +246,7 @@ The above command returns the following output columns:
 <Tab title='DESCRIBE metadata'>
 
 ```sql
-DESCRIBE sentiment_classifier.metadata;
+DESCRIBE [MODEL] sentiment_classifier.metadata;
 ```
 
 The above command returns the following output columns:
@@ -272,7 +278,7 @@ The above command returns the following output columns:
 MindsDB integrates Nixtla engines, such as StatsForecast, NeuralForecast, and HierarchicalForecast. Let's see how to describe models based on Nixtla engines.
 
 ```sql
-DESCRIBE quarterly_expenditure_forecaster;
+DESCRIBE [MODEL] quarterly_expenditure_forecaster;
 ```
 
 On execution we get:
@@ -291,7 +297,7 @@ The `tables` output column lists all available options to describe a model.
 <Tab title='DESCRIBE info'>
 
 ```sql
-DESCRIBE quarterly_expenditure_forecaster.info;
+DESCRIBE [MODEL] quarterly_expenditure_forecaster.info;
 ```
 
 The above command returns the following output columns:
@@ -323,7 +329,7 @@ The above command returns the following output columns:
 <Tab title='DESCRIBE features'>
 
 ```sql
-DESCRIBE quarterly_expenditure_forecaster.features;
+DESCRIBE [MODEL] quarterly_expenditure_forecaster.features;
 ```
 
 The above command returns the following output columns:
@@ -355,7 +361,7 @@ The above command returns the following output columns:
 <Tab title='DESCRIBE model'>
 
 ```sql
-DESCRIBE quarterly_expenditure_forecaster.model;
+DESCRIBE [MODEL] quarterly_expenditure_forecaster.model;
 ```
 
 The above command returns the following output columns:
@@ -395,13 +401,13 @@ The above command returns the following output columns:
 Models that utlize LangChain or are brought to MindsDB via MLflow can be described as follows:
 
 ```sql
-DESCRIBE other_model;
+DESCRIBE [MODEL] other_model;
 ```
 
 The above command returs `["info"]` in its first output column.
 
 ```sql
-DESCRIBE other_model.info;
+DESCRIBE [MODEL] other_model.info;
 ```
 
 The above command lists basic model information.

diff --git a/docs/sql/api/finetune.mdx b/docs/sql/api/finetune.mdx
@@ -18,7 +18,7 @@ Imagine you have a model that was trained with a certain dataset. Now there is m
 Here is the syntax:
 
 ```sql
-FINETUNE project_name.model_name
+FINETUNE [MODEL] project_name.model_name
 FROM integration_name
     (SELECT column_name, ... FROM table_name)
 [USING

diff --git a/docs/sql/api/retrain.mdx b/docs/sql/api/retrain.mdx
@@ -7,14 +7,14 @@ sidebarTitle: Retrain a Model
 
 The `RETRAIN` statement is used to retrain the already trained predictors with the new data. The predictor is updated to leverage the new data in optimizing its predictive capabilities.
 
-Retraining takes at least as much time as the training process of the predictor did because now the dataset used to retrain has new or updated data.
+Retraining takes at least as much time as the training process of the predictor did because now the dataset used to retrain has new or updated data in addition to the *old* data.
 
 ## Syntax
 
 Here is the syntax:
 
 ```sql
-RETRAIN project_name.predictor_name
+RETRAIN [MODEL] project_name.predictor_name
 [FROM integration_name
     (SELECT column_name, ... FROM table_name)
 PREDICT target_name

diff --git a/mindsdb/integrations/handlers/vertex_handler/requirements.txt b/mindsdb/integrations/handlers/vertex_handler/requirements.txt
@@ -0,0 +1 @@
+google-cloud-aiplatform>=1.35.0