Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
isthaison authored Dec 6, 2024
2 parents e3d2b7f + f54a8d7 commit 2975bdd
Show file tree
Hide file tree
Showing 40 changed files with 592 additions and 411 deletions.
114 changes: 114 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
name: release

on:
schedule:
- cron: '0 13 * * *' # This schedule runs every 13:00:00Z(21:00:00+08:00)
# The "create tags" trigger is specifically focused on the creation of new tags, while the "push tags" trigger is activated when tags are pushed, including both new tag creations and updates to existing tags.
create:
tags:
- "v*.*.*" # normal release
- "nightly" # the only one mutable tag

# https://docs.github.com/en/actions/using-jobs/using-concurrency
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
release:
runs-on: [ "self-hosted", "overseas" ]
steps:
- name: Ensure workspace ownership
run: echo "chown -R $USER $GITHUB_WORKSPACE" && sudo chown -R $USER $GITHUB_WORKSPACE

# https://github.com/actions/checkout/blob/v3/README.md
- name: Check out code
uses: actions/checkout@v4
with:
token: ${{ secrets.MY_GITHUB_TOKEN }} # Use the secret as an environment variable

- name: Prepare release body
run: |
if [[ $GITHUB_EVENT_NAME == 'create' ]]; then
RELEASE_TAG=${GITHUB_REF#refs/tags/}
if [[ $RELEASE_TAG == 'nightly' ]]; then
PRERELEASE=true
else
PRERELEASE=false
fi
echo "Workflow triggered by create tag: $RELEASE_TAG"
else
RELEASE_TAG=nightly
PRERELEASE=true
echo "Workflow triggered by schedule"
fi
echo "RELEASE_TAG=$RELEASE_TAG" >> $GITHUB_ENV
echo "PRERELEASE=$PRERELEASE" >> $GITHUB_ENV
RELEASE_DATETIME=$(date --rfc-3339=seconds)
echo Release $RELEASE_TAG created from $GITHUB_SHA at $RELEASE_DATETIME > release_body.md
- name: Move the existing mutable tag
# https://github.com/softprops/action-gh-release/issues/171
run: |
if [[ $GITHUB_EVENT_NAME == 'schedule' ]]; then
# Determine if a given tag exists and matches a specific Git commit.
# actions/checkout@v4 fetch-tags doesn't work when triggered by schedule
git fetch --tags
if [ "$(git rev-parse -q --verify "refs/tags/$RELEASE_TAG")" = "$GITHUB_SHA" ]; then
echo "mutable tag $RELEASE_TAG exists and matches $GITHUB_SHA"
else
git tag -f $RELEASE_TAG $GITHUB_SHA
git push -f origin $RELEASE_TAG:refs/tags/$RELEASE_TAG
echo "created/moved mutable tag $RELEASE_TAG to $GITHUB_SHA"
fi
fi
- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

# https://github.com/marketplace/actions/docker-login
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: infiniflow
password: ${{ secrets.DOCKERHUB_TOKEN }}

# https://github.com/marketplace/actions/build-and-push-docker-images
- name: Build and push full image
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: infiniflow/ragflow:${{ env.RELEASE_TAG }}
file: Dockerfile
platforms: linux/amd64

# https://github.com/marketplace/actions/build-and-push-docker-images
- name: Build and push slim image
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: infiniflow/ragflow:${{ env.RELEASE_TAG }}-slim
file: Dockerfile
build-args: LIGHTEN=1
platforms: linux/amd64

- name: Build ragflow-sdk
if: startsWith(github.ref, 'refs/tags/v')
run: |
apt install -y pipx && \
pipx install poetry && \
cd sdk/python && \
poetry build
- name: Publish package distributions to PyPI
if: startsWith(github.ref, 'refs/tags/v')
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: dist/
password: ${{ secrets.PYPI_API_TOKEN }}
verbose: true
3 changes: 1 addition & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,8 @@ jobs:
- name: Build ragflow:dev-slim
run: |
RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-$HOME}
cp -r ${RUNNER_WORKSPACE_PREFIX}/huggingface.co ${RUNNER_WORKSPACE_PREFIX}/nltk_data ${RUNNER_WORKSPACE_PREFIX}/libssl*.deb ${RUNNER_WORKSPACE_PREFIX}/tika-server*.jar* ${RUNNER_WORKSPACE_PREFIX}/chrome* ${RUNNER_WORKSPACE_PREFIX}/cl100k_base.tiktoken .
sudo docker pull ubuntu:22.04
sudo docker build --progress=plain -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
sudo docker build --progress=plain --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim .
- name: Build ragflow:dev
run: |
Expand Down
147 changes: 68 additions & 79 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,57 @@ FROM ubuntu:22.04 AS base
USER root
SHELL ["/bin/bash", "-c"]

ENV LIGHTEN=0
ARG LIGHTEN=0
ENV LIGHTEN=${LIGHTEN}

WORKDIR /ragflow

RUN rm -f /etc/apt/apt.conf.d/docker-clean \
&& echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache

RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
apt update && apt-get --no-install-recommends install -y ca-certificates
# Copy models downloaded via download_deps.py
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
tar --exclude='.*' -cf - \
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
/huggingface.co/InfiniFlow/deepdoc \
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
if [ "$LIGHTEN" == "0" ]; then \
(tar -cf - \
/huggingface.co/BAAI/bge-large-zh-v1.5 \
/huggingface.co/BAAI/bge-reranker-v2-m3 \
/huggingface.co/maidalun1020/bce-embedding-base_v1 \
/huggingface.co/maidalun1020/bce-reranker-base_v1 \
| tar -xf - --strip-components=2 -C /root/.ragflow) \
fi

# Setup apt mirror site
RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
# https://github.com/chrismattmann/tika-python
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
cp -r /deps/nltk_data /root/ && \
cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4

ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"

# Setup apt
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list && \
rm -f /etc/apt/apt.conf.d/docker-clean && \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
apt update && apt --no-install-recommends install -y ca-certificates && \
rm -rf /var/lib/apt/lists/*

RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
# cv2 requires libGL.so.1
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git \
&& rm -rf /var/lib/apt/lists/*

RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && pip3 config set global.trusted-host "pypi.tuna.tsinghua.edu.cn mirrors.pku.edu.cn" && pip3 config set global.extra-index-url "https://mirrors.pku.edu.cn/pypi/web/simple" \
&& pipx install poetry \
&& /root/.local/bin/poetry self add poetry-plugin-pypi-mirror
libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git nginx libgl1 vim less && \
rm -rf /var/lib/apt/lists/*

# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
# aspose-slides on linux/arm64 is unavailable
RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \
--mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_arm64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb \
if [ "$(uname -m)" = "x86_64" ]; then \
dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
elif [ "$(uname -m)" = "aarch64" ]; then \
dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
fi
RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
pip3 config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \
pipx install poetry && \
pipx runpip poetry config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
pipx runpip poetry config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \
/root/.local/bin/poetry self add poetry-plugin-pypi-mirror

ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
ENV PATH=/root/.local/bin:$PATH
Expand All @@ -45,14 +65,34 @@ ENV POETRY_REQUESTS_TIMEOUT=15
ENV POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/

# nodejs 12.22 on Ubuntu 22.04 is too old
RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt purge -y nodejs npm && \
apt autoremove && \
apt update && \
apt install -y nodejs cargo && \
rm -rf /var/lib/apt/lists/*

# Add dependencies of selenium
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
unzip /chrome-linux64.zip && \
mv chrome-linux64 /opt/chrome && \
ln -s /opt/chrome/chrome /usr/local/bin/
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
mv chromedriver /usr/local/bin/ && \
rm -f /usr/bin/google-chrome

# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
# aspose-slides on linux/arm64 is unavailable
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
if [ "$(uname -m)" = "x86_64" ]; then \
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
elif [ "$(uname -m)" = "aarch64" ]; then \
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
fi


# builder stage
FROM base AS builder
USER root
Expand All @@ -62,7 +102,7 @@ WORKDIR /ragflow
# install dependencies from poetry.lock file
COPY pyproject.toml poetry.toml poetry.lock ./

RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
RUN --mount=type=cache,id=ragflow_poetry,target=/root/.cache/pypoetry,sharing=locked \
if [ "$LIGHTEN" == "1" ]; then \
poetry install --no-root; \
else \
Expand All @@ -71,20 +111,12 @@ RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sh

COPY web web
COPY docs docs
RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
cd web && npm install --force && npm run build

COPY .git /ragflow/.git

RUN current_commit=$(git rev-parse --short HEAD); \
last_tag=$(git describe --tags --abbrev=0); \
commit_count=$(git rev-list --count "$last_tag..HEAD"); \
version_info=""; \
if [ "$commit_count" -eq 0 ]; then \
version_info=$last_tag; \
else \
version_info="$current_commit($last_tag~$commit_count)"; \
fi; \
RUN version_info=$(git describe --tags --match=v* --dirty); \
if [ "$LIGHTEN" == "1" ]; then \
version_info="$version_info slim"; \
else \
Expand All @@ -104,49 +136,6 @@ ENV VIRTUAL_ENV=/ragflow/.venv
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

# Install python packages' dependencies
# cv2 requires libGL.so.1
RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \
apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
rm -rf /var/lib/apt/lists/*

# Copy models downloaded via download_deps.py
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \
tar --exclude='.*' -cf - \
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
/huggingface.co/InfiniFlow/deepdoc \
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \
tar -cf - \
/huggingface.co/BAAI/bge-large-zh-v1.5 \
/huggingface.co/BAAI/bge-reranker-v2-m3 \
/huggingface.co/maidalun1020/bce-embedding-base_v1 \
/huggingface.co/maidalun1020/bce-reranker-base_v1 \
| tar -xf - --strip-components=2 -C /root/.ragflow

# Copy nltk data downloaded via download_deps.py
COPY nltk_data /root/nltk_data

# https://github.com/chrismattmann/tika-python
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
COPY tika-server-standard-3.0.0.jar /ragflow/tika-server-standard.jar
COPY tika-server-standard-3.0.0.jar.md5 /ragflow/tika-server-standard.jar.md5
ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard.jar"

# Copy cl100k_base
COPY cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4

# Add dependencies of selenium
RUN --mount=type=bind,source=chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
unzip /chrome-linux64.zip && \
mv chrome-linux64 /opt/chrome && \
ln -s /opt/chrome/chrome /usr/local/bin/
RUN --mount=type=bind,source=chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
mv chromedriver /usr/local/bin/ && \
rm -f /usr/bin/google-chrome

ENV PYTHONPATH=/ragflow/

COPY web web
Expand Down
10 changes: 10 additions & 0 deletions Dockerfile.deps
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# This builds an image that contains the resources needed by Dockerfile
#
FROM ubuntu:22.04

# Copy resources downloaded via download_deps.py
COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.0.0.jar tika-server-standard-3.0.0.jar.md5 libssl*.deb /

COPY nltk_data /nltk_data

COPY huggingface.co /huggingface.co
Loading

0 comments on commit 2975bdd

Please sign in to comment.