Skip to content

Commit

Permalink
Initial DataChain Commit
Browse files Browse the repository at this point in the history
This adds the DataChain code to this repository, and includes an additional top ten contributors from the source iterative/dvcx repository.

---------

Co-authored-by: Ronan Lamy <ronan.lamy@gmail.com>
Co-authored-by: Ivan Longin <ivan.longin1@gmail.com>
Co-authored-by: skshetry <18718008+skshetry@users.noreply.github.com>
Co-authored-by: Jon Burdo <jon@jonburdo.com>
Co-authored-by: Dmitry Petrov <dmitry@iterative.ai>
Co-authored-by: Domas Monkus <domas@iterative.ai>
Co-authored-by: Vladimir Rudnykh <dreadatour@gmail.com>
Co-authored-by: Dave Berenbaum <dave.berenbaum@gmail.com>
Co-authored-by: Matt Seddon <37993418+mattseddon@users.noreply.github.com>
Co-authored-by: Ivan Shcheklein <shcheklein@gmail.com>
  • Loading branch information
11 people committed Jul 10, 2024
1 parent 676d83e commit fb7fdda
Show file tree
Hide file tree
Showing 252 changed files with 46,645 additions and 0 deletions.
23 changes: 23 additions & 0 deletions .cruft.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"template": "https://github.com/iterative/py-template",
"commit": "867297aa15a0deaf5302edd01a2bc7ab87039627",
"checkout": null,
"context": {
"cookiecutter": {
"project_name": "datachain",
"package_name": "datachain",
"friendly_name": "DataChain",
"author": "Dmitry Petrov",
"email": "support@dvc.org",
"github_user": "iterative",
"version": "0.0.0",
"copyright_year": "2022",
"license": "Apache-2.0",
"docs": true,
"short_description": "Wrangle unstructured AI data at scale",
"development_status": "Development Status :: 2 - Pre-Alpha",
"_template": "https://github.com/iterative/py-template"
}
},
"directory": null
}
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* text=auto eol=lf
27 changes: 27 additions & 0 deletions .github/ISSUE_TEMPLATE/bug_report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: 🐛 Bug Report
description: Report a bug to help us improve
labels: bug

body:
- type: textarea
id: description
attributes:
label: Description
description:
validations:
required: true

- type: textarea
id: version
attributes:
label: Version Info
description: |
Please run the following command and copy the output below:
```bash
datachain -V; python -V
```
render: Text
validations:
required: false
4 changes: 4 additions & 0 deletions .github/ISSUE_TEMPLATE/empty_issue.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
name: Empty Issue
about: A minimal template for ordinary issues or sub-tasks
---
12 changes: 12 additions & 0 deletions .github/ISSUE_TEMPLATE/feature_request.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: 💡 Feature Request
description: Suggest a new feature or share ideas
labels: enhancement

body:
- type: textarea
id: description
attributes:
label: Description
description:
validations:
required: true
16 changes: 16 additions & 0 deletions .github/codecov.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
coverage:
status:
project:
default:
# auto compares coverage to the previous base commit
target: auto
# adjust accordingly based on how flaky your tests are
# this allows a 10% drop from the previous base commit coverage
threshold: 10%
# non-blocking status checks
informational: true

flags:
datachain:
paths:
- src/datachain
16 changes: 16 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
version: 2

updates:
- directory: "/"
package-ecosystem: "pip"
schedule:
interval: "weekly"
labels:
- "maintenance"

- directory: "/"
package-ecosystem: "github-actions"
schedule:
interval: "weekly"
labels:
- "maintenance"
34 changes: 34 additions & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Benchmarks

on:
schedule:
- cron: '0 0 * * *'
pull_request:
types: [opened, reopened, labeled, synchronize]
workflow_dispatch: {}

env:
FORCE_COLOR: "1"

jobs:
build:
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks') }}
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'

- name: Upgrade nox and uv
run: |
python -m pip install --upgrade 'nox[uv]'
nox --version
uv --version
- name: Run benchmarks
run: nox -s bench
40 changes: 40 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Release

on:
release:
types: [published]
workflow_dispatch:

env:
FORCE_COLOR: "1"

jobs:
release:
environment: pypi
permissions:
contents: read
id-token: write
runs-on: ubuntu-latest
steps:
- name: Check out the repository
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Upgrade nox and uv
run: |
python -m pip install --upgrade 'nox[uv]'
nox --version
uv --version
- name: Build package
run: nox -s build

- name: Upload package
if: github.event_name == 'release'
uses: pypa/gh-action-pypi-publish@release/v1
127 changes: 127 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
name: Tests

on:
push:
branches: [main]
pull_request:
workflow_dispatch:

env:
FORCE_COLOR: "1"

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
lint:
runs-on: ubuntu-latest
steps:

- name: Check out the repository
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python 3.9
uses: actions/setup-python@v5
with:
python-version: '3.9'
cache: 'pip'

- name: Upgrade nox and uv
run: |
python -m pip install --upgrade 'nox[uv]'
nox --version
uv --version
- name: Cache mypy
uses: actions/cache@v4
with:
path: .mypy_cache
key: mypy-${{ runner.os }}-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}

- name: Cache pre-commit hooks
uses: actions/cache@v4
with:
path: ~/.cache/pre-commit
key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}

- name: Lint code
run: nox -s lint

tests:
timeout-minutes: 25
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest-8-cores]
pyv: ['3.9', '3.10', '3.11', '3.12']
include:
- os: macos-latest
pyv: '3.9'
- os: macos-latest
pyv: '3.12'
- os: windows-latest-8-cores
pyv: '3.9'
- os: windows-latest-8-cores
pyv: '3.12'

steps:

# https://github.com/iterative/pytest-servers/pull/122
# https://github.com/abiosoft/colima/issues/468
# https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
# colima v0.5.6 seems to run more stable than the latest - that has occasional network failures (ports are not open)
# see: https://github.com/abiosoft/colima/issues/962
- name: Use colima as default docker host on MacOS
if: runner.os == 'macOS'
run: |
brew install docker lima || true # avoid non-zero exit code if brew link fails
sudo curl -L -o /usr/local/bin/colima https://github.com/abiosoft/colima/releases/download/v0.5.6/colima-Darwin-x86_64
sudo chmod +x /usr/local/bin/colima
colima start
sudo ln -vsf "${HOME}"/.colima/default/docker.sock /var/run/docker.sock
env:
HOMEBREW_NO_AUTO_UPDATE: true
HOMEBREW_NO_INSTALL_CLEANUP: true
HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: true
HOMEBREW_NO_INSTALL_UPGRADE: true

- name: Check out the repository
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python ${{ matrix.pyv }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.pyv }}
cache: 'pip'

- name: Upgrade nox and uv
run: |
python -m pip install --upgrade 'nox[uv]'
nox --version
uv --version
- name: Skip flaky azure, gs remotes if unavailable on macos
if: runner.os == 'macOS'
run: echo 'DATACHAIN_TEST_SKIP_MISSING_REMOTES=azure,gs' >> "$GITHUB_ENV"

- name: Run tests
run: nox -s tests-${{ matrix.pyv }}

- name: Upload coverage report
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: coverage.xml
flags: datachain

- name: Build package
run: nox -s build

- name: Build docs
run: nox -s docs
19 changes: 19 additions & 0 deletions .github/workflows/update-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: Update template

on:
schedule:
- cron: '5 1 * * *' # every day at 01:05

workflow_dispatch:

jobs:
update:
runs-on: ubuntu-latest
steps:
- name: Check out the repository
uses: actions/checkout@v4

- name: Update template
uses: iterative/py-template@main
with:
token: ${{ secrets.UPDATE_TEMPLATE_TOKEN || secrets.GITHUB_TOKEN }}
Loading

0 comments on commit fb7fdda

Please sign in to comment.