Skip to content

Commit

Permalink
Initial DataChain Commit (#1)
Browse files Browse the repository at this point in the history
This adds the DataChain code to this repository, and includes an additional top ten contributors from the source iterative/dvcx repository.

---------

Co-authored-by: Ronan Lamy <ronan.lamy@gmail.com>
Co-authored-by: Ivan Longin <ivan.longin1@gmail.com>
Co-authored-by: skshetry <18718008+skshetry@users.noreply.github.com>
Co-authored-by: Jon Burdo <jon@jonburdo.com>
Co-authored-by: Dmitry Petrov <dmitry@iterative.ai>
Co-authored-by: Domas Monkus <domas@iterative.ai>
Co-authored-by: Vladimir Rudnykh <dreadatour@gmail.com>
Co-authored-by: Dave Berenbaum <dave.berenbaum@gmail.com>
Co-authored-by: Matt Seddon <37993418+mattseddon@users.noreply.github.com>
Co-authored-by: Ivan Shcheklein <shcheklein@gmail.com>
  • Loading branch information
11 people authored Jul 10, 2024
1 parent 676d83e commit 317c955
Show file tree
Hide file tree
Showing 252 changed files with 46,750 additions and 0 deletions.
23 changes: 23 additions & 0 deletions .cruft.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"template": "https://github.com/iterative/py-template",
"commit": "867297aa15a0deaf5302edd01a2bc7ab87039627",
"checkout": null,
"context": {
"cookiecutter": {
"project_name": "datachain",
"package_name": "datachain",
"friendly_name": "DataChain",
"author": "Dmitry Petrov",
"email": "support@dvc.org",
"github_user": "iterative",
"version": "0.0.0",
"copyright_year": "2022",
"license": "Apache-2.0",
"docs": true,
"short_description": "Wrangle unstructured AI data at scale",
"development_status": "Development Status :: 2 - Pre-Alpha",
"_template": "https://github.com/iterative/py-template"
}
},
"directory": null
}
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* text=auto eol=lf
27 changes: 27 additions & 0 deletions .github/ISSUE_TEMPLATE/bug_report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: 🐛 Bug Report
description: Report a bug to help us improve
labels: bug

body:
- type: textarea
id: description
attributes:
label: Description
description:
validations:
required: true

- type: textarea
id: version
attributes:
label: Version Info
description: |
Please run the following command and copy the output below:
```bash
datachain -V; python -V
```
render: Text
validations:
required: false
4 changes: 4 additions & 0 deletions .github/ISSUE_TEMPLATE/empty_issue.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
name: Empty Issue
about: A minimal template for ordinary issues or sub-tasks
---
12 changes: 12 additions & 0 deletions .github/ISSUE_TEMPLATE/feature_request.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: 💡 Feature Request
description: Suggest a new feature or share ideas
labels: enhancement

body:
- type: textarea
id: description
attributes:
label: Description
description:
validations:
required: true
16 changes: 16 additions & 0 deletions .github/codecov.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
coverage:
status:
project:
default:
# auto compares coverage to the previous base commit
target: auto
# adjust accordingly based on how flaky your tests are
# this allows a 10% drop from the previous base commit coverage
threshold: 10%
# non-blocking status checks
informational: true

flags:
datachain:
paths:
- src/datachain
16 changes: 16 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
version: 2

updates:
- directory: "/"
package-ecosystem: "pip"
schedule:
interval: "weekly"
labels:
- "maintenance"

- directory: "/"
package-ecosystem: "github-actions"
schedule:
interval: "weekly"
labels:
- "maintenance"
34 changes: 34 additions & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Benchmarks

on:
schedule:
- cron: '0 0 * * *'
pull_request:
types: [opened, reopened, labeled, synchronize]
workflow_dispatch: {}

env:
FORCE_COLOR: "1"

jobs:
build:
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks') }}
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'

- name: Upgrade nox and uv
run: |
python -m pip install --upgrade 'nox[uv]'
nox --version
uv --version
- name: Run benchmarks
run: nox -s bench
40 changes: 40 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Release

on:
release:
types: [published]
workflow_dispatch:

env:
FORCE_COLOR: "1"

jobs:
release:
environment: pypi
permissions:
contents: read
id-token: write
runs-on: ubuntu-latest
steps:
- name: Check out the repository
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Upgrade nox and uv
run: |
python -m pip install --upgrade 'nox[uv]'
nox --version
uv --version
- name: Build package
run: nox -s build

- name: Upload package
if: github.event_name == 'release'
uses: pypa/gh-action-pypi-publish@release/v1
127 changes: 127 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
name: Tests

on:
push:
branches: [main]
pull_request:
workflow_dispatch:

env:
FORCE_COLOR: "1"

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
lint:
runs-on: ubuntu-latest
steps:

- name: Check out the repository
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python 3.9
uses: actions/setup-python@v5
with:
python-version: '3.9'
cache: 'pip'

- name: Upgrade nox and uv
run: |
python -m pip install --upgrade 'nox[uv]'
nox --version
uv --version
- name: Cache mypy
uses: actions/cache@v4
with:
path: .mypy_cache
key: mypy-${{ runner.os }}-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}

- name: Cache pre-commit hooks
uses: actions/cache@v4
with:
path: ~/.cache/pre-commit
key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}

- name: Lint code
run: nox -s lint

tests:
timeout-minutes: 25
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest-8-cores]
pyv: ['3.9', '3.10', '3.11', '3.12']
include:
- os: macos-latest
pyv: '3.9'
- os: macos-latest
pyv: '3.12'
- os: windows-latest-8-cores
pyv: '3.9'
- os: windows-latest-8-cores
pyv: '3.12'

steps:

# https://github.com/iterative/pytest-servers/pull/122
# https://github.com/abiosoft/colima/issues/468
# https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
# colima v0.5.6 seems to run more stable than the latest - that has occasional network failures (ports are not open)
# see: https://github.com/abiosoft/colima/issues/962
- name: Use colima as default docker host on MacOS
if: runner.os == 'macOS'
run: |
brew install docker lima || true # avoid non-zero exit code if brew link fails
sudo curl -L -o /usr/local/bin/colima https://github.com/abiosoft/colima/releases/download/v0.5.6/colima-Darwin-x86_64
sudo chmod +x /usr/local/bin/colima
colima start
sudo ln -vsf "${HOME}"/.colima/default/docker.sock /var/run/docker.sock
env:
HOMEBREW_NO_AUTO_UPDATE: true
HOMEBREW_NO_INSTALL_CLEANUP: true
HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: true
HOMEBREW_NO_INSTALL_UPGRADE: true

- name: Check out the repository
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python ${{ matrix.pyv }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.pyv }}
cache: 'pip'

- name: Upgrade nox and uv
run: |
python -m pip install --upgrade 'nox[uv]'
nox --version
uv --version
- name: Skip flaky azure, gs remotes if unavailable on macos
if: runner.os == 'macOS'
run: echo 'DATACHAIN_TEST_SKIP_MISSING_REMOTES=azure,gs' >> "$GITHUB_ENV"

- name: Run tests
run: nox -s tests-${{ matrix.pyv }}

- name: Upload coverage report
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: coverage.xml
flags: datachain

- name: Build package
run: nox -s build

- name: Build docs
run: nox -s docs
19 changes: 19 additions & 0 deletions .github/workflows/update-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: Update template

on:
schedule:
- cron: '5 1 * * *' # every day at 01:05

workflow_dispatch:

jobs:
update:
runs-on: ubuntu-latest
steps:
- name: Check out the repository
uses: actions/checkout@v4

- name: Update template
uses: iterative/py-template@main
with:
token: ${{ secrets.UPDATE_TEMPLATE_TOKEN || secrets.GITHUB_TOKEN }}
Loading

0 comments on commit 317c955

Please sign in to comment.