feat(examples): use QuantizedModelForCausalLM #327

	name: Linux CUDA tests

	on:
	push:
	branches:
	- main
	paths:
	- "optimum/quanto/**"
	- "test/**"
	- "pyproject.toml"
	pull_request:
	types: [assigned, opened, synchronize, reopened]
	paths:
	- "optimum/quanto/**"
	- "test/**"
	- "pyproject.toml"

	jobs:
	check-commits:
	uses: ./.github/workflows/check-commits.yml
	python-quality:
	uses: ./.github/workflows/python-quality.yml
	test-ubuntu-cuda:
	needs: [check-commits, python-quality]
	runs-on:
	group: aws-g5-4xlarge-plus
	strategy:
	fail-fast: false
	matrix:
	cuda-version: ["11.8", "12.1", "12.4"]
	container:
	image: pytorch/pytorch:2.4.0-cuda${{ matrix.cuda-version }}-cudnn9-devel
	options: --gpus 0

	steps:
	- uses: actions/checkout@v2
	- name: Check CUDA installation
	run: \|
	nvcc -V

	- name: Build and install quanto
	run: \|
	pip install --upgrade pip
	pip install -e .[dev]

	- name: Run base tests
	run: \|
	python -m pytest test --ignore=test/models --ignore=test/cli

	- name: Run models tests
	run: \|
	pip install accelerate transformers diffusers
	python -m pytest test/models

	- name: Run CLI tests
	run: \|
	pip install optimum
	python -m pytest test/cli

Provide feedback