Skip to content

Commit

Permalink
ci: updated ruf
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen committed Dec 15, 2024
1 parent 24e391c commit 4c59bde
Show file tree
Hide file tree
Showing 25 changed files with 59 additions and 202 deletions.
4 changes: 3 additions & 1 deletion docs/tutorials/filter_corpus_using_quality.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@
"from datasets import load_dataset\n",
"\n",
"# stream in the dataset\n",
"dataset = load_dataset(\"mc4\", \"en\", streaming=True, split=\"train\", trust_remote_code=True)\n",
"dataset = load_dataset(\n",
" \"mc4\", \"en\", streaming=True, split=\"train\", trust_remote_code=True\n",
")\n",
"\n",
"# download the first 1 000\n",
"dataset = dataset.take(1000)\n",
Expand Down
13 changes: 7 additions & 6 deletions docs/tutorials/sklearn_integration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@
],
"source": [
"from textdescriptives.utils import load_sms_data\n",
"\n",
"df = load_sms_data()\n",
"df.head()"
]
Expand All @@ -152,7 +153,7 @@
"# to textdescriptives.extract_metrics\n",
"descriptive_stats_extractor = TextDescriptivesFeaturizer(\n",
" lang=\"en\", metrics=[\"descriptive_stats\"]\n",
" )"
")"
]
},
{
Expand Down Expand Up @@ -184,7 +185,7 @@
"from sklearn.pipeline import Pipeline\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.model_selection import train_test_split \n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn import set_config\n",
"\n",
Expand All @@ -197,10 +198,9 @@
" (\n",
" \"featurizer\",\n",
" ColumnTransformer(\n",
" [(\"text_processing\", descriptive_stats_extractor, \"message\")]\n",
" ,\n",
" # removes the `text_processing__` prefix from feature names\n",
" verbose_feature_names_out=False, \n",
" [(\"text_processing\", descriptive_stats_extractor, \"message\")],\n",
" # removes the `text_processing__` prefix from feature names\n",
" verbose_feature_names_out=False,\n",
" ),\n",
" ),\n",
" (\"imputer\", SimpleImputer(strategy=\"median\")),\n",
Expand Down Expand Up @@ -366,6 +366,7 @@
],
"source": [
"import pandas as pd\n",
"\n",
"# extract feature importances\n",
"feature_importance_mapping = list(\n",
" zip(\n",
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ keywords = [

dependencies = [
"spacy[lookups]>=3.6.0",
"numpy>=1.20.0,<2.0.0", # due to https://stackoverflow.com/questions/78634235/numpy-dtype-size-changed-may-indicate-binary-incompatibility-expected-96-from
"numpy>=1.20.0,<2.0.0", # due to https://stackoverflow.com/questions/78634235/numpy-dtype-size-changed-may-indicate-binary-incompatibility-expected-96-from
"pandas>=1.0.0",
"pyphen>=0.11.0",
"ftfy>=6.0.3",
Expand All @@ -49,7 +49,7 @@ repository = "https://github.com/HLasse/textdescriptives"
documentation = "https://hlasse.github.io/TextDescriptives/"

[project.optional-dependencies]
style = ["ruff==0.1.15"]
style = ["ruff==0.8.3"]
tests = ["pytest>=7.1.3", "pytest-cov>=3.0.0", "pytest-xdist"]
docs = [
"pydantic==2.1",
Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/about.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" About textdescriptives, version number is specified in the setup.cfg
"""About textdescriptives, version number is specified in the setup.cfg
file."""

# if python >= 3.8, use importlib.metadata otherwise use pkg_resources
Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/dependency_distance.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculation of statistics related to dependency distance."""
"""Calculation of statistics related to dependency distance."""

from typing import Callable

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/descriptive_stats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculation of descriptive statistics."""
"""Calculation of descriptive statistics."""

from typing import Callable, Dict, Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/information_theory.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculate the entropy and perplexity of a corpus."""
"""Calculate the entropy and perplexity of a corpus."""

from typing import Callable, Dict, Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/pos_proportions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculation of statistics that require a pos-tagger in the pipeline."""
"""Calculation of statistics that require a pos-tagger in the pipeline."""

from typing import Callable, Counter, List, Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/quality.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Component for calculating quality metrics."""
"""Component for calculating quality metrics."""

from collections import Counter, defaultdict
from typing import Callable, Dict, List, Mapping, Optional, Tuple, Union
Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/quality_data_classes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Data classes used for the quality component."""
"""Data classes used for the quality component."""

from typing import Any, Dict, Optional, Tuple, Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/readability.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculation of various readability metrics."""
"""Calculation of various readability metrics."""

from typing import Callable, Dict

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Utility functions for calculating various text descriptives."""
"""Utility functions for calculating various text descriptives."""

from typing import Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/extractors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Extract metrics as Pandas DataFrame."""
"""Extract metrics as Pandas DataFrame."""

from typing import Any, Dict, Iterable, List, Optional, Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/load_components.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Adds all components to a spaCy pipeline."""
"""Adds all components to a spaCy pipeline."""

from spacy.language import Language
from spacy.tokens import Doc
Expand Down
3 changes: 2 additions & 1 deletion tests/books.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" These books and several tests are borrowed from
"""These books and several tests are borrowed from
https://github.com/mholtzscher/spacy_readability."""

from __future__ import annotations

oliver_twist = """Among other public buildings in a certain town, which for many reasons
Expand Down
1 change: 1 addition & 0 deletions tests/test_coherence.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import pytest
import spacy

import textdescriptives as td # noqa: F401


Expand Down
1 change: 1 addition & 0 deletions tests/test_dependency_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
import pytest
import spacy

import textdescriptives as td # noqa: F401

from .books import flatland, oliver_twist, secret_garden
Expand Down
1 change: 1 addition & 0 deletions tests/test_descriptive_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import ftfy
import pytest
from spacy.lang.en import English

from textdescriptives.components import DescriptiveStatistics # noqa: F401

from .books import flatland, oliver_twist, secret_garden
Expand Down
1 change: 1 addition & 0 deletions tests/test_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest
import spacy

import textdescriptives as td
from textdescriptives.utils import _create_spacy_pipeline, _download_spacy_model

Expand Down
1 change: 1 addition & 0 deletions tests/test_information.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pytest
import spacy

import textdescriptives as td
from textdescriptives.components.information_theory import (
entropy_getter,
Expand Down
1 change: 1 addition & 0 deletions tests/test_load_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest
import spacy

import textdescriptives as td # noqa: F401


Expand Down
3 changes: 2 additions & 1 deletion tests/test_pos_proportions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import pytest
import spacy
import textdescriptives as td # noqa: F401
from spacy.tokens import Doc

import textdescriptives as td # noqa: F401


@pytest.fixture(scope="function")
def nlp():
Expand Down
4 changes: 3 additions & 1 deletion tests/test_quality.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
""" Tests for the quality module."""
"""Tests for the quality module."""

from __future__ import annotations

import pytest
import spacy

import textdescriptives as td
from textdescriptives.components.quality import (
alpha_ratio,
Expand Down
1 change: 1 addition & 0 deletions tests/test_readability.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytest
import spacy
from spacy.lang.en import English

from textdescriptives.utils import _remove_textdescriptives_extensions # noqa: F401

from .books import (
Expand Down
Loading

0 comments on commit 4c59bde

Please sign in to comment.