Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Towards a 1.0.0 release #908

Merged
merged 32 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
2a7941f
jettison built-in measures in favour of StatisticalMeasures.jl
ablaom May 25, 2023
1a4e5e1
remove loss functions
ablaom May 25, 2023
b3d8102
harmless commit
ablaom May 25, 2023
c452543
add v0.22 staging branch to ci
ablaom May 25, 2023
4c54571
add 0.22 staging branch to ci
ablaom May 25, 2023
b1bfab1
remove LossFunctions import
ablaom May 25, 2023
da710ad
cleanup default measures, incl. a bug fix; change regressor default;
ablaom May 25, 2023
2692ca9
rename two files + whitespace changes + docstring addition
ablaom May 26, 2023
1dde0d9
oop forgotten commit
ablaom May 26, 2023
a3fca52
add per_observation flag to evaluate/evaluate! for performance boost
ablaom May 26, 2023
91a6f54
simplify evaluate!/evaluate docstrings
ablaom May 26, 2023
e2d15dd
make StatisticalMeasures a [weakdep]; add DefaultMeasuresExt.jl
ablaom May 26, 2023
7ae73a5
doc string tweak
ablaom May 26, 2023
588e777
doc string typo
ablaom May 28, 2023
c6dddce
Merge branch 'dev' into for-a-0-point-22-release
ablaom Sep 6, 2023
e5dfa91
Merge branch 'for-a-0-point-22-release' into measures
ablaom Sep 6, 2023
0549150
update readme
ablaom Sep 21, 2023
0ee7f41
Merge pull request #909 from JuliaAI/measures
ablaom Sep 21, 2023
a394358
Merge branch 'dev' into for-a-0-point-22-release
ablaom Sep 21, 2023
ffc69a7
Remove deprecated code supporting @from_network and @pipeline
ablaom Sep 22, 2023
8788e41
rm forgotten include statement
ablaom Sep 22, 2023
fa955a5
Merge pull request #932 from JuliaAI/remove-deprecated-stuff
ablaom Sep 22, 2023
1678e5c
remove `target` as alias for `tranformer` to close #856
ablaom Sep 22, 2023
72d211f
remove a redundant restrictin against "target" in pipelines
ablaom Sep 22, 2023
444087e
rm residual reference to "target" as kwarg
ablaom Sep 22, 2023
e61f43a
Merge pull request #933 from JuliaAI/transformed-target-alias-remove
ablaom Sep 22, 2023
99d72f2
remove stray code masking some tests!!
ablaom Sep 24, 2023
a24df7f
fix and adapt previously masked tests
ablaom Sep 24, 2023
5ed9492
rm obsolete private method
ablaom Sep 24, 2023
9d12b2c
test output of `pretty`
ablaom Sep 24, 2023
63b7d1b
Merge pull request #934 from JuliaAI/improve-coverage
ablaom Sep 25, 2023
7ed7d17
rm redundant code
ablaom Sep 25, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
branches:
- master
- dev
- for-a-0-point-21-release
- for-a-0-point-22-release
- next-breaking-release
push:
branches:
Expand Down
19 changes: 16 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,37 +13,49 @@ Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
LearnAPI = "92ad9a40-7767-427a-9ee6-6e577f1266cb"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7"
MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541"
StatisticalMeasuresBase = "c062fc1d-0d66-479b-b6ac-8b44719de4cc"
StatisticalTraits = "64bff920-2084-43da-a3e6-9bb72801c0c9"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[weakdeps]
StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541"

[extensions]
DefaultMeasuresExt = "StatisticalMeasures"

[compat]
CategoricalArrays = "0.9, 0.10"
CategoricalDistributions = "0.1"
ComputationalResources = "0.3"
DelimitedFiles = "1"
Distributions = "0.25.3"
InvertedIndices = "1"
LossFunctions = "0.11"
LearnAPI = "0.1"
MLJModelInterface = "1.7"
Missings = "0.4, 1"
OrderedCollections = "1.1"
Parameters = "0.12"
PrettyTables = "1, 2"
ProgressMeter = "1.7.1"
Reexport = "1.2"
ScientificTypes = "3"
StatisticalMeasures = "0.1.1"
StatisticalMeasuresBase = "0.1.1"
StatisticalTraits = "3.2"
StatsBase = "0.32, 0.33, 0.34"
Tables = "0.2, 1.0"
Expand All @@ -57,8 +69,9 @@ Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"

[targets]
test = ["DataFrames", "DecisionTree", "Distances", "Logging", "MultivariateStats", "NearestNeighbors", "StableRNGs", "Test", "TypedTables"]
test = ["DataFrames", "DecisionTree", "Distances", "Logging", "MultivariateStats", "NearestNeighbors", "StableRNGs", "StatisticalMeasures", "Test", "TypedTables"]
11 changes: 5 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,16 @@ repository provides core functionality for MLJ, including:

- basic utilities for **manipulating datasets** and for **synthesizing datasets** (src/data)

- a [small interface](https://alan-turing-institute.github.io/MLJ.jl/dev/evaluating_model_performance/#Custom-resampling-strategies-1) for **resampling strategies** and implementations, including `CV()`, `StratifiedCV` and `Holdout` (src/resampling.jl)
- a [small
interface](https://alan-turing-institute.github.io/MLJ.jl/dev/evaluating_model_performance/#Custom-resampling-strategies-1)
for **resampling strategies** and implementations, including `CV()`, `StratifiedCV` and
`Holdout` (src/resampling.jl). Actual performance evaluation measures (aka metrics), which previously
were provided by MLJBase.jl, now live in [StatisticalMeasures.jl](https://juliaai.github.io/StatisticalMeasures.jl/dev/).

- methods for **performance evaluation**, based on those resampling strategies (src/resampling.jl)

- **one-dimensional hyperparameter range types**, constructors and
associated methods, for use with
[MLJTuning](https://github.com/JuliaAI/MLJTuning.jl) (src/hyperparam)

- a [small
interface](https://alan-turing-institute.github.io/MLJ.jl/dev/performance_measures/#Traits-and-custom-measures-1)
for **performance measures** (losses and scores), implementation of about 60 such measures, including integration of the
[LossFunctions.jl](https://github.com/JuliaML/LossFunctions.jl)
library (src/measures). To be migrated into separate package in the near future.

15 changes: 15 additions & 0 deletions ext/DefaultMeasuresExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module DefaultMeasuresExt

using MLJBase
import MLJBase:default_measure, ProbabilisticDetector, DeterministicDetector
using StatisticalMeasures
using StatisticalMeasures.ScientificTypesBase

default_measure(::Deterministic, ::Type{<:Union{Continuous,Count}}) = l2
default_measure(::Deterministic, ::Type{<:Finite}) = misclassification_rate
default_measure(::Probabilistic, ::Type{<:Union{Finite,Count}}) = log_loss
default_measure(::Probabilistic, ::Type{<:Continuous}) = log_loss
default_measure(::ProbabilisticDetector, ::Type{<:OrderedFactor{2}}) = area_under_curve
default_measure(::DeterministicDetector, ::Type{<:OrderedFactor{2}}) = balanced_accuracy

Check warning on line 13 in ext/DefaultMeasuresExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/DefaultMeasuresExt.jl#L12-L13

Added lines #L12 - L13 were not covered by tests

end # module
105 changes: 17 additions & 88 deletions src/MLJBase.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module MLJBase
# ===================================================================
# IMPORTS

using Reexport
import Base: ==, precision, getindex, setindex!
import Base.+, Base.*, Base./

Expand All @@ -16,7 +17,7 @@ for trait in StatisticalTraits.TRAITS
eval(:(import StatisticalTraits.$trait))
end

import Base.instances # considered a trait for measures
import LearnAPI
import StatisticalTraits.snakecase
import StatisticalTraits.info

Expand Down Expand Up @@ -47,7 +48,7 @@ end
###################
# Hack Block ends #
###################

import MLJModelInterface: ProbabilisticDetector, DeterministicDetector
import MLJModelInterface: fit, update, update_data, transform,
inverse_transform, fitted_params, predict, predict_mode,
predict_mean, predict_median, predict_joint,
Expand Down Expand Up @@ -78,8 +79,6 @@ using ProgressMeter
import .Threads

# Operations & extensions
import LossFunctions
import LossFunctions.Traits
import StatsBase
import StatsBase: fit!, mode, countmap
import Missings: levels
Expand All @@ -89,6 +88,9 @@ using CategoricalDistributions
import Distributions: pdf, logpdf, sampler
const Dist = Distributions

# Measures
import StatisticalMeasuresBase

# from Standard Library:
using Statistics, LinearAlgebra, Random, InteractiveUtils

Expand Down Expand Up @@ -128,57 +130,6 @@ const CatArrMissing{T,N} = ArrMissing{CategoricalValue{T},N}
const MMI = MLJModelInterface
const FI = MLJModelInterface.FullInterface

const MARGIN_LOSSES = [
:DWDMarginLoss,
:ExpLoss,
:L1HingeLoss,
:L2HingeLoss,
:L2MarginLoss,
:LogitMarginLoss,
:ModifiedHuberLoss,
:PerceptronLoss,
:SigmoidLoss,
:SmoothedL1HingeLoss,
:ZeroOneLoss
]

const DISTANCE_LOSSES = [
:HuberLoss,
:L1EpsilonInsLoss,
:L2EpsilonInsLoss,
:LPDistLoss,
:LogitDistLoss,
:PeriodicLoss,
:QuantileLoss
]

const WITH_PARAMETERS = [
:DWDMarginLoss,
:SmoothedL1HingeLoss,
:HuberLoss,
:L1EpsilonInsLoss,
:L2EpsilonInsLoss,
:LPDistLoss,
:QuantileLoss,
]

const MEASURE_TYPE_ALIASES = [
:FPR, :FNR, :TPR, :TNR,
:FDR, :PPV, :NPV, :Recall, :Specificity,
:MFPR, :MFNR, :MTPR, :MTNR,
:MFDR, :MPPV, :MNPV, :MulticlassRecall, :MulticlassSpecificity,
:MCR,
:MCC,
:BAC, :BACC,
:RMS, :RMSPV, :RMSL, :RMSLP, :RMSP,
:MAV, :MAE, :MAPE,
:RSQ, :LogCosh,
:CrossEntropy,
:AUC
]

const LOSS_FUNCTIONS = vcat(MARGIN_LOSSES, DISTANCE_LOSSES)

# ===================================================================
# Computational Resource
# default_resource allows to switch the mode of parallelization
Expand All @@ -199,19 +150,13 @@ include("models.jl")
include("sources.jl")
include("machines.jl")

include("composition/deprecated_abstract_types.jl")
include("composition/learning_networks/nodes.jl")
include("composition/learning_networks/inspection.jl")
include("composition/learning_networks/signatures.jl")
include("composition/learning_networks/deprecated_machines.jl")
include("composition/learning_networks/replace.jl")

include("composition/models/deprecated_pipelines.jl")
include("composition/models/deprecated_methods.jl")
include("composition/models/network_composite_types.jl")
include("composition/models/network_composite.jl")
include("composition/models/deprecated_from_network.jl")
include("composition/models/inspection.jl")
include("composition/models/pipelines.jl")
include("composition/models/transformed_target_model.jl")

Expand All @@ -225,21 +170,14 @@ include("data/data.jl")
include("data/datasets.jl")
include("data/datasets_synthetic.jl")

include("measures/measures.jl")
include("measures/measure_search.jl")
include("measures/doc_strings.jl")
include("default_measures.jl")

include("composition/models/stacking.jl")

# function on the right-hand side is defined in src/measures/meta_utilities.jl:
const MEASURE_TYPES_ALIASES_AND_INSTANCES = measures_for_export()

const EXTENDED_ABSTRACT_MODEL_TYPES = vcat(
MLJBase.MLJModelInterface.ABSTRACT_MODEL_SUBTYPES,
MLJBase.NETWORK_COMPOSITE_TYPES, # src/composition/models/network_composite_types.jl
MLJBase.COMPOSITE_TYPES, # src/composition/abstract_types.jl
MLJBase.SURROGATE_TYPES, # src/composition/abstract_types.jl
[:MLJType, :Model, :NetworkComposite, :Surrogate, :Composite],
[:MLJType, :Model, :NetworkComposite],
)

# ===================================================================
Expand Down Expand Up @@ -337,8 +275,8 @@ export machine, Machine, fit!, report, fit_only!, default_scitype_check_level,
# datasets_synthetics.jl
export make_blobs, make_moons, make_circles, make_regression

# composition (surrogates and composites are exported in composition):
export machines, sources, @from_network, @pipeline, Stack,
# composition
export machines, sources, Stack,
glb, @tuple, node, @node, sources, origins, return!,
nrows_at_source, machine, rebind!, nodes, freeze!, thaw!,
Node, AbstractNode, Pipeline,
Expand All @@ -357,28 +295,19 @@ export ResamplingStrategy, Holdout, CV, StratifiedCV, TimeSeriesCV,
# -------------------------------------------------------------------
# exports from MLJBase specific to measures

# measure names:
for m in MEASURE_TYPES_ALIASES_AND_INSTANCES
:(export $m) |> eval
end

# measures/registry.jl:
export measures, metadata_measure

# measure/measures.jl (excluding traits):
export aggregate, default_measure, value, skipinvalid

# measures/probabilistic:
export roc_curve, roc

# measures/finite.jl (averaging modes for multiclass scores)
export no_avg, macro_avg, micro_avg

export default_measure

# -------------------------------------------------------------------
# re-export from Random, StatsBase, Statistics, Distributions,
# OrderedCollections, CategoricalArrays, InvertedIndices:
export pdf, sampler, mode, median, mean, shuffle!, categorical, shuffle,
levels, levels!, std, Not, support, logpdf, LittleDict

# for julia < 1.9
if !isdefined(Base, :get_extension)
include(joinpath("..","ext", "DefaultMeasuresExt.jl"))
@reexport using .DefaultMeasuresExt.StatisticalMeasures
end

end # module
40 changes: 0 additions & 40 deletions src/composition/deprecated_abstract_types.jl

This file was deleted.

Loading
Loading