Try #1264:

FluxML · Jul 9, 2020 · ea41d68 · ea41d68
2 parents f8001cf + 67c0608
commit ea41d68
Show file tree

Hide file tree

Showing 22 changed files with 554 additions and 466 deletions.
diff --git a/Manifest.toml b/Manifest.toml
@@ -45,16 +45,16 @@ uuid = "052768ef-5323-5732-b1bb-66c8b64840ba"
 version = "1.0.2"
 
 [[ChainRules]]
-deps = ["ChainRulesCore", "LinearAlgebra", "Reexport", "Requires", "Statistics"]
-git-tree-sha1 = "76cd719cb7ab57bd2687dcb3b186c4f99820a79d"
+deps = ["ChainRulesCore", "LinearAlgebra", "Random", "Reexport", "Requires", "Statistics"]
+git-tree-sha1 = "cb6638807e41b771337cf3dbb7b67c5be16e2018"
 uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2"
-version = "0.6.5"
+version = "0.7.6"
 
 [[ChainRulesCore]]
 deps = ["MuladdMacro"]
-git-tree-sha1 = "c384e0e4fe6bfeb6bec0d41f71cc5e391cd110ba"
+git-tree-sha1 = "bd1e56ac59d408c63ea26ab23aab78f827f13511"
 uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-version = "0.8.1"
+version = "0.9.1"
 
 [[CodecZlib]]
 deps = ["TranscodingStreams", "Zlib_jll"]
@@ -64,9 +64,9 @@ version = "0.7.0"
 
 [[ColorTypes]]
 deps = ["FixedPointNumbers", "Random"]
-git-tree-sha1 = "cd19496d8943326b752d1712afd6ab79c7514d28"
+git-tree-sha1 = "6e7aa35d0294f647bb9c985ccc34d4f5d371a533"
 uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
-version = "0.10.5"
+version = "0.10.6"
 
 [[Colors]]
 deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Reexport"]
@@ -128,9 +128,9 @@ version = "0.1.1"
 
 [[FillArrays]]
 deps = ["LinearAlgebra", "Random", "SparseArrays"]
-git-tree-sha1 = "bf726ba7ce99e00d10bf63c031285fb9ab3676ae"
+git-tree-sha1 = "4783bbbeade37f2a8bd82af6c112510fde78e532"
 uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
-version = "0.8.11"
+version = "0.8.12"
 
 [[FixedPointNumbers]]
 git-tree-sha1 = "8fb797c37a3b7ced4327a05ac4ca0dd6a4f1ba92"
@@ -255,9 +255,9 @@ uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
 version = "0.5.3+3"
 
 [[OrderedCollections]]
-git-tree-sha1 = "12ce190210d278e12644bcadf5b21cbdcf225cd3"
+git-tree-sha1 = "293b70ac1780f9584c89268a6e2a560d938a7065"
 uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-version = "1.2.0"
+version = "1.3.0"
 
 [[Pkg]]
 deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
@@ -368,10 +368,10 @@ uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
 version = "1.2.11+14"
 
 [[Zygote]]
-deps = ["AbstractFFTs", "ArrayLayouts", "ChainRules", "FillArrays", "ForwardDiff", "Future", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NNlib", "Random", "Requires", "Statistics", "ZygoteRules"]
-git-tree-sha1 = "6fdbecad94c572d8b8cc0dcd3b1e82011232d44d"
+deps = ["AbstractFFTs", "ArrayLayouts", "ChainRules", "DiffRules", "FillArrays", "ForwardDiff", "Future", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NNlib", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"]
+git-tree-sha1 = "295e68106dd7bb327f8aedbab79ca1b6bd80d9b3"
 uuid = "e88e6eb3-aa80-5325-afca-941959d7151f"
-version = "0.5.1"
+version = "0.5.2"
 
 [[ZygoteRules]]
 deps = ["MacroTools"]

diff --git a/NEWS.md b/NEWS.md
@@ -7,6 +7,7 @@
 * Add [Adaptive Pooling](https://github.com/FluxML/Flux.jl/pull/1239) in Flux layers.
 * Change to `DataLoader`'s [constructor](https://github.com/FluxML/Flux.jl/pull/1152)
 * Uniform loss [interface](https://github.com/FluxML/Flux.jl/pull/1150)
+* Loss functions now live in the `Flux.Losses` [module](https://github.com/FluxML/Flux.jl/pull/1264)
 * Optimistic ADAM (OADAM) optimizer for [adversarial training](https://github.com/FluxML/Flux.jl/pull/1246).
 * Add option for [same padding](https://github.com/FluxML/Flux.jl/pull/901) to conv and pooling layers by setting `pad=SamePad()`.
 * Added option to set `bias` to [Flux.Zeros](https://github.com/FluxML/Flux.jl/pull/873) to eliminating `bias` from being trained.
@@ -32,7 +33,7 @@ See GitHub's releases.
   - This means Flux now does not depend on using a specialised `TrackedArray` type, and can be used with normal Array implementations directly.
   - Tracker compatibility is maintained in most common cases, but Zygote will be the preferred AD backend for Flux from now on.
 * The CUDNN wrappers have been [moved from Flux into CuArrays](https://github.com/FluxML/Flux.jl/pull/874), to allow for better supporting the CUDA backend, and improve user experience, not to mention making Flux lean.
-* `*crossentropy` functions now [work as expected with CuArrays](https://github.com/FluxML/Flux.jl/pull/926). [PR for bce_loss](https://github.com/FluxML/Flux.jl/pull/940).
+* `*crossentropy` functions now [work as expected with CuArrays](https://github.com/FluxML/Flux.jl/pull/926). [PR for binarycrossentropy](https://github.com/FluxML/Flux.jl/pull/940).
 * Added [clearer docs](https://github.com/FluxML/Flux.jl/pull/904) around training and the Optimiser interface.
 * [Layer initialisations](https://github.com/FluxML/Flux.jl/pull/937) have been improved with a clearer API on how to extend it for other purposes.
 * [Better messaging around CUDA availability](https://github.com/FluxML/Flux.jl/pull/924), with hooks to initialize the GPU as default where possible.

diff --git a/docs/src/models/losses.md b/docs/src/models/losses.md
@@ -1,6 +1,7 @@
-## Loss Functions
+# Loss Functions
 
 Flux provides a large number of common loss functions used for training machine learning models.
+They are grouped together in the `Flux.Losses` module.
 
 Loss functions for supervised learning typically expect as inputs a target `y`, and a prediction `ŷ`.
 In Flux's convention, the order of the arguments is the following
@@ -20,21 +21,21 @@ loss(ŷ, y, agg=x->mean(w .* x))    # weighted mean
 loss(ŷ, y, agg=identity)           # no aggregation.
 ```
 
-### Losses Reference
+## Losses Reference
 
 ```@docs
-Flux.mae
-Flux.mse
-Flux.msle
-Flux.huber_loss
-Flux.crossentropy
-Flux.logitcrossentropy
-Flux.bce_loss
-Flux.logitbce_loss
-Flux.kldivergence
-Flux.poisson_loss
-Flux.hinge_loss
-Flux.squared_hinge_loss
-Flux.dice_coeff_loss
-Flux.tversky_loss
+Flux.Losses.mae
+Flux.Losses.mse
+Flux.Losses.msle
+Flux.Losses.huber_loss
+Flux.Losses.crossentropy
+Flux.Losses.logitcrossentropy
+Flux.Losses.binarycrossentropy
+Flux.Losses.logitbinarycrossentropy
+Flux.Losses.kldivergence
+Flux.Losses.poisson_loss
+Flux.Losses.hinge_loss
+Flux.Losses.squared_hinge_loss
+Flux.Losses.dice_coeff_loss
+Flux.Losses.tversky_loss
 ```
diff --git a/docs/src/models/regularisation.md b/docs/src/models/regularisation.md
@@ -8,7 +8,7 @@ For example, say we have a simple regression.
 
 ```julia
 using Flux
-using Flux: logitcrossentropy
+using Flux.Losses: logitcrossentropy
 m = Dense(10, 5)
 loss(x, y) = logitcrossentropy(m(x), y)
 ```

diff --git a/docs/src/training/optimisers.md b/docs/src/training/optimisers.md
@@ -116,7 +116,7 @@ w = randn(10, 10)
 w1 = randn(10,10)
 ps = Params([w, w1])
 
-loss(x) = Flux.mse(w * x, w1 * x)
+loss(x) = Flux.Losses.mse(w * x, w1 * x)
 
 loss(rand(10)) # around 9
 

diff --git a/docs/src/training/training.md b/docs/src/training/training.md
@@ -17,22 +17,25 @@ There are plenty of examples in the [model zoo](https://github.com/FluxML/model-
 
 ## Loss Functions
 
-The objective function must return a number representing how far the model is from its target – the *loss* of the model. The `loss` function that we defined in [basics](../models/basics.md) will work as an objective. We can also define an objective in terms of some model:
+The objective function must return a number representing how far the model is from its target – the *loss* of the model. The `loss` function that we defined in [basics](../models/basics.md) will work as an objective.
+In addition to custom losses, model can be trained in conjuction with
+the commonly used losses that are grouped under the `Flux.Losses` module.
+We can also define an objective in terms of some model:
 
 ```julia
 m = Chain(
   Dense(784, 32, σ),
   Dense(32, 10), softmax)
 
-loss(x, y) = Flux.mse(m(x), y)
+loss(x, y) = Flux.Losses.mse(m(x), y)
 ps = Flux.params(m)
 
 # later
 Flux.train!(loss, ps, data, opt)
 ```
 
 The objective will almost always be defined in terms of some *cost function* that measures the distance of the prediction `m(x)` from the target `y`. Flux has several of these built in, like `mse` for mean squared error or `crossentropy` for cross entropy loss, but you can calculate it however you want.
-For a list of all built-in loss functions, check out the [layer reference](../models/layers.md).
+For a list of all built-in loss functions, check out the [losses reference](../models/losses.md).
 
 At first glance it may seem strange that the model that we want to train is not part of the input arguments of `Flux.train!` too. However the target of the optimizer is not the model itself, but the objective function that represents the departure between modelled and observed data. In other words, the model is implicitly defined in the objective function, and there is no need to give it explicitly. Passing the objective function instead of the model and a cost function separately provides more flexibility, and the possibility of optimizing the calculations.
 
@@ -157,6 +160,7 @@ function my_custom_train!(loss, ps, data, opt)
   end
 end
 ```
+
 You could simplify this further, for example by hard-coding in the loss function.
 
 Another possibility is to use [`Zygote.pullback`](https://fluxml.ai/Zygote.jl/dev/adjoints/#Pullbacks-1)

diff --git a/src/Flux.jl b/src/Flux.jl
@@ -35,14 +35,16 @@ include("onehot.jl")
 include("functor.jl")
 
 include("layers/stateless.jl")
-include("layers/losses.jl")
 include("layers/basic.jl")
 include("layers/conv.jl")
 include("layers/recurrent.jl")
 include("layers/normalise.jl")
 
 include("data/Data.jl")
 
+include("losses/Losses.jl")
+using .Losses # TODO: stop importing Losses in Flux's namespace in v0.12 
+
 include("deprecations.jl")
 
 include("cuda/cuda.jl")

diff --git a/src/deprecations.jl b/src/deprecations.jl
@@ -1,7 +1,18 @@
 # v0.11 deprecations
-@deprecate poisson poisson_loss
-@deprecate hinge hinge_loss
-@deprecate squared_hinge squared_hinge_loss
-@deprecate binarycrossentropy(ŷ, y) bce_loss(ŷ, y, agg=identity)
-@deprecate logitbinarycrossentropy(ŷ, y) logitbce_loss(ŷ, y, agg=identity)
-@deprecate normalise(x) normalise(x, dims=1) 
+@deprecate poisson poisson_loss false
+@deprecate hinge hinge_loss false
+@deprecate squared_hinge squared_hinge_loss false
+@deprecate normalise(x) normalise(x, dims=1) false
+
+@deprecate binarycrossentropy(ŷ, y) Losses.binarycrossentropy(ŷ, y, agg=identity) false
+@deprecate logitbinarycrossentropy(ŷ, y) Losses.logitbinarycrossentropy(ŷ, y, agg=identity) false
+
+function Broadcast.broadcasted(::typeof(binarycrossentropy), ŷ, y)
+    @warn "binarycrossentropy.(ŷ, y) is deprecated, use Losses.binarycrossentropy(ŷ, y, agg=identity) instead"
+    Losses.binarycrossentropy(ŷ, y, agg=identity)
+end
+
+function Broadcast.broadcasted(::typeof(logitbinarycrossentropy), ŷ, y)
+    @warn "logitbinarycrossentropy.(ŷ, y) is deprecated, use Losses.logitbinarycrossentropy(ŷ, y, agg=identity) instead"
+    Losses.logitbinarycrossentropy(ŷ, y, agg=identity)
+end
diff --git a/src/layers/losses.jl b/src/layers/losses.jl