Skip to content

Kernel functions for kernel density estimation and kernel regression #993

Open
@Beliavsky

Description

@Beliavsky

Kernel functions that are symmetric and integrate to 1, such as the Gaussian, are used in kernel density estimation, kernel regression, and other statistical algorithms https://en.wikipedia.org/wiki/Kernel_(statistics). The code below implements kernel functions from the Wikipedia article. They could be added to stdlib. An argument for not adding them to stdlib is that they are simple to code oneself, but if stdlib does add nonparametric statistical methods, the kernel functions should be defined in one place. In some cases the kernel functions are derivatives of known neural network activation functions, which have been added to stdlib.

module kind_mod
implicit none
private
public :: dp
integer, parameter :: dp = selected_real_kind(15, 307) ! double precision
end module kind_mod

module constants_mod
use kind_mod, only: dp
real(kind=dp), parameter, public :: &
pi             = 3.141592653589793238462643_dp, &
pi_over_2      = pi/2.0_dp                    , &
pi_over_4      = pi/4.0_dp                    , &
two_over_pi    = 2.0_dp/pi                    , &
pi_reciprocal  = 0.318309886183790671537767_dp, &
pi_squared     = 9.869604401089358618834491_dp, &
pi_square_root = 1.772453850905516027298167_dp, &
one_over_sqrt_two_pi = 0.39894228040143270_dp , &
pi_ln          = 1.144729885849400174143427_dp, &
log_two_pi     = 1.837877066409345483560659_dp, & 
pi_log10       = 0.497149872694133854351268_dp, &
sqrt_2_over_pi = 0.7978845608028654_dp        , &
sqrt_pi_over_2 = 1.2533141373155003_dp        , &
e              = 2.718281828459045235360287_dp, &
e_reciprocal   = 0.367879441171442321595523_dp, &
e_squared      = 7.389056098930650227230427_dp, &
e_log10        = 0.434294481903251827651129_dp, &
sqrt_two       = 1.414213562373095_dp
end module constants_mod

module kernels_mod
use kind_mod     , only: dp
use constants_mod, only: one_over_sqrt_two_pi,pi_over_2,pi_over_4,sqrt_two,two_over_pi
implicit none
private
public :: weight
real(kind=dp), parameter :: tiny_real = 1.0d0
contains
elemental function weight(x,kernel) result(y)
! kernels for nonparametric regression from https://en.wikipedia.org/wiki/Kernel_(statistics)
real(kind=dp)    , intent(in) :: x
character (len=*), intent(in) :: kernel
real(kind=dp)                 :: y
if (any(["uniform     ","triangular  ","epanechnikov","quartic     ","triweight   ", &
         "tricube     ","cosine      "] == kernel)) then
   if (abs(x) >= 1.0_dp) then
      y = 0.0_dp
      return
   end if
end if
select case (kernel)
   case ("uniform")     ; y = 0.5_dp
   case ("triangular")  ; y = 1.0_dp - abs(x)
   case ("epanechnikov"); y = 0.75_dp*(1.0_dp-x**2)
   case ("quartic")     ; y = 0.9375_dp*(1.0_dp-x**2)**2 ! also known as biweight
   case ("triweight")   ; y = 1.09375_dp*(1.0_dp-x**2)**3
   case ("tricube")     ; y = 0.86419753086_dp*(1.0_dp-abs(x)**3)**3 ! 0.86419753086 = 70/81
   case ("gaussian")    ; y = one_over_sqrt_two_pi*(exp(-0.5*x**2))
   case ("cosine")      ; y = pi_over_4*cos(pi_over_2*x)
   case ("logistic")    ; y = 1.0_dp/(exp(x) + 2.0_dp + exp(-x))
   case ("sigmoid")     ; y = two_over_pi/(exp(x) + exp(-x))
   case ("exponential") ; y = exp(-abs(x))
   case ("silverman")   ; y = 0.5_dp*exp(-abs(x)/sqrt_two)*cos(abs(x)/sqrt_two + pi_over_4)
   case default         ; y = -huge(x) ! should not get here
end select
end function weight
end module kernels_mod

Metadata

Metadata

Assignees

No one assigned

    Labels

    ideaProposition of an idea and opening an issue to discuss ittopic: mathematicslinear algebra, sparse matrices, special functions, FFT, random numbers, statistics, ...

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions