(ns knn.distance)
(use '[knn.op :as op])
(use '[knn.norm :as norm])

Util Function to be used to compute Bray Curtis Distance Function between two vectors

(defn- bray-curtis-distance-denominator
  [first-vector second-vector]
  (reduce + (map #(Math/abs (+ %1 %2)) first-vector second-vector)))

Manhattan Distance between two vectors, called l_1 distance as well

Formula: sum(abs(x - y))

(defn manhattan-distance
  "Manhattan Distance between two vectors, called l_1 distance as well\n
  Formula: sum(abs(x - y))"
  [first-vector second-vector]
  (reduce + (map #(Math/abs (- %1 %2)) first-vector second-vector)))

Euclidean distance between two vectors

Formula: sqrt(sum((x - y) .^ 2))

(defn euclidean-distance
  "Euclidean distance between two vectors\n
  Formula: sqrt(sum((x - y) .^ 2))"
  [first-vector second-vector]
  (Math/sqrt (reduce + (map #(Math/pow (- %1 %2) 2) first-vector second-vector))))

Squared Euclidean Distance between two vectors

Formula: sum((x - y) .^ 2)

(defn squared-euclidean-distance
  "Squared Euclidean Distance between two vectors\n
  Formula: sum((x - y) .^ 2)"
  [first-vector second-vector]
  (* (euclidean-distance first-vector second-vector) (euclidean-distance first-vector second-vector)))

Minkowski Distance, known as l-p distance Most Generic form of l-p distances, covers l_1(manhattan) and l_2(euclidean distances) as well

Formula: sum(abs(x - y).^p) ^ (1/p)

(defn minkowski-distance
  "Minkowski Distance, known as l-p distance
  Most Generic form of l-p distances, covers l_1(manhattan)
  and l_2(euclidean distances) as well\n
  Formula: sum(abs(x - y).^p) ^ (1/p)"
  [first-vector second-vector p]
  (Math/pow (reduce + (map #(Math/pow (Math/abs (- %1 %2)) p) first-vector second-vector)) (/ p)))

Weighted Minkowski Distance, with different weights on the difference between observations

Formula: sum(abs(x - y).^p .* w) ^ (1/p)

(defn weighted-minkowski-distance
  "Weighted Minkowski Distance, with different weights
  on the difference between observations\n
  Formula: sum(abs(x - y).^p .* w) ^ (1/p)"
  [first-vector second-vector weight-vector p]
  (Math/pow
   (reduce + (map #(* (Math/pow (Math/abs (- %1 %2)) p) %3) first-vector second-vector weight-vector))
   (/ p)))

Chebyshev Distance It is a form of l_p distance when p -> /Infinity

Formula: max(abs(x - y))

(defn chebyshev-distance
  "Chebyshev Distance
  It is a form of l_p distance when p -> /Infinity\n
  Formula: max(abs(x - y))"
  [first-vector second-vector]
  (apply max (map #(Math/abs (- %1 %2)) first-vector second-vector)))

Hamming Distance between two vectors

Formula: sum(x .!= y)

(defn hamming-distance
  "Hamming Distance between two vectors\n
  Formula: sum(x .!= y)"
  [first-vector second-vector]
  (count
   (filter true? (map #(not= %1 %2) first-vector second-vector))))

Bray Curtis Distance Function

(defn bray-curtis-distance
  [first-vector second-vector]
  (/ (manhattan-distance first-vector second-vector) (bray-curtis-distance-denominator first-vector second-vector)))

Canberra Distance

(defn canberra-distance
  [first-vector second-vector]
  (reduce + (map #(/ (Math/abs (- %1 %2)) (+ (Math/abs %1) (Math/abs %2))) first-vector second-vector)))

Cosine distance between two vectors

Formula: 1 - dot(x, y) / (norm(x) * norm(y))

(defn cosine-distance
  "Cosine distance between two vectors\n
  Formula: 1 - dot(x, y) / (norm(x) * norm(y))"
  [first-vector second-vector]
  (- 1 (/ (op/dot first-vector second-vector) (* (norm/euclidean-norm first-vector) (norm/euclidean-norm second-vector) ))))

Correlation distance between two vectors

Formula: cosine-distance(x - mean(x), y - mean(y))

(defn correlation-distance
  "Correlation distance between two vectors\n
  Formula: cosine-distance(x - mean(x), y - mean(y))"
  [first-vector second-vector]
  (cosine-distance (op/scalar-subtract first-vector (op/mean first-vector))
                   (op/scalar-subtract second-vector (op/mean second-vector))))

Chi Squared Distance between two vectors

Formula: sum((x - y).^2 / (x + y))

(defn chi-squared-distance
  "Chi Squared Distance between two vectors\n
  Formula: sum((x - y).^2 / (x + y))"
  [first-vector second-vector]
  (reduce + (map #(/ (* (- %1 %2) (- %1 %2) ) (+ %1 %2)) first-vector second-vector)))

Kullback-Leibler Divergence of two vectors with a function that is mapped to the ratio

(defn- kl-div
  [first-vector second-vector func]
  (reduce + (op/mult first-vector (map func (op/div first-vector second-vector)))))

Kullback-Leibler Divergence with a normal Log function

Formula: sum(p .* log(p ./ q))

(defn kl-divergence
  "Kullback-Leibler Divergence with a normal Log function\n
  Formula: sum(p .* log(p ./ q))"
  [first-vector second-vector]
  (kl-div first-vector second-vector #(Math/log %)))

JS Divergence of two vectors

Formula: KL(x, m) / 2 + KL(y, m) / 2 where m = (x + y) / 2

(defn js-divergence
  "JS Divergence of two vectors\n
  Formula: KL(x, m) / 2 + KL(y, m) / 2 where
  m = (x + y) / 2"
  [first-vector second-vector]
  (let [m (op/scalar-div (op/add first-vector second-vector) 2)]
    (+ (kl-divergence first-vector m) (kl-divergence second-vector m))))

Span-norm distance between two vectors

Formula: max(x - y) - min(x - y)

(defn span-norm-distance
  "Span-norm distance between two vectors\n
  Formula: max(x - y) - min(x - y)"
  [first-vector second-vector]
  (- (apply max (op/subtract first-vector second-vector)) (apply min (op/subtract first-vector second-vector))))

Bhattacharyya Distance between two vectors

Formula: -log(sum(sqrt(x .* y) / sqrt(sum(x) * sum(y)))

(defn bhattacharyya-distance
  "Bhattacharyya Distance between two vectors\n
  Formula: -log(sum(sqrt(x .* y) / sqrt(sum(x) * sum(y)))"
  [first-vector second-vector]
  (- (Math/log
      (reduce + (map #(/ (Math/sqrt (* %1 %2)) (Math/pow (* (reduce + first-vector)
                                                             (reduce + second-vector)) 0.5)) first-vector second-vector)))))
(defn hellinger-distance
  [first-vector second-vector]
  (Math/pow (- 1.0 (reduce + (map #(/ (Math/sqrt (* %1 %2)) (Math/pow (* (reduce + first-vector)
                                                             (reduce + second-vector)) 0.5)) first-vector second-vector))) 0.5))