IGNITE-8907: [ML] Using vectors in featureExtractor
[ignite.git] / modules / ml / src / main / java / org / apache / ignite / ml / preprocessing / normalization / NormalizationPreprocessor.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.ignite.ml.preprocessing.normalization;
19
20 import org.apache.ignite.ml.math.Vector;
21 import org.apache.ignite.ml.math.functions.Functions;
22 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
23 import org.apache.ignite.ml.math.functions.IgniteDoubleFunction;
24
25 /**
26 * Preprocessing function that makes normalization.
27 *
28 * Normalization is the process of scaling individual samples to have unit norm.
29 * This process can be useful if you plan to use a quadratic form such as the dot-product or any other kernel
30 * to quantify the similarity of any pair of samples.
31 *
32 * @param <K> Type of a key in {@code upstream} data.
33 * @param <V> Type of a value in {@code upstream} data.
34 */
35 public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, Vector> {
36 /** */
37 private static final long serialVersionUID = 6873438115778921295L;
38
39 /** Normalization in L^p space. Must be greater than 0. Default value is 2. */
40 private int p = 2;
41
42 /** Base preprocessor. */
43 private final IgniteBiFunction<K, V, Vector> basePreprocessor;
44
45 /**
46 * Constructs a new instance of Normalization preprocessor.
47 *
48 * @param p Degree of L^p space value.
49 * @param basePreprocessor Base preprocessor.
50 */
51 public NormalizationPreprocessor(int p, IgniteBiFunction<K, V, Vector> basePreprocessor) {
52 this.p = p;
53 this.basePreprocessor = basePreprocessor;
54 }
55
56 /**
57 * Applies this preprocessor.
58 *
59 * @param k Key.
60 * @param v Value.
61 * @return Preprocessed row.
62 */
63 @Override public Vector apply(K k, V v) {
64 Vector res = basePreprocessor.apply(k, v);
65
66 double pNorm = Math.pow(foldMap(res, Functions.PLUS, Functions.pow(p), 0d), 1.0 / p);
67
68 for (int i = 0; i < res.size(); i++)
69 res.set(i, res.get(i) / pNorm);
70
71 return res;
72 }
73
74 /**
75 * Folds given array into a single value.
76 * @param vec The given array.
77 * @param foldFun Folding function that takes two parameters: accumulator and the current value.
78 * @param mapFun Mapping function that is called on each vector element before its passed to the accumulator (as its
79 * second parameter).
80 * @param zero Zero value for fold operation.
81 * @return Folded value of this vector.
82 */
83 private double foldMap(Vector vec, IgniteBiFunction<Double,Double,Double> foldFun, IgniteDoubleFunction<Double> mapFun, double zero) {
84 for (int i = 0; i< vec.size(); i++)
85 zero = foldFun.apply(zero, mapFun.apply(vec.get(i)));
86
87 return zero;
88 }
89
90 /** Gets the degree of L^p space parameter value. */
91 public double p() {
92 return p;
93 }
94 }