Skip to content

Commit

Permalink
Add NonParametricMachine class (#5055)
Browse files Browse the repository at this point in the history
* add nonparametric machine
* fix notebooks
  • Loading branch information
LiuYuHui authored and gf712 committed Jun 17, 2020
1 parent 41b6247 commit c8b46a1
Show file tree
Hide file tree
Showing 20 changed files with 120 additions and 85 deletions.
16 changes: 7 additions & 9 deletions doc/ipython-notebooks/classification/Classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -535,22 +535,20 @@
"number_of_neighbors = 10\n",
"\n",
"distances_linear = sg.create_distance('EuclideanDistance')\n",
"distances_linear.init(shogun_feats_linear, shogun_feats_linear)\n",
"knn_linear = sg.create_machine(\"KNN\", k=number_of_neighbors, distance=distances_linear, labels=shogun_labels_linear)\n",
"knn_linear.train()\n",
"knn_linear = sg.create_machine(\"KNN\", k=number_of_neighbors, distance=distances_linear)\n",
"knn_linear.train(shogun_feats_linear, shogun_labels_linear)\n",
"classifiers_linear.append(knn_linear)\n",
"classifiers_names.append(\"Nearest Neighbors\")\n",
"fadings.append(False)\n",
"\n",
"plt.figure(figsize=(15,5))\n",
"plt.subplot(121)\n",
"plt.title(\"Nearest Neighbors - Linear Features\")\n",
"plot_model(plt,knn_linear,feats_linear,labels_linear,fading=False)\n",
"plot_model(plt,knn_linear,feats_linear,labels_linear,fading=True)\n",
"\n",
"distances_non_linear = sg.create_distance('EuclideanDistance')\n",
"distances_non_linear.init(shogun_feats_non_linear, shogun_feats_non_linear)\n",
"knn_non_linear = sg.create_machine(\"KNN\", k=number_of_neighbors, distance=distances_non_linear, labels=shogun_labels_non_linear)\n",
"knn_non_linear.train()\n",
"knn_non_linear = sg.create_machine(\"KNN\", k=number_of_neighbors, distance=distances_non_linear)\n",
"knn_non_linear.train(shogun_feats_non_linear, shogun_labels_non_linear)\n",
"classifiers_non_linear.append(knn_non_linear)\n",
"\n",
"plt.subplot(122)\n",
Expand Down Expand Up @@ -804,8 +802,8 @@
"plot_binary_data(plt,feats_non_linear, labels_non_linear)\n",
"\n",
"for i in range(0,10):\n",
" plt.subplot(2,11,13+i)\n",
" plot_model(plt,classifiers_non_linear[i],feats_non_linear,labels_non_linear,fading=fadings[i])"
" plt.subplot(2,11,13+i)\n",
" plot_model(plt,classifiers_non_linear[i],feats_non_linear,labels_non_linear,fading=fadings[i])"
]
},
{
Expand Down
9 changes: 4 additions & 5 deletions doc/ipython-notebooks/multiclass/KNN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -286,19 +286,18 @@
" labels.add_subset(idx_train)\n",
"\n",
" dist = sg.create_distance('EuclideanDistance')\n",
" dist.init(feats, feats)\n",
" knn = sg.create_machine(\"KNN\", k=k, distance=dist, labels=labels)\n",
" knn = sg.create_machine(\"KNN\", k=k, distance=dist)\n",
" #knn.set_store_model_features(True)\n",
" #FIXME: causes SEGFAULT\n",
" if use_cover_tree:\n",
" continue\n",
" # knn.put('knn_solver', \"KNN_COVER_TREE\")\n",
" else:\n",
" knn.put('knn_solver', \"KNN_BRUTE\")\n",
" knn.train()\n",
" knn.train(feats, labels)\n",
"\n",
" evaluator = sg.create_evaluation(\"MulticlassAccuracy\")\n",
" pred = knn.apply()\n",
" pred = knn.apply(feats)\n",
" acc_train[i, j] = evaluator.evaluate(pred, labels)\n",
"\n",
" feats.remove_subset()\n",
Expand Down Expand Up @@ -490,7 +489,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.6.9"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions examples/meta/src/evaluation/clustering.sg.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ RealMatrix centers = kmeans.get_real_matrix("cluster_centers")
Labels labels_centroids = create_labels(f_labels_centroids)
Features fea_centroids = create_features(centers)
Distance d2 = create_distance("EuclideanDistance", lhs=fea_centroids, rhs=fea_centroids)
Machine knn = create_machine("KNN", k=1, distance=d2, labels=labels_centroids)
knn.train()
Machine knn = create_machine("KNN", k=1, distance=d2)
knn.train(fea_centroids, labels_centroids)
Labels gnd_hat = knn.apply(features_train)
#![assign_labels]

Expand Down
4 changes: 2 additions & 2 deletions examples/meta/src/multiclass/k_nearest_neighbours.sg.in
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Labels labels_test = create_labels(f_labels_test)
#![create_features]

#![choose_distance]
Distance d = create_distance("EuclideanDistance", lhs=features_train, rhs=features_train)
Distance d = create_distance("EuclideanDistance")
#![choose_distance]

#![create_instance]
Expand All @@ -20,7 +20,7 @@ Machine knn = create_machine("KNN", k=k, distance=d, labels=labels_train)
#![create_instance]

#![train_and_apply]
knn.train()
knn.train(features_train, labels_train)
MulticlassLabels labels_predict = knn.apply_multiclass(features_test)
#![train_and_apply]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ Distance lmnn_distance = lmnn.get_distance()
#![train_metric]

#![train_and_apply]
Machine knn = create_machine("KNN", k=k, distance=lmnn_distance, labels=labels_train)
knn.train()
Machine knn = create_machine("KNN", k=k, distance=lmnn_distance)
knn.train(features_train, labels_train)
MulticlassLabels labels_predict = knn.apply_multiclass(features_test)
#![train_and_apply]

Expand Down
4 changes: 2 additions & 2 deletions examples/undocumented/python/evaluation_clustering_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def assign_labels(data, centroids, ncenters):
fea_centroids = sg.create_features(centroids)
distance = sg.create_distance('EuclideanDistance')
distance.init(fea_centroids, fea_centroids)
knn = sg.create_machine("KNN", k=1, distance=distance, labels=labels)
knn.train()
knn = sg.create_machine("KNN", k=1, distance=distance)
knn.train(fea_centroids, labels)
return knn.apply(data)

def evaluation_clustering_simple (n_data=100, sqrt_num_blobs=4, distance=5):
Expand Down
3 changes: 3 additions & 0 deletions src/interfaces/swig/Classifier.i
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@

/* Include Class Headers to make them visible from within the target language */
%include <shogun/machine/Machine.h>
%include <shogun/machine/NonParametricMachine.h>
%include <shogun/machine/IterativeMachine.h>
%include <shogun/machine/FeatureDispatchCRTP.h>
%include <shogun/machine/KernelMachine.h>
%include <shogun/machine/LinearMachine.h>
%include <shogun/classifier/svm/SVM.h>
Expand Down
2 changes: 2 additions & 0 deletions src/interfaces/swig/Clustering.i
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ SHARED_RANDOM_INTERFACE(shogun::DistanceMachine)
%shared_ptr(shogun::GMM)

/* Include Class Headers to make them visible from within the target language */
%include <shogun/machine/Machine.h>
%include <shogun/machine/NonParametricMachine.h>
%include <shogun/machine/DistanceMachine.h>
RANDOM_INTERFACE(DistanceMachine)
%include <shogun/clustering/GMM.h>
1 change: 1 addition & 0 deletions src/interfaces/swig/Machine.i
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ SHARED_RANDOM_INTERFACE(shogun::Machine)
%shared_ptr(shogun::LinearMachine)
%shared_ptr(shogun::DistanceMachine)
%shared_ptr(shogun::IterativeMachine<LinearMachine>)
%shared_ptr(shogun::NonParametricMachine)
4 changes: 2 additions & 2 deletions src/shogun/clustering/GMM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -774,8 +774,8 @@ SGMatrix<float64_t> GMM::alpha_init(SGMatrix<float64_t> init_means)
SGVector<float64_t> label_num(init_means.num_cols);
linalg::range_fill(label_num);

auto knn=std::make_shared<KNN>(1, std::make_shared<EuclideanDistance>(), std::make_shared<MulticlassLabels>(label_num));
knn->train(std::make_shared<DenseFeatures<float64_t>>(init_means));
auto knn=std::make_shared<KNN>(1, std::make_shared<EuclideanDistance>());
knn->train(std::make_shared<DenseFeatures<float64_t>>(init_means), std::make_shared<MulticlassLabels>(label_num));
auto init_labels = knn->apply(features)->as<MulticlassLabels>();

SGMatrix<float64_t> alpha(num_vectors, index_t(m_components.size()));
Expand Down
1 change: 1 addition & 0 deletions src/shogun/clustering/KMeans.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ void KMeans::Lloyd_KMeans(SGMatrix<float64_t> centers, int32_t num_centers)

bool KMeans::train_machine(std::shared_ptr<Features> data)
{
m_features = data;
initialize_training(data);
Lloyd_KMeans(cluster_centers, k);
compute_cluster_variances();
Expand Down
21 changes: 6 additions & 15 deletions src/shogun/machine/DistanceMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
using namespace shogun;

DistanceMachine::DistanceMachine()
: Machine()
: NonParametricMachine()
{
init();
}
Expand Down Expand Up @@ -99,6 +99,7 @@ void DistanceMachine::distances_rhs(SGVector<float64_t>& result, index_t idx_b1,

std::shared_ptr<MulticlassLabels> DistanceMachine::apply_multiclass(std::shared_ptr<Features> data)
{

if (data)
{
/* set distance features to given ones and apply to all */
Expand All @@ -118,30 +119,20 @@ std::shared_ptr<MulticlassLabels> DistanceMachine::apply_multiclass(std::shared_
return apply_multiclass(all);
}
return NULL;

}

float64_t DistanceMachine::apply_one(int32_t num)
{
/* number of clusters */
auto lhs=distance->get_lhs();
const auto& lhs=distance->get_lhs();
int32_t num_clusters=lhs->get_num_vectors();

/* (multiple threads) calculate distances to all cluster centers */
SGVector<float64_t> dists(num_clusters);
distances_lhs(dists, 0, num_clusters-1, num);

/* find cluster index with smallest distance */
float64_t result=dists.vector[0];
index_t best_index=0;
for (index_t i=1; i<num_clusters; ++i)
{
if (dists[i]<result)
{
result=dists[i];
best_index=i;
}
}

const auto result_iter = std::min_element(dists.begin(), dists.end());
index_t best_index = std::distance(dists.begin(), result_iter);
/* implicit cast */
return best_index;
}
Expand Down
6 changes: 3 additions & 3 deletions src/shogun/machine/DistanceMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#include <shogun/lib/common.h>
#include <shogun/machine/Machine.h>

#include <shogun/machine/NonParametricMachine.h>

namespace shogun
{
Expand All @@ -24,7 +24,7 @@ namespace shogun
*
* A distance machine is based on a a-priori choosen distance.
*/
class DistanceMachine : public Machine
class DistanceMachine : public NonParametricMachine
{
public:
/** default constructor */
Expand Down Expand Up @@ -81,7 +81,7 @@ class DistanceMachine : public Machine
* @param data (test)data to be classified
* @return classified labels
*/
virtual std::shared_ptr<MulticlassLabels> apply_multiclass(std::shared_ptr<Features> data=NULL);
virtual std::shared_ptr<MulticlassLabels> apply_multiclass(std::shared_ptr<Features> data);

/** Apply machine to one example.
* Cluster index with smallest distance to to be classified element is
Expand Down
5 changes: 5 additions & 0 deletions src/shogun/machine/Machine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ bool Machine::train(std::shared_ptr<Features> data)
return result;
}

bool Machine::train(const std::shared_ptr<Features>& data, const std::shared_ptr<Labels>& lab){
set_labels(lab);
return train(data);
}

void Machine::set_labels(std::shared_ptr<Labels> lab)
{
if (lab != NULL)
Expand Down
9 changes: 9 additions & 0 deletions src/shogun/machine/Machine.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,15 @@ class Machine : public StoppableSGObject
*/
virtual bool train(std::shared_ptr<Features> data=NULL);

/** train machine
*
* @param data training data
* @param lab training label
*
* @return whether training was successful
*/
virtual bool train(const std::shared_ptr<Features>& data, const std::shared_ptr<Labels>& lab);

/** apply machine to data
* if data is not specified apply to the current features
*
Expand Down
44 changes: 44 additions & 0 deletions src/shogun/machine/NonParametricMachine.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Yuhui Liu
*/

#ifndef NONPARAMETRCMACHINE_H_
#define NONPARAMETRCMACHINE_H_

#include <shogun/machine/Machine.h>

namespace shogun
{

class NonParametricMachine : public Machine
{
public:
NonParametricMachine(): Machine()
{
//TODO : when all refactor is done, m_labels should be removed from
//Machine Class
// SG_ADD(
// &m_labels, "labels", "labels used in train machine algorithm",
// ParameterProperties::READONLY);
SG_ADD(&m_features, "features_train",
"Training features of nonparametric model",
ParameterProperties::READONLY);
}
virtual ~NonParametricMachine()
{
}

const char* get_name() const override{ return "NonParametricMachine"; }

protected:

std::shared_ptr<Features> m_features;

//TODO
// when all refactor is done, we should use this m_labels
// std::shared_ptr<Labels> m_labels;
};
} // namespace shogun
#endif
3 changes: 2 additions & 1 deletion src/shogun/metric/LMNNImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ SGMatrix<index_t> LMNNImpl::find_target_nn(const std::shared_ptr<DenseFeatures<f
auto features_slice = std::make_shared<DenseFeatures<float64_t>>(slice_mat);
auto labels_slice = std::make_shared<MulticlassLabels>(labels_vec);

auto knn = std::make_shared<KNN>(k+1, std::make_shared<EuclideanDistance>(features_slice, features_slice), labels_slice);
auto knn = std::make_shared<KNN>(k+1, std::make_shared<EuclideanDistance>());
knn->train(features_slice, labels_slice);
SGMatrix<int32_t> target_slice = knn->nearest_neighbors();
// sanity check
ASSERT(target_slice.num_rows==k+1 && target_slice.num_cols==slice_size)
Expand Down
Loading

0 comments on commit c8b46a1

Please sign in to comment.