Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NonParametricMachine class #5055

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions doc/ipython-notebooks/classification/Classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -444,22 +444,20 @@
"number_of_neighbors = 10\n",
"\n",
"distances_linear = sg.create_distance('EuclideanDistance')\n",
"distances_linear.init(shogun_feats_linear, shogun_feats_linear)\n",
"knn_linear = sg.create_machine(\"KNN\", k=number_of_neighbors, distance=distances_linear, labels=shogun_labels_linear)\n",
"knn_linear.train()\n",
"knn_linear = sg.create_machine(\"KNN\", k=number_of_neighbors, distance=distances_linear)\n",
"knn_linear.train(shogun_feats_linear, shogun_labels_linear)\n",
"classifiers_linear.append(knn_linear)\n",
"classifiers_names.append(\"Nearest Neighbors\")\n",
"fadings.append(False)\n",
"\n",
"plt.figure(figsize=(15,5))\n",
"plt.subplot(121)\n",
"plt.title(\"Nearest Neighbors - Linear Features\")\n",
"plot_model(plt,knn_linear,feats_linear,labels_linear,fading=False)\n",
"plot_model(plt,knn_linear,feats_linear,labels_linear,fading=True)\n",
"\n",
"distances_non_linear = sg.create_distance('EuclideanDistance')\n",
"distances_non_linear.init(shogun_feats_non_linear, shogun_feats_non_linear)\n",
"knn_non_linear = sg.create_machine(\"KNN\", k=number_of_neighbors, distance=distances_non_linear, labels=shogun_labels_non_linear)\n",
"knn_non_linear.train()\n",
"knn_non_linear = sg.create_machine(\"KNN\", k=number_of_neighbors, distance=distances_non_linear)\n",
"knn_non_linear.train(shogun_feats_non_linear, shogun_labels_non_linear)\n",
"classifiers_non_linear.append(knn_non_linear)\n",
"\n",
"plt.subplot(122)\n",
Expand Down Expand Up @@ -663,9 +661,16 @@
"plot_binary_data(plt,feats_non_linear, labels_non_linear)\n",
"\n",
"for i in range(0,10):\n",
" plt.subplot(2,11,13+i)\n",
" plot_model(plt,classifiers_non_linear[i],feats_non_linear,labels_non_linear,fading=fadings[i])"
" plt.subplot(2,11,13+i)\n",
" plot_model(plt,classifiers_non_linear[i],feats_non_linear,labels_non_linear,fading=fadings[i])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -684,7 +689,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.6.9"
}
},
"nbformat": 4,
Expand Down
9 changes: 4 additions & 5 deletions doc/ipython-notebooks/multiclass/KNN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -286,19 +286,18 @@
" labels.add_subset(idx_train)\n",
"\n",
" dist = sg.create_distance('EuclideanDistance')\n",
" dist.init(feats, feats)\n",
" knn = sg.create_machine(\"KNN\", k=k, distance=dist, labels=labels)\n",
" knn = sg.create_machine(\"KNN\", k=k, distance=dist)\n",
" #knn.set_store_model_features(True)\n",
" #FIXME: causes SEGFAULT\n",
" if use_cover_tree:\n",
" continue\n",
" # knn.put('knn_solver', \"KNN_COVER_TREE\")\n",
" else:\n",
" knn.put('knn_solver', \"KNN_BRUTE\")\n",
" knn.train()\n",
" knn.train(feats, labels)\n",
"\n",
" evaluator = sg.create_evaluation(\"MulticlassAccuracy\")\n",
" pred = knn.apply()\n",
" pred = knn.apply(feats)\n",
" acc_train[i, j] = evaluator.evaluate(pred, labels)\n",
"\n",
" feats.remove_subset()\n",
Expand Down Expand Up @@ -490,7 +489,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.6.9"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions examples/meta/src/evaluation/clustering.sg.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ RealMatrix centers = kmeans.get_real_matrix("cluster_centers")
Labels labels_centroids = create_labels(f_labels_centroids)
Features fea_centroids = create_features(centers)
Distance d2 = create_distance("EuclideanDistance", lhs=fea_centroids, rhs=fea_centroids)
Machine knn = create_machine("KNN", k=1, distance=d2, labels=labels_centroids)
knn.train()
Machine knn = create_machine("KNN", k=1, distance=d2)
knn.train(fea_centroids, labels_centroids)
Labels gnd_hat = knn.apply(features_train)
#![assign_labels]

Expand Down
6 changes: 3 additions & 3 deletions examples/meta/src/multiclass/k_nearest_neighbours.sg.in
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ MulticlassLabels labels_test(f_labels_test)
#![create_features]

#![choose_distance]
Distance d = create_distance("EuclideanDistance", lhs=features_train, rhs=features_train)
Distance d = create_distance("EuclideanDistance")
#![choose_distance]

#![create_instance]
int k = 3
KNN knn(k, d, labels_train)
KNN knn(k, d)
#![create_instance]

#![train_and_apply]
knn.train()
knn.train(features_train, labels_train)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this API is much better

MulticlassLabels labels_predict = knn.apply_multiclass(features_test)
#![train_and_apply]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ Distance lmnn_distance = lmnn.get_distance()
#![train_metric]

#![train_and_apply]
KNN knn(k, lmnn_distance,labels_train)
knn.train()
KNN knn(k, lmnn_distance)
knn.train(features_train, labels_train)
MulticlassLabels labels_predict = knn.apply_multiclass(features_test)
#![train_and_apply]

Expand Down
4 changes: 2 additions & 2 deletions examples/undocumented/python/evaluation_clustering_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def assign_labels(data, centroids, ncenters):
fea_centroids = sg.create_features(centroids)
distance = sg.create_distance('EuclideanDistance')
distance.init(fea_centroids, fea_centroids)
knn = sg.create_machine("KNN", k=1, distance=distance, labels=labels)
knn.train()
knn = sg.create_machine("KNN", k=1, distance=distance)
knn.train(fea_centroids, labels)
return knn.apply(data)

def evaluation_clustering_simple (n_data=100, sqrt_num_blobs=4, distance=5):
Expand Down
4 changes: 2 additions & 2 deletions examples/undocumented/python/metric_lmnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def metric_lmnn(train_fname=traindat,test_fname=testdat,label_train_fname=label_
lmnn_distance=lmnn.get_distance()

# perform classification with KNN
knn=KNN(k,lmnn_distance,labels)
knn.train()
knn=KNN(k,lmnn_distance)
knn.train(feats_train, labels)
output=knn.apply(feats_test).get_labels()

return lmnn,output
Expand Down
1 change: 1 addition & 0 deletions src/interfaces/swig/Classifier.i
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@

/* Include Class Headers to make them visible from within the target language */
%include <shogun/machine/Machine.h>
%include <shogun/machine/NonParametricMachine.h>
%include <shogun/machine/IterativeMachine.h>
%include <shogun/machine/FeatureDispatchCRTP.h>
%include <shogun/machine/KernelMachine.h>
Expand Down
1 change: 1 addition & 0 deletions src/interfaces/swig/Clustering.i
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ SHARED_RANDOM_INTERFACE(shogun::DistanceMachine)

/* Include Class Headers to make them visible from within the target language */
%include <shogun/machine/Machine.h>
%include <shogun/machine/NonParametricMachine.h>
%include <shogun/machine/DistanceMachine.h>
RANDOM_INTERFACE(DistanceMachine)
%include <shogun/clustering/KMeansBase.h>
Expand Down
2 changes: 1 addition & 1 deletion src/interfaces/swig/Machine.i
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ SHARED_RANDOM_INTERFACE(shogun::Machine)
%shared_ptr(shogun::LinearMachine)
%shared_ptr(shogun::DistanceMachine)
%shared_ptr(shogun::IterativeMachine<LinearMachine>)

%shared_ptr(shogun::NonParametricMachine)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You also need to include %include <shogun/machine/NonParametricMachine.h>

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After this

4 changes: 2 additions & 2 deletions src/shogun/clustering/GMM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -774,8 +774,8 @@ SGMatrix<float64_t> GMM::alpha_init(SGMatrix<float64_t> init_means)
SGVector<float64_t> label_num(init_means.num_cols);
linalg::range_fill(label_num);

auto knn=std::make_shared<KNN>(1, std::make_shared<EuclideanDistance>(), std::make_shared<MulticlassLabels>(label_num));
knn->train(std::make_shared<DenseFeatures<float64_t>>(init_means));
auto knn=std::make_shared<KNN>(1, std::make_shared<EuclideanDistance>());
knn->train(std::make_shared<DenseFeatures<float64_t>>(init_means), std::make_shared<MulticlassLabels>(label_num));
auto init_labels = knn->apply(features)->as<MulticlassLabels>();

SGMatrix<float64_t> alpha(num_vectors, index_t(m_components.size()));
Expand Down
1 change: 1 addition & 0 deletions src/shogun/clustering/KMeans.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ void KMeans::Lloyd_KMeans(SGMatrix<float64_t> centers, int32_t num_centers)

bool KMeans::train_machine(std::shared_ptr<Features> data)
{
m_features = data;
initialize_training(data);
Lloyd_KMeans(cluster_centers, k);
compute_cluster_variances();
Expand Down
21 changes: 6 additions & 15 deletions src/shogun/machine/DistanceMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
using namespace shogun;

DistanceMachine::DistanceMachine()
: Machine()
: NonParametricMachine()
{
init();
}
Expand Down Expand Up @@ -99,6 +99,7 @@ void DistanceMachine::distances_rhs(SGVector<float64_t>& result, index_t idx_b1,

std::shared_ptr<MulticlassLabels> DistanceMachine::apply_multiclass(std::shared_ptr<Features> data)
{

if (data)
{
/* set distance features to given ones and apply to all */
Expand All @@ -118,30 +119,20 @@ std::shared_ptr<MulticlassLabels> DistanceMachine::apply_multiclass(std::shared_
return apply_multiclass(all);
}
return NULL;

}

float64_t DistanceMachine::apply_one(int32_t num)
{
/* number of clusters */
auto lhs=distance->get_lhs();
const auto& lhs=distance->get_lhs();
int32_t num_clusters=lhs->get_num_vectors();

/* (multiple threads) calculate distances to all cluster centers */
SGVector<float64_t> dists(num_clusters);
distances_lhs(dists, 0, num_clusters-1, num);

/* find cluster index with smallest distance */
float64_t result=dists.vector[0];
index_t best_index=0;
for (index_t i=1; i<num_clusters; ++i)
{
if (dists[i]<result)
{
result=dists[i];
best_index=i;
}
}

const auto result_iter = std::min_element(dists.begin(), dists.end());
index_t best_index = std::distance(dists.begin(), result_iter);
/* implicit cast */
return best_index;
}
Expand Down
6 changes: 3 additions & 3 deletions src/shogun/machine/DistanceMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#include <shogun/lib/common.h>
#include <shogun/machine/Machine.h>

#include <shogun/machine/NonParametricMachine.h>

namespace shogun
{
Expand All @@ -24,7 +24,7 @@ namespace shogun
*
* A distance machine is based on a a-priori choosen distance.
*/
class DistanceMachine : public Machine
class DistanceMachine : public NonParametricMachine
{
public:
/** default constructor */
Expand Down Expand Up @@ -81,7 +81,7 @@ class DistanceMachine : public Machine
* @param data (test)data to be classified
* @return classified labels
*/
virtual std::shared_ptr<MulticlassLabels> apply_multiclass(std::shared_ptr<Features> data=NULL);
virtual std::shared_ptr<MulticlassLabels> apply_multiclass(std::shared_ptr<Features> data);

/** Apply machine to one example.
* Cluster index with smallest distance to to be classified element is
Expand Down
5 changes: 5 additions & 0 deletions src/shogun/machine/Machine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ bool Machine::train(std::shared_ptr<Features> data)
return result;
}

bool Machine::train(const std::shared_ptr<Features>& data, const std::shared_ptr<Labels>& lab){
set_labels(lab);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think like this is fine for now!

return train(data);
}

void Machine::set_labels(std::shared_ptr<Labels> lab)
{
if (lab != NULL)
Expand Down
9 changes: 9 additions & 0 deletions src/shogun/machine/Machine.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,15 @@ class Machine : public StoppableSGObject
*/
virtual bool train(std::shared_ptr<Features> data=NULL);

/** train machine
*
* @param data training data
* @param lab training label
*
* @return whether training was successful
*/
virtual bool train(const std::shared_ptr<Features>& data, const std::shared_ptr<Labels>& lab);

/** apply machine to data
* if data is not specified apply to the current features
*
Expand Down
44 changes: 44 additions & 0 deletions src/shogun/machine/NonParametricMachine.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Yuhui Liu
*/

#ifndef NONPARAMETRCMACHINE_H_
#define NONPARAMETRCMACHINE_H_

#include <shogun/machine/Machine.h>
LiuYuHui marked this conversation as resolved.
Show resolved Hide resolved

namespace shogun
{

class NonParametricMachine : public Machine
{
public:
NonParametricMachine(): Machine()
{
//TODO : when all refactor is done, m_labels should be removed from
//Machine Class
// SG_ADD(
// &m_labels, "labels", "labels used in train machine algorithm",
// ParameterProperties::READONLY);
SG_ADD(&m_features, "features_train",
"Training features of nonparametric model",
ParameterProperties::READONLY);
}
virtual ~NonParametricMachine()
{
}

const char* get_name() const override{ return "NonParametricMachine"; }

protected:

std::shared_ptr<Features> m_features;

//TODO
// when all refactor is done, we should use this m_labels
// std::shared_ptr<Labels> m_labels;
LiuYuHui marked this conversation as resolved.
Show resolved Hide resolved
};
} // namespace shogun
#endif
3 changes: 2 additions & 1 deletion src/shogun/metric/LMNNImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ SGMatrix<index_t> LMNNImpl::find_target_nn(const std::shared_ptr<DenseFeatures<f
auto features_slice = std::make_shared<DenseFeatures<float64_t>>(slice_mat);
auto labels_slice = std::make_shared<MulticlassLabels>(labels_vec);

auto knn = std::make_shared<KNN>(k+1, std::make_shared<EuclideanDistance>(features_slice, features_slice), labels_slice);
auto knn = std::make_shared<KNN>(k+1, std::make_shared<EuclideanDistance>());
knn->train(features_slice, labels_slice);
SGMatrix<int32_t> target_slice = knn->nearest_neighbors();
// sanity check
ASSERT(target_slice.num_rows==k+1 && target_slice.num_cols==slice_size)
Expand Down
Loading