Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mldb 2095 boosting and stump #785

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 35 additions & 26 deletions ml/jml/classifier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -353,9 +353,6 @@ optimize(const std::vector<Feature> & features)
result.from_features = features;
result.to_features = all_features();

if (!optimization_supported())
return result;

map<Feature, int> & feature_map = result.feature_to_optimized_index;
for (unsigned i = 0; i < result.to_features.size(); ++i) {
feature_map[result.to_features[i]] = i;
Expand Down Expand Up @@ -402,6 +399,13 @@ optimize(const std::vector<Feature> & features)

result.initialized = true;

// TODO - what to do here if optimize_impl returns
// false. Currently, we expect the returned Optimization_Info
// to be initialized anyway so that we can call the optimized
// predict methods. I suggest we change the signature of
// optimize_impl to void so that we don't allow it to fail
// and we keep the predict_is_optimized call to decide if
// we should default to the non-optimized predict.
optimize_impl(result);

return result;
Expand All @@ -411,8 +415,9 @@ Optimization_Info
Classifier_Impl::
optimize(const Feature_Set & feature_set)
{
if (!optimization_supported())
return Optimization_Info();
// verificatiopn that optimization is
// supported is delegated to the other
// optimize overload

// Extract the list of features, and continue
vector<Feature> features;
Expand All @@ -429,13 +434,6 @@ optimize(const Feature_Set & feature_set)
return optimize(features);
}

bool
Classifier_Impl::
optimization_supported() const
{
return false;
}

bool
Classifier_Impl::
predict_is_optimized() const
Expand All @@ -449,11 +447,12 @@ predict(const Feature_Set & features,
const Optimization_Info & info,
PredictionContext * context) const
{
if (!predict_is_optimized() || !info) return predict(features, context);
if (!predict_is_optimized()) return predict(features, context);

ExcAssert(info);
float fv[info.features_out()];
info.apply(features, fv);

return optimized_predict_impl(fv, info, context);
}

Expand All @@ -463,6 +462,7 @@ predict(const std::vector<float> & features,
const Optimization_Info & info,
PredictionContext * context) const
{
ExcAssert(info);
float fv[info.features_out()];
info.apply(features, fv);

Expand All @@ -475,6 +475,7 @@ predict(const float * features,
const Optimization_Info & info,
PredictionContext * context) const
{
ExcAssert(info);
float fv[info.features_out()];
info.apply(features, fv);

Expand All @@ -488,8 +489,11 @@ predict(int label,
const Optimization_Info & info,
PredictionContext * context) const
{
if (!predict_is_optimized() || !info) return predict(label, features, context);

if (!predict_is_optimized() || !info)
return predict(label, features, context);

ExcAssert(info);
float fv[info.features_out()];

info.apply(features, fv);
Expand All @@ -504,10 +508,10 @@ predict(int label,
const Optimization_Info & info,
PredictionContext * context) const
{
if (!predict_is_optimized() || !info) {

if (!predict_is_optimized()) {
ExcAssert(info);
// Convert to standard feature set, then call classical predict
Dense_Feature_Set fset(make_unowned_sp(info.to_features),
Dense_Feature_Set fset(make_unowned_sp(info.from_features),
&features[0]);

return predict(label, fset);
Expand All @@ -527,10 +531,10 @@ predict(int label,
const Optimization_Info & info,
PredictionContext * context) const
{
if (!predict_is_optimized() || !info) {

if (!predict_is_optimized()) {
ExcAssert(info);
// Convert to standard feature set, then call classical predict
Dense_Feature_Set fset(make_unowned_sp(info.to_features),
Dense_Feature_Set fset(make_unowned_sp(info.from_features),
features);

return predict(label, fset, context);
Expand Down Expand Up @@ -558,9 +562,10 @@ optimized_predict_impl(const float * features,
{
// If the classifier implemented optimized predict, then this would have
// been overridden.

ExcAssert(info);

// Convert to standard feature set, then call classical predict
Dense_Feature_Set fset(make_unowned_sp(info.to_features),
Dense_Feature_Set fset(make_unowned_sp(info.from_features),
features);

return predict(fset, context);
Expand All @@ -574,6 +579,7 @@ optimized_predict_impl(const float * features,
double weight,
PredictionContext * context) const
{
ExcAssert(info);
Label_Dist result = optimized_predict_impl(features, info, context);
for (unsigned i = 0; i < result.size(); ++i) {
accum[i] += weight * result[i];
Expand All @@ -589,9 +595,10 @@ optimized_predict_impl(int label,
{
// If the classifier implemented optimized predict, then this would have
// been overridden.

ExcAssert(info);

// Convert to standard feature set, then call classical predict
Dense_Feature_Set fset(make_unowned_sp(info.to_features),
Dense_Feature_Set fset(make_unowned_sp(info.from_features),
features);

return predict(label, fset, context);
Expand Down Expand Up @@ -637,7 +644,9 @@ struct Accuracy_Job_Info {

//cerr << "x = " << x << " w = " << w << endl;

distribution<float> result = classifier.predict(data[x], opt_info);
distribution<float> result = opt_info ?
classifier.predict(data[x], opt_info) :
classifier.predict(data[x]);

if (regression_problem) {
float correct = data[x][classifier.predicted()];
Expand Down
14 changes: 8 additions & 6 deletions ml/jml/classifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,19 +324,21 @@ class Classifier_Impl {
virtual Optimization_Info optimize(const std::vector<Feature> & features);
virtual Optimization_Info optimize(const Feature_Set & features);

/** Is optimization supported by the classifier? */
virtual bool optimization_supported() const;

/** Is predict optimized? Default returns false; those classifiers which
a) support optimized predict and b) have had optimize_predict() called
/** Is predict optimized? Default returns false; those classifiers which
a) support optimized predict and b) have had optimize() called
will override to return true in this case.
*/
virtual bool predict_is_optimized() const;


/** Methods to call for the optimized predict. Will check if
predict_is_optimized() and if true, will call the optimized methods.
Otherwise, they fall back to the non-optimized versions. */
Otherwise, they fall back to the non-optimized versions.

Is is safe to call these methods with the optimization info
returned by the optimize methods even if optimization is not
supported by the classifier.
*/
virtual Label_Dist predict(const Feature_Set & features,
const Optimization_Info & info,
PredictionContext * context = 0) const;
Expand Down
7 changes: 0 additions & 7 deletions ml/jml/committee.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,6 @@ predict(const Feature_Set & features,
return result;
}

bool
Committee::
optimization_supported() const
{
return true;
}

bool
Committee::
predict_is_optimized() const
Expand Down
4 changes: 0 additions & 4 deletions ml/jml/committee.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,6 @@ class Committee : public Classifier_Impl {
predict(const Feature_Set & features,
PredictionContext * context = 0) const;


/** Is optimization supported by the classifier? */
virtual bool optimization_supported() const;

/** Is predict optimized? Default returns false; those classifiers which
a) support optimized predict and b) have had optimize_predict() called
will override to return true in this case.
Expand Down
7 changes: 0 additions & 7 deletions ml/jml/decision_tree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,6 @@ predict(const Feature_Set & features,
return results;
}

bool
Decision_Tree::
optimization_supported() const
{
return true;
}

bool
Decision_Tree::
predict_is_optimized() const
Expand Down
5 changes: 1 addition & 4 deletions ml/jml/decision_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,7 @@ class Decision_Tree : public Classifier_Impl {
predict(const Feature_Set & features,
PredictionContext * context = 0) const;

/** Is optimization supported by the classifier? */
virtual bool optimization_supported() const;

/** Is predict optimized? Default returns false; those classifiers which
/** Is predict optimized? Default returns false; those classifiers which
a) support optimized predict and b) have had optimize_predict() called
will override to return true in this case.
*/
Expand Down
7 changes: 0 additions & 7 deletions ml/jml/decoded_classifier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,6 @@ predict(int label, const Feature_Set & features,
return predict(features)[label];
}

bool
Decoded_Classifier::
optimization_supported() const
{
return classifier_.impl->optimization_supported();
}

bool
Decoded_Classifier::
predict_is_optimized() const
Expand Down
3 changes: 0 additions & 3 deletions ml/jml/decoded_classifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,6 @@ class Decoded_Classifier : public Classifier_Impl {

using Classifier_Impl::predict;

/** Is optimization supported by the classifier? */
virtual bool optimization_supported() const;

/** Is predict optimized? Default returns false; those classifiers which
a) support optimized predict and b) have had optimize_predict() called
will override to return true in this case.
Expand Down
20 changes: 20 additions & 0 deletions ml/jml/dense_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1244,5 +1244,25 @@ Register_Factory<Feature_Space, Dense_Feature_Space>
DFS_REG("DENSE_FEATURE_SPACE");


void apply_permutation_in_place(float * vec,
const std::vector<std::size_t>& p) {
std::vector<bool> done(p.size());
for (std::size_t i = 0; i < p.size(); ++i) {
if (done[i]) {
continue;
}
done[i] = true;
std::size_t prev_j = i;
std::size_t j = p[i];
while (i != j) {
std::swap(*(vec + prev_j), *(vec + j));
done[j] = true;
prev_j = j;
j = p[j];
}
}
}


} // namespace ML

67 changes: 59 additions & 8 deletions ml/jml/dense_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,33 +367,84 @@ class Dense_Training_Data : public Training_Data {
array.
*/


template <typename T, typename Compare>
std::vector<std::size_t> sort_permutation(const std::vector<T>& vec,
Compare& compare) {
std::vector<std::size_t> p(vec.size());
std::iota(p.begin(), p.end(), 0);
std::sort(p.begin(), p.end(),
[&](std::size_t i, std::size_t j){ return compare(vec[i], vec[j]); });
return p;
}

template <typename T>
void apply_permutation_in_place(std::vector<T>& vec,
const std::vector<std::size_t>& p) {
std::vector<bool> done(vec.size());
for (std::size_t i = 0; i < vec.size(); ++i) {
if (done[i]) {
continue;
}
done[i] = true;
std::size_t prev_j = i;
std::size_t j = p[i];
while (i != j) {
std::swap(vec[prev_j], vec[j]);
done[j] = true;
prev_j = j;
j = p[j];
}
}
}

void apply_permutation_in_place(float * vec,
const std::vector<std::size_t>& p);

class Dense_Feature_Set : public Feature_Set {
public:
Dense_Feature_Set(std::shared_ptr<const std::vector<Feature> > features,
const float * values)
: features(features), values(values)
Dense_Feature_Set(std::shared_ptr<const std::vector<Feature> > features_,
const float * values_) :
is_sorted(false)
{
values = new float[features_->size()]();
for (int i = 0; i < features_->size(); i++)
values[i] = *(values_ + i);
features = *features_;
sort();
}

virtual ~Dense_Feature_Set() {}

virtual std::tuple<const Feature *, const float *, int, int, size_t>
get_data(bool need_sorted = false) const
{
//if (need_sorted && !is_sorted) sort();

return std::make_tuple
(&(*features)[0], values, sizeof(Feature), sizeof(float),
features->size());
(&features[0], values, sizeof(Feature), sizeof(float),
features.size());
}

virtual void sort()
{
auto compare = [](Feature const& a, Feature const& b){
return a < b;
};

auto perm = sort_permutation(features, compare);

apply_permutation_in_place(features, perm);
apply_permutation_in_place(values, perm);

is_sorted = true;
}

std::shared_ptr<const std::vector<Feature> > features;
const float * values;
std::vector<Feature> features;
float *values;
bool is_sorted;

virtual Dense_Feature_Set * make_copy() const;
};


} // namespace ML
Loading