From ace10ab15d30d6d567d9f2e960fba24e587c7611 Mon Sep 17 00:00:00 2001 From: Andrew DalPino Date: Thu, 6 Aug 2020 18:19:23 -0500 Subject: [PATCH] Update to Rubix ML 0.1.0 --- README.md | 28 ++++++++++++++++++++++++++-- composer.json | 3 +-- predict.php | 8 +++----- train.php | 11 +++++------ 4 files changed, 35 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 8c90949..4646ea9 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ Since Gradient Boost implements the [Verbose](https://docs.rubixml.com/en/latest ```php use Rubix\ML\Other\Loggers\Screen; -$estimator->setLogger(new Screen('housing')); +$estimator->setLogger(new Screen()); ``` ### Training @@ -110,7 +110,7 @@ $estimator->train($dataset); ``` ### Validation Score and Loss -During training, the learner will record the validation score and the training loss at each iteration or *epoch*. The validation score is calculated using the default [RMSE](https://docs.rubixml.com/en/latest/cross-validation/metrics/rmse.html) metric on a hold out portion of the training set. Contrariwise, the training loss is the value of the cost function (in this case the L2 or *quadratic* loss) computed over the training data. We can visualize the training progress by plotting these metrics. To export the scores and losses you can call the additional `scores()` and `steps()` methods on the learner instance. +During training, the learner will record the validation score and the training loss at each iteration or *epoch*. The validation score is calculated using the default [RMSE](https://docs.rubixml.com/en/latest/cross-validation/metrics/rmse.html) metric on a hold out portion of the training set. Contrariwise, the training loss is the value of the cost function (in this case the L2 or *quadratic* loss) computed over the training data. We can visualize the training progress by plotting these metrics. To output the scores and losses you can call the additional `scores()` and `steps()` methods on the learner instance. ```php $scores = $estimator->scores(); @@ -118,6 +118,19 @@ $scores = $estimator->scores(); $losses = $estimator->steps(); ``` +Then we can export the data to a CSV file using an [Unlabeled](https://docs.rubixml.com/en/latest/datasets/unlabeled.html) dataset object. The `array_transpose()` method takes a 2-dimensional array and changes the rows to columns and vice versa. + +```php +use Rubix\ML\Unlabeled; +use function Rubix\ML\array_transpose; + +Unlabeled::build(array_transpose([$scores, $losses])) + ->toCSV(['scores', 'losses']) + ->write('progress.csv'); + +``` + + Here is an example of what the validation score and training loss look like when plotted. You can plot the values yourself by importing the `progress.csv` file into your favorite plotting software. ![R Squared Score](https://raw.githubusercontent.com/RubixML/Housing/master/docs/images/validation-score.svg?sanitize=true) @@ -176,6 +189,17 @@ To obtain the predictions from the model, call the `predict()` method with the d $predictions = $estimator->predict($dataset); ``` +Then we'll use another [Unlabeled](https://docs.rubixml.com/en/latest/datasets/unlabeled.html) dataset to write the IDs and predictions to a CSV file that we'll submit to the competition. + +```php +use Rubix\ML\Datasets\Unlabeled; +use function Rubix\ML\array_transpose; + +Unlabeled::build(array_transpose([$ids, $predictions])) + ->toCSV(['Id', 'SalePrice']) + ->write('predictions.csv'); +``` + Now run the prediction script by calling it from the command line. ```sh $ php predict.php diff --git a/composer.json b/composer.json index 4d3a43b..4081ea1 100644 --- a/composer.json +++ b/composer.json @@ -20,8 +20,7 @@ ], "require": { "php": ">=7.2", - "league/csv": "^9.5", - "rubix/ml": "^0.1.0-rc2" + "rubix/ml": "^0.1.0" }, "suggest": { "ext-tensor": "For faster training and inference" diff --git a/predict.php b/predict.php index 70f88b7..29d0e33 100644 --- a/predict.php +++ b/predict.php @@ -7,7 +7,6 @@ use Rubix\ML\Transformers\NumericStringConverter; use Rubix\ML\PersistentModel; use Rubix\ML\Persisters\Filesystem; -use League\Csv\Writer; use function Rubix\ML\array_transpose; @@ -28,9 +27,8 @@ $predictions = $estimator->predict($dataset); -$writer = Writer::createFromPath('predictions.csv', 'w+'); - -$writer->insertOne(['Id', 'SalePrice']); -$writer->insertAll(array_transpose([$ids, $predictions])); +Unlabeled::build(array_transpose([$ids, $predictions])) + ->toCSV(['Id', 'SalePrice']) + ->write('predictions.csv'); echo 'Predictions saved to predictions.csv' . PHP_EOL; \ No newline at end of file diff --git a/train.php b/train.php index 7bef275..db0589a 100644 --- a/train.php +++ b/train.php @@ -12,7 +12,7 @@ use Rubix\ML\Regressors\RegressionTree; use Rubix\ML\Persisters\Filesystem; use Rubix\ML\Other\Loggers\Screen; -use League\Csv\Writer; +use Rubix\ML\Datasets\Unlabeled; use function Rubix\ML\array_transpose; @@ -50,7 +50,7 @@ new Filesystem('housing.model', true) ); -$estimator->setLogger(new Screen('housing')); +$estimator->setLogger(new Screen()); echo 'Training ...' . PHP_EOL; @@ -59,10 +59,9 @@ $scores = $estimator->scores(); $losses = $estimator->steps(); -$writer = Writer::createFromPath('progress.csv', 'w+'); - -$writer->insertOne(['score', 'loss']); -$writer->insertAll(array_transpose([$scores, $losses])); +Unlabeled::build(array_transpose([$scores, $losses])) + ->toCSV(['scores', 'losses']) + ->write('progress.csv'); echo 'Progress saved to progress.csv' . PHP_EOL;