Source code for pmlearn.gaussian_process.tests.test_gpr

"""Testing for Gaussian process regression """

# Authors: Daniel Emaasit <daniel.emaasit@gmail.com>
#
# License: BSD 3 clause

import pytest
import numpy.testing as npt
import pandas.testing as pdt
import shutil
import tempfile

import numpy as np
import pymc3 as pm
from pymc3 import summary
from sklearn.gaussian_process import \
    GaussianProcessRegressor as skGaussianProcessRegressor
from sklearn.model_selection import train_test_split


from pmlearn.exceptions import NotFittedError
from pmlearn.gaussian_process import (GaussianProcessRegressor,
                                      StudentsTProcessRegressor,
                                      SparseGaussianProcessRegressor)


[docs]class TestGaussianProcessRegressor(object):

[docs]    def setup_method(self):
        self.num_pred = 1
        self.num_training_samples = 300

        self.length_scale = 1.0
        self.signal_variance = 0.1
        self.noise_variance = 0.1

        X = np.linspace(start=0, stop=10,
                        num=self.num_training_samples)[:, None]

        cov_func = self.signal_variance ** 2 * pm.gp.cov.ExpQuad(
            1, self.length_scale)
        mean_func = pm.gp.mean.Zero()

        f_true = np.random.multivariate_normal(
            mean_func(X).eval(),
            cov_func(X).eval() + 1e-8 * np.eye(self.num_training_samples),
            1).flatten()
        y = f_true + \
            self.noise_variance * np.random.randn(self.num_training_samples)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.3)

        self.advi_gpr = GaussianProcessRegressor()
        
        self.test_dir = tempfile.mkdtemp()

[docs]    def teardown_method(self):
        """Tear down
        """
        shutil.rmtree(self.test_dir)


[docs]class TestGaussianProcessRegressorFit(TestGaussianProcessRegressor):
[docs]    def test_advi_fit_returns_correct_model(self):
        # This print statement ensures PyMC3 output won't overwrite
        # the test name
        print('')
        self.advi_gpr.fit(self.X_train, self.y_train,
                          inference_args={"n": 25000})

        npt.assert_equal(self.num_pred, self.advi_gpr.num_pred)
        npt.assert_almost_equal(
            self.signal_variance,
            self.advi_gpr.summary['mean']['signal_variance__0'],
            0)
        npt.assert_almost_equal(
            self.length_scale,
            self.advi_gpr.summary['mean']['length_scale__0_0'],
            0)
        npt.assert_almost_equal(
            self.noise_variance,
            self.advi_gpr.summary['mean']['noise_variance__0'],
            0)


[docs]class TestGaussianProcessRegressorPredict(TestGaussianProcessRegressor):
[docs]    def test_predict_returns_predictions(self):
        print('')
        self.advi_gpr.fit(self.X_train, self.y_train,
                          inference_args={"n": 25000})
        preds = self.advi_gpr.predict(self.X_test)
        npt.assert_equal(self.y_test.shape, preds.shape)

[docs]    def test_predict_returns_mean_predictions_and_std(self):
        print('')
        self.advi_gpr.fit(self.X_train, self.y_train,
                          inference_args={"n": 25000})
        preds, stds = self.advi_gpr.predict(self.X_test, return_std=True)
        npt.assert_equal(self.y_test.shape, preds.shape)
        npt.assert_equal(self.y_test.shape, stds.shape)

[docs]    def test_predict_raises_error_if_not_fit(self):
        print('')
        with pytest.raises(NotFittedError):
            advi_gpr = GaussianProcessRegressor()
            advi_gpr.predict(self.X_train)


[docs]class TestGaussianProcessRegressorScore(TestGaussianProcessRegressor):
[docs]    def test_score_matches_sklearn_performance(self):
        print('')
        sk_gpr = skGaussianProcessRegressor()
        sk_gpr.fit(self.X_train, self.y_train)
        sk_gpr_score = sk_gpr.score(self.X_test, self.y_test)

        self.advi_gpr.fit(self.X_train, self.y_train,
                          inference_args={"n": 25000})
        advi_gpr_score = self.advi_gpr.score(self.X_test, self.y_test)

        npt.assert_almost_equal(sk_gpr_score, advi_gpr_score, 1)


[docs]class TestGaussianProcessRegressorSaveAndLoad(TestGaussianProcessRegressor):
[docs]    def test_save_and_load_work_correctly(self):
        print('')
        self.advi_gpr.fit(self.X_train, self.y_train,
                          inference_args={"n": 25000})
        score1 = self.advi_gpr.score(self.X_test, self.y_test)
        self.advi_gpr.save(self.test_dir)

        gpr2 = GaussianProcessRegressor()
        gpr2.load(self.test_dir)

        npt.assert_equal(self.advi_gpr.inference_type, gpr2.inference_type)
        npt.assert_equal(self.advi_gpr.num_pred, gpr2.num_pred)
        npt.assert_equal(self.advi_gpr.num_training_samples,
                         gpr2.num_training_samples)
        pdt.assert_frame_equal(summary(self.advi_gpr.trace),
                               summary(gpr2.trace))

        score2 = gpr2.score(self.X_test, self.y_test)
        npt.assert_almost_equal(score1, score2, 0)


[docs]class TestStudentsTProcessRegressor(object):

[docs]    def setup_method(self):
        self.num_pred = 1
        self.num_training_samples = 500

        self.length_scale = 1.0
        self.signal_variance = 0.1
        self.noise_variance = 0.1
        self.degrees_of_freedom = 1.0

        X = np.linspace(start=0, stop=10,
                        num=self.num_training_samples)[:, None]

        cov_func = self.signal_variance ** 2 * pm.gp.cov.ExpQuad(
            1, self.length_scale)
        mean_func = pm.gp.mean.Zero()

        f_true = np.random.multivariate_normal(
            mean_func(X).eval(),
            cov_func(X).eval() + 1e-8 * np.eye(self.num_training_samples),
            1).flatten()
        y = f_true + \
            self.noise_variance * \
            np.random.standard_t(self.degrees_of_freedom,
                                 size=self.num_training_samples)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.3)

        self.advi_stpr = StudentsTProcessRegressor()

        self.test_dir = tempfile.mkdtemp()

[docs]    def tearDown(self):
        shutil.rmtree(self.test_dir)


[docs]class TestStudentsTProcessRegressorFit(TestStudentsTProcessRegressor):
[docs]    def test_advi_fit_returns_correct_model(self):
        # This print statement ensures PyMC3 output won't overwrite
        # the test name
        print('')
        self.advi_stpr.fit(self.X_train, self.y_train,
                           inference_args={"n": 25000})

        npt.assert_equal(self.num_pred, self.advi_stpr.num_pred)
        npt.assert_almost_equal(
            self.signal_variance,
            self.advi_stpr.summary['mean']['signal_variance__0'],
            0)
        npt.assert_almost_equal(
            self.length_scale,
            self.advi_stpr.summary['mean']['length_scale__0_0'],
            0)
        npt.assert_almost_equal(
            self.noise_variance,
            self.advi_stpr.summary['mean']['noise_variance__0'],
            0)


[docs]class TestStudentsTProcessRegressorPredict(TestStudentsTProcessRegressor):
[docs]    def test_predict_returns_predictions(self):
        print('')
        self.advi_stpr.fit(self.X_train, self.y_train,
                           inference_args={"n": 25000})
        preds = self.advi_stpr.predict(self.X_test)
        npt.assert_equal(self.y_test.shape, preds.shape)

[docs]    def test_predict_returns_mean_predictions_and_std(self):
        print('')
        self.advi_stpr.fit(self.X_train, self.y_train,
                           inference_args={"n": 25000})
        preds, stds = self.advi_stpr.predict(self.X_test, return_std=True)
        npt.assert_equal(self.y_test.shape, preds.shape)
        npt.assert_equal(self.y_test.shape, stds.shape)

[docs]    def test_predict_raises_error_if_not_fit(self):
        print('')
        with pytest.raises(NotFittedError):
            advi_stpr = StudentsTProcessRegressor()
            advi_stpr.predict(self.X_train)


[docs]class TestStudentsTProcessRegressorScore(TestStudentsTProcessRegressor):
[docs]    def test_score_matches_sklearn_performance(self):
        print('')
        sk_gpr = skGaussianProcessRegressor()
        sk_gpr.fit(self.X_train, self.y_train)
        sk_gpr_score = sk_gpr.score(self.X_test, self.y_test)

        self.advi_stpr.fit(self.X_train, self.y_train,
                           inference_args={"n": 25000})
        advi_stpr_score = self.advi_stpr.score(self.X_test, self.y_test)

        npt.assert_almost_equal(sk_gpr_score, advi_stpr_score, 0)


[docs]class TestStudentsTProcessRegressorSaveAndLoad(TestStudentsTProcessRegressor):
[docs]    def test_save_and_load_work_correctly(self):
        print('')
        self.advi_stpr.fit(self.X_train, self.y_train,
                           inference_args={"n": 25000})
        score1 = self.advi_stpr.score(self.X_test, self.y_test)
        self.advi_stpr.save(self.test_dir)

        stpr2 = StudentsTProcessRegressor()
        stpr2.load(self.test_dir)

        npt.assert_equal(self.advi_stpr.inference_type, stpr2.inference_type)
        npt.assert_equal(self.advi_stpr.num_pred, stpr2.num_pred)
        npt.assert_equal(self.advi_stpr.num_training_samples,
                         stpr2.num_training_samples)
        pdt.assert_frame_equal(summary(self.advi_stpr.trace),
                               summary(stpr2.trace))

        score2 = stpr2.score(self.X_test, self.y_test)
        npt.assert_almost_equal(score1, score2, 0)


[docs]class TestSparseGaussianProcessRegressor(object):

[docs]    def setup_method(self):
        self.num_pred = 1
        self.num_training_samples = 1000

        self.length_scale = 1.0
        self.signal_variance = 0.1
        self.noise_variance = 0.1

        X = np.linspace(start=0, stop=10,
                        num=self.num_training_samples)[:, None]

        cov_func = self.signal_variance ** 2 * pm.gp.cov.ExpQuad(
            1, self.length_scale)
        mean_func = pm.gp.mean.Zero()

        f_true = np.random.multivariate_normal(
            mean_func(X).eval(),
            cov_func(X).eval() + 1e-8 * np.eye(self.num_training_samples),
            1).flatten()
        y = f_true + \
            self.noise_variance * np.random.randn(self.num_training_samples)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.3)

        self.advi_sgpr = SparseGaussianProcessRegressor()

        self.test_dir = tempfile.mkdtemp()

[docs]    def teardown_method(self):
        """Tear down
        """
        shutil.rmtree(self.test_dir)


[docs]class TestSparseGaussianProcessRegressorFit(TestSparseGaussianProcessRegressor):
[docs]    def test_advi_fit_returns_correct_model(self):
        # This print statement ensures PyMC3 output won't overwrite
        # the test name
        print('')
        self.advi_sgpr.fit(self.X_train, self.y_train)

        npt.assert_equal(self.num_pred, self.advi_sgpr.num_pred)
        npt.assert_almost_equal(
            self.signal_variance,
            self.advi_sgpr.summary['mean']['signal_variance__0'],
            0)
        npt.assert_almost_equal(
            self.length_scale,
            self.advi_sgpr.summary['mean']['length_scale__0_0'],
            0)
        npt.assert_almost_equal(
            self.noise_variance,
            self.advi_sgpr.summary['mean']['noise_variance__0'],
            0)


[docs]class TestSparseGaussianProcessRegressorPredict(
    TestSparseGaussianProcessRegressor):

[docs]    def test_predict_returns_predictions(self):
        print('')
        self.advi_sgpr.fit(self.X_train, self.y_train,
                           inference_args={"n": 25000})
        preds = self.advi_sgpr.predict(self.X_test)
        npt.assert_equal(self.y_test.shape, preds.shape)

[docs]    def test_predict_returns_mean_predictions_and_std(self):
        print('')
        self.advi_sgpr.fit(self.X_train, self.y_train,
                           inference_args={"n": 25000})
        preds, stds = self.advi_sgpr.predict(self.X_test, return_std=True)
        npt.assert_equal(self.y_test.shape, preds.shape)
        npt.assert_equal(self.y_test.shape, stds.shape)

[docs]    def test_predict_raises_error_if_not_fit(self):
        print('')
        with pytest.raises(NotFittedError):
            advi_sgpr = SparseGaussianProcessRegressor()
            advi_sgpr.predict(self.X_train)


[docs]class TestSparseGaussianProcessRegressorScore(
    TestSparseGaussianProcessRegressor):

[docs]    def test_score_matches_sklearn_performance(self):
        print('')
        sk_gpr = skGaussianProcessRegressor()
        sk_gpr.fit(self.X_train, self.y_train)
        sk_gpr_score = sk_gpr.score(self.X_test, self.y_test)

        self.advi_sgpr.fit(self.X_train, self.y_train)
        advi_sgpr_score = self.advi_sgpr.score(self.X_test, self.y_test)

        npt.assert_almost_equal(sk_gpr_score, advi_sgpr_score, 0)


[docs]class TestSparseGaussianProcessRegressorSaveAndLoad(
    TestSparseGaussianProcessRegressor):

[docs]    def test_save_and_load_work_correctly(self):
        print('')
        self.advi_sgpr.fit(self.X_train, self.y_train)
        score1 = self.advi_sgpr.score(self.X_test, self.y_test)
        self.advi_sgpr.save(self.test_dir)

        sgpr2 = SparseGaussianProcessRegressor()
        sgpr2.load(self.test_dir)

        npt.assert_equal(self.advi_sgpr.inference_type, sgpr2.inference_type)
        npt.assert_equal(self.advi_sgpr.num_pred, sgpr2.num_pred)
        npt.assert_equal(self.advi_sgpr.num_training_samples,
                         sgpr2.num_training_samples)
        pdt.assert_frame_equal(summary(self.advi_sgpr.trace),
                               summary(sgpr2.trace))

        score2 = sgpr2.score(self.X_test, self.y_test)
        npt.assert_almost_equal(score1, score2, 0)