Source code for pmlearn.linear_model.logistic

"""
Logistic regression models.
"""

# Authors: Nicole Carlson <parsing-science@gmail.com>
#
# License: BSD 3 clause

import numpy as np
import pymc3 as pm
import theano
import theano.tensor as tt

from .base import BayesianModel
from .base import BayesianLinearClassifierMixin


[docs]class LogisticRegression(BayesianModel, BayesianLinearClassifierMixin):
    """Bayesian Logistic Regression built using PyMC3

    """
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.num_cats = None

[docs]    def create_model(self):
        """
        Creates and returns the PyMC3 model.

        Note: The size of the shared variables must match the size of the
        training data. Otherwise, setting the shared variables later will
        raise an error. See http://docs.pymc.io/advanced_theano.html

        Returns
        ----------
        the PyMC3 model
        """
        model_input = theano.shared(
            np.zeros([self.num_training_samples, self.num_pred]))

        model_output = theano.shared(
            np.zeros(self.num_training_samples, dtype='int'))

        model_cats = theano.shared(
            np.zeros(self.num_training_samples, dtype='int'))

        self.shared_vars = {
            'model_input': model_input,
            'model_output': model_output,
            'model_cats': model_cats
        }

        model = pm.Model()

        with model:
            alpha = pm.Normal('alpha', mu=0, sd=100,
                              shape=(self.num_cats,))
            betas = pm.Normal('beta', mu=0, sd=100,
                              shape=(self.num_cats, self.num_pred))

            c = model_cats

            linear_function = alpha[c] + tt.sum(betas[c] * model_input, 1)

            p = pm.invlogit(linear_function)

            y = pm.Bernoulli('y', p, observed=model_output)

        return model

[docs]    def save(self, file_prefix):
        params = {
            'inference_type': self.inference_type,
            'num_cats': self.num_cats,
            'num_pred': self.num_pred,
            'num_training_samples': self.num_training_samples
        }

        super(LogisticRegression, self).save(file_prefix, params)

[docs]    def load(self, file_prefix):
        params = super(LogisticRegression, self).load(
            file_prefix, load_custom_params=True)

        self.inference_type = params['inference_type']
        self.num_cats = params['num_cats']
        self.num_pred = params['num_pred']
        self.num_training_samples = params['num_training_samples']


[docs]class HierarchicalLogisticRegression(BayesianModel,
                                     BayesianLinearClassifierMixin):
    """
    Custom Hierachical Logistic Regression built using PyMC3.
    """

    def __init__(self):
        super(HierarchicalLogisticRegression, self).__init__()
        self.num_cats = None

[docs]    def create_model(self):
        """
        Creates and returns the PyMC3 model.

        Note: The size of the shared variables must match the size of the
        training data. Otherwise, setting the shared variables later will
        raise an error. See http://docs.pymc.io/advanced_theano.html

        Returns
        ----------
        the PyMC3 model
        """
        model_input = theano.shared(
            np.zeros([self.num_training_samples, self.num_pred]))

        model_output = theano.shared(
            np.zeros(self.num_training_samples, dtype='int'))

        model_cats = theano.shared(
            np.zeros(self.num_training_samples, dtype='int'))

        self.shared_vars = {
            'model_input': model_input,
            'model_output': model_output,
            'model_cats': model_cats
        }

        model = pm.Model()

        with model:
            mu_alpha = pm.Normal('mu_alpha', mu=0, sd=100)
            sigma_alpha = pm.HalfNormal('sigma_alpha', sd=100)

            mu_beta = pm.Normal('mu_beta', mu=0, sd=100)
            sigma_beta = pm.HalfNormal('sigma_beta', sd=100)

            alpha = pm.Normal('alpha', mu=mu_alpha, sd=sigma_alpha,
                              shape=(self.num_cats,))
            betas = pm.Normal('beta', mu=mu_beta, sd=sigma_beta,
                              shape=(self.num_cats, self.num_pred))

            c = model_cats

            linear_function = alpha[c] + tt.sum(betas[c] * model_input, 1)

            p = pm.invlogit(linear_function)

            y = pm.Bernoulli('y', p, observed=model_output)

        return model

[docs]    def save(self, file_prefix):
        params = {
            'inference_type': self.inference_type,
            'num_cats': self.num_cats,
            'num_pred': self.num_pred,
            'num_training_samples': self.num_training_samples
        }

        super(HierarchicalLogisticRegression, self).save(file_prefix, params)

[docs]    def load(self, file_prefix):
        params = super(HierarchicalLogisticRegression,
                       self).load(file_prefix, load_custom_params=True)

        self.inference_type = params['inference_type']
        self.num_cats = params['num_cats']
        self.num_pred = params['num_pred']
        self.num_training_samples = params['num_training_samples']