Source code for pmlearn.linear_model.logistic

"""
Logistic regression models.
"""

# Authors: Nicole Carlson <parsing-science@gmail.com>
#
# License: BSD 3 clause

import numpy as np
import pymc3 as pm
import theano
import theano.tensor as tt

from .base import BayesianModel
from .base import BayesianLinearClassifierMixin


[docs]class LogisticRegression(BayesianModel, BayesianLinearClassifierMixin): """Bayesian Logistic Regression built using PyMC3 """ def __init__(self): super(LogisticRegression, self).__init__() self.num_cats = None
[docs] def create_model(self): """ Creates and returns the PyMC3 model. Note: The size of the shared variables must match the size of the training data. Otherwise, setting the shared variables later will raise an error. See http://docs.pymc.io/advanced_theano.html Returns ---------- the PyMC3 model """ model_input = theano.shared( np.zeros([self.num_training_samples, self.num_pred])) model_output = theano.shared( np.zeros(self.num_training_samples, dtype='int')) model_cats = theano.shared( np.zeros(self.num_training_samples, dtype='int')) self.shared_vars = { 'model_input': model_input, 'model_output': model_output, 'model_cats': model_cats } model = pm.Model() with model: alpha = pm.Normal('alpha', mu=0, sd=100, shape=(self.num_cats,)) betas = pm.Normal('beta', mu=0, sd=100, shape=(self.num_cats, self.num_pred)) c = model_cats linear_function = alpha[c] + tt.sum(betas[c] * model_input, 1) p = pm.invlogit(linear_function) y = pm.Bernoulli('y', p, observed=model_output) return model
[docs] def save(self, file_prefix): params = { 'inference_type': self.inference_type, 'num_cats': self.num_cats, 'num_pred': self.num_pred, 'num_training_samples': self.num_training_samples } super(LogisticRegression, self).save(file_prefix, params)
[docs] def load(self, file_prefix): params = super(LogisticRegression, self).load( file_prefix, load_custom_params=True) self.inference_type = params['inference_type'] self.num_cats = params['num_cats'] self.num_pred = params['num_pred'] self.num_training_samples = params['num_training_samples']
[docs]class HierarchicalLogisticRegression(BayesianModel, BayesianLinearClassifierMixin): """ Custom Hierachical Logistic Regression built using PyMC3. """ def __init__(self): super(HierarchicalLogisticRegression, self).__init__() self.num_cats = None
[docs] def create_model(self): """ Creates and returns the PyMC3 model. Note: The size of the shared variables must match the size of the training data. Otherwise, setting the shared variables later will raise an error. See http://docs.pymc.io/advanced_theano.html Returns ---------- the PyMC3 model """ model_input = theano.shared( np.zeros([self.num_training_samples, self.num_pred])) model_output = theano.shared( np.zeros(self.num_training_samples, dtype='int')) model_cats = theano.shared( np.zeros(self.num_training_samples, dtype='int')) self.shared_vars = { 'model_input': model_input, 'model_output': model_output, 'model_cats': model_cats } model = pm.Model() with model: mu_alpha = pm.Normal('mu_alpha', mu=0, sd=100) sigma_alpha = pm.HalfNormal('sigma_alpha', sd=100) mu_beta = pm.Normal('mu_beta', mu=0, sd=100) sigma_beta = pm.HalfNormal('sigma_beta', sd=100) alpha = pm.Normal('alpha', mu=mu_alpha, sd=sigma_alpha, shape=(self.num_cats,)) betas = pm.Normal('beta', mu=mu_beta, sd=sigma_beta, shape=(self.num_cats, self.num_pred)) c = model_cats linear_function = alpha[c] + tt.sum(betas[c] * model_input, 1) p = pm.invlogit(linear_function) y = pm.Bernoulli('y', p, observed=model_output) return model
[docs] def save(self, file_prefix): params = { 'inference_type': self.inference_type, 'num_cats': self.num_cats, 'num_pred': self.num_pred, 'num_training_samples': self.num_training_samples } super(HierarchicalLogisticRegression, self).save(file_prefix, params)
[docs] def load(self, file_prefix): params = super(HierarchicalLogisticRegression, self).load(file_prefix, load_custom_params=True) self.inference_type = params['inference_type'] self.num_cats = params['num_cats'] self.num_pred = params['num_pred'] self.num_training_samples = params['num_training_samples']