How to estimate general covariance of a gaussian mixture

up vote
1
down vote

favorite

I am trying to estimate the covariance of a 3 patches Gaussian mixture model with PyMC3. The mean and covariance are totally unknown and the weights are [1,1,1]. For the mean estimate, one can use tt.stack([vx,vy]) to build the appropriate quantity. But for the covariance, I want to use [sigma_x, sigma_y, rho] as the random variable. I try to use tt.stack to build the corresponding covariance tensor with the code

import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pymc3 as pm, theano.tensor as tt

k = 3 # number of patch
ndata = 500 # number of data

centers = np.array([[-10,-10], [0,0], [10,10]])
sigmax = [1,1,1]
sigmay = [1,1,1]
ro = [0,0,0]
cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
v = np.random.choice([0,1,2], size=ndata, replace=True)

data = 
for i in v:
 data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))

# plt.hist(data)
data = np.array(data)
# sns.distplot(data[:,1])
ax = plt.gca()
ax.scatter(data[:,0],data[:,1])

# setup model
vx_min, vx_max = -20, 20
vy_min, vy_max = -20, 20
Dx_min, Dx_max = 0.2, 3.0
Dy_min, Dy_max = 0.2, 3.0
ro_min, ro_max = 0.0, 0.9

with pm.Model() as model:
 # cluster sizes
 p = tt.constant(np.array([1.,1.,1.]))

 # cluster centers
 vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
 vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
 Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
 Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
 ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3) 
 category = pm.Categorical('category',
 p=p,
 shape=ndata)

 means = tt.stack([vx, vy], axis=-1)
 m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
 m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]

 cov = tt.stack([m1, m2], axis=-1)

 points = pm.MvNormal('obs',
 mu=means[category],
 cov=cov[category],
 observed=data)

# sampling
with model:
 step1 = pm.Metropolis()
 step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
 tr = pm.sample(100)

# plot results
pm.plots.traceplot(tr, ['vx']);
plt.show()

but it raises the error:

Traceback (most recent call last):
 File "/Users/huox/Downloads/t1108.py", line 51, in <module>
 observed=data)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
 dist = cls.dist(*args, **kwargs)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
 dist.__init__(*args, **kwargs)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
 lower=lower, *args, **kwargs)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
 raise ValueError('cov must be two dimensional.')
 ValueError: cov must be two dimensional.

How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?

edited Nov 9 at 16:55

merv

24.3k671108

asked Nov 8 at 20:12

sam

add a comment |

up vote
1
down vote

favorite

import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pymc3 as pm, theano.tensor as tt

k = 3 # number of patch
ndata = 500 # number of data

centers = np.array([[-10,-10], [0,0], [10,10]])
sigmax = [1,1,1]
sigmay = [1,1,1]
ro = [0,0,0]
cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
v = np.random.choice([0,1,2], size=ndata, replace=True)

data = 
for i in v:
 data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))

# plt.hist(data)
data = np.array(data)
# sns.distplot(data[:,1])
ax = plt.gca()
ax.scatter(data[:,0],data[:,1])

# setup model
vx_min, vx_max = -20, 20
vy_min, vy_max = -20, 20
Dx_min, Dx_max = 0.2, 3.0
Dy_min, Dy_max = 0.2, 3.0
ro_min, ro_max = 0.0, 0.9

with pm.Model() as model:
 # cluster sizes
 p = tt.constant(np.array([1.,1.,1.]))

 # cluster centers
 vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
 vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
 Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
 Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
 ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3) 
 category = pm.Categorical('category',
 p=p,
 shape=ndata)

 means = tt.stack([vx, vy], axis=-1)
 m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
 m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]

 cov = tt.stack([m1, m2], axis=-1)

 points = pm.MvNormal('obs',
 mu=means[category],
 cov=cov[category],
 observed=data)

# sampling
with model:
 step1 = pm.Metropolis()
 step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
 tr = pm.sample(100)

# plot results
pm.plots.traceplot(tr, ['vx']);
plt.show()

but it raises the error:

Traceback (most recent call last):
 File "/Users/huox/Downloads/t1108.py", line 51, in <module>
 observed=data)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
 dist = cls.dist(*args, **kwargs)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
 dist.__init__(*args, **kwargs)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
 lower=lower, *args, **kwargs)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
 raise ValueError('cov must be two dimensional.')
 ValueError: cov must be two dimensional.

How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?

edited Nov 9 at 16:55

merv

24.3k671108

asked Nov 8 at 20:12

sam

add a comment |

up vote
1
down vote

favorite

import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pymc3 as pm, theano.tensor as tt

k = 3 # number of patch
ndata = 500 # number of data

centers = np.array([[-10,-10], [0,0], [10,10]])
sigmax = [1,1,1]
sigmay = [1,1,1]
ro = [0,0,0]
cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
v = np.random.choice([0,1,2], size=ndata, replace=True)

data = 
for i in v:
 data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))

# plt.hist(data)
data = np.array(data)
# sns.distplot(data[:,1])
ax = plt.gca()
ax.scatter(data[:,0],data[:,1])

# setup model
vx_min, vx_max = -20, 20
vy_min, vy_max = -20, 20
Dx_min, Dx_max = 0.2, 3.0
Dy_min, Dy_max = 0.2, 3.0
ro_min, ro_max = 0.0, 0.9

with pm.Model() as model:
 # cluster sizes
 p = tt.constant(np.array([1.,1.,1.]))

 # cluster centers
 vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
 vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
 Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
 Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
 ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3) 
 category = pm.Categorical('category',
 p=p,
 shape=ndata)

 means = tt.stack([vx, vy], axis=-1)
 m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
 m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]

 cov = tt.stack([m1, m2], axis=-1)

 points = pm.MvNormal('obs',
 mu=means[category],
 cov=cov[category],
 observed=data)

# sampling
with model:
 step1 = pm.Metropolis()
 step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
 tr = pm.sample(100)

# plot results
pm.plots.traceplot(tr, ['vx']);
plt.show()

but it raises the error:

Traceback (most recent call last):
 File "/Users/huox/Downloads/t1108.py", line 51, in <module>
 observed=data)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
 dist = cls.dist(*args, **kwargs)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
 dist.__init__(*args, **kwargs)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
 lower=lower, *args, **kwargs)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
 raise ValueError('cov must be two dimensional.')
 ValueError: cov must be two dimensional.

How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?

edited Nov 9 at 16:55

merv

24.3k671108

asked Nov 8 at 20:12

sam

import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pymc3 as pm, theano.tensor as tt

k = 3 # number of patch
ndata = 500 # number of data

centers = np.array([[-10,-10], [0,0], [10,10]])
sigmax = [1,1,1]
sigmay = [1,1,1]
ro = [0,0,0]
cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
v = np.random.choice([0,1,2], size=ndata, replace=True)

data = 
for i in v:
 data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))

# plt.hist(data)
data = np.array(data)
# sns.distplot(data[:,1])
ax = plt.gca()
ax.scatter(data[:,0],data[:,1])

# setup model
vx_min, vx_max = -20, 20
vy_min, vy_max = -20, 20
Dx_min, Dx_max = 0.2, 3.0
Dy_min, Dy_max = 0.2, 3.0
ro_min, ro_max = 0.0, 0.9

with pm.Model() as model:
 # cluster sizes
 p = tt.constant(np.array([1.,1.,1.]))

 # cluster centers
 vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
 vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
 Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
 Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
 ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3) 
 category = pm.Categorical('category',
 p=p,
 shape=ndata)

 means = tt.stack([vx, vy], axis=-1)
 m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
 m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]

 cov = tt.stack([m1, m2], axis=-1)

 points = pm.MvNormal('obs',
 mu=means[category],
 cov=cov[category],
 observed=data)

# sampling
with model:
 step1 = pm.Metropolis()
 step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
 tr = pm.sample(100)

# plot results
pm.plots.traceplot(tr, ['vx']);
plt.show()

but it raises the error:

Traceback (most recent call last):
 File "/Users/huox/Downloads/t1108.py", line 51, in <module>
 observed=data)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
 dist = cls.dist(*args, **kwargs)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
 dist.__init__(*args, **kwargs)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
 lower=lower, *args, **kwargs)
 File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
 raise ValueError('cov must be two dimensional.')
 ValueError: cov must be two dimensional.

How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?

python bayesian hierarchical-bayesian

edited Nov 9 at 16:55

merv

24.3k671108

asked Nov 8 at 20:12

sam

edited Nov 9 at 16:55

merv

24.3k671108

asked Nov 8 at 20:12

sam

edited Nov 9 at 16:55

merv

24.3k671108

edited Nov 9 at 16:55

merv

24.3k671108

edited Nov 9 at 16:55

merv

24.3k671108

asked Nov 8 at 20:12

sam

asked Nov 8 at 20:12

sam

asked Nov 8 at 20:12

sam

add a comment |

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "1"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader:
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
,
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53215435%2fhow-to-estimate-general-covariance-of-a-gaussian-mixture%23new-answer', 'question_page');

);

Post as a guest

Name

Required, but never shown

active

oldest

votes

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Pfthb