How to estimate general covariance of a gaussian mixture
up vote
1
down vote
favorite
I am trying to estimate the covariance of a 3 patches Gaussian mixture model with PyMC3. The mean and covariance are totally unknown and the weights are [1,1,1]
. For the mean estimate, one can use tt.stack([vx,vy])
to build the appropriate quantity. But for the covariance, I want to use [sigma_x, sigma_y, rho]
as the random variable. I try to use tt.stack
to build the corresponding covariance tensor with the code
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pymc3 as pm, theano.tensor as tt
k = 3 # number of patch
ndata = 500 # number of data
centers = np.array([[-10,-10], [0,0], [10,10]])
sigmax = [1,1,1]
sigmay = [1,1,1]
ro = [0,0,0]
cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
v = np.random.choice([0,1,2], size=ndata, replace=True)
data =
for i in v:
data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))
# plt.hist(data)
data = np.array(data)
# sns.distplot(data[:,1])
ax = plt.gca()
ax.scatter(data[:,0],data[:,1])
# setup model
vx_min, vx_max = -20, 20
vy_min, vy_max = -20, 20
Dx_min, Dx_max = 0.2, 3.0
Dy_min, Dy_max = 0.2, 3.0
ro_min, ro_max = 0.0, 0.9
with pm.Model() as model:
# cluster sizes
p = tt.constant(np.array([1.,1.,1.]))
# cluster centers
vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3)
category = pm.Categorical('category',
p=p,
shape=ndata)
means = tt.stack([vx, vy], axis=-1)
m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]
cov = tt.stack([m1, m2], axis=-1)
points = pm.MvNormal('obs',
mu=means[category],
cov=cov[category],
observed=data)
# sampling
with model:
step1 = pm.Metropolis()
step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
tr = pm.sample(100)
# plot results
pm.plots.traceplot(tr, ['vx']);
plt.show()
but it raises the error:
Traceback (most recent call last):
File "/Users/huox/Downloads/t1108.py", line 51, in <module>
observed=data)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
dist = cls.dist(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
dist.__init__(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
lower=lower, *args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
raise ValueError('cov must be two dimensional.')
ValueError: cov must be two dimensional.
How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?
python bayesian hierarchical-bayesian
add a comment |
up vote
1
down vote
favorite
I am trying to estimate the covariance of a 3 patches Gaussian mixture model with PyMC3. The mean and covariance are totally unknown and the weights are [1,1,1]
. For the mean estimate, one can use tt.stack([vx,vy])
to build the appropriate quantity. But for the covariance, I want to use [sigma_x, sigma_y, rho]
as the random variable. I try to use tt.stack
to build the corresponding covariance tensor with the code
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pymc3 as pm, theano.tensor as tt
k = 3 # number of patch
ndata = 500 # number of data
centers = np.array([[-10,-10], [0,0], [10,10]])
sigmax = [1,1,1]
sigmay = [1,1,1]
ro = [0,0,0]
cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
v = np.random.choice([0,1,2], size=ndata, replace=True)
data =
for i in v:
data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))
# plt.hist(data)
data = np.array(data)
# sns.distplot(data[:,1])
ax = plt.gca()
ax.scatter(data[:,0],data[:,1])
# setup model
vx_min, vx_max = -20, 20
vy_min, vy_max = -20, 20
Dx_min, Dx_max = 0.2, 3.0
Dy_min, Dy_max = 0.2, 3.0
ro_min, ro_max = 0.0, 0.9
with pm.Model() as model:
# cluster sizes
p = tt.constant(np.array([1.,1.,1.]))
# cluster centers
vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3)
category = pm.Categorical('category',
p=p,
shape=ndata)
means = tt.stack([vx, vy], axis=-1)
m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]
cov = tt.stack([m1, m2], axis=-1)
points = pm.MvNormal('obs',
mu=means[category],
cov=cov[category],
observed=data)
# sampling
with model:
step1 = pm.Metropolis()
step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
tr = pm.sample(100)
# plot results
pm.plots.traceplot(tr, ['vx']);
plt.show()
but it raises the error:
Traceback (most recent call last):
File "/Users/huox/Downloads/t1108.py", line 51, in <module>
observed=data)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
dist = cls.dist(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
dist.__init__(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
lower=lower, *args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
raise ValueError('cov must be two dimensional.')
ValueError: cov must be two dimensional.
How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?
python bayesian hierarchical-bayesian
add a comment |
up vote
1
down vote
favorite
up vote
1
down vote
favorite
I am trying to estimate the covariance of a 3 patches Gaussian mixture model with PyMC3. The mean and covariance are totally unknown and the weights are [1,1,1]
. For the mean estimate, one can use tt.stack([vx,vy])
to build the appropriate quantity. But for the covariance, I want to use [sigma_x, sigma_y, rho]
as the random variable. I try to use tt.stack
to build the corresponding covariance tensor with the code
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pymc3 as pm, theano.tensor as tt
k = 3 # number of patch
ndata = 500 # number of data
centers = np.array([[-10,-10], [0,0], [10,10]])
sigmax = [1,1,1]
sigmay = [1,1,1]
ro = [0,0,0]
cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
v = np.random.choice([0,1,2], size=ndata, replace=True)
data =
for i in v:
data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))
# plt.hist(data)
data = np.array(data)
# sns.distplot(data[:,1])
ax = plt.gca()
ax.scatter(data[:,0],data[:,1])
# setup model
vx_min, vx_max = -20, 20
vy_min, vy_max = -20, 20
Dx_min, Dx_max = 0.2, 3.0
Dy_min, Dy_max = 0.2, 3.0
ro_min, ro_max = 0.0, 0.9
with pm.Model() as model:
# cluster sizes
p = tt.constant(np.array([1.,1.,1.]))
# cluster centers
vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3)
category = pm.Categorical('category',
p=p,
shape=ndata)
means = tt.stack([vx, vy], axis=-1)
m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]
cov = tt.stack([m1, m2], axis=-1)
points = pm.MvNormal('obs',
mu=means[category],
cov=cov[category],
observed=data)
# sampling
with model:
step1 = pm.Metropolis()
step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
tr = pm.sample(100)
# plot results
pm.plots.traceplot(tr, ['vx']);
plt.show()
but it raises the error:
Traceback (most recent call last):
File "/Users/huox/Downloads/t1108.py", line 51, in <module>
observed=data)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
dist = cls.dist(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
dist.__init__(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
lower=lower, *args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
raise ValueError('cov must be two dimensional.')
ValueError: cov must be two dimensional.
How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?
python bayesian hierarchical-bayesian
I am trying to estimate the covariance of a 3 patches Gaussian mixture model with PyMC3. The mean and covariance are totally unknown and the weights are [1,1,1]
. For the mean estimate, one can use tt.stack([vx,vy])
to build the appropriate quantity. But for the covariance, I want to use [sigma_x, sigma_y, rho]
as the random variable. I try to use tt.stack
to build the corresponding covariance tensor with the code
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pymc3 as pm, theano.tensor as tt
k = 3 # number of patch
ndata = 500 # number of data
centers = np.array([[-10,-10], [0,0], [10,10]])
sigmax = [1,1,1]
sigmay = [1,1,1]
ro = [0,0,0]
cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
v = np.random.choice([0,1,2], size=ndata, replace=True)
data =
for i in v:
data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))
# plt.hist(data)
data = np.array(data)
# sns.distplot(data[:,1])
ax = plt.gca()
ax.scatter(data[:,0],data[:,1])
# setup model
vx_min, vx_max = -20, 20
vy_min, vy_max = -20, 20
Dx_min, Dx_max = 0.2, 3.0
Dy_min, Dy_max = 0.2, 3.0
ro_min, ro_max = 0.0, 0.9
with pm.Model() as model:
# cluster sizes
p = tt.constant(np.array([1.,1.,1.]))
# cluster centers
vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3)
category = pm.Categorical('category',
p=p,
shape=ndata)
means = tt.stack([vx, vy], axis=-1)
m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]
cov = tt.stack([m1, m2], axis=-1)
points = pm.MvNormal('obs',
mu=means[category],
cov=cov[category],
observed=data)
# sampling
with model:
step1 = pm.Metropolis()
step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
tr = pm.sample(100)
# plot results
pm.plots.traceplot(tr, ['vx']);
plt.show()
but it raises the error:
Traceback (most recent call last):
File "/Users/huox/Downloads/t1108.py", line 51, in <module>
observed=data)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
dist = cls.dist(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
dist.__init__(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
lower=lower, *args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
raise ValueError('cov must be two dimensional.')
ValueError: cov must be two dimensional.
How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?
python bayesian hierarchical-bayesian
python bayesian hierarchical-bayesian
edited Nov 9 at 16:55
merv
24.3k671108
24.3k671108
asked Nov 8 at 20:12
sam
61
61
add a comment |
add a comment |
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53215435%2fhow-to-estimate-general-covariance-of-a-gaussian-mixture%23new-answer', 'question_page');
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown