How to estimate general covariance of a gaussian mixture









up vote
1
down vote

favorite












I am trying to estimate the covariance of a 3 patches Gaussian mixture model with PyMC3. The mean and covariance are totally unknown and the weights are [1,1,1]. For the mean estimate, one can use tt.stack([vx,vy]) to build the appropriate quantity. But for the covariance, I want to use [sigma_x, sigma_y, rho] as the random variable. I try to use tt.stack to build the corresponding covariance tensor with the code



import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pymc3 as pm, theano.tensor as tt

k = 3 # number of patch
ndata = 500 # number of data

centers = np.array([[-10,-10], [0,0], [10,10]])
sigmax = [1,1,1]
sigmay = [1,1,1]
ro = [0,0,0]
cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
v = np.random.choice([0,1,2], size=ndata, replace=True)

data =
for i in v:
data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))

# plt.hist(data)
data = np.array(data)
# sns.distplot(data[:,1])
ax = plt.gca()
ax.scatter(data[:,0],data[:,1])

# setup model
vx_min, vx_max = -20, 20
vy_min, vy_max = -20, 20
Dx_min, Dx_max = 0.2, 3.0
Dy_min, Dy_max = 0.2, 3.0
ro_min, ro_max = 0.0, 0.9

with pm.Model() as model:
# cluster sizes
p = tt.constant(np.array([1.,1.,1.]))

# cluster centers
vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3)
category = pm.Categorical('category',
p=p,
shape=ndata)

means = tt.stack([vx, vy], axis=-1)
m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]

cov = tt.stack([m1, m2], axis=-1)

points = pm.MvNormal('obs',
mu=means[category],
cov=cov[category],
observed=data)

# sampling
with model:
step1 = pm.Metropolis()
step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
tr = pm.sample(100)

# plot results
pm.plots.traceplot(tr, ['vx']);
plt.show()


but it raises the error:



Traceback (most recent call last):
File "/Users/huox/Downloads/t1108.py", line 51, in <module>
observed=data)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
dist = cls.dist(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
dist.__init__(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
lower=lower, *args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
raise ValueError('cov must be two dimensional.')
ValueError: cov must be two dimensional.


How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?










share|improve this question



























    up vote
    1
    down vote

    favorite












    I am trying to estimate the covariance of a 3 patches Gaussian mixture model with PyMC3. The mean and covariance are totally unknown and the weights are [1,1,1]. For the mean estimate, one can use tt.stack([vx,vy]) to build the appropriate quantity. But for the covariance, I want to use [sigma_x, sigma_y, rho] as the random variable. I try to use tt.stack to build the corresponding covariance tensor with the code



    import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
    import pymc3 as pm, theano.tensor as tt

    k = 3 # number of patch
    ndata = 500 # number of data

    centers = np.array([[-10,-10], [0,0], [10,10]])
    sigmax = [1,1,1]
    sigmay = [1,1,1]
    ro = [0,0,0]
    cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
    v = np.random.choice([0,1,2], size=ndata, replace=True)

    data =
    for i in v:
    data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))

    # plt.hist(data)
    data = np.array(data)
    # sns.distplot(data[:,1])
    ax = plt.gca()
    ax.scatter(data[:,0],data[:,1])

    # setup model
    vx_min, vx_max = -20, 20
    vy_min, vy_max = -20, 20
    Dx_min, Dx_max = 0.2, 3.0
    Dy_min, Dy_max = 0.2, 3.0
    ro_min, ro_max = 0.0, 0.9

    with pm.Model() as model:
    # cluster sizes
    p = tt.constant(np.array([1.,1.,1.]))

    # cluster centers
    vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
    vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
    Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
    Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
    ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3)
    category = pm.Categorical('category',
    p=p,
    shape=ndata)

    means = tt.stack([vx, vy], axis=-1)
    m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
    m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]

    cov = tt.stack([m1, m2], axis=-1)

    points = pm.MvNormal('obs',
    mu=means[category],
    cov=cov[category],
    observed=data)

    # sampling
    with model:
    step1 = pm.Metropolis()
    step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
    tr = pm.sample(100)

    # plot results
    pm.plots.traceplot(tr, ['vx']);
    plt.show()


    but it raises the error:



    Traceback (most recent call last):
    File "/Users/huox/Downloads/t1108.py", line 51, in <module>
    observed=data)
    File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
    dist = cls.dist(*args, **kwargs)
    File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
    dist.__init__(*args, **kwargs)
    File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
    lower=lower, *args, **kwargs)
    File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
    raise ValueError('cov must be two dimensional.')
    ValueError: cov must be two dimensional.


    How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?










    share|improve this question

























      up vote
      1
      down vote

      favorite









      up vote
      1
      down vote

      favorite











      I am trying to estimate the covariance of a 3 patches Gaussian mixture model with PyMC3. The mean and covariance are totally unknown and the weights are [1,1,1]. For the mean estimate, one can use tt.stack([vx,vy]) to build the appropriate quantity. But for the covariance, I want to use [sigma_x, sigma_y, rho] as the random variable. I try to use tt.stack to build the corresponding covariance tensor with the code



      import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
      import pymc3 as pm, theano.tensor as tt

      k = 3 # number of patch
      ndata = 500 # number of data

      centers = np.array([[-10,-10], [0,0], [10,10]])
      sigmax = [1,1,1]
      sigmay = [1,1,1]
      ro = [0,0,0]
      cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
      v = np.random.choice([0,1,2], size=ndata, replace=True)

      data =
      for i in v:
      data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))

      # plt.hist(data)
      data = np.array(data)
      # sns.distplot(data[:,1])
      ax = plt.gca()
      ax.scatter(data[:,0],data[:,1])

      # setup model
      vx_min, vx_max = -20, 20
      vy_min, vy_max = -20, 20
      Dx_min, Dx_max = 0.2, 3.0
      Dy_min, Dy_max = 0.2, 3.0
      ro_min, ro_max = 0.0, 0.9

      with pm.Model() as model:
      # cluster sizes
      p = tt.constant(np.array([1.,1.,1.]))

      # cluster centers
      vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
      vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
      Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
      Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
      ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3)
      category = pm.Categorical('category',
      p=p,
      shape=ndata)

      means = tt.stack([vx, vy], axis=-1)
      m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
      m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]

      cov = tt.stack([m1, m2], axis=-1)

      points = pm.MvNormal('obs',
      mu=means[category],
      cov=cov[category],
      observed=data)

      # sampling
      with model:
      step1 = pm.Metropolis()
      step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
      tr = pm.sample(100)

      # plot results
      pm.plots.traceplot(tr, ['vx']);
      plt.show()


      but it raises the error:



      Traceback (most recent call last):
      File "/Users/huox/Downloads/t1108.py", line 51, in <module>
      observed=data)
      File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
      dist = cls.dist(*args, **kwargs)
      File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
      dist.__init__(*args, **kwargs)
      File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
      lower=lower, *args, **kwargs)
      File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
      raise ValueError('cov must be two dimensional.')
      ValueError: cov must be two dimensional.


      How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?










      share|improve this question















      I am trying to estimate the covariance of a 3 patches Gaussian mixture model with PyMC3. The mean and covariance are totally unknown and the weights are [1,1,1]. For the mean estimate, one can use tt.stack([vx,vy]) to build the appropriate quantity. But for the covariance, I want to use [sigma_x, sigma_y, rho] as the random variable. I try to use tt.stack to build the corresponding covariance tensor with the code



      import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
      import pymc3 as pm, theano.tensor as tt

      k = 3 # number of patch
      ndata = 500 # number of data

      centers = np.array([[-10,-10], [0,0], [10,10]])
      sigmax = [1,1,1]
      sigmay = [1,1,1]
      ro = [0,0,0]
      cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
      v = np.random.choice([0,1,2], size=ndata, replace=True)

      data =
      for i in v:
      data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))

      # plt.hist(data)
      data = np.array(data)
      # sns.distplot(data[:,1])
      ax = plt.gca()
      ax.scatter(data[:,0],data[:,1])

      # setup model
      vx_min, vx_max = -20, 20
      vy_min, vy_max = -20, 20
      Dx_min, Dx_max = 0.2, 3.0
      Dy_min, Dy_max = 0.2, 3.0
      ro_min, ro_max = 0.0, 0.9

      with pm.Model() as model:
      # cluster sizes
      p = tt.constant(np.array([1.,1.,1.]))

      # cluster centers
      vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
      vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
      Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
      Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
      ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3)
      category = pm.Categorical('category',
      p=p,
      shape=ndata)

      means = tt.stack([vx, vy], axis=-1)
      m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
      m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]

      cov = tt.stack([m1, m2], axis=-1)

      points = pm.MvNormal('obs',
      mu=means[category],
      cov=cov[category],
      observed=data)

      # sampling
      with model:
      step1 = pm.Metropolis()
      step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
      tr = pm.sample(100)

      # plot results
      pm.plots.traceplot(tr, ['vx']);
      plt.show()


      but it raises the error:



      Traceback (most recent call last):
      File "/Users/huox/Downloads/t1108.py", line 51, in <module>
      observed=data)
      File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
      dist = cls.dist(*args, **kwargs)
      File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
      dist.__init__(*args, **kwargs)
      File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
      lower=lower, *args, **kwargs)
      File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
      raise ValueError('cov must be two dimensional.')
      ValueError: cov must be two dimensional.


      How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?







      python bayesian hierarchical-bayesian






      share|improve this question















      share|improve this question













      share|improve this question




      share|improve this question








      edited Nov 9 at 16:55









      merv

      24.3k671108




      24.3k671108










      asked Nov 8 at 20:12









      sam

      61




      61



























          active

          oldest

          votes











          Your Answer






          StackExchange.ifUsing("editor", function ()
          StackExchange.using("externalEditor", function ()
          StackExchange.using("snippets", function ()
          StackExchange.snippets.init();
          );
          );
          , "code-snippets");

          StackExchange.ready(function()
          var channelOptions =
          tags: "".split(" "),
          id: "1"
          ;
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function()
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled)
          StackExchange.using("snippets", function()
          createEditor();
          );

          else
          createEditor();

          );

          function createEditor()
          StackExchange.prepareEditor(
          heartbeatType: 'answer',
          convertImagesToLinks: true,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: 10,
          bindNavPrevention: true,
          postfix: "",
          imageUploader:
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          ,
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          );



          );













           

          draft saved


          draft discarded


















          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53215435%2fhow-to-estimate-general-covariance-of-a-gaussian-mixture%23new-answer', 'question_page');

          );

          Post as a guest















          Required, but never shown






























          active

          oldest

          votes













          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes















           

          draft saved


          draft discarded















































           


          draft saved


          draft discarded














          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53215435%2fhow-to-estimate-general-covariance-of-a-gaussian-mixture%23new-answer', 'question_page');

          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          Popular posts from this blog

          Use pre created SQLite database for Android project in kotlin

          Darth Vader #20

          Ondo