Monte Carlo sampling

Size: px

Start display at page:

Download "Monte Carlo sampling"

Emery Eustace Dean
6 years ago
Views:

1 1 y u theta 0 x 1 Monte Carlo sampling Problem 1 Suppose we want to sample uniformly at random from the triangle defined by the points (0,0), (0,1), (1,0). First Sampling Algorithm: We decide to do this by sampling θ U(0,π/2), U U(0,1) and then the coordinates X = U cos(θ) and Y = U sin(θ). The sample (X,Y ) is retained if it is inside the triangle, i.e. if X + Y < 1. mc.ss<-3000 postscript("mcplot_1.ps") plot(c(0,1),c(0,1),type="n") for(i in 1:mc.ss) { theta<-runif(1)*pi/2 u<-runif(1) x<-u*cos(theta) y<-u*sin(theta) if((y+x)<1) points(x,y) } dev.off() 1

2 c(0, 1) c(0, 1) Figure 1: Sample of size 3000 obtained using the first sampling algorithm The plot shows that the procedure does not sample from the correct distribution since the vertex (0,0) is preferred by the sample. Alternatively, we can use the following second sampling algorithm: X U(0,1), and Y U(0,1 X). postscript("mcplot_2.ps") plot(c(0,1),c(0,1),type="n") for(i in 1:mc.ss) { x<-runif(1) y<-runif(1)*(1-x) points(x,y) } dev.off() This one doesn t work either. The right vertex is obviously favored by the sample. 2

3 c(0, 1) c(0, 1) Figure 2: Sample of size 3000 obtained using the second sampling algorithm The third alternative is to sample uniformly in the unit square and retain only the points in the triangle. postscript("mcplot_3.ps") plot(c(0,1),c(0,1),type="n") for(i in 1:mc.ss) { x<-runif(1) y<-runif(1) if((x+y)<1) points(x,y) } dev.off() This is correct but rather inefficient. For a fourth sampling algorithm one can start by computing the marginal density of X as f(x) = 2(1 x) if 0 x 1. We can use the following algorithm for sampling : X f( ), and Y U(0,1 X). 3

4 c(0, 1) c(0, 1) Figure 3: Sample of size 3000 obtained using the third sampling algorithm postscript("mcplot_4.ps") plot(c(0,1),c(0,1),type="n") for(i in 1:mc.ss) { u<-runif(1) x<-1-sqrt(1-u) y<-runif(1)*(1-x) points(x,y) } dev.off() Of all four, the fourth sampling algorithm is correct and efficient! 4

5 c(0, 1) c(0, 1) Figure 4: Sample of size 3000 obtained using the fourth sampling algorithm Importance sampling Suppose we want to compute the mean of the following mixture of distributions: f(x) = 0.4n(x 0,1) + 0.6n(x 3,1) = 0.4exp( x 2 /2)/ 2π + 0.6exp( (x 3) 2 /2)/ 2π. The true value is = 1.8. The estimate is ˆµ = m i=1 w iy i m i=1 w i In the first scenario we consider the importance density to be n(x 0,1.5). We examine the Monte Carlo error in terms of bias and variance. We use m = 1000; in order to obtain the Monte Carlo error we replicate the simulation experiment 500 times.. m<-1000 rep<-500 target<-function(x) {tx<- 0.4 *exp(-x^2/2)/sqrt(2*pi)+ 0.6 *exp(-(x-3)^2/2)/sqrt(2*pi) tx} proposal<-function(x) {px<- dnorm(x,0,sqrt(1.5)) px } 5

6 w<-c() y<-c() hat.mu<-c() for(j in 1:rep){ for(i in 1:m){ y[i]<-rnorm(1,0,sqrt(1.5)) w[i]<-target(y[i])/proposal(y[i])} hat.mu[j]<- sum(w*y)/sum(w)} > mean(hat.mu) [1] > sqrt(var(hat.mu)) [1] For a second scenario we consider the proposal to be t(x 1), the student distribution with one degrees of freedom. proposal.t<-function(x) { px<- dt(x,1) px } hat.t.mu<-c() for(j in 1:rep){ for(i in 1:m){ y[i]<-rt(1,1) w[i]<-target(y[i])/proposal.t(y[i])} hat.t.mu[j]<- sum(w*y)/sum(w)} > mean(hat.t.mu) 6

7 1.798 > sqrt(var(hat.t.mu)) [1] par(mfrow=c(2,2)) a<-seq(-10,10,0.1) b1<-dnorm(a,0,sqrt(1.5)) b2<-0.4*dnorm(a)+0.6*dnorm(a,3,1) plot(a,b1,type="l") lines(a,b2,lty=3) legend(locator(1),c(" target "), lty=3, cex=0.8) legend(locator(1),c(" proposal "), lty=1, cex=0.8) hist(hat.mu,nclass=20,prob=t) b1.t<-dt(a,1) b2<-0.4*dnorm(a)+0.6*dnorm(a,3,1) plot(a,b1.t,type="l") lines(a,b2,lty=3) legend(locator(1),c(" target "), lty=3, cex=0.8) legend(locator(1),c(" proposal "), lty=1, cex=0.8) hist(hat.t.mu,nclass=30,prob=t) 7

8 b target proposal a hat.mu[, 1] b1.t target proposal a hat.t.mu[, 1] 8

9 m i=1 Alternatively we can use µ = w iy i m m<-1000 rep<-500 w<-c() y<-c() hat.mu2<-c() for(j in 1:rep){ for(i in 1:m){ y[i]<-rnorm(1,0,sqrt(1.5)) w[i]<-target(y[i])/proposal(y[i])} hat.mu2[j]<- sum(w*y)/m} > mean(hat.mu2) [1] > sqrt(var(hat.mu2)) [1] Same approach but using t(x 1) as a proposal. hat.t.mu2<-c() for(j in 1:rep){ for(i in 1:m){ y[i]<-rt(1,1) w[i]<-target(y[i])/proposal.t(y[i])} hat.t.mu2[j]<- sum(w*y)/sum(w)} > mean(hat.t.mu2) [1] > sqrt(var(hat.t.mu2)) 9

10 [1] postscript( importance_plot2.ps ) par(mfrow=c(2,2)) a<-seq(-10,10,0.1) b1<-dnorm(a,0,sqrt(1.5)) b2<-0.4*dnorm(a)+0.6*dnorm(a,3,1) plot(a,b1,type="l") lines(a,b2,lty=3) hist(hat.mu2,nclass=30,prob=t) b1.t<-dt(a,1) b2<-0.4*dnorm(a)+0.6*dnorm(a,3,1) plot(a,b1.t,type="l") lines(a,b2,lty=3) hist(hat.t.mu2,nclass=30,prob=t) dev.off() 10

11 Histogram of hat.mu2 b Density a hat.mu2 Histogram of hat.t.mu2 b1.t Density a hat.t.mu2 11

12 We consider the bivariate distribution Markov Chain Monte Carlo ( π(x 1,x 2 ) exp 1 ) 2 (Ax2 1 x2 2 + x2 1 + x2 2 2Bx 1x2 2C 1 x 1 2C 2 x 2 ), with A = 8,B = 0,C 1 = 4,C 2 = 4. The conditional distributions can be easily determined to be normals (incidentally, this is one example where the marginals are Gaussian but the joint distribution is not Gaussian). π(x 1 x 2 ) = N((Bx 2 + C 1 )/(Ax ),1/(Ax )) π(x 2 x 1 ) = N((Bx 2 + C 2 )/(Ax ),1/(Ax )) So we can start by implementing a Gibbs sampling algorithm. Gibbs Sampling fun<-function(x1=1,x2=1,a=1,b=1,c1=1,c2=1) {rez<-exp(-0.5*(a*x1^2*x2^2+x1^2+x2^2-2*b*x1*x2-2*c1*x1-2*c2*x2)) return(rez) } A=8; B=0; C1=4; C2=4; x<-seq(-10,10,0.2) y<-seq(-10,10,0.2) par(mfrow=c(1,1)) eval<-150 gelmeng<-array(0,c(eval,eval)) co1<-ppoints(eval)*10-2 co2<-ppoints(eval)*10-2 for(i in 1:eval){ for(j in 1:eval){ gelmeng[i,j]<-sum(fun(co1[i],co2[j],a,b,c1,c2)) }} 12

13 dens<-gelmeng/max(gelmeng) contour(co1,co2,t(dens),levels=c(0,0.05,0.1,1,2,4,8,10)/100,main="gibbs Sampling") nsim<-2000; nchains=2; #define the matrices used to store the samples x.sim<-matrix(0,nrow=nchains,ncol=nsim); y.sim<-matrix(0,nrow=nchains,ncol=nsim); #we initialize the chains x.sim[,1]<-rnorm(nchains,mean=1,sd=1); y.sim[,1]<-rnorm(nchains,mean=1,sd=1); for(j in 2:nsim){ for(i in 1:nchains){ x.sim[i,j]<-rnorm(1,(b*y.sim[i,j-1]+c1)/(a*y.sim[i,j-1]^2+1),1/(a*y.sim[i,j-1]^2+1)) y.sim[i,j]<-rnorm(1, (B*x.sim[i,j]+C2)/(A*x.sim[i,j]^2+1),1/(A*x.sim[i,j]^2+1)) points(x.sim[i,j],y.sim[i,j],pch=i)}} Alternatively, we can try to run a few Metropolis-Hastings samplers with various proposals: Independent Metropolis Metropolis-Hastings Sampling We use as a proposal a mixture of two normals q(x,y) = 0.5n(x,y;(0,4),diag(0.1, 2)) + 0.5n(x,y;(4,0),diag(2,0.1)) library(mvtnorm) logdproposal<-function(x=1,y=1){ rez<-0.5*dmvnorm(c(x,y),c(0,4),diag(c(0.1,2)))+ 0.5*dmvnorm(c(x,y),c(4,0),diag(c(2,0.1))) return(log(rez)) } 13

14 Gibbs Sampling sample.proposal<-function(u=0.1) { if(u<=0.5) rez<-rmvnorm(1,c(0,4),diag(c(0.1,2))) if(u>0.5) rez<-rmvnorm(1,c(4,0),diag(c(2,0.1))) return(rez[1,]) } logtarget<-function(x1=1,x2=1,a=1,b=1,c1=1,c2=1) {rez<- (-0.5)*(A*x1^2*x2^2+x1^2+x2^2-2*B*x1*x2-2*C1*x1-2*C2*x2) return(rez) } nsim<-1000; nchains=2; contour(co1,co2,t(dens),levels=c(0,0.05,0.1,1,2,4,8,10)/100, main="independent Metropolis") #define the matrices used to store the samples x.sim<-matrix(0,nrow=nchains,ncol=nsim); 14

15 y.sim<-matrix(0,nrow=nchains,ncol=nsim); #we initialize the chains x.sim[,1]<-rnorm(nchains,mean=1,sd=5); y.sim[,1]<-rnorm(nchains,mean=1,sd=5); count<-0 for(j in 2:nsim){ for(i in 1:nchains){ w<-runif(1) z.prop<-sample.proposal(w) ratio<- logtarget(z.prop[1],z.prop[2],a,b,c1,c2)- logtarget(x.sim[i,j-1],y.sim[i,j-1],a,b,c1,c2)+ logdproposal(x.sim[i,j-1],y.sim[i,j-1])-logdproposal(z.prop[1],z.prop[2]) ratio<-exp(ratio) v<-runif(1) if(v<=ratio){ x.sim[i,j]<-z.prop[1] y.sim[i,j]<-z.prop[2] count<-count+1 } if(v>ratio) { x.sim[i,j]<-x.sim[i,j-1] y.sim[i,j]<-y.sim[i,j-1] } points(x.sim[i,j],y.sim[i,j],pch=i) }} print(count/(nsim*nchains)) 15

16 Independent Metropolis Random Walk Metropolis In general, we don t know the contour of the target so we cannot find a good independent proposal; in that case we could use a random walk Metropolis algorithm in which the proposal is q( (x,y)) = n( ;(x,y),σ 2 I 2 ). The choice of σ 2 is crucial for finding an efficient sampling algorithm. library(mvtnorm) dproposal<-function(x=1,y=1,x.old=2,y.old=2){ rez<-dmvnorm(c(x,y),c(x.old,y.old),diag(c(0.5,0.5)),log=t) return(rez) } sample.proposal<-function(x.old=1,y.old=1) { rez<-rmvnorm(1,c(x.old,y.old),diag(c(0.5,0.5))) return(rez[1,]) } 16

17 nsim<-1000; nchains=2; contour(co1,co2,t(dens),levels=c(0,0.05,0.1,1,2,4,8,10)/100, main="random Walk Metropolis") #define the matrices used to store the samples x.sim<-matrix(0,nrow=nchains,ncol=nsim); y.sim<-matrix(0,nrow=nchains,ncol=nsim); #we initialize the chains x.sim[,1]<-rnorm(nchains,mean=1,sd=1); y.sim[,1]<-rnorm(nchains,mean=1,sd=1); count<-0 for(j in 2:nsim){ for(i in 1:nchains){ w<-runif(1) z.prop<-sample.proposal(x.sim[i,j-1],y.sim[i,j-1]) ratio<- logtarget(z.prop[1],z.prop[2],a,b,c1,c2)- logtarget(x.sim[i,j-1],y.sim[i,j-1],a,b,c1,c2) ratio<-exp(ratio) v<-runif(1) if(v<=ratio){ x.sim[i,j]<-z.prop[1] y.sim[i,j]<-z.prop[2] count<-count+1 } if(v>ratio) { x.sim[i,j]<-x.sim[i,j-1] y.sim[i,j]<-y.sim[i,j-1] } points(x.sim[i,j],y.sim[i,j],pch=i)}} 17

18 Random Walk Metropolis print(count/(nsim*nchains)) 18

Markov chain Monte Carlo methods

Markov chain Monte Carlo methods (supplementary material) see also the applet http://www.lbreyer.com/classic.html February 9 6 Independent Hastings Metropolis Sampler Outline Independent Hastings Metropolis