library("reshape") # read data fd964="ftp://ftp.ncdc.noaa.gov/pub/data/cirs/drd964x.tmpst.txt" fw=c(3,1,2,4,7,7,7,7,7,7,7,7,7,7,7,7) d964col=c("code","div","element","year","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec") d964=read.fwf(fd964,fw) colnames(d964)=d964col # get lower 48 / CONUS # station id = 110 (see state.README) # unique(d964[d964[,1]==110,c(1:3)]) conus=d964[d964[,1]==110,c(4:16)] conus[conus==-99.90] <- NA # TRUE FALSE upper tercile for (i in 2:13) { conus[,i] = conus[,i]>sort(conus[,i])[round(2*length(sort(conus[,i]))/3)] } # transpose months from columns to rows conus_ym=melt(conus, id=c("year")) # convert to numeric values # merge year and months into y.m value conus_ym[,2] = conus_ym[,1]+match(conus_ym[,2],month.abb)/12 - 1/24 # resort into time order conus_ym = conus_ym[order(conus_ym[,2]),] conus_ym # how often does 1 follow 1? n = nrow(conus_ym) numInitialTercile = sum(conus_ym[,3]>0,na.rm=T) numFollowTercile = sum((conus_ym[1:(n-1),3]+conus_ym[2:n,3])>1,na.rm=T) adj_prob = numFollowTercile/numInitialTercile adj_prob # [1] 0.4444444 # how often does a month appear in the upper tercile numInitialTercile/sum(!is.na(conus_ym[,3])) # [1] 0.3319149 final_prob = 1/( (1/3)*(adj_prob)^12 ) final_prob # [1] 50502.34 inferred_prob = 1/( (1/3)*(0.4000)^12 ) inferred_prob # [1] 178813.9 # ============================== # repeat, but remove 2011 and 2012 from analysis # ============================== conus=d964[d964[,1]==110,c(4:16)] conus[conus==-99.90] <- NA conus = conus[1:116,] # TRUE FALSE upper tercile for (i in 2:13) { conus[,i] = conus[,i]>sort(conus[,i])[round(2*length(sort(conus[,i]))/3)] } # transpose months from columns to rows conus_ym=melt(conus, id=c("year")) # convert to numeric values # merge year and months into y.m value conus_ym[,2] = conus_ym[,1]+match(conus_ym[,2],month.abb)/12 - 1/24 # resort into time order conus_ym = conus_ym[order(conus_ym[,2]),] conus_ym # how often does 1 follow 1? n = nrow(conus_ym) numInitialTercile = sum(conus_ym[,3]>0,na.rm=T) numFollowTercile = sum((conus_ym[1:(n-1),3]+conus_ym[2:n,3])>1,na.rm=T) adj_prob = numFollowTercile/numInitialTercile adj_prob #[1] 0.4368308 # how often does a month appear in the upper tercile numInitialTercile/sum(!is.na(conus_ym[,3])) #[1] 0.3354885 final_prob = 1/( (1/3)*(adj_prob)^12 ) final_prob #[1] 62138.65 # how often does 0 follow 0? n = nrow(conus_ym) numInitialTercile = sum(conus_ym[,3]==0,na.rm=T) numFollowTercile = sum((conus_ym[1:(n-1),3]+conus_ym[2:n,3])==0,na.rm=T) adj_prob = numFollowTercile/numInitialTercile adj_prob #[1] 0.7145946 #============ # count off #============ conus=d964[d964[,1]==110,c(4:16)] conus[conus==-99.90] <- NA conus = conus[1:116,] # TRUE FALSE upper tercile for (i in 2:13) { conus[,i] = conus[,i]>sort(conus[,i])[round(2*length(sort(conus[,i]))/3)] } # transpose months from columns to rows conus_ym=melt(conus, id=c("year")) # convert to numeric values # merge year and months into y.m value conus_ym[,2] = conus_ym[,1]+match(conus_ym[,2],month.abb)/12 - 1/24 # resort into time order conus_ym = conus_ym[order(conus_ym[,2]),] conus_ym = conus_ym[!is.na(conus_ym[,3]),] conus_ym # tmp1 has overlapping bins n=nrow(conus_ym) tmp1 = rep(NA,(n-13)) for (i in 1:(n-13)) { tmp1[i] = sum(conus_ym[i:(i+12),3],na.rm=T) } mean(tmp1) # [1] 4.614173 all data # [1] 4.35388 truncate at 2010 # tmp2 has distinct bins n=as.integer(nrow(conus_ym)/13) tmp2 = rep(NA,n) for (i in 1:n) { tmp2[i] = sum(conus_ym[i:(i+12),3],na.rm=T) } mean(tmp2) # [1] 2.803738 truncate at 2010 # calculate prob for number of high months n = (0,13) idx = 13 rawbins = 0:idx poi_m = 0:idx poi_we = 0:idx poi_calc = 0:idx lm = mean(tmp1) lc = sum(rsums*rprods) lwe=5.2 for (i in 0:idx) { rawbins[i+1] = sum(tmp==i) poi_m[i+1] = ((lm^i)/factorial(i))*exp(1)^(-lm) poi_we[i+1] = ((lwe^i)/factorial(i))*exp(1)^(-lwe) poi_calc[i+1] = ((lc^i)/factorial(i))*exp(1)^(-lc) } rawbins = rawbins/sum(rawbins) # need to source thirteenMonths-pdf.R m = t(cbind(bins,rawbins)) barplot(m,beside=T,names.arg=0:13) plot(x=0:13,y=bins, col="red",xlab="",ylab="",pch=20) lines(x=0:13,y=bins, col="red",lwd=1.5)s points(x=0:13,y=rawbins,pch=20) lines(x=0:13,y=rawbins,lwd=1.5) points(x=0:13,y=poi_we,col="green",pch=20) lines(x=0:13,y=poi_we,col="green",lwd=1.5) points(x=0:13,y=poi_calc,col="blue",pch=20) lines(x=0:13,y=poi_calc,col="blue",lwd=1.5) title(main="PDFs for CONUS 13 Month Series",sub="Months in Upper Tercile") legend("topright", c("Calculated","Measured","Poisson (calculated)","Poisson (Eschenbach)"), pch=20,lty=1, col=c("red","black","blue","green"),bty="n") sum((poi_calc-rawbins)^2) # [1] 0.003268577 sum((poi_we-rawbins)^2) # [1] 0.006581847 1/poi_we[14] # [1] 555.3518 1/poi_calc[14] # [1] 2328.640 1/bins[14] # [1] 62197.53