R script: distribution.R

$$\newcommand{\esp}[1]{\mathbb{E}\left(#1\right)} \newcommand{\var}[1]{\mbox{Var}\left(#1\right)} \newcommand{\deriv}[1]{\dot{#1}(t)} \newcommand{\prob}[1]{ \mathbb{P}\!(#1)} \newcommand{\eqdef}{\mathop{=}\limits^{\mathrm{def}}} \newcommand{\by}{\boldsymbol{y}} \newcommand{\bc}{\boldsymbol{c}} \newcommand{\bpsi}{\boldsymbol{\psi}} \def\pmacro{\texttt{p}} \def\like{{\cal L}} \def\llike{{\cal LL}} \def\logit{{\rm logit}} \def\probit{{\rm probit}} \def\one{{\rm 1\!I}} \def\iid{\mathop{\sim}_{\rm i.i.d.}} \def\simh0{\mathop{\sim}_{H_0}} \def\df{\texttt{df}} \def\res{e} \def\xomega{x} \newcommand{\argmin}[1]{{\rm arg}\min_{#1}} \newcommand{\argmax}[1]{{\rm arg}\max_{#1}} \newcommand{\Rset}{\mbox{\mathbb{R}}} \def\param{\theta} \def\setparam{\Theta} \def\xnew{x_{\rm new}} \def\fnew{f_{\rm new}} \def\ynew{y_{\rm new}} \def\nnew{n_{\rm new}} \def\enew{e_{\rm new}} \def\Xnew{X_{\rm new}} \def\hfnew{\widehat{\fnew}} \def\degree{m} \def\nbeta{d} \newcommand{\limite}[1]{\mathop{\longrightarrow}\limits_{#1}} \def\ka{k{\scriptstyle a}} \def\ska{k{\scriptscriptstyle a}} \def\kel{k{\scriptstyle e}} \def\skel{k{\scriptscriptstyle e}} \def\cl{C{\small l}} \def\Tlag{T\hspace{-0.1em}{\scriptstyle lag}} \def\sTlag{T\hspace{-0.07em}{\scriptscriptstyle lag}} \def\Tk{T\hspace{-0.1em}{\scriptstyle k0}} \def\sTk{T\hspace{-0.07em}{\scriptscriptstyle k0}} \def\thalf{t{\scriptstyle 1/2}} \newcommand{\Dphi}[1]{\partial_\pphi #1} \def\asigma{a} \def\pphi{\psi} \newcommand{\stheta}{{\theta^\star}} \newcommand{\htheta}{{\widehat{\theta}}}$$

# 1 Normal and transformations of normal distributions

Normal, lognormal, logitnormal and probit normal distributions are available in the Mlxtran library of distributions:

normal.dist = inlineModel("
[LONGITUDINAL]
DEFINITION:
y1 = {distribution=normal, mean=2, sd=0.3}
y2 = {distribution=lognormal, mean=2, sd=0.3}
y3 = {distribution=logitnormal, mean=2, sd=0.3}
y4 = {distribution=probitnormal, mean=2, sd=0.3}
")

Here, mean=2 and sd=0.3 are the mean and standard deviation of the normal distributions: \begin{align*} y_1 & \sim {\cal N}(2, 0.3) \\ \log(y_2) & \sim {\cal N}(2, 0.3) \\ \logit(y_3) = \log(y_3/(1-y_3)) & \sim {\cal N}(2, 0.3) \\ \probit(y_4) = \Phi^{-1}(y_4) & \sim {\cal N}(2, 0.3) \end{align*}

N <- 10000
y <- list(name=c('y1','y2','y3','y4'),time=(1:N))
normal.res <- simulx(model = normal.dist, output=y)

update_geom_defaults("bar",   list(fill = "blue"))
pl1 <- ggplot() + geom_histogram(data=normal.res$y1,aes(y1), bins=30) pl2 <- ggplot() + geom_histogram(data=normal.res$y2,aes(y2), bins=30)
pl3 <- ggplot() + geom_histogram(data=normal.res$y3,aes(y3), bins=30) pl4 <- ggplot() + geom_histogram(data=normal.res$y4,aes(y4), bins=30)

library(gridExtra)
grid.arrange(pl1, pl2, pl3, pl4)

z1 <- normal.res$y1$y1
print(c(mean(z1), sd(z1)))
## [1] 2.0035821 0.3003813
z2 <- log(normal.res$y2$y2)
print(c(mean(z2), sd(z2)))
## [1] 2.0001217 0.3007154
z3 <- log(normal.res$y3$y3/(1-normal.res$y3$y3))
print(c(mean(z3), sd(z3)))
## [1] 2.0004113 0.3001045
z4 <- qnorm(normal.res$y4$y4)
print(c(mean(z4), sd(z4)))
## [1] 1.9964198 0.2994488

# 2 Continuous probability distributions

Several continuous and discrete distributions are also available. The names and the parametrizations of these distributions are those used in R:

continuous.dist = inlineModel("
[LONGITUDINAL]
DEFINITION:
y5 = {distribution=uniform, min=-2.4, max=13}
y6 = {distribution=exponential, rate=0.7}
y7 = {distribution=gamma, shape=2, scale=0.5}
y8 = {distribution=weibull, shape=2, scale=1.2}
y9 = {distribution=extremeValue, location=-10, scale=1.8}
y10 = {distribution=chiSquared, df=2}
y11 = {distribution=cauchy, location=0, scale=1}
y12 = {distribution=fisherF, df1=40, df2=20}
y13 = {distribution=studentT, df=10}
")

y <- list(name=c('y5','y6','y7','y8','y9','y10','y11','y12','y13'),time=(1:N))
continuous.res <- simulx(model = continuous.dist, output=y)

pl5  <- ggplot() + geom_histogram(data=continuous.res$y5,aes(y5), bins=30) pl6 <- ggplot() + geom_histogram(data=continuous.res$y6,aes(y6), bins=30)
pl7  <- ggplot() + geom_histogram(data=continuous.res$y7,aes(y7), bins=30) pl8 <- ggplot() + geom_histogram(data=continuous.res$y8,aes(y8), bins=30)
pl9  <- ggplot() + geom_histogram(data=continuous.res$y9,aes(y9), bins=30) pl10 <- ggplot() + geom_histogram(data=continuous.res$y10,aes(y10), bins=30)
pl11 <- ggplot() + geom_histogram(data=continuous.res$y11,aes(log(abs(y11))), bins=30) pl12 <- ggplot() + geom_histogram(data=continuous.res$y12,aes(y12), bins=30)
pl13 <- ggplot() + geom_histogram(data=continuous.res$y13,aes(y13), bins=30) grid.arrange(pl5, pl6, pl7, pl8, pl9, pl10, pl11, pl12, pl13) # 3 Discrete probability distributions discrete.dist = inlineModel(" [LONGITUDINAL] DEFINITION: y14 = {distribution=bernoulli, prob=0.3} y15 = {distribution=discreteUniform, min=-4, max=2} y16 = {distribution=binomial, size=30, prob=0.7} y17 = {distribution=geometric, prob=0.2} y18 = {distribution=negativeBinomial, size=3, prob=0.3} y19 = {distribution=poisson, lambda=2} ") y <- list(name=c('y14','y15','y16','y17','y18','y19'),time=(1:N)) discrete.res <- simulx(model = discrete.dist, output=y) pl14 <- ggplot() + geom_bar(data=discrete.res$y14,aes(y14))
pl15 <- ggplot() + geom_bar(data=discrete.res$y15,aes(y15)) pl16 <- ggplot() + geom_bar(data=discrete.res$y16,aes(y16))
pl17 <- ggplot() + geom_bar(data=discrete.res$y17,aes(y17)) pl18 <- ggplot() + geom_bar(data=discrete.res$y18,aes(y18))
pl19 <- ggplot() + geom_bar(data=discrete.res\$y19,aes(y19))
grid.arrange(pl14, pl15, pl16, pl17, pl18, pl19)