!set slib_header_stat=\
/* probas.gp */\
\
/*\
  general TODO:\
  - argument 'n' for random generation and precomputations\
  - domain check\
  - doc\
\
  list of proba distr:\
  normal\
  gamma\
  chi2\
  uniform\
  binom\
  zgauss\
  exp\
  poisson\
  student\
  fisher\
  geom\
  negbin\
*/\
\
/* preliminary GP code for stat functions */\
\
/* normal mean m std s ; assume s>0 */\
\
boxmuller() =\
{\
  my(x1,x2,w);\
  until(w && w < 1,\
    x1 = 2*random(1.) - 1;\
    x2 = 2*random(1.) - 1;\
    w = x1^2+x2^2;\
  );\
  w = sqrt(-2*log(w)/w);\
  [x1 * w, x2 * w];\
};\
\
statnormalrandom(m,s,n=0) =\
{\
  if(!n,\
    m + boxmuller()[1]*s\
  ,\\else\
    my(v = vector(n),bm);\
    for(i=1,n,\
      if(i%2,\
        bm = boxmuller();\
        v[i] = m + bm[1]*s\
      ,\\else\
        v[i] = m + bm[2]*s\
      );\
    );\
    v\
  );\
};\
\
normalpdf0(x) = exp(-sqr(x)/2)/sqrt(2*Pi);\
\
statnormalpdf(m,s,x) = normalpdf0((x-m)/s)/s;\
\
normalcdf0(x) =\
{\
  if(x<0.01,\
    erfc(-x/sqrt(2))/2\
  ,\\else\
    1 - normalcdf0(-x)\
  );\
};\
\
statnormalcdf(m,s,x) = normalcdf0((x-m)/s);\
\
normalinv0(p) =\
{\
  /* do we really need this disjonction? avoids only 1 evaluation of cdf */\
  if (p>1/2,\
    solve(z=0,oo,normalcdf0(z)-p);\
  ,\\else\
    solve(z=-oo,0,normalcdf0(z)-p);\
  );\
};\
\
statnormalinv(m,s,p) = m + s*normalinv0(p);\
\
gammarandom_weibull(c,d) =\
{\
  my(z=0, e=0, x=0);\
  while(z+e<d+x,\
    z=-log(random(1.));\
    e=-log(random(1.));\
    x=z^c;\
  );\
  x\
};\
\
gammarandom_best(d,c) =\
{\
  my(u1,u2,w,x,y);\
  while(1,\
    u1=random(1.);\
    u2=random(1.);\
    w=u1*(1-u1);\
    y=sqrt(c/w)*(u1-1/2);\
    x=d+y;\
    if(x<=0,next);\
    z=64*sqr(w)*w*sqr(u2);\
    if(z<1-2*sqr(y)/x || log(z)<2*(d*log(x/d)-y),\
      return(x)\
    );\
  );\
};\
\
\\TODO ajouter le paramtre b dans gamma\
\\comme https://gnu-octave.github.io/statistics/gamrnd.html\
statgammarandom(a,n=0) = \
{\
  if(a<1,\
    my(c=1/a, d=a^(a^(a/(1-a))*(1-a)));\
    if(!n,\
      gammarandom_weibull(c,d)\
    ,\\else\
      vector(n,i, gammarandom_weibull(c,d))\
    );\
  ,a>1,\
    my(d=a-1, c=3*a-3/4);\
    if(!n,\
      gammarandom_best(d,c);\
    ,\\else\
      vector(n,i,gammarandom_best(d,c));\
    );\
  ,\\else a==1\
    if(!n,statexprandom(1),vector(n,i,statexprandom(1)));\
  );\
};\
\
statgammapdf(a,x,g=gamma(a)) = if(x<=0, 0, x^(a-1)*exp(-x)/g);\
\
statgammacdf(a,x,g=gamma(a)) = if(x<=0, 0, 1 - incgam(a,x,g)/g);\
\
statgammainv(a,p,g=gamma(a)) = solve(x=0,oo, statgammacdf(a,x,g)-p);\
\
statchi2random(df,n=0) = 2*statgammarandom(df/2,n);\
\
statchi2pdf(df,x,g=gamma(df/2)) = statgammapdf(df/2,x/2,g)/2;\
\
statchi2cdf(df,x,g=gamma(df/2)) = statgammacdf(df/2,x/2,g);\
\
statchi2inv(df,p,g=gamma(df/2)) = 2*statgammainv(df/2,p,g);\
\
binomrandom_sumbern(nb,ps) =\
{\
  my(v=0);\
  for(i=1,nb,v+=random(1.)<ps);\
  v\
};\
\
binomrandom_inv(nb,ps) =\
{\
  statbinominv(nb,ps,random(1.))\
  \\TODO for large number of samples, could precompute cdf table + dichotomy\
  \\implement as generic discrete distribution\
};\
\
\\TODO vector n + precomputations\
statbinomrandom(nb,ps) =\
{\
  if(ps<0.1 || ps>0.9, \\TODO tune threshold\
    binomrandom_inv(nb,ps)\
  ,\\else\
    binomrandom_sumbern(nb,ps)\
  );\
};\
\
/* warning: wrt counting measure on Z, not Lebesgue measure on R */\
statbinompdf(nb,ps,x) =\
{\
  my(k=round(x));\
  if(k!=x, return(0)); \\or error?\
  binomial(nb,k)*ps^k*(1-ps)^(nb-k)\
};\
\
statbinomcdf(nb,ps,x) =\
{\
  my(k,s=0,u,a,j);\
  a = ps/(1-ps);\
  if(x<0, return(0));\
  if(x>=nb, return(1));\
  k = floor(x);\
  if(k<nb/2,\
    u = (1-ps)^nb;\
    s = u;\
    for(i=1,k,\
      u *= a*(nb-i+1)/i;\
      s += u;\
    );\
  ,\\else: k>=nb/2\
    u = ps^nb;\
    s = u;\
    for(i=k+1,nb-1,\
      u *= (nb-i+k+1)/a/(i-k);\
      s += u;\
    );\
    s = 1-s;\
  );\
  s;\
};\
\
\\TODO allow to factor out precomputations\
statbinominv(nb,ps,p) =\
{\
  my(s,u,j,a,b,v);\
  a = ps/(1-ps);\
  \\b = normalcdf0(sqrt(nb)*(1/2-ps)/sqrt(ps*(1-ps)));\
  b = 1/2;\
  if(p<b,\
    v = statbinompdf(nb,ps,0);\
    u = v;\
    j = 0;\
    while(u<p,\
      j++;\
      v *= a*(nb-j+1)/j;\
      u += v\
    );\
    j;\
  ,\\else p>=b\
    u=1;\
    j=nb;\
    v=ps^nb;\
    while(u>=p,\
      u -=v;\
      v*=1/a*j/(nb-j+1);\
      j--;\
    );\
    j+1;\
  );\
  \\*/\
};\
\
rhoz(s,c,x) = exp(-Pi*((x-c)/s)^2);\
\
zgaussrandom_reject(s,c) =\
{\
  if(s<3.8, error("This algorithm is only correct for s>=3.8"));\
  my(a = c-3.8*s,b = c+3.8*s, x);\
  while(1,\
    x = random([a,b]);\
    if(random(1.)<rhoz(s,c,x), return(x));\
  );\
};\
\
zgaussrandom_roundreject(s,c) =\
{\
  my(Z0,Z1,Z,rho0,rho1,pr1,pr2,pr3,r,x,k);\
  c = 1-c;\
  Z0 = s/2 * erfc(sqrt(Pi)*c/s);\
  Z1 = s/2 * erfc(sqrt(Pi)*(1-c)/s);\
  rho0 = rhoz(s,0,c);\
  rho1 = rhoz(s,0,c-1);\
  Z = Z0 + Z1 + rho0 + rho1;\
  pr1 = rho0/Z;\
  pr2 = (rho0+rho1)/Z;\
  pr3 = (rho0+rho1+Z0)/Z;\
  while(1,\
    r = random(1.);\
    if(r<pr1, \\proba rho0/Z\
      return(1);\
    ,r<pr2, \\proba rho1/Z\
      return(0);\
    ,r<pr3, \\proba Z0/Z\
      until(x>c, x = statnormalrandom(0,s/sqrt(2*Pi)));\
      k = ceil(x-c); \\y = k+c\
      if(random(1.) < rhoz(s,0,k+c)/rhoz(s,0,x), return(k+1))\
    ,\\else: proba Z1/Z\
      until(x<c-1, x = statnormalrandom(0,s/sqrt(2*Pi)));\
      k = floor(x-c); \\y = k+c\
      if(random(1.) < rhoz(s,0,k+c)/rhoz(s,0,x), return(k+1))\
    );\
  );\
};\
\
zgaussmass0(s,c,M) =\
{\
  sum(n=-M,0,rhoz(s,c,n)) +\
  sum(n=-M,-1,rhoz(s,c,-n));\
};\
\
zgaussmass(s,c) = \\TODO use theta()\
{\
  zgaussmass0(s,c-round(c),round(10*(1+s)))\
};\
\
\
\\s<1.\
zgaussrandom_small(s,c) =\
{\
  my(M=zgaussmass0(s,c,6),r=M*random(1.),cdf=0);\
  for(k=-6,5, \\TODO version with two halves\
    cdf += rhoz(s,c,k);\
    if(r<cdf, return(k));\
  );\
  6\
};\
\
statzgaussrandom(s,c) =\
{\
  my(k = floor(c));\
  if(k, return(statzgaussrandom(s,c-k)+k));\
  if(s<0.7, \\TODO tune threshold\
    zgaussrandom_small(s,c)\
  ,s<3.8, \\reject only valid for s>=3.8\
    zgaussrandom_roundreject(s,c)\
  ,\\else\
    zgaussrandom_reject(s,c)\
  );\
};\
\
/* warning: wrt counting measure on Z, not Lebesgue measure on R */\
statzgausspdf(s,c,x) =\
{\
  rhoz(s,c,x)/zgaussmass(s,c);\
};\
\
statzgausscdf(s,c,x) =\
{\
  my(M = round(10*(1+s)),k);\
  k = round(c);\
  c -= k;\
  x -= k;\
  x = floor(x);\
  (sum(n=-M,min(x,0),rhoz(s,c,n)) +\
  sum(n=-min(x,M),-1,rhoz(s,c,-n))\
  )\
  / zgaussmass(s,c);\
};\
\
statzgaussinv(s,c,p) =\
{\
  my(M = round(10*(1+s)),k,x,ps);\
  k = round(c);\
  c -= k;\
\
  if(p==0, return(-oo));\
  if(p==1, return(+oo));\
\
  if(p<=1/2,\
    p *= zgaussmass(s,c);\
    x = -M-1;\
    ps = 0;\
    while(ps<p,\
      x++;\
      ps += rhoz(s,c,x);\
    );\
  ,\\else p>1/2\
    p = (1-p)*zgaussmass(s,c);\
    x = M+1;\
    ps = 0;\
    while(ps<p,\
      x--;\
      ps += rhoz(s,c,x);\
    );\
  );\
\
  x+k\
};\
\
\\TODO discrete gaussian of arbitrary lattices\
\
\\mu = 1/lambda\
statexprandom(mu) = -mu*log(random(1.));\
\
statexppdf(mu,x) = if(x<0,0,exp(-x/mu)/mu);\
\
statexpcdf(mu,x) = if(x<0,0,1-exp(-x/mu));\
\
statexpinv(mu,p) = -log(1-p)*mu;\
\
statpoissrandom(lambda) =\
{\
  my(s,j,i,a);\
  a = exp(-lambda);\
  s = 1;\
  j = 0;\
  while (s>a,\
    s *= random(1.);\
    j = j+1\
  );\
  j-1\
};\
\
/* warning: wrt counting measure on Z, not Lebesgue measure on R */\
\\ lambda^k*exp(-lambda)/k! is the exp evaluation really better??\
statpoisspdf(lambda,x) = my(k=round(x)); if(k<0 || k!=x,0,exp(k*log(lambda)-lambda-lngamma(k+1)));\
\
statpoisscdf(lambda,x) =\
{\
  my(k = floor(x), g);\
  if(k<0,\
    0\
  ,k<10, \\TODO tune threshold\
    sum(j=0,k,lambda^j/j!)/exp(lambda)\
  ,k<=2*lambda, \\TODO tune threshold\
    g = k!;\
    incgam(k+1,lambda,g)/g\
  ,\\else k>2*lambda\
    1-incgamc(k+1,lambda)/k!\
  );\
};\
\
\\TODO check octave implem https://github.com/gnu-octave/statistics/tree/main/inst/dist_fun/poissinv.m\
statpoissinv(lambda,p) =\
{\
  my(x);\
  if(p==0,\
    -1\
  ,p==1,\
    oo\
  ,\\else\
    my(s=0,k=-1);\
    p *= exp(lambda);\
    while(s<p,\
      k++;\
      s += lambda^k/k!;\
    );\
    k\
  );\
};\
\
\\TODO tests beta and incbeta\
beta(a,b) = gamma(a)*gamma(b)/gamma(a+b);\
incbeta(a,b,x) = x^a/a * hypergeom([a,1-b],[a+1],x); \\https://www.fungrim.org/topic/Beta_function/\
regincbeta(a,b,x) = incbeta(a,b,x) / beta(a,b);\
\
\\ Student's t distribution, df>0 (usually a positive integer)\
\\ set student(df=oo) = normal distribution?\
studentfactor(df) = gamma((df+1)/2)/(sqrt(Pi*df)*gamma(df/2));\
\\can be simplified when df is an integer (TODO)\
\
statstudentrandom(df) =\
{\
  my(z,v);\
  z = statnormalrandom(0,1);\
  v = statchi2random(df);\
  z/sqrt(v/df)\
};\
\
statstudentpdf(df,x) = studentfactor(df) * (1+x^2/df)^(-(df+1)/2);\
\
statstudentcdf(df,x) = \
{\
  if(!x,\
    1/2\
  ,x>0,\
    1 - regincbeta(df/2,1/2,df/(x^2+df))/2\
  ,\\else x<0\
    regincbeta(df/2,1/2,df/(x^2+df))/2\
  );\
};\
\\can be simplified for small integer values of df\
\
statstudentinv(df,p) =\
{\
  if (p>1/2,\
    solve(z=0,oo,statstudentcdf(df,z)-p);\
  ,\\else\
    solve(z=-oo,0,statstudentcdf(df,z)-p);\
  );\
};\
\
\\TODO test Fisher\
\
\\x>0  if d1<2\
\\x>=0 if d1>=2\
statfisherpdf(d1,d2,x) = ((d1/d2)^(d1/2)/beta(d1/2,d2/2)) * x^(d1/2-1)*(1+x*d1/d2)^(-(d1+d2)/2);\
\
statfishercdf(d1,d2,x) = if(x==0,0,regincbeta(d1/2,d2/2,d1*x/(d1*x+d2)));\
\
statfisherinv(d1,d2,p) = solve(x=0,oo,statfishercdf(d1,d2,x)-p);\
\
statfisherrandom(d1,d2) = d2/d1 * statchi2random(d1)/statchi2random(d2);\
\
\
\\TODO test geom\
\
statgeompdf(ps,x) = \
{\
  my(k=round(x));\
  if(k!=x, return(0));\
  ps*(1-ps)^k\
};\
\
statgeomcdf(ps,x) =\
{\
  if(x<0, return(0));\
  1-(1-ps)^(floor(x)+1)\
};\
\
statgeominv(ps,p) = \
{\
  if(p<=0, return(0));\
  if(p>=1, return(oo));\
  floor(1+log(1-p)/log(1-ps))-1\
};\
\
\\TODO FIXME when ps is very small (use higher precision?)\
statgeomrandom(ps) = ceil(log(random(1.))/log(1-ps))-1;\
\
\
\\TODO test negbin\
\
statnegbinpdf(r,ps,x) =\
{\
  my(k=round(x));\
  if(k!=x, return(0));\
  binomial(k+r-1,k)*(1-ps)^k*ps^r\
};\
\
\\TODO sometimes use the formula with regincbeta?\
statnegbincdf(r,ps,x) = sum(k=0,x,statnegbinpdf(r,ps,k));\
\
\\TODO optimise?\
statnegbininv(r,ps,p) =\
{\
  my(k=-1,psum=0);\
  if(p<=0, return(0));\
  while(psum<p,\
    k++;\
    psum += statnegbinpdf(r,ps,k);\
  );\
  k\
};\
\
statnegbinrandom(r,ps) = statpoissrandom((1-ps)/ps * statgammarandom(r));\
\
\
\
/* empirical distribution (CDF only), useful for plotting with data =\
   sorted list of samples + gen_search */\
statempiricalcdf(data,x) =\
{\
  my(i=1,j=#data,k);\
  if(x<data[1], return(0));\
  if(x>data[#data], return(1));\
  \\data[i] <= x < data[j]\
  while(j>i+1,\
    k = (i+j)\2;\
    if(data[k]<=x, i=k, j=k);\
  );\
  i/#data\
};\
/* TODO empirical inv cdf? = data[1+round(p*(n-1))] */\
\
\
\
\\elementary stats\
\
statmoment(L,p) = sum(i=1,#L,L[i]^p)/#L;\
statabsmoment(L,p) = sum(i=1,#L,abs(L[i])^p)/#L;\
\
\\TODO statmedian quantiles etc (median of median algorithm)\
\\ for all quantiles, use statempiricalcdf\
\
statmean(L) = statmoment(L,1);\
\
statcenter(L) = my(m=statmean(L)); vector(#L,i,L[i]-m);\
\
\\assume mean==0\
dev0(L) = sqrt(statmoment(L,2));\
\
statdev(L) = dev0(statcenter(L));\
\
statnormalise(L) = L = statcenter(L); L/dev0(L);\
\
statcorrelation(L1,L2) =\
{\
  L1 = statnormalise(L1);\
  L2 = statnormalise(L2);\
  statmean(vector(#L1,i,L1[i]*L2[i]));\
};\
\
statrankcorrelation(L1,L2) =\
{\
  statcorrelation(vecsort(L1,,1),vecsort(L2,,1))\
};\
\
\\MAD = mean absolute deviation\
statmad(L) = statmean(abs(statcenter(L)));\
\
\\TODO meansquare?\
\\TODO skewness\
\\TODO kurtosis\
\\TODO cumulants?\
\\TODO histogram???\
\
\
\
\
\\other random sampling functions\
\
\\uniform random prime ideal of norm in [2,N-1] with residue degree <= f\
nfrandomprime(nf,N,f=0) =\
{\
  my(n,p,f,dec);\
  n = poldegree(nf.pol);\
  if(!f, f=n);\
  while(1,\
    p = randomprime(N);\
    f = min(f,logint(N-1,p));\
    dec = idealprimedec(nf,p,f);\
    if(random(n)<#dec, \\rejection step: accept p with proba #dec/n\
      return(dec[1+random(#dec)]);\
    );\
  );\
};\
\
/* random HNF */\
\
randomhnfp(n,p) =\
{\
  my(k=1,M);\
  while(random(p)==0, k=(k%n)+1);\
  M = matid(n);\
  M[k,k] = p;\
  for(j=k+1,n,M[k,j]=random(p));\
  M\
};\
\
randomhnfppow(n,p,k) =\
{\
  my(M=1,c=1,c2);\
  for(j=1,k,\
    M *= randomhnfp(n,p);\
    c2 = content(M);\
    if(c2!=1 && random((p^n-1)\(p-1))!=0,\
      return(randomhnfppow(n,p,k)); \\reject\
    );\
  );\
  c*mathnf(M)\
};\
\
randomhnffa(n,fa) =\
{\
  my(M=1);\
  for(i=1,#fa~,\
    M *= randomhnfppow(n,fa[i,1],fa[i,2]);\
  );\
  mathnf(M)\
};\
\
\\TODO test this\
randomhnf(n,d) = randomhnffa(n,factor(d));\
\
\
\\ML version. Only approximately uniform.\
rperm(n)=\
{\
my(x=vector(n,i,random(1.)));\
vecsort(x,,1);\
}\
\\slower. But would it also be slower in C? Should be O(n) vs O(n*log(n))!\
\\TODO check\
randomperm(n) =\
{\
  my(g=vectorsmall(n,i,i),j,tmp);\
  for(i=1,n-1,\
    j = random([i,n]);\
    tmp = g[i];\
    g[i] = g[j];\
    g[j] = tmp;\
  );\
  g\
};\
\
\\uniform distribution on unit sphere in R^d\
randominsphere(d) =\
{\
  my(v);\
  v = statnormalrandom(0,1,d);\
  v~/sqrt(norml2(v))\
};\
\
\\uniform distribution on unit ball in R^d\
randominball(d) =\
{\
  random(1.)^(1/d)*randominsphere(d)\
};\
\
\\TODO tests\
randomgamma0(N,B=100) =\
{\
  my(M,H);\
  M = matrix(2,2,i,j,if(i>j, random(-4*B\N)*N, random(-4*B)));\
  while(gcd(M[2,2],N)!=1, M[2,2] = random(-4*B));\
  [H,U] = mathnf(M,1);\
  if(matdet(U)<0, U[,1]*=-1);\
  if(matdet(U)!=1, error("not in SL2Z!"));\\check\
  if(U[2,1]%N!=0, error("not in gamma0(N)!"));\\check\
  U\
};\
\
