The goal of fashr is to implement the functional adaptive shrinkage through empirical Bayes.
You can install the development version of fashr from GitHub with:
# install.packages("pak")
pak::pak("AgueroZZ/fash_software")
This is a basic example, which tries to identify non-linear functions in a set of datasets. We use order = 2
to apply the second order IWP, which has a base model S0 being the space of linear functions.
library(fashr)
# Simulate five datasets
set.seed(123)
datasets <- list()
for (i in 1:20) {
n <- 50
t <- seq(0, 5, length.out = n)
sd <- sample(c(2, 1), size = n, replace = TRUE)
u <- runif(1); if (u < 0.5) {f <- function(t) 3*cos(t)} else {f <- function(t) (t)}
y <- f(t) + rnorm(n, sd = 0.5)
datasets[[i]] <- data.frame(t = t, y = y, sd = sd)
}
# Fit the model
fash_fit <- fash(Y = "y", smooth_var = "t", S = "sd", data = datasets,
order = 2, likelihood = "gaussian", verbose = TRUE)
#> Starting data setup...
#> Completed data setup in 0.00 seconds.
#> Starting likelihood computation...
#> | | | 0% | |==== | 5% | |======= | 10% | |========== | 15% | |============== | 20% | |================== | 25% | |===================== | 30% | |======================== | 35% | |============================ | 40% | |================================ | 45% | |=================================== | 50% | |====================================== | 55% | |========================================== | 60% | |============================================== | 65% | |================================================= | 70% | |==================================================== | 75% | |======================================================== | 80% | |============================================================ | 85% | |=============================================================== | 90% | |================================================================== | 95% | |======================================================================| 100%
#> Completed likelihood computation in 2.98 seconds.
#> Starting empirical Bayes estimation...
#> Completed empirical Bayes estimation in 0.00 seconds.
#> fash object created successfully.
fash_fit
#> Fitted fash Object
#> -------------------
#> Number of datasets: 20
#> Likelihood: gaussian
#> Number of PSD grid values: 10 (initial), 2 (non-trivial)
#> Order of Integrated Wiener Process (IWP): 2
Take a look at the structure plot ordered by local false discovery rate:
plot(fash_fit, ordering = "lfdr")
Obtain the posterior summary of the function for the first dataset:
fitted <- predict(fash_fit, index = 1)
str(fitted)
#> 'data.frame': 50 obs. of 5 variables:
#> $ x : num 0 0.102 0.204 0.306 0.408 ...
#> $ mean : num 3.41 3.19 2.97 2.75 2.53 ...
#> $ median: num 3.42 3.19 2.98 2.75 2.53 ...
#> $ lower : num 1.9 1.9 1.87 1.78 1.66 ...
#> $ upper : num 4.91 4.46 4.05 3.69 3.37 ...
Obtain the posterior samples of the function:
fitted_samps <- predict(fash_fit, index = 1, only.samples = TRUE, M = 30)
str(fitted_samps)
#> num [1:50, 1:30] 3.35 3.12 2.96 2.78 2.56 ...
plot(datasets[[1]]$t, datasets[[1]]$y, type = "p", col = "black", ylab = "y", xlab = "t")
lines(fitted$x, fitted$mean, col = "red")
polygon(c(fitted$x, rev(fitted$x)), c(fitted$lower, rev(fitted$upper)), col = rgb(1, 0, 0, 0.2), border = NA)
matlines(fitted$x, fitted_samps[, 1:5], col = "blue", lty = 2, lwd = 0.5)
Compute the FDR, and highlight the datasets with FDR < 0.1:
fdr_result <- fdr_control(fash_fit, alpha = 0.1, plot = TRUE)
#> 5 datasets are significant at alpha level 0.10. Total datasets tested: 20.
str(fdr_result)
#> List of 1
#> $ fdr_results:'data.frame': 20 obs. of 2 variables:
#> ..$ index: int [1:20] 4 13 17 12 1 15 9 3 20 14 ...
#> ..$ FDR : num [1:20] 3.18e-16 3.88e-16 6.96e-12 2.86e-11 4.14e-09 ...