We will model the time evolution of an algal sample taken in the Adriatic Sea (Cavallini, 1993). Time (\(x\)) is expressed in days and biomass (\(y\)), which is a measure of growth, is measured in mm2 (what is actually measured is the surface covered by biomass in a microscopic sample). The data seem to follow a logistic curve: \[y_i = \frac{\theta_0}{1 + \exp (-\theta_1(x_i - \theta_2))}\]

Descriptive analysis

algal <- data.frame(x = c(11, 15, 18, 23, 26, 31, 39, 44, 54, 64, 74), 
                    y = c(.0048, .0105, .0207, .0619, .3370, 
                          .7400, 1.7, 2.45, 3.5, 4.5, 5.09))
kable(algal, row.names = T)
x y
1 11 0.0048
2 15 0.0105
3 18 0.0207
4 23 0.0619
5 26 0.3370
6 31 0.7400
7 39 1.7000
8 44 2.4500
9 54 3.5000
10 64 4.5000
11 74 5.0900
require(ggplot2)
p <- ggplot(data = algal, aes(x, y)) + geom_point()
p

Perform a nonlinear regression with the grid-search method

In R, we can perform the grid search and find the starting value manually.

# setup grid
grid <- expand.grid(theta0 = seq(0, 10, 2), 
                    theta1 = seq(0, .5, .1), 
                    theta2 = seq(40, 60, 5))
kable(head(grid))
theta0 theta1 theta2
0 0 40
2 0 40
4 0 40
6 0 40
8 0 40
10 0 40

We specify the nonlinear model and compute the sum of squares of error manually.

nlmodel <- function(x, theta) {
  theta[1] / (1 + exp(-theta[2] * (x - theta[3])))
}

sse <- apply(grid, 1, function(theta) {
  sum((algal$y - nlmodel(algal$x, theta))^2)
})
kable(head(data.frame(grid, sse)))  # only the head of the table is shown
theta0 theta1 theta2 sse
0 0 40 67.96616
2 0 40 42.13636
4 0 40 38.30656
6 0 40 56.47676
8 0 40 96.64696
10 0 40 158.81716

Final starting values:

grid[which.min(sse), ]
##    theta0 theta1 theta2
## 82      6    0.1     50

Fit the nonlinear model

nls_fit <- nls(y ~ nlmodel(x, theta), data = algal, trace = T,
               start = list(theta = c(6, .1, 50)))
## 0.6350226 :   6.0  0.1 50.0
## 0.2857354 :   5.0141669  0.1157241 45.8625826
## 0.2384547 :   5.0887283  0.1221786 45.7619359
## 0.2381781 :   5.0956237  0.1211468 45.7748836
## 0.2381669 :   5.0947305  0.1213394 45.7743463
## 0.2381665 :   5.0949589  0.1213007 45.7748138
## 0.2381664 :   5.0949235  0.1213077 45.7747829
## 0.2381664 :   5.0949315  0.1213063 45.7747980
## 0.2381664 :   5.0949302  0.1213066 45.7747966
nls_fit
## Nonlinear regression model
##   model: y ~ nlmodel(x, theta)
##    data: algal
##  theta1  theta2  theta3 
##  5.0949  0.1213 45.7748 
##  residual sum-of-squares: 0.2382
## 
## Number of iterations to convergence: 8 
## Achieved convergence tolerance: 1.925e-06
sm <- summary(nls_fit, correlation = T)
sm$coefficients
##          Estimate Std. Error  t value     Pr(>|t|)
## theta1  5.0949302 0.19766239 25.77592 5.505407e-09
## theta2  0.1213066 0.01159284 10.46393 6.044273e-06
## theta3 45.7747966 1.17097569 39.09116 2.015699e-10
sm$correlation
##            theta1     theta2     theta3
## theta1  1.0000000 -0.6865027  0.8166978
## theta2 -0.6865027  1.0000000 -0.6528791
## theta3  0.8166978 -0.6528791  1.0000000

Confidence intervals for parameter estimates

# the output shows theta 1-3, it's actually theta 0-2
mapply(function(est, se) est + qt(c(.025, .975), df.residual(nls_fit)) * se,
       sm$coefficients[, 1], sm$coefficients[, 2], SIMPLIFY = F)
## $theta1
## [1] 4.63912 5.55074
## 
## $theta2
## [1] 0.09457346 0.14803972
## 
## $theta3
## [1] 43.07452 48.47507

Confidence and prediction interval for \(Y\)

y0 <- fitted(nls_fit)
cf <- cf <- nls_fit$m$gradient() 
se <- apply(t(cf), 2, function(f0) {
  sm$sigma * {t(f0) %*% sm$cov.unscaled %*% f0}^(.5)
})

ll <- fitted(nls_fit) + qt(.025, df.residual(nls_fit)) * se
ul <- fitted(nls_fit) + qt(.975, df.residual(nls_fit)) * se
ci <- data.frame(algal, ll, ul)

# prediction intervals are similar
se <- apply(t(cf), 2, function(f0) {
  sm$sigma * {1 + t(f0) %*% sm$cov.unscaled %*% f0}^(.5)
})

ll <- fitted(nls_fit) + qt(.025, df.residual(nls_fit)) * se
ul <- fitted(nls_fit) + qt(.975, df.residual(nls_fit)) * se
pi <- data.frame(algal, ll, ul)

p + stat_function(fun = nlmodel, args = list(coef(nls_fit))) +
  geom_smooth(aes(ymin = ll, ymax = ul), data = ci, stat="identity") + 
  geom_smooth(aes(ymin = ll, ymax = ul), data = pi, stat="identity")