Using parametric splines for piece-wise polynomial curves
and Fourier series for seasonal effects

Data set simulates data from Statistics Canada NPHS from 1994 to 2011. Participants were surveyed every 2 years for up to 7 occasions.

Some participants happened to give birth during the study but since data was collected every two years there was little data on individual longitudinal sleep patterns before and after birth.

However, using mixed models with a parametric model for sleep behaviour before and after birth, it’s possible to ‘stitch’ trajectories together to get a picture of individual predicted sleep trajectories.

library(spida2)
library(nlme)

    
    Attaching package: 'nlme'

    The following object is masked from 'package:spida2':
    
        getData

library(latticeExtra)

    Loading required package: lattice

Hypothetical perinatal ‘birth effect’ on maternal sleep relative to days before and after birth.

birth_effect <- function(d, plus = 1, minus = 1) {
  ifelse(d < -180, 0,
         ifelse(d < 0, plus * (.5 -.5* cos(pi*(d+180)/180)),
                ifelse(d < 180, - minus * (.5 -.5* cos(pi*(d+180)/180)), 0)
         )
  )
}
# test
seq(-365,365) %>% plot(., birth_effect(.), type = 'l')

Generate a data set

Note that many women in the NPHS gave birth more than once. Here there is only one birth recorded per person.

# sample(100000, 1)
{
  set.seed(4728)
  Nid <- 1000    # number of subjects
  Nobs <- 7      # observations per subject
  
  expand.grid(id = 1:Nid, obs = 1:Nobs) %>%  # basic skeleton for data set 
    within(
      {
        
        # date id registered
        reg_date <- sample(Nobs * 365, Nid, replace = TRUE)[id]  # generating one value per id
        
        # dates id observed (approx every 2 years)
        date <- reg_date + obs*2*365 + sample(365, length(id), replace = TRUE)  # generating one value per observation
        
        birth_date <- reg_date + sample(365*14, Nid, replace = TRUE)[id]       # date giving birth
        
        ..plus <- runif(Nid)[id]      # extra sleep pre birth
        ..minus <- runif(Nid)[id]     # less sleep after birth
        ..birth_effect <- birth_effect( date - birth_date, ..plus, ..minus)
        
        ..seasonal <- .5 * cos(2*pi*(date-30)/365)
        ..sd_between <- 1
        ..sd_within <- .5
        
        sleep <- 8 + ..sd_between * rnorm(Nid)[id] + ..sd_within * rnorm(id) +
          ..birth_effect + ..seasonal
        
        ..plus <- ..minus <- ..birth_effect <- ..seasonal <- ..sd_between <- ..sd_within <- NULL
        
      }
    ) %>% 
    sortdf(~id/date)-> dd
}
head(dd)

         id obs    sleep birth_date date reg_date
    1     1   1 7.088871       5298 1197      288
    1001  1   2 7.336316       5298 1891      288
    2001  1   3 7.909879       5298 2655      288
    3001  1   4 7.489981       5298 3441      288
    4001  1   5 7.087731       5298 4084      288
    5001  1   6 6.920117       5298 4893      288

xqplot(dd)

xyplot(sleep ~ date, dd, groups = id, type = 'l')

xyplot(sleep ~ I(date-birth_date), dd, groups = id, type = 'l')

Note: one observation every two years on each person

Between-person and within-person variation in sleep

fit <- lme(sleep ~ 1, dd, random = ~1 |id)
summary(fit)

    Linear mixed-effects model fit by REML
      Data: dd 
           AIC      BIC    logLik
      16048.53 16069.09 -8021.267
    
    Random effects:
     Formula: ~1 | id
            (Intercept)  Residual
    StdDev:   0.9974714 0.6156731
    
    Fixed effects:  sleep ~ 1 
                   Value  Std.Error   DF  t-value p-value
    (Intercept) 7.987087 0.03238981 6000 246.5926       0
    
    Standardized Within-Group Residuals:
            Min          Q1         Med          Q3         Max 
    -3.85320384 -0.64160961  0.00633589  0.63204103  3.68567783 
    
    Number of Observations: 7000
    Number of Groups: 1000

define a parametric spline using years as unit to avoid large numbers

sp <- function(y) {
  gsp(y, knots = c(-.5,0,.5), degree = c(0,1,1,0), c(0, -1, 0))
}

seq(-2,2,.1) %>% matplot(., sp(.), type ='b')

sp(seq(-2,2,.1))

            D1(0) C(0).0 C(0).1
    f(-2)    -0.5      0    0.0
    f(-1.9)  -0.5      0    0.0
    f(-1.8)  -0.5      0    0.0
    f(-1.7)  -0.5      0    0.0
    f(-1.6)  -0.5      0    0.0
    f(-1.5)  -0.5      0    0.0
    f(-1.4)  -0.5      0    0.0
    f(-1.3)  -0.5      0    0.0
    f(-1.2)  -0.5      0    0.0
    f(-1.1)  -0.5      0    0.0
    f(-1)    -0.5      0    0.0
    f(-0.9)  -0.5      0    0.0
    f(-0.8)  -0.5      0    0.0
    f(-0.7)  -0.5      0    0.0
    f(-0.6)  -0.5      0    0.0
    f(-0.5)  -0.5      0    0.0
    f(-0.4)  -0.4      0    0.0
    f(-0.3)  -0.3      0    0.0
    f(-0.2)  -0.2      0    0.0
    f(-0.1)  -0.1      0    0.0
    f(0)      0.0      0    0.0
    f(0.1)    0.1      1    0.1
    f(0.2)    0.2      1    0.2
    f(0.3)    0.3      1    0.3
    f(0.4)    0.4      1    0.4
    f(0.5)    0.5      1    0.5
    f(0.6)    0.5      1    0.5
    f(0.7)    0.5      1    0.5
    f(0.8)    0.5      1    0.5
    f(0.9)    0.5      1    0.5
    f(1)      0.5      1    0.5
    f(1.1)    0.5      1    0.5
    f(1.2)    0.5      1    0.5
    f(1.3)    0.5      1    0.5
    f(1.4)    0.5      1    0.5
    f(1.5)    0.5      1    0.5
    f(1.6)    0.5      1    0.5
    f(1.7)    0.5      1    0.5
    f(1.8)    0.5      1    0.5
    f(1.9)    0.5      1    0.5
    f(2)      0.5      1    0.5
    attr(,"spline.attr")
    attr(,"spline.attr")$knots
    [1] -0.5  0.0  0.5
    
    attr(,"spline.attr")$degree
    [1] 0 1 1 0
    
    attr(,"spline.attr")$smoothness
    [1]  0 -1  0
    
    attr(,"spline.attr")$lin
    NULL
    
    attr(,"spline.attr")$intercept
    [1] 0
    
    attr(,"spline.attr")$signif
    [1] 3
    
    attr(,"class")
    [1] "gsp"

Use years as time units

dd <- within(dd,
           {
             datey <- date /365
             birthy <- birth_date / 365
           })



fit <- lme(sleep ~ sp(datey - birthy) , dd, random = ~ 1 | id)
summary(fit)

    Linear mixed-effects model fit by REML
      Data: dd 
           AIC      BIC    logLik
      15985.45 16026.57 -7986.724
    
    Random effects:
     Formula: ~1 | id
            (Intercept)  Residual
    StdDev:   0.9970469 0.6118624
    
    Fixed effects:  sleep ~ sp(datey - birthy) 
                                 Value  Std.Error   DF   t-value p-value
    (Intercept)               8.465813 0.08105925 5997 104.43982  0.0000
    sp(datey - birthy)D1(0)   0.987491 0.15450552 5997   6.39130  0.0000
    sp(datey - birthy)C(0).0 -0.985077 0.11077282 5997  -8.89277  0.0000
    sp(datey - birthy)C(0).1  0.040963 0.22128064 5997   0.18512  0.8531
     Correlation: 
                             (Intr) s(-b)D s(-b)C(0).0
    sp(datey - birthy)D1(0)   0.907                   
    sp(datey - birthy)C(0).0 -0.637 -0.682            
    sp(datey - birthy)C(0).1 -0.617 -0.677 -0.063     
    
    Standardized Within-Group Residuals:
             Min           Q1          Med           Q3          Max 
    -3.877379606 -0.646902798  0.002500854  0.631484893  3.660662143 
    
    Number of Observations: 7000
    Number of Groups: 1000

Create a prediction data frame to show model prediction

pred <- data.frame(datey = seq(-2,2,.01), birthy = 0)
pred$fit <- predict(fit, newdata = pred, level = 0)

xyplot(fit ~ I(12*datey), pred, type = 'l', lwd = 2,
       xlim = c(-12,12),
       ylab = "Mean maternal hours of sleep",
       xlab = "Time pre or post birth (in months)") +
  layer_(panel.grid(h=-1,v=-1))

To add error bounds, since ‘predict’ won’t provide them for ‘lme’ models

ww <- as.data.frame(wald(fit, pred = pred))

plotbands <- function(ww,...) {
xyplot(coef ~ I(12*datey), ww, type = 'l', lwd = 2,
       xlim = c(-12,12),
       ...,
       lower = ww$L2,             # added for panel.fit
       upper = ww$U2,             # added for panel.fit
       subscripts = T,            # added for panel.fit
       ylab = "Mean maternal hours of sleep with 95% confidence bands",
       xlab = "Time pre or post birth (in months)") +
    layer_(panel.grid(h = -1, v = -1)) +
    layer(panel.fit(..., alpha = .2))
}
plotbands(ww)

plotbands(ww, ylim = c(7,9))

Try a different spline

sp2 <- function(y) gsp(y, c(-1,-.5, 0, .5, 1), c(0,2,3,3,2,0), c(1,1,-1,1,1))

fit2 <- lme(sleep ~ sp2(datey - birthy) , dd, random = ~ 1 | id)
summary(fit2)

    Linear mixed-effects model fit by REML
      Data: dd 
          AIC      BIC    logLik
      15967.9 16036.42 -7973.948
    
    Random effects:
     Formula: ~1 | id
            (Intercept)  Residual
    StdDev:   0.9971542 0.6118803
    
    Fixed effects:  sleep ~ sp2(datey - birthy) 
                                  Value Std.Error   DF  t-value p-value
    (Intercept)                 8.55861   0.15489 5993 55.25753  0.0000
    sp2(datey - birthy)D1(0)    1.75659   2.02286 5993  0.86837  0.3852
    sp2(datey - birthy)D2(0)    1.05499  14.57452 5993  0.07239  0.9423
    sp2(datey - birthy)D3(0)   -8.98711  43.99968 5993 -0.20425  0.8382
    sp2(datey - birthy)C(0).0  -1.01700   0.21748 5993 -4.67626  0.0000
    sp2(datey - birthy)C(0).1  -2.64154   2.94118 5993 -0.89812  0.3692
    sp2(datey - birthy)C(0).2  17.97831  21.12807 5993  0.85092  0.3948
    sp2(datey - birthy)C(0).3 -60.95171  63.63423 5993 -0.95784  0.3382
     Correlation: 
                              (Intr) s2(-b)D1 s2(-b)D2 s2(-b)D3 s2(-b)C(0).0
    sp2(datey - birthy)D1(0)   0.840                                        
    sp2(datey - birthy)D2(0)   0.726  0.975                                 
    sp2(datey - birthy)D3(0)   0.663  0.945    0.994                        
    sp2(datey - birthy)C(0).0 -0.685 -0.599   -0.518   -0.474               
    sp2(datey - birthy)C(0).1 -0.577 -0.688   -0.671   -0.650   -0.031      
    sp2(datey - birthy)C(0).2 -0.502 -0.672   -0.689   -0.685    0.742      
    sp2(datey - birthy)C(0).3 -0.457 -0.654   -0.688   -0.692   -0.024      
                              s2(-b)C(0).1 s2(-b)C(0).2
    sp2(datey - birthy)D1(0)                           
    sp2(datey - birthy)D2(0)                           
    sp2(datey - birthy)D3(0)                           
    sp2(datey - birthy)C(0).0                          
    sp2(datey - birthy)C(0).1                          
    sp2(datey - birthy)C(0).2 -0.050                   
    sp2(datey - birthy)C(0).3  0.946       -0.046      
    
    Standardized Within-Group Residuals:
             Min           Q1          Med           Q3          Max 
    -3.877143934 -0.645090676  0.001593104  0.629206506  3.666263366 
    
    Number of Observations: 7000
    Number of Groups: 1000

pred$fit2 <- predict(fit2, newdata = pred, level = 0)
with(pred, plot(datey, fit2, type = 'l'))

ww <- as.data.frame(wald(fit2, pred = pred))
plotbands(ww)

plotbands(ww, ylim = seq(7.2,9,.2))

fit and fit2 have different FE models so we must refit

We can compare these models with AIC or BIC but the p-value should not be interpreted since the neither model is nested in the other

anova(update(fit, method = "ML"), update(fit2, method = "ML"))

                                Model df      AIC      BIC    logLik   Test
    update(fit, method = "ML")      1  6 15970.56 16011.68 -7979.281       
    update(fit2, method = "ML")     2 10 15975.12 16043.65 -7977.558 1 vs 2
                                 L.Ratio p-value
    update(fit, method = "ML")                  
    update(fit2, method = "ML") 3.446274  0.4861

Results: AIC favours the smaller model

The positions of knots can be estimated by trial and error and could be estimated more formally using non-linear models, which we might take up later.

Adding seasonal effects with sin/cos pair harmonics

Sin <- function(x) cbind(sin(x), cos(x))
#
fit3 <- lme(sleep ~ sp2(datey - birthy) + Sin(2*pi*datey) , dd, random = ~ 1 | id)

    Error in lme.formula(sleep ~ sp2(datey - birthy) + Sin(2 * pi * datey), : nlminb problem, convergence error code = 1
      message = false convergence (8)

We can force lme to return an object:

fit3 <- lme(sleep ~ sp2(datey - birthy) + Sin(2*pi*datey) , dd, random = ~ 1 | id,
            control = list(returnObject = TRUE))

    Warning in lme.formula(sleep ~ sp2(datey - birthy) + Sin(2 * pi * datey), : nlminb problem, convergence error code = 1
      message = false convergence (8)

but it’s generally better to try an alternative optimizer

fit3o <- lme(sleep ~ sp2(datey - birthy) + Sin(2*pi*datey) , dd, random = ~ 1 | id,
            control = list(opt = 'optim', msVerbose = T, verbose = T, returnObject = T))

    initial  value 27836.131999 
    final  value 27836.131999 
    converged

with the same result:

car::compareCoefs(fit3,fit3o)

    Calls:
    1: lme.formula(fixed = sleep ~ sp2(datey - birthy) + Sin(2 * pi * datey), 
      data = dd, random = ~1 | id, control = list(returnObject = TRUE))
    2: lme.formula(fixed = sleep ~ sp2(datey - birthy) + Sin(2 * pi * datey), 
      data = dd, random = ~1 | id, control = list(opt = "optim", msVerbose = T, 
      verbose = T, returnObject = T))
    
                              Model 1 Model 2
    (Intercept)                 8.500   8.500
    SE                          0.129   0.129
                                             
    sp2(datey - birthy)D1(0)     1.10    1.10
    SE                           1.67    1.67
                                             
    sp2(datey - birthy)D2(0)    -0.43   -0.43
    SE                          12.04   12.04
                                             
    sp2(datey - birthy)D3(0)    -7.76   -7.76
    SE                          36.34   36.34
                                             
    sp2(datey - birthy)C(0).0   -1.06   -1.06
    SE                           0.18    0.18
                                             
    sp2(datey - birthy)C(0).1  -0.163  -0.163
    SE                          2.431   2.431
                                             
    sp2(datey - birthy)C(0).2    7.62    7.62
    SE                          17.46   17.46
                                             
    sp2(datey - birthy)C(0).3   -29.7   -29.7
    SE                           52.6    52.6
                                             
    Sin(2 * pi * datey)1      0.25487 0.25487
    SE                        0.00925 0.00925
                                             
    Sin(2 * pi * datey)2       0.4213  0.4213
    SE                         0.0092  0.0092

Also using ‘ML’ can give convergence:

fit3 <- lme(sleep ~ sp2(datey - birthy) + Sin(2*pi*datey) , dd, random = ~ 1 | id, method = 'ML')

Compare estimated models

summary(fit3)

    Linear mixed-effects model fit by maximum likelihood
      Data: dd 
           AIC     BIC    logLik
      13645.35 13727.6 -6810.676
    
    Random effects:
     Formula: ~1 | id
            (Intercept) Residual
    StdDev:   0.9933905 0.504401
    
    Fixed effects:  sleep ~ sp2(datey - birthy) + Sin(2 * pi * datey) 
                                   Value Std.Error   DF  t-value p-value
    (Intercept)                 8.500148   0.12914 5991 65.82083  0.0000
    sp2(datey - birthy)D1(0)    1.095116   1.67092 5991  0.65540  0.5122
    sp2(datey - birthy)D2(0)   -0.429970  12.03848 5991 -0.03572  0.9715
    sp2(datey - birthy)D3(0)   -7.757804  36.34333 5991 -0.21346  0.8310
    sp2(datey - birthy)C(0).0  -1.062551   0.17968 5991 -5.91358  0.0000
    sp2(datey - birthy)C(0).1  -0.162555   2.43072 5991 -0.06688  0.9467
    sp2(datey - birthy)C(0).2   7.623029  17.46120 5991  0.43657  0.6624
    sp2(datey - birthy)C(0).3 -29.658111  52.58728 5991 -0.56398  0.5728
    Sin(2 * pi * datey)1        0.254869   0.00925 5991 27.54351  0.0000
    Sin(2 * pi * datey)2        0.421299   0.00920 5991 45.79646  0.0000
     Correlation: 
                              (Intr) s2(-b)D1 s2(-b)D2 s2(-b)D3 s2(-b)C(0).0
    sp2(datey - birthy)D1(0)   0.832                                        
    sp2(datey - birthy)D2(0)   0.719  0.975                                 
    sp2(datey - birthy)D3(0)   0.657  0.945    0.994                        
    sp2(datey - birthy)C(0).0 -0.679 -0.599   -0.518   -0.474               
    sp2(datey - birthy)C(0).1 -0.571 -0.688   -0.671   -0.650   -0.032      
    sp2(datey - birthy)C(0).2 -0.497 -0.672   -0.689   -0.685    0.742      
    sp2(datey - birthy)C(0).3 -0.453 -0.653   -0.687   -0.691   -0.025      
    Sin(2 * pi * datey)1      -0.005 -0.006   -0.003   -0.001    0.016      
    Sin(2 * pi * datey)2      -0.007 -0.005   -0.001    0.001   -0.015      
                              s2(-b)C(0).1 s2(-b)C(0).2 s2(-b)C(0).3 S(2*p*d)1
    sp2(datey - birthy)D1(0)                                                  
    sp2(datey - birthy)D2(0)                                                  
    sp2(datey - birthy)D3(0)                                                  
    sp2(datey - birthy)C(0).0                                                 
    sp2(datey - birthy)C(0).1                                                 
    sp2(datey - birthy)C(0).2 -0.051                                          
    sp2(datey - birthy)C(0).3  0.946       -0.047                             
    Sin(2 * pi * datey)1      -0.013        0.021       -0.020                
    Sin(2 * pi * datey)2       0.030       -0.026        0.025        0.007   
    
    Standardized Within-Group Residuals:
             Min           Q1          Med           Q3          Max 
    -3.743602439 -0.633286792 -0.003526948  0.642603214  3.533283785 
    
    Number of Observations: 7000
    Number of Groups: 1000

summary(fit3o)

    Linear mixed-effects model fit by REML
      Data: dd 
          AIC      BIC    logLik
      13655.9 13738.12 -6815.948
    
    Random effects:
     Formula: ~1 | id
            (Intercept) Residual
    StdDev:   0.9939113 0.504777
    
    Fixed effects:  sleep ~ sp2(datey - birthy) + Sin(2 * pi * datey) 
                                   Value Std.Error   DF  t-value p-value
    (Intercept)                 8.500148   0.12914 5991 65.81977  0.0000
    sp2(datey - birthy)D1(0)    1.095123   1.67097 5991  0.65538  0.5122
    sp2(datey - birthy)D2(0)   -0.429922  12.03884 5991 -0.03571  0.9715
    sp2(datey - birthy)D3(0)   -7.757657  36.34439 5991 -0.21345  0.8310
    sp2(datey - birthy)C(0).0  -1.062551   0.17969 5991 -5.91340  0.0000
    sp2(datey - birthy)C(0).1  -0.162579   2.43079 5991 -0.06688  0.9467
    sp2(datey - birthy)C(0).2   7.623092  17.46172 5991  0.43656  0.6624
    sp2(datey - birthy)C(0).3 -29.658545  52.58882 5991 -0.56397  0.5728
    Sin(2 * pi * datey)1        0.254869   0.00925 5991 27.54273  0.0000
    Sin(2 * pi * datey)2        0.421299   0.00920 5991 45.79514  0.0000
     Correlation: 
                              (Intr) s2(-b)D1 s2(-b)D2 s2(-b)D3 s2(-b)C(0).0
    sp2(datey - birthy)D1(0)   0.832                                        
    sp2(datey - birthy)D2(0)   0.719  0.975                                 
    sp2(datey - birthy)D3(0)   0.657  0.945    0.994                        
    sp2(datey - birthy)C(0).0 -0.679 -0.599   -0.518   -0.474               
    sp2(datey - birthy)C(0).1 -0.571 -0.688   -0.671   -0.650   -0.032      
    sp2(datey - birthy)C(0).2 -0.497 -0.672   -0.689   -0.685    0.742      
    sp2(datey - birthy)C(0).3 -0.453 -0.653   -0.687   -0.691   -0.025      
    Sin(2 * pi * datey)1      -0.005 -0.006   -0.003   -0.001    0.016      
    Sin(2 * pi * datey)2      -0.007 -0.005   -0.001    0.001   -0.015      
                              s2(-b)C(0).1 s2(-b)C(0).2 s2(-b)C(0).3 S(2*p*d)1
    sp2(datey - birthy)D1(0)                                                  
    sp2(datey - birthy)D2(0)                                                  
    sp2(datey - birthy)D3(0)                                                  
    sp2(datey - birthy)C(0).0                                                 
    sp2(datey - birthy)C(0).1                                                 
    sp2(datey - birthy)C(0).2 -0.051                                          
    sp2(datey - birthy)C(0).3  0.946       -0.047                             
    Sin(2 * pi * datey)1      -0.013        0.021       -0.020                
    Sin(2 * pi * datey)2       0.030       -0.026        0.025        0.007   
    
    Standardized Within-Group Residuals:
             Min           Q1          Med           Q3          Max 
    -3.740828341 -0.632796684 -0.003509994  0.642116499  3.530681040 
    
    Number of Observations: 7000
    Number of Groups: 1000

getG(fit3)

    Random effects variance covariance matrix
                (Intercept)
    (Intercept)     0.98682
      Standard Deviations: 0.99339

getG(fit3o)

    Random effects variance covariance matrix
                (Intercept)
    (Intercept)     0.98786
      Standard Deviations: 0.99391

getR(fit3)

    id 1 
    Conditional variance covariance matrix
            1       2       3       4       5       6       7
    1 0.25442 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
    2 0.00000 0.25442 0.00000 0.00000 0.00000 0.00000 0.00000
    3 0.00000 0.00000 0.25442 0.00000 0.00000 0.00000 0.00000
    4 0.00000 0.00000 0.00000 0.25442 0.00000 0.00000 0.00000
    5 0.00000 0.00000 0.00000 0.00000 0.25442 0.00000 0.00000
    6 0.00000 0.00000 0.00000 0.00000 0.00000 0.25442 0.00000
    7 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.25442
      Standard Deviations: 0.5044 0.5044 0.5044 0.5044 0.5044 0.5044 0.5044

getR(fit3o)

    id 1 
    Conditional variance covariance matrix
           1      2      3      4      5      6      7
    1 0.2548 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
    2 0.0000 0.2548 0.0000 0.0000 0.0000 0.0000 0.0000
    3 0.0000 0.0000 0.2548 0.0000 0.0000 0.0000 0.0000
    4 0.0000 0.0000 0.0000 0.2548 0.0000 0.0000 0.0000
    5 0.0000 0.0000 0.0000 0.0000 0.2548 0.0000 0.0000
    6 0.0000 0.0000 0.0000 0.0000 0.0000 0.2548 0.0000
    7 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.2548
      Standard Deviations: 0.50478 0.50478 0.50478 0.50478 0.50478 0.50478 0.50478

Estimating seasonal pattern:

preds <- data.frame(datey = seq(0,2,.01))
preds$birthy <- preds$datey - 2     # to move birth out of the way

preds$fit3 <- predict(fit3, newdata = preds, level = 0)

with(preds, plot(datey, fit3, type = 'l'))

ww <- as.data.frame(wald(fit3o, pred = preds))

xyplot(coef ~ datey, ww, type = 'l',
       lower = ww$L2,
       upper = ww$U2,
       subscripts = TRUE) +
  layer(panel.fit(...))

ww <- as.data.frame(wald(fit3o, pred = pred))

xyplot(coef ~ datey, ww, type = 'l',
       lower = ww$L2,
       upper = ww$U2,
       subscripts = TRUE) +
  layer(panel.fit(...))

Combines seasonal and birth effects showing predicted patterns for a birth on January 1.

To isolate seasonal and birth effects we would need to reparameterize the model to allow unlinking the variable used for calendar date from the variable used for time pre/post birth.

This is left as an exercise. (Challenge: medium)

Solution:

Create a separate variable for days relative to birth

head(dd)

         id obs    sleep birth_date date reg_date   birthy     datey
    1     1   1 7.088871       5298 1197      288 14.51507  3.279452
    1001  1   2 7.336316       5298 1891      288 14.51507  5.180822
    2001  1   3 7.909879       5298 2655      288 14.51507  7.273973
    3001  1   4 7.489981       5298 3441      288 14.51507  9.427397
    4001  1   5 7.087731       5298 4084      288 14.51507 11.189041
    5001  1   6 6.920117       5298 4893      288 14.51507 13.405479

dd$post_birthy <- with(dd, datey - birthy)

fit3b <- lme(sleep ~ sp2(post_birthy) + Sin(2*pi*datey) , dd, random = ~ 1 | id, method = 'ML')
predb <- expand.grid(post_birthy = seq(-1,1,.01), datey = 0)
ww <- as.data.frame( wald(fit3b, pred = predb) )
head(ww)

          coef         se       U2       L2 p-value  t-value   DF post_birthy datey
    1 8.396669 0.03482825 8.466326 8.327013       0 241.0879 5991       -1.00     0
    2 8.396703 0.03482662 8.466356 8.327050       0 241.1002 5991       -0.99     0
    3 8.396805 0.03482180 8.466449 8.327162       0 241.1365 5991       -0.98     0
    4 8.396976 0.03481399 8.466604 8.327348       0 241.1955 5991       -0.97     0
    5 8.397214 0.03480350 8.466821 8.327607       0 241.2749 5991       -0.96     0
    6 8.397520 0.03479081 8.467102 8.327939       0 241.3718 5991       -0.95     0
      L.(Intercept) L.sp2(post_birthy)D1(0) L.sp2(post_birthy)D2(0)
    1    1.00000000             -0.75000000              0.25000000
    2    1.00000000             -0.74990000              0.24995000
    3    1.00000000             -0.74960000              0.24980000
    4    1.00000000             -0.74910000              0.24955000
    5    1.00000000             -0.74840000              0.24920000
    6    1.00000000             -0.74750000              0.24875000
      L.sp2(post_birthy)D3(0) L.sp2(post_birthy)C(0).0 L.sp2(post_birthy)C(0).1
    1             -0.05208333               0.00000000               0.00000000
    2             -0.05207083               0.00000000               0.00000000
    3             -0.05203333               0.00000000               0.00000000
    4             -0.05197083               0.00000000               0.00000000
    5             -0.05188333               0.00000000               0.00000000
    6             -0.05177083               0.00000000               0.00000000
      L.sp2(post_birthy)C(0).2 L.sp2(post_birthy)C(0).3 L.Sin(2 * pi * datey)1
    1               0.00000000               0.00000000             0.00000000
    2               0.00000000               0.00000000             0.00000000
    3               0.00000000               0.00000000             0.00000000
    4               0.00000000               0.00000000             0.00000000
    5               0.00000000               0.00000000             0.00000000
    6               0.00000000               0.00000000             0.00000000
      L.Sin(2 * pi * datey)2
    1             1.00000000
    2             1.00000000
    3             1.00000000
    4             1.00000000
    5             1.00000000
    6             1.00000000

xyplot(coef ~ post_birthy, ww, type = 'l',
       lower = ww$L2,
       upper = ww$U2,
       subscripts = TRUE) +
  layer(panel.fit(...))

Fitting higher harmonics

fit4 <- lme(sleep ~ sp2(datey - birthy) + 
              Sin(1 * 2 * pi * datey) + 
              Sin(2 * 2 * pi * datey) +
              Sin(3 * 2 * pi * datey)
              , dd, random = ~ 1 | id)
summary(fit4)

    Linear mixed-effects model fit by REML
      Data: dd 
           AIC      BIC    logLik
      13690.76 13800.38 -6829.378
    
    Random effects:
     Formula: ~1 | id
            (Intercept)  Residual
    StdDev:   0.9938603 0.5048109
    
    Fixed effects:  sleep ~ sp2(datey - birthy) + Sin(1 * 2 * pi * datey) + Sin(2 *      2 * pi * datey) + Sin(3 * 2 * pi * datey) 
                                   Value Std.Error   DF  t-value p-value
    (Intercept)                 8.497015   0.12920 5987 65.76594  0.0000
    sp2(datey - birthy)D1(0)    1.022949   1.67225 5987  0.61172  0.5407
    sp2(datey - birthy)D2(0)   -0.993524  12.04851 5987 -0.08246  0.9343
    sp2(datey - birthy)D3(0)   -9.476648  36.37328 5987 -0.26054  0.7945
    sp2(datey - birthy)C(0).0  -1.061064   0.17975 5987 -5.90301  0.0000
    sp2(datey - birthy)C(0).1  -0.067145   2.43268 5987 -0.02760  0.9780
    sp2(datey - birthy)C(0).2   8.057741  17.47070 5987  0.46121  0.6447
    sp2(datey - birthy)C(0).3 -27.625040  52.62973 5987 -0.52489  0.5997
    Sin(1 * 2 * pi * datey)1    0.254924   0.00926 5987 27.54186  0.0000
    Sin(1 * 2 * pi * datey)2    0.421517   0.00921 5987 45.78260  0.0000
    Sin(2 * 2 * pi * datey)1   -0.010929   0.00932 5987 -1.17309  0.2408
    Sin(2 * 2 * pi * datey)2   -0.001708   0.00913 5987 -0.18709  0.8516
    Sin(3 * 2 * pi * datey)1   -0.009974   0.00922 5987 -1.08218  0.2792
    Sin(3 * 2 * pi * datey)2   -0.007817   0.00918 5987 -0.85177  0.3944
     Correlation: 
                              (Intr) s2(-b)D1 s2(-b)D2 s2(-b)D3 s2(-b)C(0).0
    sp2(datey - birthy)D1(0)   0.832                                        
    sp2(datey - birthy)D2(0)   0.719  0.975                                 
    sp2(datey - birthy)D3(0)   0.657  0.945    0.994                        
    sp2(datey - birthy)C(0).0 -0.679 -0.599   -0.518   -0.474               
    sp2(datey - birthy)C(0).1 -0.571 -0.688   -0.671   -0.650   -0.031      
    sp2(datey - birthy)C(0).2 -0.497 -0.672   -0.689   -0.685    0.742      
    sp2(datey - birthy)C(0).3 -0.453 -0.654   -0.687   -0.691   -0.025      
    Sin(1 * 2 * pi * datey)1  -0.006 -0.006   -0.003   -0.001    0.016      
    Sin(1 * 2 * pi * datey)2  -0.007 -0.006   -0.002    0.000   -0.015      
    Sin(2 * 2 * pi * datey)1   0.019  0.029    0.030    0.030   -0.007      
    Sin(2 * 2 * pi * datey)2   0.012  0.015    0.013    0.012   -0.022      
    Sin(3 * 2 * pi * datey)1  -0.011 -0.007   -0.006   -0.006    0.006      
    Sin(3 * 2 * pi * datey)2   0.014  0.017    0.020    0.020   -0.003      
                              s2(-b)C(0).1 s2(-b)C(0).2 s2(-b)C(0).3 S(1*2*p*d)1
    sp2(datey - birthy)D1(0)                                                    
    sp2(datey - birthy)D2(0)                                                    
    sp2(datey - birthy)D3(0)                                                    
    sp2(datey - birthy)C(0).0                                                   
    sp2(datey - birthy)C(0).1                                                   
    sp2(datey - birthy)C(0).2 -0.051                                            
    sp2(datey - birthy)C(0).3  0.946       -0.047                               
    Sin(1 * 2 * pi * datey)1  -0.013        0.021       -0.019                  
    Sin(1 * 2 * pi * datey)2   0.031       -0.026        0.026        0.008     
    Sin(2 * 2 * pi * datey)1  -0.030       -0.014       -0.026       -0.004     
    Sin(2 * 2 * pi * datey)2   0.005       -0.026        0.010        0.013     
    Sin(3 * 2 * pi * datey)1   0.011       -0.001        0.009        0.006     
    Sin(3 * 2 * pi * datey)2  -0.021       -0.004       -0.024       -0.012     
                              S(1*2*p*d)2 S(2*2*p*d)1 S(2*2*p*d)2 S(3*2*p*d)1
    sp2(datey - birthy)D1(0)                                                 
    sp2(datey - birthy)D2(0)                                                 
    sp2(datey - birthy)D3(0)                                                 
    sp2(datey - birthy)C(0).0                                                
    sp2(datey - birthy)C(0).1                                                
    sp2(datey - birthy)C(0).2                                                
    sp2(datey - birthy)C(0).3                                                
    Sin(1 * 2 * pi * datey)1                                                 
    Sin(1 * 2 * pi * datey)2                                                 
    Sin(2 * 2 * pi * datey)1  -0.015                                         
    Sin(2 * 2 * pi * datey)2   0.017      -0.001                             
    Sin(3 * 2 * pi * datey)1   0.012      -0.003      -0.017                 
    Sin(3 * 2 * pi * datey)2  -0.028       0.020       0.013      -0.006     
    
    Standardized Within-Group Residuals:
             Min           Q1          Med           Q3          Max 
    -3.749540694 -0.631159258 -0.004632081  0.641790295  3.523068562 
    
    Number of Observations: 7000
    Number of Groups: 1000

We can test higher harmonics with a Wald test or with a LR test

wald(fit4, 'Sin\\(3')

            numDF denDF   F-value p-value
    Sin\\(3     2  5987 0.9537905 0.38534
                             Estimate  Std.Error DF   t-value   p-value Lower 0.95
    Sin(3 * 2 * pi * datey)1 -0.009974 0.009216  5987 -1.082177 0.27922 -0.028041 
    Sin(3 * 2 * pi * datey)2 -0.007817 0.009177  5987 -0.851774 0.39437 -0.025807 
                             Upper 0.95
    Sin(3 * 2 * pi * datey)1 0.008094  
    Sin(3 * 2 * pi * datey)2 0.010173

wald(fit4, 'Sin\\([23]')

               numDF denDF   F-value p-value
    Sin\\([23]     4  5987 0.8226071 0.51051
                             Estimate  Std.Error DF   t-value   p-value Lower 0.95
    Sin(2 * 2 * pi * datey)1 -0.010929 0.009316  5987 -1.173091 0.24081 -0.029193 
    Sin(2 * 2 * pi * datey)2 -0.001708 0.009129  5987 -0.187089 0.85160 -0.019605 
    Sin(3 * 2 * pi * datey)1 -0.009974 0.009216  5987 -1.082177 0.27922 -0.028041 
    Sin(3 * 2 * pi * datey)2 -0.007817 0.009177  5987 -0.851774 0.39437 -0.025807 
                             Upper 0.95
    Sin(2 * 2 * pi * datey)1 0.007335  
    Sin(2 * 2 * pi * datey)2 0.016189  
    Sin(3 * 2 * pi * datey)1 0.008094  
    Sin(3 * 2 * pi * datey)2 0.010173

wald(fit4, 'Sin\\([123]')

                numDF denDF  F-value p-value
    Sin\\([123]     6  5987 473.4823 <.00001
                             Estimate  Std.Error DF   t-value   p-value Lower 0.95
    Sin(1 * 2 * pi * datey)1  0.254924 0.009256  5987 27.541861 <.00001  0.236779 
    Sin(1 * 2 * pi * datey)2  0.421517 0.009207  5987 45.782595 <.00001  0.403468 
    Sin(2 * 2 * pi * datey)1 -0.010929 0.009316  5987 -1.173091 0.24081 -0.029193 
    Sin(2 * 2 * pi * datey)2 -0.001708 0.009129  5987 -0.187089 0.85160 -0.019605 
    Sin(3 * 2 * pi * datey)1 -0.009974 0.009216  5987 -1.082177 0.27922 -0.028041 
    Sin(3 * 2 * pi * datey)2 -0.007817 0.009177  5987 -0.851774 0.39437 -0.025807 
                             Upper 0.95
    Sin(1 * 2 * pi * datey)1 0.273069  
    Sin(1 * 2 * pi * datey)2 0.439566  
    Sin(2 * 2 * pi * datey)1 0.007335  
    Sin(2 * 2 * pi * datey)2 0.016189  
    Sin(3 * 2 * pi * datey)1 0.008094  
    Sin(3 * 2 * pi * datey)2 0.010173

anova(update(fit3, method = 'ML'), update(fit4, method = "ML"))

                                Model df      AIC      BIC    logLik   Test L.Ratio
    update(fit3, method = "ML")     1 12 13645.35 13727.59 -6810.676               
    update(fit4, method = "ML")     2 16 13650.06 13759.71 -6809.027 1 vs 2 3.29655
                                p-value
    update(fit3, method = "ML")        
    update(fit4, method = "ML")  0.5095

Note how close the corresponding p-values are from the two tests

1 Hand-rolled parametric splines

Many parametric splines are easily make ‘by hand’.

As long as a spline adds polynomial degrees monotonically, either from the left to the right, or from the right to the left, it can be easily specified with ‘plus’ functions:

plus <- function(x) x * (x > 0)

For example, a piece-wise linear function with knots at 0 and at 1 could be specified as:

y ~ x + plus(x) + plus(x - 1)

head(mtcars)

                       mpg cyl disp  hp drat    wt  qsec vs am gear carb
    Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
    Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
    Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
    Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
    Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
    Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

xyplot(mpg ~ wt, mtcars)

plus <- function(x) x * (x>0)
fit <- lm(mpg ~ disp + wt + plus(wt - 3) + plus(wt -4), mtcars)
summary(fit)

    
    Call:
    lm(formula = mpg ~ disp + wt + plus(wt - 3) + plus(wt - 4), data = mtcars)
    
    Residuals:
        Min      1Q  Median      3Q     Max 
    -3.0400 -1.6780 -0.7629  0.8295  5.3393 
    
    Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
    (Intercept)  42.97993    3.25485  13.205  2.7e-13 ***
    disp         -0.01890    0.00834  -2.266 0.031701 *  
    wt           -6.56004    1.46556  -4.476 0.000124 ***
    plus(wt - 3)  5.05495    3.04939   1.658 0.108955    
    plus(wt - 4)  0.52160    3.12072   0.167 0.868506    
    ---
    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    
    Residual standard error: 2.525 on 27 degrees of freedom
    Multiple R-squared:  0.8471,    Adjusted R-squared:  0.8245 
    F-statistic:  37.4 on 4 and 27 DF,  p-value: 1.213e-10

wald(fit, 'plus')  # individual components not sig. but overall sig.

         numDF denDF  F-value p-value
    plus     2    27 5.843264 0.00779
                 Estimate Std.Error DF t-value  p-value Lower 0.95 Upper 0.95
    plus(wt - 3) 5.054949 3.049385  27 1.657695 0.10895 -1.201872  11.311771 
    plus(wt - 4) 0.521595 3.120718  27 0.167139 0.86851 -5.881589   6.924778

Exercise: interpret the coefficients of this model

L <- rbind(
  "Slope wt<3" =  c(0,0,1,0,0),
  "Slope 3<wt<4" = c(0,0,1,1,0),
  "Slope 4<wt" = c(0,0,1,1,1),
  "Change at 3" = c(0,0,0,1,0),
  "Change at 4" = c(0,0,0,0,1)
)

wald(fit, L)

      numDF denDF  F-value p-value
    1     3    27 7.579704 0.00078
                 Estimate  Std.Error DF t-value   p-value Lower 0.95 Upper 0.95
    Slope wt<3   -6.560040 1.465563  27 -4.476122 0.00012 -9.567127  -3.552953 
    Slope 3<wt<4 -1.505090 2.449412  27 -0.614470 0.54405 -6.530868   3.520687 
    Slope 4<wt   -0.983496 1.549031  27 -0.634910 0.53083 -4.161845   2.194854 
    Change at 3   5.054949 3.049385  27  1.657695 0.10895 -1.201872  11.311771 
    Change at 4   0.521595 3.120718  27  0.167139 0.86851 -5.881589   6.924778

The previous spline is equivalent to the following ‘gsp’ spline:

sp1 <- function(x) gsp(x, c(3,4), c(1,1,1), c(0,0))

fits <- lm(mpg ~ disp + sp1(wt), mtcars)
summary(fits)

    
    Call:
    lm(formula = mpg ~ disp + sp1(wt), data = mtcars)
    
    Residuals:
        Min      1Q  Median      3Q     Max 
    -3.0400 -1.6780 -0.7629  0.8295  5.3393 
    
    Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
    (Intercept)   42.97993    3.25485  13.205  2.7e-13 ***
    disp          -0.01890    0.00834  -2.266 0.031701 *  
    sp1(wt)D1(0)  -6.56004    1.46556  -4.476 0.000124 ***
    sp1(wt)C(3).1  5.05495    3.04939   1.658 0.108955    
    sp1(wt)C(4).1  0.52160    3.12072   0.167 0.868506    
    ---
    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    
    Residual standard error: 2.525 on 27 degrees of freedom
    Multiple R-squared:  0.8471,    Adjusted R-squared:  0.8245 
    F-statistic:  37.4 on 4 and 27 DF,  p-value: 1.213e-10

pred <- with(mtcars, pred.grid(wt= seq(1,6,.01), disp = seq(100,400,100)))
pred$fits <- predict(fits, newdata = pred)        # no levels argument since this is 'monolevel'
xyplot(fits ~ wt, pred, groups = disp, type = 'l', auto.key = T)

car::compareCoefs(fit, fits)

    Calls:
    1: lm(formula = mpg ~ disp + wt + plus(wt - 3) + plus(wt - 4), data = 
      mtcars)
    2: lm(formula = mpg ~ disp + sp1(wt), data = mtcars)
    
                   Model 1  Model 2
    (Intercept)      42.98    42.98
    SE                3.25     3.25
                                   
    disp          -0.01890 -0.01890
    SE             0.00834  0.00834
                                   
    wt               -6.56         
    SE                1.47         
                                   
    plus(wt - 3)      5.05         
    SE                3.05         
                                   
    plus(wt - 4)     0.522         
    SE               3.121         
                                   
    sp1(wt)D1(0)              -6.56
    SE                         1.47
                                   
    sp1(wt)C(3).1              5.05
    SE                         3.05
                                   
    sp1(wt)C(4).1             0.522
    SE                        3.121

Fitting a different spline

sp2 <- function(x) gsp(x, 4, c(2,1), 1)
fitq <- lm(mpg ~ disp + sp2(wt), mtcars)
summary(fitq)

    
    Call:
    lm(formula = mpg ~ disp + sp2(wt), data = mtcars)
    
    Residuals:
        Min      1Q  Median      3Q     Max 
    -2.8311 -1.5000 -0.9477  0.9182  5.6663 
    
    Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
    (Intercept)   50.495411   4.738387  10.657 2.32e-11 ***
    disp          -0.018532   0.007768  -2.386 0.024050 *  
    sp2(wt)D1(0) -13.677911   3.067820  -4.459 0.000122 ***
    sp2(wt)D2(0)   3.218240   0.905567   3.554 0.001370 ** 
    ---
    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    
    Residual standard error: 2.464 on 28 degrees of freedom
    Multiple R-squared:  0.849, Adjusted R-squared:  0.8329 
    F-statistic: 52.49 on 3 and 28 DF,  p-value: 1.284e-11

pred$fitq <- predict(fitq, newdata = pred)
xyplot(fitq ~ wt, pred, groups = disp, type = 'l', auto.key = T)

AIC(fits, fitq)

         df      AIC
    fits  6 156.6588
    fitq  5 154.2543

For a more realistic plot that reflects the relationship among predictors, use the original data but might fill in values for continuity

mtcars$fitq <- predict(fitq)
xyplot(disp ~ wt, mtcars)

xyplot(fitq ~ wt, mtcars, groups = disp, type = 'l', auto.key = T)

library(p3d)

    Loading required package: rgl

    
    Attaching package: 'p3d'

    The following objects are masked from 'package:spida2':
    
        cell, center, ConjComp, dell, disp, ell, ell.conj, ellbox, ellplus,
        ellpt, ellptc, ellpts, ellptsc, elltan, elltanc, na.include, uv

Init3d()
Plot3d(mpg ~ wt + disp, mtcars)

    Use left mouse to rotate, middle mouse (or scroll) to zoom, right mouse to change perspective

rglwidget()

Fit3d(fitq)
rglwidget()

Fit3d(fits, col = 'pink')
rglwidget()

Using parametric splines and Fourier series for seasonal effects

2024-03-18

1 Hand-rolled parametric splines