THIS VERSION: 5.12.2024

Rig count data.

Weekly data 1.3.2017 to 28.3.2024

Explorative data analysis:

Number of observations is 365. The mean and variance of the data are: 6.3945205 and 5.7944754.

plot(data1$AlaskaL, type="l")

forecast::Acf(data1$AlaskaL)

forecast::Pacf(data1$AlaskaL)

Are there quarterly seasonal effects?

Mean counts for quarters 1 to 4:

meanq1 <- mean(data1$AlaskaL[data1$Q1 == 1])
meanq1
## [1] 7.988764
meanq2 <- mean(data1$AlaskaL[data1$Q2 == 1])
meanq2
## [1] 6.076923
meanq3 <- mean(data1$AlaskaL[data1$Q3 == 1])
meanq3
## [1] 5.608696
meanq4 <- mean(data1$AlaskaL[data1$Q4 == 1])
meanq4
## [1] 5.956989

Specification 1:

I start with the specification used so far, i.e. with a linear trend and the Q1 dummy.

# preparations for regression analysis
X1 <- data1$AlaskaL
n1 <- length(X1)

# generate trend variables and quarterly dummies
trend_1 <- (1:n1 - n1/2) / n1
const_1 <- rep(1, n1)

q1_1 <- data1$Q1

Fitting a GP(2) model with trend and Q1-Dummy (log-link)

xreg_1_tq1 <- cbind(const_1, trend_1, q1_1 )


fit_1_gp2tq1 <- cocoReg(order=2, type="GP", data=X1, xreg=xreg_1_tq1 , julia=T)
## Starting Julia ...
#fit_1_gp2tq1 <- cocoReg(order=2, type="GP", data=X1, xreg=xreg_1_tq1, link_function = "log" , julia=T)
summary(fit_1_gp2tq1)
## Coefficients:
##           Estimate   Std. Error          t
## alpha1      0.0515       0.0077     6.7128
## alpha2      0.0190       0.0063     3.0179
## alpha3      0.8758       0.0171    51.3371
## eta         0.1972       0.0458     4.3111
## const_1    -1.7628       0.1759   -10.0223
## trend_1    -2.1621       0.4951    -4.3673
## q1_1        0.7095       0.2823     2.5135
## 
## Type: GP 
## Order: 2 
## 
## Log-likelihood: -455.7017

Evaluation of the fitted model:

boot <- cocoBoot(fit_1_gp2tq1, rep.Bootstrap=500, conf.alpha = 0.01, julia=TRUE)
plot(boot)

pit <- cocoPit(fit_1_gp2tq1,  julia=TRUE)
plot(pit)

residuals <- cocoResid(fit_1_gp2tq1, val.num = 1e-10)
mean(residuals$pe.resid)
## [1] 0.05874158
var(residuals$pe.resid)
## [1] 0.8029447
forecast::Acf(residuals$pe.resid)

plot(residuals$pe.resid,type="l")

The mean and variance of the Pearson residuals are: 0.0587416 and 0.8029447.

Note, that the confidence level in the parametric bootstrap exercise was set to \(99\%\) in order to include the SACF of the data.

We concluded then, that despite the complex dynamics, this model is doing pretty ok.

Moving on to a one-step-ahead forecasting exercise:

k <- 1

trendf <- ((n1+1):(n1+k) - n1/2) / n1 #adjusted to a vector
q1f   <- 0 
constf <- 1

xcastk <- cbind(constf, trendf, q1f) #adjusted to create a matrix


fc_1_gp2 <- predict(fit_1_gp2tq1,k=1, max=20, simulate_one_step_ahead = F, xcast=xcastk, julia=T)


#k=1
fc_1_gp2[[1]]$mode
## [1] 13
fc_1_gp2[[1]]$median
## [1] 13
plot(fc_1_gp2[[1]])

Specification 2:

Include a Dummy for quarter 3 to the above specification:

q3_1 <- data1$Q3

xreg_1_tq1q3 <- cbind(const_1, trend_1, q1_1, q3_1 )


fit_1_gp2tq1q3 <- cocoReg(order=2, type="GP", data=X1, xreg=xreg_1_tq1q3 , julia=T)

summary(fit_1_gp2tq1q3)
## Coefficients:
##           Estimate   Std. Error         t
## alpha1      0.0516       0.0077    6.7187
## alpha2      0.0192       0.0063    3.0329
## alpha3      0.8754       0.0171   51.2380
## eta         0.1968       0.0458    4.2955
## const_1    -1.7017       0.1990   -8.5514
## trend_1    -2.1716       0.4958   -4.3799
## q1_1        0.6474       0.2975    2.1761
## q3_1       -0.1962       0.3158   -0.6213
## 
## Type: GP 
## Order: 2 
## 
## Log-likelihood: -455.505

No real improvement. Do not pursue any further.

Specification 3:

I now try to include some intervention terms in the spirit of Liboschek et al JSS paper for the tscount package.

Data preparation:

#intervention dummies

intv1_0 <- rep(0,158)
intv1_1 <- rep(1,80)
intv1_2 <- rep(0,127)
intv_corona <- c(intv1_0,intv1_1,intv1_2)
# a Covid19 Dummy covering the time span from mid April 2020 to end of Sept. 2021

intv2_1 <- rep(0, 238)  
intv2_2 <- rep(1,127)
intv2 <- c(intv2_1,intv2_2) 
# a Covid19 recovery dummy starting from Oktober 2021 up to the end of the data set

xreg_1_tq1_i = cbind(const_1,q1_1,intv_corona, intv2)

fit_1_gp2tq1_i <- cocoReg(order=2, type="GP", data=X1, xreg=xreg_1_tq1_i , julia=T, link_function = "log")

summary(fit_1_gp2tq1_i)
## Coefficients:
##               Estimate   Std. Error         t
## alpha1          0.0516       0.0080    6.4211
## alpha2          0.0157       0.0067    2.3356
## alpha3          0.8716       0.0185   47.1573
## eta             0.1954       0.0461    4.2342
## const_1        -0.8680       0.2160   -4.0186
## q1_1            0.5802       0.2752    2.1082
## intv_corona    -1.3603       0.3739   -3.6385
## intv2          -1.5686       0.3656   -4.2899
## 
## Type: GP 
## Order: 2 
## 
## Log-likelihood: -451.5651

The usual diagnostic plots for this specification (note however, that the confidence level for the parametric bootstrap is 99%)

bootrci <- cocoBoot(fit_1_gp2tq1_i, conf.alpha = 0.01, julia=TRUE)
plot(bootrci)

pitrci <- cocoPit(fit_1_gp2tq1_i, julia=TRUE)
plot(pitrci)

residualsrci <- cocoResid(fit_1_gp2tq1_i, val.num = 1e-10)
mean(residualsrci$pe.resid)
## [1] 0.07594969
var(residualsrci$pe.resid)
## [1] 0.7723266
acf_resrci <- forecast::Acf(residualsrci$pe.resid, lag = 25)

intv_coronaf <- 0
intv2f <- 1

xcastki <- cbind(constf, q1f, intv_coronaf, intv2f) #adjusted to create a matrix


fc_1_gp2_i <- predict(fit_1_gp2tq1_i,k=1, simulate_one_step_ahead = F, xcast=xcastki, julia=T)


#k=1
fc_1_gp2_i[[1]]$mode
## [1] 13
fc_1_gp2_i[[1]]$median
## [1] 13
plot(fc_1_gp2_i[[1]])

Specification 4:

What about bringing back the trend term…

Data preparation:

trend_1 <- (1:n1 - n1/2) / n1
#intervention dummies

intv1_0 <- rep(0,158)
intv1_1 <- rep(1,80)
intv1_2 <- rep(0,127)
intv_corona <- c(intv1_0,intv1_1,intv1_2)
# a Covid19 Dummy covering the time span from mid April 2020 to end of Sept. 2021

intv2_1 <- rep(0, 238)  
intv2_2 <- rep(1,127)
intv2 <- c(intv2_1,intv2_2) 
# a Covid19 recovery dummy starting from Oktober 2021 up to the end of the data set

xreg_1_tq1_it = cbind(const_1,q1_1,intv_corona, trend_1)

fit_1_gp2tq1_it <- cocoReg(order=2, type="GP", data=X1, xreg=xreg_1_tq1_it , julia=T, link_function = "log")

summary(fit_1_gp2tq1_it)
## Coefficients:
##               Estimate   Std. Error         t
## alpha1          0.0497       0.0074    6.6845
## alpha2          0.0176       0.0062    2.8191
## alpha3          0.8785       0.0168   52.2651
## eta             0.1945       0.0467    4.1635
## const_1        -1.6018       0.1971   -8.1250
## q1_1            0.6673       0.2852    2.3401
## intv_corona    -0.5984       0.3719   -1.6090
## trend_1        -1.9307       0.4997   -3.8642
## 
## Type: GP 
## Order: 2 
## 
## Log-likelihood: -454.2904

The usual diagnostic plots for this specification

bootrcit <- cocoBoot(fit_1_gp2tq1_it, conf.alpha = 0.01, julia=TRUE)
plot(bootrcit)

pitrcit <- cocoPit(fit_1_gp2tq1_it, julia=TRUE)
plot(pitrcit)

residualsrcit <- cocoResid(fit_1_gp2tq1_it, val.num = 1e-10)
mean(residualsrcit$pe.resid)
## [1] 0.06364868
var(residualsrcit$pe.resid)
## [1] 0.8187228
acf_resrcit <- forecast::Acf(residualsrcit$pe.resid, lag = 25)

plot(residualsrcit$pe.resid,type="l")

Forecasting one-step-ahead

k <- 1

trendf <- ((n1+1):(n1+k) - n1/2) / n1 #adjusted to a vector
intv_coronaf <- 0


xcastkit <- cbind(constf,  q1f, intv_coronaf, trendf) #adjusted to create a matrix


fc_1_gp2_it <- predict(fit_1_gp2tq1_i,k=1,  simulate_one_step_ahead = F, xcast=xcastkit, julia=T)


#k=1
fc_1_gp2_it[[1]]$mode
## [1] 13
fc_1_gp2_it[[1]]$median
## [1] 13
plot(fc_1_gp2_it[[1]])

Specification 5:

Yet anther twist with the regressors: …

Data preparation:

trend_1 <- (1:n1 - n1/2) / n1
#intervention dummies

intv1_0 <- rep(0,158)
intv1_1 <- rep(1,80)
intv1_2 <- rep(0,127)
intv_corona <- c(intv1_0,intv1_1,intv1_2)
# a Covid19 Dummy covering the time span from mid April 2020 to end of Sept. 2021

intv2_1 <- rep(0, 238)  
intv2_2 <- rep(1,127)
intv2 <- c(intv2_1,intv2_2) 
# a Covid19 recovery dummy starting from Oktober 2021 up to the end of the data set

xreg_1_tq1_it2 = cbind(const_1,q1_1,intv_corona, trend_1*intv2)

fit_1_gp2tq1_it2 <- cocoReg(order=2, type="GP", data=X1, xreg=xreg_1_tq1_it2 , julia=T, link_function = "log")

summary(fit_1_gp2tq1_it2)
## Coefficients:
##               Estimate   Std. Error         t
## alpha1          0.0524       0.0080    6.5913
## alpha2          0.0165       0.0068    2.4401
## alpha3          0.8706       0.0179   48.5261
## eta             0.1943       0.0454    4.2757
## const_1        -0.9064       0.2106   -4.3039
## q1_1            0.5907       0.2731    2.1626
## intv_corona    -1.3298       0.3733   -3.5625
##                -4.6805       1.0593   -4.4185
## 
## Type: GP 
## Order: 2 
## 
## Log-likelihood: -451.6935

Diagnostic plots for this specification

bootrcit2 <- cocoBoot(fit_1_gp2tq1_it2, conf.alpha = 0.01, julia=TRUE)
plot(bootrcit2)

pitrcit2 <- cocoPit(fit_1_gp2tq1_it2, julia=TRUE)
plot(pitrcit2)

residualsrcit2 <- cocoResid(fit_1_gp2tq1_it2, val.num = 1e-10)
mean(residualsrcit2$pe.resid)
## [1] 0.06974777
var(residualsrcit2$pe.resid)
## [1] 0.7680785
acf_resrcit2 <- forecast::Acf(residualsrcit2$pe.resid, lag = 25)

plot(residualsrcit2$pe.resid,type="l")

Specification 6:

Yet anther twist with the regressors: …

Data preparation:

trend_1 <- (1:n1 - n1/2) / n1
#intervention dummies

intv3_0 <- rep(0,158)
intv3_1 <- rep(1,207)

intv_corona2 <- c(intv3_0,intv3_1)
# a Covid19 Dummy covering the time span from mid April 2020 to end of Sept. 2021

intv2_1 <- rep(0, 238)  
intv2_2 <- rep(1,127)
intv2 <- c(intv2_1,intv2_2) 
# a Covid19 recovery dummy starting from Oktober 2021 up to the end of the data set

xreg_1_tq1_it3 = cbind(const_1,q1_1,intv_corona2)

fit_1_gp2tq1_it3 <- cocoReg(order=2, type="GP", data=X1, xreg=xreg_1_tq1_it3 , julia=T, link_function = "log")

summary(fit_1_gp2tq1_it3)
## Coefficients:
##                Estimate   Std. Error         t
## alpha1           0.0507       0.0077    6.6253
## alpha2           0.0155       0.0066    2.3406
## alpha3           0.8736       0.0177   49.2499
## eta              0.1949       0.0466    4.1835
## const_1         -0.8693       0.2161   -4.0224
## q1_1             0.5714       0.2762    2.0690
## intv_corona2    -1.4707       0.2951   -4.9836
## 
## Type: GP 
## Order: 2 
## 
## Log-likelihood: -451.6711

Diagnostic plots for this specification

bootrcit3 <- cocoBoot(fit_1_gp2tq1_it3, conf.alpha = 0.01, julia=TRUE)
plot(bootrcit3)

pitrcit3 <- cocoPit(fit_1_gp2tq1_it3, julia=TRUE)
plot(pitrcit3)

residualsrcit3 <- cocoResid(fit_1_gp2tq1_it3, val.num = 1e-10)
mean(residualsrcit3$pe.resid)
## [1] 0.07573195
var(residualsrcit3$pe.resid)
## [1] 0.7845895
acf_resrcit2 <- forecast::Acf(residualsrcit2$pe.resid, lag = 25)

plot(residualsrcit2$pe.resid,type="l")

Summary

Table with scoring rule results for the data set 2017 to 2024 (not comprehensive)

rbind(Spez1 = cocoScore(fit_1_gp2tq1, julia = T), Spez3 =  cocoScore(fit_1_gp2tq1_i), Spez4 =  cocoScore(fit_1_gp2tq1_it))
##       log.score quad.score rps.score
## Spez1 1.255377  -0.4363836 0.4862753
## Spez3 1.243981  -0.4439594 0.4841648
## Spez4 1.251489  -0.437554  0.4854141

I think, I should try a different data span. Without the Covid19 period.

Spezification (II.1) with dataset 2014 to 2020 (March to March)

data2 <- read_excel("rigcountsAlaskaL2014-20-march2march.xlsx")

Explorative data analysis:

Number of observations is 313. The mean and variance of the data are: 7.7955272 and 6.6760056.

mean(data2$AlaskaL)
## [1] 7.795527
var(data2$AlaskaL)
## [1] 6.676006
plot(data2$AlaskaL, type="l")

forecast::Acf(data2$AlaskaL)

forecast::Pacf(data2$AlaskaL)

Are there quarterly seasonal effects?

Mean counts for quarters 1 to 4:

meanq1 <- mean(data2$AlaskaL[data2$Q1 == 1])
meanq1
## [1] 9.220779
meanq2 <- mean(data2$AlaskaL[data2$Q2 == 1])
meanq2
## [1] 7.743243
meanq3 <- mean(data2$AlaskaL[data2$Q3 == 1])
meanq3
## [1] 6.73494
meanq4 <- mean(data2$AlaskaL[data2$Q4 == 1])
meanq4
## [1] 7.56962

There is no need for a trend and the data seem to be slightly underdispersed. Preliminary tests show that the \(\eta\) parameter in the GPAR is not significantly different from zero. So I stick wit a PAR2 model and the Q1 Dummy.

# preparations for regression analysis
X2 <- data2$AlaskaL
n2 <- length(X2)

# generate trend variables and quarterly dummies
trend_2 <- (1:n2 - n2/2) / n2
const_2 <- rep(1, n2)

q1_2 <- data2$Q1



xreg_2_q1 <- cbind(const_2, q1_2 )

Fitting a PAR(2) model with Q1-Dummy only (log-link):

fit_2_p2q1 <- cocoReg(order=2, type="Poisson", data=X2, xreg=xreg_2_q1, link_function = "log" , julia=T)

summary(fit_2_p2q1)
## Coefficients:
##           Estimate   Std. Error         t
## alpha1      0.1002       0.0131    7.6659
## alpha2      0.0418       0.0125    3.3456
## alpha3      0.7770       0.0227   34.1635
## const_2    -0.6315       0.1752   -3.6037
## q1_2        0.5375       0.1924    2.7942
## 
## Type: Poisson 
## Order: 2 
## 
## Log-likelihood: -521.294

Evaluation of the fitted model:

boot <- cocoBoot(fit_2_p2q1, rep.Bootstrap=500, conf.alpha = 0.05, julia=TRUE)
plot(boot)

pit <- cocoPit(fit_2_p2q1,  julia=TRUE)
plot(pit)

residuals <- cocoResid(fit_2_p2q1, val.num = 1e-10)
mean(residuals$pe.resid)
## [1] 0.009020904
var(residuals$pe.resid)
## [1] 1.041562
forecast::Acf(residuals$pe.resid)

plot(residuals$pe.resid,type="l")

Forecasting one-step-ahead

constf2 <- 1
q1f2 <- 0

xcastk2 <- cbind(constf2, q1f2) #adjusted to create a matrix


fc_2_p2 <- predict(fit_2_p2q1, k=1, simulate_one_step_ahead = F, xcast=xcastk2, julia=T)


#k=1
fc_2_p2[[1]]$mode
## [1] 9
fc_2_p2[[1]]$median
## [1] 9
plot(fc_2_p2[[1]])

Spezification II.2 mit seasonal components

# generate trend variables and quarterly dummies
X2 <- data2$AlaskaL
n2 <- length(X2)

sin_x2 <- sin(2*pi*1:n2/52)
cos_x2 <- cos(2*pi*1:n2/52)

const_2 <- rep(1, n2)

q1_2 <- data2$Q1

xreg_2_q1har <- cbind(const_2, q1_2, sin_x2, cos_x2 )

Fitting a PAR(2) model with Q1-Dummy only (log-link)

fit_2_p2q1har <- cocoReg(order=2, type="Poisson", data=X2, xreg=xreg_2_q1har, link_function = "log" , julia=T)

summary(fit_2_p2q1har)
## Coefficients:
##           Estimate   Std. Error         t
## alpha1      0.0999       0.0130    7.6994
## alpha2      0.0427       0.0125    3.4133
## alpha3      0.7781       0.0226   34.3622
## const_2    -0.7227       0.2025   -3.5685
## q1_2        0.7897       0.3447    2.2912
## sin_x2      0.0631       0.1713    0.3682
## cos_x2     -0.2002       0.1982   -1.0100
## 
## Type: Poisson 
## Order: 2 
## 
## Log-likelihood: -520.7641

Evaluation of the fitted model:

boot <- cocoBoot(fit_2_p2q1har, rep.Bootstrap=500, conf.alpha = 0.05, julia=TRUE)
plot(boot)

pit <- cocoPit(fit_2_p2q1har,  julia=TRUE)
plot(pit)

residuals <- cocoResid(fit_2_p2q1har, val.num = 1e-10)
mean(residuals$pe.resid)
## [1] 0.008739587
var(residuals$pe.resid)
## [1] 1.047478
forecast::Acf(residuals$pe.resid)

plot(residuals$pe.resid,type="l")

Spezification (II.3) with dataset 2014 to 2020 (March to March) and intervention

… of course, the intervention has no inferential background, just eyeballing…

# generate trend variables and quarterly dummies
const_2 <- rep(1, n2)

q1_2 <- data2$Q1

intv2_1 <- rep(0,109)
intv2_2 <- rep(1,204)
interv <- c(intv2_1,intv2_2)

xreg_2_q1i <- cbind(const_2, q1_2, interv )

Fitting a PAR(2) model with Q1-Dummy and intervention (log-link)

fit_2_p2q1i <- cocoReg(order=2, type="Poisson", data=X2, xreg=xreg_2_q1i, link_function = "log" , julia=T)

summary(fit_2_p2q1i)
## Coefficients:
##           Estimate   Std. Error         t
## alpha1      0.1035       0.0139    7.4571
## alpha2      0.0427       0.0128    3.3460
## alpha3      0.7714       0.0240   32.0969
## const_2    -0.4724       0.2172   -2.1747
## q1_2        0.5336       0.1910    2.7941
## interv     -0.2230       0.1927   -1.1573
## 
## Type: Poisson 
## Order: 2 
## 
## Log-likelihood: -520.6517

The intervention dummy is not significant, but may help to improve the fit of the model as far as the dynamics is concerned.

Evaluation of the fitted model:

boot <- cocoBoot(fit_2_p2q1i, rep.Bootstrap=500, conf.alpha = 0.05, julia=TRUE)
plot(boot)

pit <- cocoPit(fit_2_p2q1i,  julia=TRUE)
plot(pit)

residuals <- cocoResid(fit_2_p2q1i, val.num = 1e-10)
mean(residuals$pe.resid)
## [1] 0.01108528
var(residuals$pe.resid)
## [1] 1.023794
forecast::Acf(residuals$pe.resid)

plot(residuals$pe.resid,type="l")

Looks all pretty ok to me.

Forecasting one-step-ahead

constf2i <- 1
q1f2i <- 0
intv2i <- 1

xcastk2i <- cbind(constf2, q1f2, intv2i) #adjusted to create a matrix


fc_2_p2i <- predict(fit_2_p2q1i,k=1, simulate_one_step_ahead = F, xcast=xcastk2i, julia=T)


#k=1
fc_2_p2i[[1]]$mode
## [1] 9
fc_2_p2i[[1]]$median
## [1] 9
plot(fc_2_p2i[[1]])

Spezification (II.4) with dataset 2014 to 2020 (March to March) and a simple trend

# generate trend variables and quarterly dummies
trend_2 <- (1:n2 - n2/2) / n2

const_2 <- rep(1, n2)

q1_2 <- data2$Q1



xreg_2_q1t <- cbind(const_2, trend_2, q1_2 )

Fitting a PAR(2) model with Q1-Dummy and trend (log-link)

fit_2_p2q1t <- cocoReg(order=2, type="Poisson", data=X2, xreg=xreg_2_q1t, link_function = "log" , julia=T)

summary(fit_2_p2q1t)
## Coefficients:
##           Estimate   Std. Error         t
## alpha1      0.1022       0.0136    7.5267
## alpha2      0.0432       0.0127    3.3943
## alpha3      0.7739       0.0235   32.9082
## const_2    -0.6420       0.1762   -3.6442
## trend_2    -0.2663       0.3363   -0.7918
## q1_2        0.5549       0.1936    2.8662
## 
## Type: Poisson 
## Order: 2 
## 
## Log-likelihood: -520.9809

Evaluation of the fitted model:

boot <- cocoBoot(fit_2_p2q1t, rep.Bootstrap=500, conf.alpha = 0.05, julia=TRUE)
plot(boot)

pit <- cocoPit(fit_2_p2q1t,  julia=TRUE)
plot(pit)

residuals <- cocoResid(fit_2_p2q1t, val.num = 1e-10)
mean(residuals$pe.resid)
## [1] 0.00979969
var(residuals$pe.resid)
## [1] 1.034343
forecast::Acf(residuals$pe.resid)

plot(residuals$pe.resid,type="l")

rbind(spec1 = cocoScore(fit_2_p2q1, julia=T), spec2 = cocoScore(fit_2_p2q1har, julia = T), spec3 = cocoScore(fit_2_p2q1i, julia = T), spec4=cocoScore(fit_2_p2q1t))
##       log.score quad.score rps.score
## spec1 1.676187  -0.2317115 0.7128052
## spec2 1.674483  -0.2327295 0.7116199
## spec3 1.674121  -0.2320096 0.7099365
## spec4 1.67518   -0.2315345 0.7115837

All close together.

Spezification (III.3) with dataset 2013 to 2020 (March to March)

data3 <- read_excel("rigcountsAlaskaL2013-20-march2march.xlsx")
#data2 <- read_excel("rigcountsAlaskaL2014-20-march2march.xlsx")

Explorative data analysis:

Number of observations is 365. The mean and variance of the data are: 8.0465753 and 7.1709017.

mean(data3$AlaskaL)
## [1] 8.046575
var(data3$AlaskaL)
## [1] 7.170902
plot(data3$AlaskaL, type="l")

forecast::Acf(data3$AlaskaL)

forecast::Pacf(data3$AlaskaL)

Hm dependence looks even more pronounced… Try another data span…

Spezification (IV.1) with dataset 2016 to 2020 (March to March)

data4 <- read_excel("rigcountsAlaskaL2016-20-march2march.xlsx")

Explorative data analysis:

Number of observations is 209. The mean and variance of the data are: 6.6124402 and 3.9884983.

The data are severely underdisperesed. I think, we should not pursue this here further.

mean(data4$AlaskaL)
## [1] 6.61244
var(data4$AlaskaL)
## [1] 3.988498
plot(data4$AlaskaL, type="l")

forecast::Acf(data4$AlaskaL)

forecast::Pacf(data4$AlaskaL)

Are there quarterly seasonal effects?

Mean counts for quarters 1 to 4:

meanq1 <- mean(data4$AlaskaL[data4$Q1 == 1])
meanq1
## [1] 8.288462
meanq2 <- mean(data4$AlaskaL[data4$Q2 == 1])
meanq2
## [1] 6.604167
meanq3 <- mean(data4$AlaskaL[data4$Q3 == 1])
meanq3
## [1] 5.578947
meanq4 <- mean(data4$AlaskaL[data4$Q4 == 1])
meanq4
## [1] 6.076923

There is no need for a trend and the data seem not overdispersed:

# preparations for regression analysis
X4 <- data4$AlaskaL
n4 <- length(X4)

const_4 <- rep(1, n4)

q1_4 <- data4$Q1


xreg_4_q1 <- cbind(const_4, q1_4 )

Fitting a GPAR(2) model with Q1-Dummy only (log-link)

fit_4_p2q1 <- cocoReg(order=2, type="GP", data=X4, xreg=xreg_4_q1, link_function = "log" , julia=T)

summary(fit_4_p2q1)
## Coefficients:
##           Estimate   Std. Error         t
## alpha1      0.1210       0.0240    5.0445
## alpha2      0.0590       0.0207    2.8543
## alpha3      0.7352       0.0459   16.0286
## eta         0.0171       0.0729    0.2349
## const_4    -0.7770       0.2845   -2.7308
## q1_4        0.6001       0.2424    2.4757
## 
## Type: GP 
## Order: 2 
## 
## Log-likelihood: -343.0093

Evaluation of the fitted model:

boot <- cocoBoot(fit_4_p2q1, rep.Bootstrap=500, conf.alpha = 0.05, julia=TRUE)
plot(boot)

pit <- cocoPit(fit_4_p2q1,  julia=TRUE)
plot(pit)

residuals <- cocoResid(fit_4_p2q1, val.num = 1e-10)
mean(residuals$pe.resid)
## [1] 0.01748717
var(residuals$pe.resid)
## [1] 0.9975233
forecast::Acf(residuals$pe.resid)

plot(residuals$pe.resid,type="l")