Commit 8271db2b authored by Jana Ulrich's avatar Jana Ulrich
Browse files

consistent parameter names, introduction examples

parent 047bc0d7
...@@ -5,7 +5,7 @@ export(IDF.plot) ...@@ -5,7 +5,7 @@ export(IDF.plot)
export(dgev.d) export(dgev.d)
export(gev.d.diag) export(gev.d.diag)
export(gev.d.fit) export(gev.d.fit)
export(gev.d.nll) export(gev.d.lik)
export(gev.d.params) export(gev.d.params)
export(pgev.d) export(pgev.d)
export(qgev.d) export(qgev.d)
......
...@@ -41,6 +41,43 @@ ...@@ -41,6 +41,43 @@
#' * Coles, S.An Introduction to Statistical Modeling of Extreme Values; Springer: New York, NY, USA, 2001, #' * Coles, S.An Introduction to Statistical Modeling of Extreme Values; Springer: New York, NY, USA, 2001,
#' https://doi.org/10.1198/tech.2002.s73 #' https://doi.org/10.1198/tech.2002.s73
#' @md #' @md
#'
#' @examples
#' ## Here are a few examples to illustrate the order in which the functions are intended to be used.
#'
#' ## Step 0: sample 20 years of example hourly 'precipitation' data
# dates <- seq(as.POSIXct("2000-01-01 00:00:00"),as.POSIXct("2019-12-31 23:00:00"),by = 'hour')
# sample.precip <- rgamma(n = length(dates), shape = 0.05, rate = 0.4)
# precip.df <- data.frame(date=dates,RR=sample.precip)
#
# ## Step 1: get annual maxima
# durations <- 2^(0:6) # accumulation durations [h]
# ann.max <- IDF.agg(list(precip.df),ds=durations,na.accept = 0.1)
# # plotting the annual maxima in log-log representation
# plot(ann.max$ds,ann.max$xdat,log='xy',xlab = 'Duration [h]',ylab='Intensity [mm/h]')
#
# ## Step 2: fit d-GEV to annual maxima
# fit <- gev.d.fit(xdat = ann.max$xdat,ds = ann.max$ds,sigma0link = make.link('log'))
# # checking the fit
# gev.d.diag(fit,pch=1,legend = FALSE)
# # parameter estimates
# params <- gev.d.params(fit)
# print(params)
# # plotting the probability density for a single duration
# q.min <- floor(min(ann.max$xdat[ann.max$ds%in%1:2]))
# q.max <- ceiling(max(ann.max$xdat[ann.max$ds%in%1:2]))
# q <- seq(q.min,q.max,0.2)
# plot(range(q),c(0,0.55),type = 'n',xlab = 'Intensity [mm/h]',ylab = 'Density')
# for(d in 1:2){ # d=1h and d=2h
# hist(ann.max$xdat[ann.max$ds==d],main = paste('d=',d),q.min:q.max
# ,freq = FALSE,add=TRUE,border = d) # sampled data
# lines(q,dgev.d(q,params$mut,params$sigma0,params$xi,params$theta,params$eta,d = d),col=d) # etimated prob. density
# }
# legend('topright',col=1:2,lwd=1,legend = paste('d=',1:2,'h'),title = 'Duration')
#
# ## Step 3: adding the IDF-curves to the data
# plot(ann.max$ds,ann.max$xdat,log='xy',xlab = 'Duration [h]',ylab='Intensity [mm/h]')
# IDF.plot(durations,params,add=TRUE)
NULL NULL
#### IDF.agg #### #### IDF.agg ####
...@@ -68,7 +105,8 @@ NULL ...@@ -68,7 +105,8 @@ NULL
#' different durations, IDF.agg needs to be run separately for the different groups of stations. #' different durations, IDF.agg needs to be run separately for the different groups of stations.
#' Afterwards the results can be joint together using `rbind`. #' Afterwards the results can be joint together using `rbind`.
#' #'
#' @return data.frame containing the annual intensity maxima [mm/h] in `$xdat`, the corresponding duration in `$ds` #' @return data.frame containing the annual intensity maxima [mm/h] in `$xdat`, the corresponding duration in `$ds`,
#' the `$year` and month (`$mon`) in which the maxima occured
#' and the station id or name in `$station`. #' and the station id or name in `$station`.
#' #'
#' @seealso \code{\link{pgev.d}} #' @seealso \code{\link{pgev.d}}
...@@ -82,13 +120,22 @@ NULL ...@@ -82,13 +120,22 @@ NULL
#' dates <- as.Date("2019-01-01")+0:729 #' dates <- as.Date("2019-01-01")+0:729
#' x <- rgamma(n = 730, shape = 0.4, rate = 0.5) #' x <- rgamma(n = 730, shape = 0.4, rate = 0.5)
#' df <- data.frame(date=dates,RR=x) #' df <- data.frame(date=dates,RR=x)
#' IDF.agg(list(df),ds=c(24,48))
#' #'
#'## xdat ds station #' # get annual maxima
#'## 1 0.3025660 24 1 #' IDF.agg(list('Sample'= df),ds=c(24,48),na.accept = 0.01)
#'## 2 0.4112304 24 1 #'
#'## 3 0.1650978 48 1 #' ## xdat ds year mon station
#'## 4 0.2356849 48 1 #' ## 0.2853811 24 2019 0:11 Sample
#' ## 0.5673122 24 2020 0:11 Sample
#' ## 0.1598448 48 2019 0:11 Sample
#' ## 0.3112713 48 2020 0:11 Sample
#'
#' # get monthly maxima for each month of june, july and august
#' IDF.agg(list('Sample'=df),ds=c(24,48),na.accept = 0.01,which.mon = list(5,6,7))
#'
#' # get maxima for time range from june to august
#' IDF.agg(list('Sample'=df),ds=c(24,48),na.accept = 0.01,which.mon = list(5:7))
#'
IDF.agg <- function(data,ds,na.accept = 0, IDF.agg <- function(data,ds,na.accept = 0,
which.stations = NULL,which.mon = list(0:11),names = c('date','RR'),cl = NULL){ which.stations = NULL,which.mon = list(0:11),names = c('date','RR'),cl = NULL){
...@@ -121,6 +168,7 @@ NULL ...@@ -121,6 +168,7 @@ NULL
max <- ifelse(n.na <= na.accept*length(vec),max(vec,na.rm = TRUE),NA) max <- ifelse(n.na <= na.accept*length(vec),max(vec,na.rm = TRUE),NA)
return(max)}) return(max)})
df <- data.frame(xdat=max,ds=ds,year=as.numeric(names(max)),mon=deparse(which.mon[[m.i]]), df <- data.frame(xdat=max,ds=ds,year=as.numeric(names(max)),mon=deparse(which.mon[[m.i]]),
station= station,
stringsAsFactors = FALSE) stringsAsFactors = FALSE)
return(df)}) return(df)})
df <- do.call(rbind,max.subset) df <- do.call(rbind,max.subset)
...@@ -132,7 +180,7 @@ NULL ...@@ -132,7 +180,7 @@ NULL
return(df) # maxima for all durations at one station return(df) # maxima for all durations at one station
} }
# which stations should be used? # which stations should be used?
if(is.null(which.stations))which.stations <- 1:length(data) if(is.null(which.stations))which.stations <- if(is.null(names(data))){1:length(data)}else{names(data)}
# call function 2 in lapply to aggregate over all durations at all stations # call function 2 in lapply to aggregate over all durations at all stations
station.list <- lapply(which.stations,agg.station) station.list <- lapply(which.stations,agg.station)
...@@ -148,7 +196,7 @@ NULL ...@@ -148,7 +196,7 @@ NULL
#' (modified location, scale offset, shape, duration offset, duration exponent) for chosen station #' (modified location, scale offset, shape, duration offset, duration exponent) for chosen station
#' as obtained from \code{\link{gev.d.fit}} #' as obtained from \code{\link{gev.d.fit}}
#' (or \code{\link{gev.d.params}} for model with covariates). #' (or \code{\link{gev.d.params}} for model with covariates).
#' @param probs vector of exeedance probabilities for which to plot IDF curves (p = 1-1/ReturnPeriod) #' @param probs vector of non-exeedance probabilities for which to plot IDF curves (p = 1-1/(Return Period))
#' @param cols vector of colors for IDF curves. Should have same length as \code{probs} #' @param cols vector of colors for IDF curves. Should have same length as \code{probs}
#' @param add logical indicating if plot should be added to existing plot, default is FALSE #' @param add logical indicating if plot should be added to existing plot, default is FALSE
#' @param legend logical indicating if legend should be plotted (TRUE, the default) #' @param legend logical indicating if legend should be plotted (TRUE, the default)
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
#' for(i in 2:4){ #' for(i in 2:4){
#' lines(x,dens[[i]],lty=i) #' lines(x,dens[[i]],lty=i)
#' } #' }
#' legend('topright',title = 'duration',legend = 1:4,lty=1:4) #' legend('topright',title = 'Duration',legend = 1:4,lty=1:4)
dgev.d <- function(q,mut,sigma0,xi,theta,eta,d,...) { dgev.d <- function(q,mut,sigma0,xi,theta,eta,d,...) {
if(any(c(length(mut),length(sigma0),length(xi),length(theta),length(eta))>1)){ if(any(c(length(mut),length(sigma0),length(xi),length(theta),length(eta))>1)){
message('One of the parameters mut, sigma0, xi, theta, eta is a vector. ', message('One of the parameters mut, sigma0, xi, theta, eta is a vector. ',
...@@ -140,8 +140,8 @@ pgev.d <- function(q,mut,sigma0,xi,theta,eta,d,...) { ...@@ -140,8 +140,8 @@ pgev.d <- function(q,mut,sigma0,xi,theta,eta,d,...) {
#' for(i in 2:3){ #' for(i in 2:3){
#' lines(ds,qs[i,],lty=i) #' lines(ds,qs[i,],lty=i)
#' } #' }
#' legend('topright',title = 'Annual frequency of exceedance', #' legend('topright',title = 'p-quantile',
#' legend = 1-p,lty=1:3,bty = 'n') #' legend = p,lty=1:3,bty = 'n')
qgev.d <- function(p,mut,sigma0,xi,theta,eta,d,...) { qgev.d <- function(p,mut,sigma0,xi,theta,eta,d,...) {
if(any(c(length(mut),length(sigma0),length(xi),length(theta),length(eta))>1)){ if(any(c(length(mut),length(sigma0),length(xi),length(theta),length(eta))>1)){
message('One of the parameters mut, sigma0, xi, theta, eta is a vector. ', message('One of the parameters mut, sigma0, xi, theta, eta is a vector. ',
...@@ -182,15 +182,17 @@ qgev.d <- function(p,mut,sigma0,xi,theta,eta,d,...) { ...@@ -182,15 +182,17 @@ qgev.d <- function(p,mut,sigma0,xi,theta,eta,d,...) {
#' #'
#' @examples #' @examples
#' # random sample for one duration #' # random sample for one duration
#' rgev.d(n=100,mut=4,sigma=2,xi=0,theta=0.1,eta=0.3,d=1) #' rgev.d(n=100,mut=4,sigma0=2,xi=0,theta=0.1,eta=0.3,d=1)
#' #'
#' # compare randomn samples for different durations #' # compare randomn samples for different durations
#' ds <- c(1,4) #' ds <- c(1,4)
#' samp <- lapply(ds,rgev.d,n=100,mut=4,sigma0=2,xi=0,theta=0.1,eta=0.3) #' samp <- lapply(ds,rgev.d,n=100,mut=4,sigma0=2,xi=0,theta=0.1,eta=0.3)
#' #'
#' hist(samp[[1]],breaks = 10,col=rgb(1,0,0,0.5),freq = FALSE #' hist(samp[[1]],breaks = 10,col=rgb(1,0,0,0.5),freq = FALSE
#' ,ylim=c(0,0.3),xlab='x',main = 'd-GEV samples for two different durations') #' ,ylim=c(0,0.3),xlim=c(3,20),xlab='x',main = 'Random d-GEV samples')
#' hist(samp[[2]],breaks = 10,add=TRUE,col=rgb(0,0,1,0.5),freq = FALSE) #' hist(samp[[2]],breaks = 10,add=TRUE,col=rgb(0,0,1,0.5),freq = FALSE)
#' legend('topright',fill = c(rgb(1,0,0,0.5),rgb(0,0,1,0.5)),
#' legend = paste('d=',1:2,'h'),title = 'Duration')
rgev.d <- function(n,mut,sigma0,xi,theta,eta,d) { rgev.d <- function(n,mut,sigma0,xi,theta,eta,d) {
if(any(c(length(mut),length(sigma0),length(xi),length(theta),length(eta))>1)){ if(any(c(length(mut),length(sigma0),length(xi),length(theta),length(eta))>1)){
message('One of the parameters mut, sigma0, xi, theta, eta is a vector. ', message('One of the parameters mut, sigma0, xi, theta, eta is a vector. ',
......
...@@ -6,26 +6,26 @@ ...@@ -6,26 +6,26 @@
#### gev.d.fit #### #### gev.d.fit ####
#' @title Maximum-likelihood Fitting of the duration dependent GEV Distribution #' @title Maximum-likelihood Fitting of the duration-dependent GEV Distribution
#' @description Modified \code{\link[ismev]{gev.fit}} function for Maximum-likelihood fitting #' @description Modified \code{\link[ismev]{gev.fit}} function for Maximum-likelihood fitting
#' for the duration dependent generalized extreme #' for the duration-dependent generalized extreme
#' value distribution, following Koutsoyiannis et al. (1998), including generalized linear #' value distribution, following Koutsoyiannis et al. (1998), including generalized linear
#' modelling of each parameter. #' modeling of each parameter.
#' @param xdat A vector containing maxima for different durations. #' @param xdat A vector containing maxima for different durations.
#' This can be obtained from \code{\link{IDF.agg}}. #' This can be obtained from \code{\link{IDF.agg}}.
#' @param ds A vector of aggregation levels corresponding to the maxima in xdat. #' @param ds A vector of aggregation levels corresponding to the maxima in xdat.
#' 1/60 corresponds to 1 minute, 1 corresponds to 1 hour. #' 1/60 corresponds to 1 minute, 1 corresponds to 1 hour.
#' @param ydat A matrix of covariates for generalized linear modelling of the parameters #' @param ydat A matrix of covariates for generalized linear modeling of the parameters
#' (or NULL (the default) for stationary fitting). The number of rows should be the same as the #' (or NULL (the default) for stationary fitting). The number of rows should be the same as the
#' length of xdat. #' length of xdat.
#' @param mul,sigl,shl,thetal,etal Numeric vectors of integers, giving the columns of ydat that contain #' @param mutl,sigma0l,xil,thetal,etal Numeric vectors of integers, giving the columns of ydat that contain
#' covariates for generalized linear modelling of the parameters (or NULL (the default) #' covariates for generalized linear modeling of the parameters (or NULL (the default)
#' if the corresponding parameter is stationary). #' if the corresponding parameter is stationary).
#' Parameters are: modified location, scale_0, shape, duration offset, duration exponent repectively. #' Parameters are: modified location, scale offset, shape, duration offset, duration exponent, respectively.
#' @param mulink,siglink,shlink,thetalink,etalink Link functions for generalized linear #' @param mutlink,sigma0link,xilink,thetalink,etalink Link functions for generalized linear
#' modelling of the parameters, created with \code{\link{make.link}}. #' modeling of the parameters, created with \code{\link{make.link}}. The default is \code{make.link("identity")}.
#' @param init.vals list of length 5, giving initial values for all or some parameters #' @param init.vals list of length 5, giving initial values for all or some parameters
#' (order: mu, sigma, xi, theta, eta). If as.list(rep(NA,5)) (the default) is given, initial parameters are obtained #' (order: mut, sigma0, xi, theta, eta). If as.list(rep(NA,5)) (the default) is given, initial parameters are obtained
#' internally by fitting the GEV separately for each duration and applying a linear model to obtain the #' internally by fitting the GEV separately for each duration and applying a linear model to obtain the
#' duration dependency of the location and shape parameter. #' duration dependency of the location and shape parameter.
#' Initial values for covariate parameters are assumed as 0 if not given. #' Initial values for covariate parameters are assumed as 0 if not given.
...@@ -53,7 +53,8 @@ ...@@ -53,7 +53,8 @@
#' \item{vals}{Parameter values for every data point.} #' \item{vals}{Parameter values for every data point.}
#' \item{init.vals}{Initial values that were used.} #' \item{init.vals}{Initial values that were used.}
#' \item{ds}{Durations for every data point.} #' \item{ds}{Durations for every data point.}
#' @seealso \code{\link{dgev.d}}, \code{\link{IDF.agg}}, \code{\link{gev.fit}}, \code{\link{optim}} #' @details For details on the d-GEV and the parameter definitions, see \link{IDF-package}.
#' @seealso \code{\link{IDF-package}}, \code{\link{IDF.agg}}, \code{\link{gev.fit}}, \code{\link{optim}}
#' @export #' @export
#' @importFrom stats optim #' @importFrom stats optim
#' @importFrom stats make.link #' @importFrom stats make.link
...@@ -61,8 +62,8 @@ ...@@ -61,8 +62,8 @@
#' @examples #' @examples
#' # sampled random data from d-gev with covariates #' # sampled random data from d-gev with covariates
#' # GEV parameters: #' # GEV parameters:
#' # mu = 4 + 0.2*cov1 +0.5*cov2 #' # mut = 4 + 0.2*cov1 +0.5*cov2
#' # sigma = 2+0.5*cov1 #' # sigma0 = 2+0.5*cov1
#' # xi = 0.5 #' # xi = 0.5
#' # theta = 0 #' # theta = 0
#' # eta = 0.5 #' # eta = 0.5
...@@ -73,8 +74,8 @@ ...@@ -73,8 +74,8 @@
#' ,mul=c(1,2),sigl=1) #' ,mul=c(1,2),sigl=1)
gev.d.fit<- gev.d.fit<-
function(xdat, ds, ydat = NULL, mul = NULL, sigl = NULL, shl = NULL, thetal = NULL, etal = NULL, function(xdat, ds, ydat = NULL, mutl = NULL, sigma0l = NULL, xil = NULL, thetal = NULL, etal = NULL,
mulink = make.link("identity"), siglink = make.link("identity"), shlink = make.link("identity"), mutlink = make.link("identity"), sigma0link = make.link("identity"), xilink = make.link("identity"),
thetalink = make.link("identity"), etalink = make.link("identity"), thetalink = make.link("identity"), etalink = make.link("identity"),
init.vals = as.list(rep(NA,5)), theta_zero = FALSE, init.vals = as.list(rep(NA,5)), theta_zero = FALSE,
show = TRUE, method = "Nelder-Mead", maxit = 10000, ...) show = TRUE, method = "Nelder-Mead", maxit = 10000, ...)
...@@ -84,14 +85,14 @@ gev.d.fit<- ...@@ -84,14 +85,14 @@ gev.d.fit<-
} }
z <- list() z <- list()
# number of parameters (betas) to estimate for each parameter: # number of parameters (betas) to estimate for each parameter:
npmu <- length(mul) + 1 npmu <- length(mutl) + 1
npsc <- length(sigl) + 1 npsc <- length(sigma0l) + 1
npsh <- length(shl) + 1 npsh <- length(xil) + 1
npth <- ifelse(!theta_zero,length(thetal) + 1,0) npth <- ifelse(!theta_zero,length(thetal) + 1,0)
npet <- length(etal) + 1 npet <- length(etal) + 1
z$trans <- FALSE # indicates if fit is non-stationary z$trans <- FALSE # indicates if fit is non-stationary
z$model <- list(mul, sigl, shl, thetal, etal) z$model <- list(mutl, sigma0l, xil, thetal, etal)
z$link <- list(mulink=mulink, siglink=siglink, shlink=shlink, thetalink=thetalink, etalink=etalink) z$link <- list(mutlink=mutlink, sigma0link=sigma0link, xilink=xilink, thetalink=thetalink, etalink=etalink)
# test for NA values: # test for NA values:
if(any(is.na(xdat))) stop('xdat contains NA values. NA values need to be removed first.') if(any(is.na(xdat))) stop('xdat contains NA values. NA values need to be removed first.')
...@@ -122,29 +123,29 @@ gev.d.fit<- ...@@ -122,29 +123,29 @@ gev.d.fit<-
} }
# generate covariates matrices: # generate covariates matrices:
if (is.null(mul)) { #stationary if (is.null(mutl)) { #stationary
mumat <- as.matrix(rep(1, length(xdat))) mumat <- as.matrix(rep(1, length(xdat)))
muinit <- init.vals$mu muinit <- init.vals$mu
}else { #non stationary }else { #non stationary
z$trans <- TRUE z$trans <- TRUE
mumat <- cbind(rep(1, length(xdat)), ydat[, mul]) mumat <- cbind(rep(1, length(xdat)), ydat[, mutl])
muinit <- c(init.vals$mu, rep(0, length(mul)))[1:npmu] #fill with 0s to length npmu muinit <- c(init.vals$mu, rep(0, length(mutl)))[1:npmu] #fill with 0s to length npmu
} }
if (is.null(sigl)) { if (is.null(sigma0l)) {
sigmat <- as.matrix(rep(1, length(xdat))) sigmat <- as.matrix(rep(1, length(xdat)))
siginit <- init.vals$sigma siginit <- init.vals$sigma
}else { }else {
z$trans <- TRUE z$trans <- TRUE
sigmat <- cbind(rep(1, length(xdat)), ydat[, sigl]) sigmat <- cbind(rep(1, length(xdat)), ydat[, sigma0l])
siginit <- c(init.vals$sigma, rep(0, length(sigl)))[1:npsc] siginit <- c(init.vals$sigma, rep(0, length(sigma0l)))[1:npsc]
} }
if (is.null(shl)) { if (is.null(xil)) {
shmat <- as.matrix(rep(1, length(xdat))) shmat <- as.matrix(rep(1, length(xdat)))
shinit <- init.vals$xi shinit <- init.vals$xi
}else { }else {
z$trans <- TRUE z$trans <- TRUE
shmat <- cbind(rep(1, length(xdat)), ydat[, shl]) shmat <- cbind(rep(1, length(xdat)), ydat[, xil])
shinit <- c(init.vals$xi, rep(0, length(shl)))[1:npsh] shinit <- c(init.vals$xi, rep(0, length(xil)))[1:npsh]
} }
if (is.null(thetal)) { if (is.null(thetal)) {
thmat <- as.matrix(rep(1, length(xdat))) thmat <- as.matrix(rep(1, length(xdat)))
...@@ -174,9 +175,9 @@ gev.d.fit<- ...@@ -174,9 +175,9 @@ gev.d.fit<-
# function to calculate neg log-likelihood: # function to calculate neg log-likelihood:
gev.lik <- function(a) { gev.lik <- function(a) {
# computes neg log lik of d-gev model # computes neg log lik of d-gev model
mu <- mulink$linkinv(mumat %*% (a[1:npmu])) mu <- mutlink$linkinv(mumat %*% (a[1:npmu]))
sigma <- siglink$linkinv(sigmat %*% (a[seq(npmu + 1, length = npsc)])) sigma <- sigma0link$linkinv(sigmat %*% (a[seq(npmu + 1, length = npsc)]))
xi <- shlink$linkinv(shmat %*% (a[seq(npmu + npsc + 1, length = npsh)])) xi <- xilink$linkinv(shmat %*% (a[seq(npmu + npsc + 1, length = npsh)]))
# Next line will set the theta likelihood as non-existent in case user requested it. # Next line will set the theta likelihood as non-existent in case user requested it.
if(!theta_zero) {theta <- thetalink$linkinv(thmat %*% (a[seq(npmu + npsc + npsh + 1, length = npth)]))} if(!theta_zero) {theta <- thetalink$linkinv(thmat %*% (a[seq(npmu + npsc + npsh + 1, length = npth)]))}
eta <- etalink$linkinv(etmat %*% (a[seq(npmu + npsc + npsh + npth + 1, length = npet)])) eta <- etalink$linkinv(etmat %*% (a[seq(npmu + npsc + npsh + npth + 1, length = npet)]))
...@@ -202,9 +203,9 @@ gev.d.fit<- ...@@ -202,9 +203,9 @@ gev.d.fit<-
# saving output parameters: # saving output parameters:
z$conv <- x$convergence z$conv <- x$convergence
mut <- mulink$linkinv(mumat %*% (x$par[1:npmu])) mut <- mutlink$linkinv(mumat %*% (x$par[1:npmu]))
sc0 <- siglink$linkinv(sigmat %*% (x$par[seq(npmu + 1, length = npsc)])) sc0 <- sigma0link$linkinv(sigmat %*% (x$par[seq(npmu + 1, length = npsc)]))
xi <- shlink$linkinv(shmat %*% (x$par[seq(npmu + npsc + 1, length = npsh)])) xi <- xilink$linkinv(shmat %*% (x$par[seq(npmu + npsc + 1, length = npsh)]))
if(!theta_zero){ #When user does NOT set theta parameter to zero (default) if(!theta_zero){ #When user does NOT set theta parameter to zero (default)
theta <- thetalink$linkinv(thmat %*% (x$par[seq(npmu + npsc + npsh + 1, length = npth)])) theta <- thetalink$linkinv(thmat %*% (x$par[seq(npmu + npsc + npsh + 1, length = npth)]))
}else{ #When user requests theta_parameter to be zero }else{ #When user requests theta_parameter to be zero
...@@ -242,7 +243,7 @@ gev.d.fit<- ...@@ -242,7 +243,7 @@ gev.d.fit<-
print(z[c(2, 4)]) # print model, link (3) , conv print(z[c(2, 4)]) # print model, link (3) , conv
# print names of link functions: # print names of link functions:
cat('$link\n') cat('$link\n')
print(c(z$link$mulink$name,z$link$siglink$name,z$link$shlink$name,z$link$thetalink$name,z$link$etalink$name)) print(c(z$link$mutlink$name,z$link$sigma0link$name,z$link$xilink$name,z$link$thetalink$name,z$link$etalink$name))
cat('\n') cat('\n')
}else{print(z[4])} # for stationary fit print only conv }else{print(z[4])} # for stationary fit print only conv
if(!z$conv){ # if fit converged if(!z$conv){ # if fit converged
...@@ -281,7 +282,7 @@ gev.d.init <- function(xdat,ds,link){ ...@@ -281,7 +282,7 @@ gev.d.init <- function(xdat,ds,link){
durs <- unique(ds) durs <- unique(ds)
mles <- matrix(NA, nrow=length(durs), ncol= 3) mles <- matrix(NA, nrow=length(durs), ncol= 3)
for(i in 1:length(durs)){ for(i in 1:length(durs)){
test <- try(fit <- gev.fit(xdat[ds==durs[i]],show = FALSE),silent = TRUE) test <- try(fit <- ismev::gev.fit(xdat[ds==durs[i]],show = FALSE),silent = TRUE)
if("try-error" %in% class(test) | fit$conv!=0){mles[i,] <- rep(NA,3)}else{mles[i,] <- fit$mle} if("try-error" %in% class(test) | fit$conv!=0){mles[i,] <- rep(NA,3)}else{mles[i,] <- fit$mle}
} }
if(all(is.na(mles))){stop('Initial values could not be computed for this dataset.')} if(all(is.na(mles))){stop('Initial values could not be computed for this dataset.')}
...@@ -290,52 +291,60 @@ gev.d.init <- function(xdat,ds,link){ ...@@ -290,52 +291,60 @@ gev.d.init <- function(xdat,ds,link){
lmmu <- lm(log(mles[,1])~log(durs)) lmmu <- lm(log(mles[,1])~log(durs))
# sig0 <- exp Intercept # sig0 <- exp Intercept
siginit <- link$siglink$linkfun(exp(lmsig$coefficients[[1]])) siginit <- link$sigma0link$linkfun(exp(lmsig$coefficients[[1]]))
# eta <- mean of negativ slopes # eta <- mean of negativ slopes
etainit <- link$etalink$linkfun(mean(c(-lmsig$coefficients[[2]],-lmmu$coefficients[[2]]))) etainit <- link$etalink$linkfun(mean(c(-lmsig$coefficients[[2]],-lmmu$coefficients[[2]])))
# mean of mu_d/sig_d # mean of mu_d/sig_d
# could try: # could try:
# mu0/sig0 = exp(lmmu$coefficients[[1]])/exp(lmsig$coefficients[[1]]) # mu0/sig0 = exp(lmmu$coefficients[[1]])/exp(lmsig$coefficients[[1]])
muinit <- link$mulink$linkfun(median(c(mles[,1]/mles[,2]),na.rm = TRUE)) muinit <- link$mutlink$linkfun(median(c(mles[,1]/mles[,2]),na.rm = TRUE))
# mean of shape parameters # mean of shape parameters
shinit <- link$shlink$linkfun(median(mles[,3],na.rm = TRUE)) shinit <- link$xilink$linkfun(median(mles[,3],na.rm = TRUE))
thetainit <- link$thetalink$linkfun(0) thetainit <- link$thetalink$linkfun(0)
return(list(mu=muinit,sigma=siginit,xi=shinit,theta=thetainit,eta=etainit)) return(list(mu=muinit,sigma=siginit,xi=shinit,theta=thetainit,eta=etainit))
} }
#### gev.d.nll #### #### gev.d.lik ####
#' computes negative log-likelihood of d-gev model
#' d-GEV Likelihood
#' #'
#' Computes (log-) likelihood of d-GEV model
#' @param xdat numeric vector containing observations #' @param xdat numeric vector containing observations
#' @param ds numeric vector containing coresponding durations (1/60 corresponds to 1 minute, 1 corresponds to 1 hour) #' @param ds numeric vector containing corresponding durations (1/60 corresponds to 1 minute, 1 corresponds to 1 hour)
#' @param mut,sig0,xi,theta,eta numeric vectors containing corresponding mles for each of the parameters #' @param mut,sigma0,xi,theta,eta numeric vectors containing corresponding estimates for each of the parameters
#' @param log Logical; if TRUE, the log likelihood is returned.
#' #'
#' @return single value containing negative log likelihood #' @return single value containing (log) likelihood
#' @export #' @export
#' #'
#' @examples #' @examples
#' # compute nll of values not included in fit #' # compute log-likelihood of observation values not included in fit
#' train.set <- example[example$d!=2,] #' train.set <- example[example$d!=2,]
#' test.set <- example[example$d==2,] #' test.set <- example[example$d==2,]
#' fit <- gev.d.fit(train.set$dat,train.set$d,mul = c(1,2),sigl = 1 #' fit <- gev.d.fit(train.set$dat,train.set$d,mul = c(1,2),sigl = 1
#' ,ydat = as.matrix(train.set[c('cov1','cov2')])) #' ,ydat = as.matrix(train.set[c('cov1','cov2')]))
#' params <- gev.d.params(fit,ydat = as.matrix(test.set[c('cov1','cov2')])) #' params <- gev.d.params(fit,ydat = as.matrix(test.set[c('cov1','cov2')]))
#' gev.d.nll(xdat = test.set$dat,ds = test.set$d,mut = params[,1],sig0 = params[,2],xi = params[,3] #' gev.d.lik(xdat = test.set$dat,ds = test.set$d,mut = params[,1],sigma0 = params[,2],xi = params[,3]
#' ,theta = params[,4],eta = params[,5]) #' ,theta = params[,4],eta = params[,5],log=TRUE)
gev.d.nll <- function(xdat,ds,mut,sig0,xi,theta,eta) { gev.d.lik <- function(xdat,ds,mut,sigma0,xi,theta,eta,log=FALSE) {
# computes neg log lik of d-gev model if(any(xi==0)){stop('Function is not defined for shape parameter of zero.')}
if(any(! c(length(ds),length(mut),length(sig0),length(xi),length(theta),length(eta)) %in% if(any(! c(length(ds),length(mut),length(sigma0),length(xi),length(theta),length(eta)) %in%
c(1,length(xdat)))){ c(1,length(xdat)))){
warning('Input vectors differ in length, but must have the same length.') stop('Input vectors differ in length, but must have the same length.')
} }
ds.t <- ds+theta ds.t <- ds+theta
sigma.d <- sig0/(ds.t^eta) sigma.d <- sigma0/(ds.t^eta)
y <- xdat/sigma.d - mut y <- xdat/sigma.d - mut
y <- 1 + xi * y y <- 1 + xi * y
sum(log(sigma.d)) + sum(y^(-1/xi)) + sum(log(y) * (1/xi + 1)) if(log){
return(sum(log(sigma.d) + y^(-1/xi) + log(y) * (1/xi + 1)))
}else{
return(prod(sigma.d * exp(y^(-1/xi)) * y ^ (1/xi + 1)))
}
} }
#### gev.d.diag #### #### gev.d.diag ####
...@@ -347,13 +356,13 @@ gev.d.nll <- function(xdat,ds,mut,sig0,xi,theta,eta) { ...@@ -347,13 +356,13 @@ gev.d.nll <- function(xdat,ds,mut,sig0,xi,theta,eta) {
#' different colors of with different symbols. #' different colors of with different symbols.
#' @param fit object returned by \code{\link{gev.d.fit}} #' @param fit object returned by \code{\link{gev.d.fit}}
#' @param subset an optional vector specifying a subset of observations to be used in the plot #' @param subset an optional vector specifying a subset of observations to be used in the plot
#' @param cols optional either one value or vector of same length as \code{unique(durations)} to #' @param cols optional either one value or vector of same length as \code{unique(fit$ds)} to
#' specify the colors of plotting points. #' specify the colors of plotting points.
#' The default uses the \code{rainbow} function. #' The default uses the \code{rainbow} function.
#' @param pch optional either one value or vector of same length as \code{unique(durations)} containing #' @param pch optional either one value or vector of same length as \code{unique(fit$ds)} containing
#' integers or symbols to specify the plotting points. #' integers or symbols to specify the plotting points.
#' @param which string containing 'both', 'pp' or 'qq' to specify, which plots should be produced. #' @param which string containing 'both', 'pp' or 'qq' to specify, which plots should be produced.
#' @param mfrow vector specifying layout of plots. If both plots should be produced seperately, #' @param mfrow vector specifying layout of plots. If both plots should be produced separately,
#' set to \code{c(1,1)}. #' set to \code{c(1,1)}.
#' @param legend logical indicating if legends should be plotted #' @param legend logical indicating if legends should be plotted
#' @param title character vector of length 2, giving the titles for the pp- and the qq-plot #' @param title character vector of length 2, giving the titles for the pp- and the qq-plot
...@@ -368,11 +377,11 @@ gev.d.nll <- function(xdat,ds,mut,sig0,xi,theta,eta) { ...@@ -368,11 +377,11 @@ gev.d.nll <- function(xdat,ds,mut,sig0,xi,theta,eta) {
#' data('example',package ='IDF') #' data('example',package ='IDF')
#' #'
#' fit <- gev.d.fit(xdat=example$dat,ds = example$d,ydat=as.matrix(example[,c('cov1','cov2')]) #' fit <- gev.d.fit(xdat=example$dat,ds = example$d,ydat=as.matrix(example[,c('cov1','cov2')])
#' ,mul=c(1,2),sigl=1) #' ,mutl=c(1,2),sigma0l=1)
#' # diagnostic plots for complete data #' # diagnostic plots for complete data
#' gev.d.diag(fit) #' gev.d.diag(fit,pch=1)
#' # diagnostic plots for subset of data (e.g. one station) #' # diagnostic plots for subset of data (e.g. one station)
#' gev.d.diag(fit,subset = example$cov1==1) #' gev.d.diag(fit,subset = example$cov1==1,pch=1)
gev.d.diag <- function(fit,subset=NULL,cols=NULL,pch=NULL,which='both',mfrow=c(1,2),legend=TRUE, gev.d.diag <- function(fit,subset=NULL,cols=NULL,pch=NULL,which='both',mfrow=c(1,2),legend=TRUE,
title=c('Residual Probability Plot','Residual Quantile Plot'), title=c('Residual Probability Plot','Residual Quantile Plot'),
emp.lab='Empirical',mod.lab='Model',...){ emp.lab='Empirical',mod.lab='Model',...){
...@@ -381,7 +390,11 @@ gev.d.diag <- function(fit,subset=NULL,cols=NULL,pch=NULL,which='both',mfrow=c(1 ...@@ -381,7 +390,11 @@ gev.d.diag <- function(fit,subset=NULL,cols=NULL,pch=NULL,which='both',mfrow=c(1
" but only 'both','pp' or 'qq' are allowed.") " but only 'both','pp' or 'qq' are allowed.")
# subset data # subset data