Commit 8271db2b authored by Jana Ulrich's avatar Jana Ulrich
Browse files

consistent parameter names, introduction examples

parent 047bc0d7
......@@ -5,7 +5,7 @@ export(IDF.plot)
export(dgev.d)
export(gev.d.diag)
export(gev.d.fit)
export(gev.d.nll)
export(gev.d.lik)
export(gev.d.params)
export(pgev.d)
export(qgev.d)
......
......@@ -41,6 +41,43 @@
#' * Coles, S.An Introduction to Statistical Modeling of Extreme Values; Springer: New York, NY, USA, 2001,
#' https://doi.org/10.1198/tech.2002.s73
#' @md
#'
#' @examples
#' ## Here are a few examples to illustrate the order in which the functions are intended to be used.
#'
#' ## Step 0: sample 20 years of example hourly 'precipitation' data
# dates <- seq(as.POSIXct("2000-01-01 00:00:00"),as.POSIXct("2019-12-31 23:00:00"),by = 'hour')
# sample.precip <- rgamma(n = length(dates), shape = 0.05, rate = 0.4)
# precip.df <- data.frame(date=dates,RR=sample.precip)
#
# ## Step 1: get annual maxima
# durations <- 2^(0:6) # accumulation durations [h]
# ann.max <- IDF.agg(list(precip.df),ds=durations,na.accept = 0.1)
# # plotting the annual maxima in log-log representation
# plot(ann.max$ds,ann.max$xdat,log='xy',xlab = 'Duration [h]',ylab='Intensity [mm/h]')
#
# ## Step 2: fit d-GEV to annual maxima
# fit <- gev.d.fit(xdat = ann.max$xdat,ds = ann.max$ds,sigma0link = make.link('log'))
# # checking the fit
# gev.d.diag(fit,pch=1,legend = FALSE)
# # parameter estimates
# params <- gev.d.params(fit)
# print(params)
# # plotting the probability density for a single duration
# q.min <- floor(min(ann.max$xdat[ann.max$ds%in%1:2]))
# q.max <- ceiling(max(ann.max$xdat[ann.max$ds%in%1:2]))
# q <- seq(q.min,q.max,0.2)
# plot(range(q),c(0,0.55),type = 'n',xlab = 'Intensity [mm/h]',ylab = 'Density')
# for(d in 1:2){ # d=1h and d=2h
# hist(ann.max$xdat[ann.max$ds==d],main = paste('d=',d),q.min:q.max
# ,freq = FALSE,add=TRUE,border = d) # sampled data
# lines(q,dgev.d(q,params$mut,params$sigma0,params$xi,params$theta,params$eta,d = d),col=d) # etimated prob. density
# }
# legend('topright',col=1:2,lwd=1,legend = paste('d=',1:2,'h'),title = 'Duration')
#
# ## Step 3: adding the IDF-curves to the data
# plot(ann.max$ds,ann.max$xdat,log='xy',xlab = 'Duration [h]',ylab='Intensity [mm/h]')
# IDF.plot(durations,params,add=TRUE)
NULL
#### IDF.agg ####
......@@ -68,7 +105,8 @@ NULL
#' different durations, IDF.agg needs to be run separately for the different groups of stations.
#' Afterwards the results can be joint together using `rbind`.
#'
#' @return data.frame containing the annual intensity maxima [mm/h] in `$xdat`, the corresponding duration in `$ds`
#' @return data.frame containing the annual intensity maxima [mm/h] in `$xdat`, the corresponding duration in `$ds`,
#' the `$year` and month (`$mon`) in which the maxima occured
#' and the station id or name in `$station`.
#'
#' @seealso \code{\link{pgev.d}}
......@@ -82,13 +120,22 @@ NULL
#' dates <- as.Date("2019-01-01")+0:729
#' x <- rgamma(n = 730, shape = 0.4, rate = 0.5)
#' df <- data.frame(date=dates,RR=x)
#' IDF.agg(list(df),ds=c(24,48))
#'
#'## xdat ds station
#'## 1 0.3025660 24 1
#'## 2 0.4112304 24 1
#'## 3 0.1650978 48 1
#'## 4 0.2356849 48 1
#' # get annual maxima
#' IDF.agg(list('Sample'= df),ds=c(24,48),na.accept = 0.01)
#'
#' ## xdat ds year mon station
#' ## 0.2853811 24 2019 0:11 Sample
#' ## 0.5673122 24 2020 0:11 Sample
#' ## 0.1598448 48 2019 0:11 Sample
#' ## 0.3112713 48 2020 0:11 Sample
#'
#' # get monthly maxima for each month of june, july and august
#' IDF.agg(list('Sample'=df),ds=c(24,48),na.accept = 0.01,which.mon = list(5,6,7))
#'
#' # get maxima for time range from june to august
#' IDF.agg(list('Sample'=df),ds=c(24,48),na.accept = 0.01,which.mon = list(5:7))
#'
IDF.agg <- function(data,ds,na.accept = 0,
which.stations = NULL,which.mon = list(0:11),names = c('date','RR'),cl = NULL){
......@@ -121,6 +168,7 @@ NULL
max <- ifelse(n.na <= na.accept*length(vec),max(vec,na.rm = TRUE),NA)
return(max)})
df <- data.frame(xdat=max,ds=ds,year=as.numeric(names(max)),mon=deparse(which.mon[[m.i]]),
station= station,
stringsAsFactors = FALSE)
return(df)})
df <- do.call(rbind,max.subset)
......@@ -132,7 +180,7 @@ NULL
return(df) # maxima for all durations at one station
}
# which stations should be used?
if(is.null(which.stations))which.stations <- 1:length(data)
if(is.null(which.stations))which.stations <- if(is.null(names(data))){1:length(data)}else{names(data)}
# call function 2 in lapply to aggregate over all durations at all stations
station.list <- lapply(which.stations,agg.station)
......@@ -148,7 +196,7 @@ NULL
#' (modified location, scale offset, shape, duration offset, duration exponent) for chosen station
#' as obtained from \code{\link{gev.d.fit}}
#' (or \code{\link{gev.d.params}} for model with covariates).
#' @param probs vector of exeedance probabilities for which to plot IDF curves (p = 1-1/ReturnPeriod)
#' @param probs vector of non-exeedance probabilities for which to plot IDF curves (p = 1-1/(Return Period))
#' @param cols vector of colors for IDF curves. Should have same length as \code{probs}
#' @param add logical indicating if plot should be added to existing plot, default is FALSE
#' @param legend logical indicating if legend should be plotted (TRUE, the default)
......
......@@ -36,7 +36,7 @@
#' for(i in 2:4){
#' lines(x,dens[[i]],lty=i)
#' }
#' legend('topright',title = 'duration',legend = 1:4,lty=1:4)
#' legend('topright',title = 'Duration',legend = 1:4,lty=1:4)
dgev.d <- function(q,mut,sigma0,xi,theta,eta,d,...) {
if(any(c(length(mut),length(sigma0),length(xi),length(theta),length(eta))>1)){
message('One of the parameters mut, sigma0, xi, theta, eta is a vector. ',
......@@ -140,8 +140,8 @@ pgev.d <- function(q,mut,sigma0,xi,theta,eta,d,...) {
#' for(i in 2:3){
#' lines(ds,qs[i,],lty=i)
#' }
#' legend('topright',title = 'Annual frequency of exceedance',
#' legend = 1-p,lty=1:3,bty = 'n')
#' legend('topright',title = 'p-quantile',
#' legend = p,lty=1:3,bty = 'n')
qgev.d <- function(p,mut,sigma0,xi,theta,eta,d,...) {
if(any(c(length(mut),length(sigma0),length(xi),length(theta),length(eta))>1)){
message('One of the parameters mut, sigma0, xi, theta, eta is a vector. ',
......@@ -182,15 +182,17 @@ qgev.d <- function(p,mut,sigma0,xi,theta,eta,d,...) {
#'
#' @examples
#' # random sample for one duration
#' rgev.d(n=100,mut=4,sigma=2,xi=0,theta=0.1,eta=0.3,d=1)
#' rgev.d(n=100,mut=4,sigma0=2,xi=0,theta=0.1,eta=0.3,d=1)
#'
#' # compare randomn samples for different durations
#' ds <- c(1,4)
#' samp <- lapply(ds,rgev.d,n=100,mut=4,sigma0=2,xi=0,theta=0.1,eta=0.3)
#'
#' hist(samp[[1]],breaks = 10,col=rgb(1,0,0,0.5),freq = FALSE
#' ,ylim=c(0,0.3),xlab='x',main = 'd-GEV samples for two different durations')
#' ,ylim=c(0,0.3),xlim=c(3,20),xlab='x',main = 'Random d-GEV samples')
#' hist(samp[[2]],breaks = 10,add=TRUE,col=rgb(0,0,1,0.5),freq = FALSE)
#' legend('topright',fill = c(rgb(1,0,0,0.5),rgb(0,0,1,0.5)),
#' legend = paste('d=',1:2,'h'),title = 'Duration')
rgev.d <- function(n,mut,sigma0,xi,theta,eta,d) {
if(any(c(length(mut),length(sigma0),length(xi),length(theta),length(eta))>1)){
message('One of the parameters mut, sigma0, xi, theta, eta is a vector. ',
......
......@@ -6,26 +6,26 @@
#### gev.d.fit ####
#' @title Maximum-likelihood Fitting of the duration dependent GEV Distribution
#' @title Maximum-likelihood Fitting of the duration-dependent GEV Distribution
#' @description Modified \code{\link[ismev]{gev.fit}} function for Maximum-likelihood fitting
#' for the duration dependent generalized extreme
#' for the duration-dependent generalized extreme
#' value distribution, following Koutsoyiannis et al. (1998), including generalized linear
#' modelling of each parameter.
#' modeling of each parameter.
#' @param xdat A vector containing maxima for different durations.
#' This can be obtained from \code{\link{IDF.agg}}.
#' @param ds A vector of aggregation levels corresponding to the maxima in xdat.
#' 1/60 corresponds to 1 minute, 1 corresponds to 1 hour.
#' @param ydat A matrix of covariates for generalized linear modelling of the parameters
#' @param ydat A matrix of covariates for generalized linear modeling of the parameters
#' (or NULL (the default) for stationary fitting). The number of rows should be the same as the
#' length of xdat.
#' @param mul,sigl,shl,thetal,etal Numeric vectors of integers, giving the columns of ydat that contain
#' covariates for generalized linear modelling of the parameters (or NULL (the default)
#' @param mutl,sigma0l,xil,thetal,etal Numeric vectors of integers, giving the columns of ydat that contain
#' covariates for generalized linear modeling of the parameters (or NULL (the default)
#' if the corresponding parameter is stationary).
#' Parameters are: modified location, scale_0, shape, duration offset, duration exponent repectively.
#' @param mulink,siglink,shlink,thetalink,etalink Link functions for generalized linear
#' modelling of the parameters, created with \code{\link{make.link}}.
#' Parameters are: modified location, scale offset, shape, duration offset, duration exponent, respectively.
#' @param mutlink,sigma0link,xilink,thetalink,etalink Link functions for generalized linear
#' modeling of the parameters, created with \code{\link{make.link}}. The default is \code{make.link("identity")}.
#' @param init.vals list of length 5, giving initial values for all or some parameters
#' (order: mu, sigma, xi, theta, eta). If as.list(rep(NA,5)) (the default) is given, initial parameters are obtained
#' (order: mut, sigma0, xi, theta, eta). If as.list(rep(NA,5)) (the default) is given, initial parameters are obtained
#' internally by fitting the GEV separately for each duration and applying a linear model to obtain the
#' duration dependency of the location and shape parameter.
#' Initial values for covariate parameters are assumed as 0 if not given.
......@@ -53,7 +53,8 @@
#' \item{vals}{Parameter values for every data point.}
#' \item{init.vals}{Initial values that were used.}
#' \item{ds}{Durations for every data point.}
#' @seealso \code{\link{dgev.d}}, \code{\link{IDF.agg}}, \code{\link{gev.fit}}, \code{\link{optim}}
#' @details For details on the d-GEV and the parameter definitions, see \link{IDF-package}.
#' @seealso \code{\link{IDF-package}}, \code{\link{IDF.agg}}, \code{\link{gev.fit}}, \code{\link{optim}}
#' @export
#' @importFrom stats optim
#' @importFrom stats make.link
......@@ -61,8 +62,8 @@
#' @examples
#' # sampled random data from d-gev with covariates
#' # GEV parameters:
#' # mu = 4 + 0.2*cov1 +0.5*cov2
#' # sigma = 2+0.5*cov1
#' # mut = 4 + 0.2*cov1 +0.5*cov2
#' # sigma0 = 2+0.5*cov1
#' # xi = 0.5
#' # theta = 0
#' # eta = 0.5
......@@ -73,8 +74,8 @@
#' ,mul=c(1,2),sigl=1)
gev.d.fit<-
function(xdat, ds, ydat = NULL, mul = NULL, sigl = NULL, shl = NULL, thetal = NULL, etal = NULL,
mulink = make.link("identity"), siglink = make.link("identity"), shlink = make.link("identity"),
function(xdat, ds, ydat = NULL, mutl = NULL, sigma0l = NULL, xil = NULL, thetal = NULL, etal = NULL,
mutlink = make.link("identity"), sigma0link = make.link("identity"), xilink = make.link("identity"),
thetalink = make.link("identity"), etalink = make.link("identity"),
init.vals = as.list(rep(NA,5)), theta_zero = FALSE,
show = TRUE, method = "Nelder-Mead", maxit = 10000, ...)
......@@ -84,14 +85,14 @@ gev.d.fit<-
}
z <- list()
# number of parameters (betas) to estimate for each parameter:
npmu <- length(mul) + 1
npsc <- length(sigl) + 1
npsh <- length(shl) + 1
npmu <- length(mutl) + 1
npsc <- length(sigma0l) + 1
npsh <- length(xil) + 1
npth <- ifelse(!theta_zero,length(thetal) + 1,0)
npet <- length(etal) + 1
z$trans <- FALSE # indicates if fit is non-stationary
z$model <- list(mul, sigl, shl, thetal, etal)
z$link <- list(mulink=mulink, siglink=siglink, shlink=shlink, thetalink=thetalink, etalink=etalink)
z$model <- list(mutl, sigma0l, xil, thetal, etal)
z$link <- list(mutlink=mutlink, sigma0link=sigma0link, xilink=xilink, thetalink=thetalink, etalink=etalink)
# test for NA values:
if(any(is.na(xdat))) stop('xdat contains NA values. NA values need to be removed first.')
......@@ -122,29 +123,29 @@ gev.d.fit<-
}
# generate covariates matrices:
if (is.null(mul)) { #stationary
if (is.null(mutl)) { #stationary
mumat <- as.matrix(rep(1, length(xdat)))
muinit <- init.vals$mu
}else { #non stationary
z$trans <- TRUE
mumat <- cbind(rep(1, length(xdat)), ydat[, mul])
muinit <- c(init.vals$mu, rep(0, length(mul)))[1:npmu] #fill with 0s to length npmu
mumat <- cbind(rep(1, length(xdat)), ydat[, mutl])
muinit <- c(init.vals$mu, rep(0, length(mutl)))[1:npmu] #fill with 0s to length npmu
}
if (is.null(sigl)) {
if (is.null(sigma0l)) {
sigmat <- as.matrix(rep(1, length(xdat)))
siginit <- init.vals$sigma
}else {
z$trans <- TRUE
sigmat <- cbind(rep(1, length(xdat)), ydat[, sigl])
siginit <- c(init.vals$sigma, rep(0, length(sigl)))[1:npsc]
sigmat <- cbind(rep(1, length(xdat)), ydat[, sigma0l])
siginit <- c(init.vals$sigma, rep(0, length(sigma0l)))[1:npsc]
}
if (is.null(shl)) {
if (is.null(xil)) {
shmat <- as.matrix(rep(1, length(xdat)))
shinit <- init.vals$xi
}else {
z$trans <- TRUE
shmat <- cbind(rep(1, length(xdat)), ydat[, shl])
shinit <- c(init.vals$xi, rep(0, length(shl)))[1:npsh]
shmat <- cbind(rep(1, length(xdat)), ydat[, xil])
shinit <- c(init.vals$xi, rep(0, length(xil)))[1:npsh]
}
if (is.null(thetal)) {
thmat <- as.matrix(rep(1, length(xdat)))
......@@ -174,9 +175,9 @@ gev.d.fit<-
# function to calculate neg log-likelihood:
gev.lik <- function(a) {
# computes neg log lik of d-gev model
mu <- mulink$linkinv(mumat %*% (a[1:npmu]))
sigma <- siglink$linkinv(sigmat %*% (a[seq(npmu + 1, length = npsc)]))
xi <- shlink$linkinv(shmat %*% (a[seq(npmu + npsc + 1, length = npsh)]))
mu <- mutlink$linkinv(mumat %*% (a[1:npmu]))
sigma <- sigma0link$linkinv(sigmat %*% (a[seq(npmu + 1, length = npsc)]))
xi <- xilink$linkinv(shmat %*% (a[seq(npmu + npsc + 1, length = npsh)]))
# Next line will set the theta likelihood as non-existent in case user requested it.
if(!theta_zero) {theta <- thetalink$linkinv(thmat %*% (a[seq(npmu + npsc + npsh + 1, length = npth)]))}
eta <- etalink$linkinv(etmat %*% (a[seq(npmu + npsc + npsh + npth + 1, length = npet)]))
......@@ -202,9 +203,9 @@ gev.d.fit<-
# saving output parameters:
z$conv <- x$convergence
mut <- mulink$linkinv(mumat %*% (x$par[1:npmu]))
sc0 <- siglink$linkinv(sigmat %*% (x$par[seq(npmu + 1, length = npsc)]))
xi <- shlink$linkinv(shmat %*% (x$par[seq(npmu + npsc + 1, length = npsh)]))
mut <- mutlink$linkinv(mumat %*% (x$par[1:npmu]))
sc0 <- sigma0link$linkinv(sigmat %*% (x$par[seq(npmu + 1, length = npsc)]))
xi <- xilink$linkinv(shmat %*% (x$par[seq(npmu + npsc + 1, length = npsh)]))
if(!theta_zero){ #When user does NOT set theta parameter to zero (default)
theta <- thetalink$linkinv(thmat %*% (x$par[seq(npmu + npsc + npsh + 1, length = npth)]))
}else{ #When user requests theta_parameter to be zero
......@@ -242,7 +243,7 @@ gev.d.fit<-
print(z[c(2, 4)]) # print model, link (3) , conv
# print names of link functions:
cat('$link\n')
print(c(z$link$mulink$name,z$link$siglink$name,z$link$shlink$name,z$link$thetalink$name,z$link$etalink$name))
print(c(z$link$mutlink$name,z$link$sigma0link$name,z$link$xilink$name,z$link$thetalink$name,z$link$etalink$name))
cat('\n')
}else{print(z[4])} # for stationary fit print only conv
if(!z$conv){ # if fit converged
......@@ -281,7 +282,7 @@ gev.d.init <- function(xdat,ds,link){
durs <- unique(ds)
mles <- matrix(NA, nrow=length(durs), ncol= 3)
for(i in 1:length(durs)){
test <- try(fit <- gev.fit(xdat[ds==durs[i]],show = FALSE),silent = TRUE)
test <- try(fit <- ismev::gev.fit(xdat[ds==durs[i]],show = FALSE),silent = TRUE)
if("try-error" %in% class(test) | fit$conv!=0){mles[i,] <- rep(NA,3)}else{mles[i,] <- fit$mle}
}
if(all(is.na(mles))){stop('Initial values could not be computed for this dataset.')}
......@@ -290,52 +291,60 @@ gev.d.init <- function(xdat,ds,link){
lmmu <- lm(log(mles[,1])~log(durs))
# sig0 <- exp Intercept
siginit <- link$siglink$linkfun(exp(lmsig$coefficients[[1]]))
siginit <- link$sigma0link$linkfun(exp(lmsig$coefficients[[1]]))
# eta <- mean of negativ slopes
etainit <- link$etalink$linkfun(mean(c(-lmsig$coefficients[[2]],-lmmu$coefficients[[2]])))
# mean of mu_d/sig_d
# could try:
# mu0/sig0 = exp(lmmu$coefficients[[1]])/exp(lmsig$coefficients[[1]])
muinit <- link$mulink$linkfun(median(c(mles[,1]/mles[,2]),na.rm = TRUE))
muinit <- link$mutlink$linkfun(median(c(mles[,1]/mles[,2]),na.rm = TRUE))
# mean of shape parameters
shinit <- link$shlink$linkfun(median(mles[,3],na.rm = TRUE))
shinit <- link$xilink$linkfun(median(mles[,3],na.rm = TRUE))
thetainit <- link$thetalink$linkfun(0)
return(list(mu=muinit,sigma=siginit,xi=shinit,theta=thetainit,eta=etainit))
}
#### gev.d.nll ####
#' computes negative log-likelihood of d-gev model
#### gev.d.lik ####
#' d-GEV Likelihood
#'
#' Computes (log-) likelihood of d-GEV model
#' @param xdat numeric vector containing observations
#' @param ds numeric vector containing coresponding durations (1/60 corresponds to 1 minute, 1 corresponds to 1 hour)
#' @param mut,sig0,xi,theta,eta numeric vectors containing corresponding mles for each of the parameters
#' @param ds numeric vector containing corresponding durations (1/60 corresponds to 1 minute, 1 corresponds to 1 hour)
#' @param mut,sigma0,xi,theta,eta numeric vectors containing corresponding estimates for each of the parameters
#' @param log Logical; if TRUE, the log likelihood is returned.
#'
#' @return single value containing negative log likelihood
#' @return single value containing (log) likelihood
#' @export
#'
#' @examples
#' # compute nll of values not included in fit
#' # compute log-likelihood of observation values not included in fit
#' train.set <- example[example$d!=2,]
#' test.set <- example[example$d==2,]
#' fit <- gev.d.fit(train.set$dat,train.set$d,mul = c(1,2),sigl = 1
#' ,ydat = as.matrix(train.set[c('cov1','cov2')]))
#' params <- gev.d.params(fit,ydat = as.matrix(test.set[c('cov1','cov2')]))
#' gev.d.nll(xdat = test.set$dat,ds = test.set$d,mut = params[,1],sig0 = params[,2],xi = params[,3]
#' ,theta = params[,4],eta = params[,5])
gev.d.nll <- function(xdat,ds,mut,sig0,xi,theta,eta) {
# computes neg log lik of d-gev model
if(any(! c(length(ds),length(mut),length(sig0),length(xi),length(theta),length(eta)) %in%
#' gev.d.lik(xdat = test.set$dat,ds = test.set$d,mut = params[,1],sigma0 = params[,2],xi = params[,3]
#' ,theta = params[,4],eta = params[,5],log=TRUE)
gev.d.lik <- function(xdat,ds,mut,sigma0,xi,theta,eta,log=FALSE) {
if(any(xi==0)){stop('Function is not defined for shape parameter of zero.')}
if(any(! c(length(ds),length(mut),length(sigma0),length(xi),length(theta),length(eta)) %in%
c(1,length(xdat)))){
warning('Input vectors differ in length, but must have the same length.')
stop('Input vectors differ in length, but must have the same length.')
}
ds.t <- ds+theta
sigma.d <- sig0/(ds.t^eta)
sigma.d <- sigma0/(ds.t^eta)
y <- xdat/sigma.d - mut
y <- 1 + xi * y
sum(log(sigma.d)) + sum(y^(-1/xi)) + sum(log(y) * (1/xi + 1))
if(log){
return(sum(log(sigma.d) + y^(-1/xi) + log(y) * (1/xi + 1)))
}else{
return(prod(sigma.d * exp(y^(-1/xi)) * y ^ (1/xi + 1)))
}
}
#### gev.d.diag ####
......@@ -347,13 +356,13 @@ gev.d.nll <- function(xdat,ds,mut,sig0,xi,theta,eta) {
#' different colors of with different symbols.
#' @param fit object returned by \code{\link{gev.d.fit}}
#' @param subset an optional vector specifying a subset of observations to be used in the plot
#' @param cols optional either one value or vector of same length as \code{unique(durations)} to
#' @param cols optional either one value or vector of same length as \code{unique(fit$ds)} to
#' specify the colors of plotting points.
#' The default uses the \code{rainbow} function.
#' @param pch optional either one value or vector of same length as \code{unique(durations)} containing
#' @param pch optional either one value or vector of same length as \code{unique(fit$ds)} containing
#' integers or symbols to specify the plotting points.
#' @param which string containing 'both', 'pp' or 'qq' to specify, which plots should be produced.
#' @param mfrow vector specifying layout of plots. If both plots should be produced seperately,
#' @param mfrow vector specifying layout of plots. If both plots should be produced separately,
#' set to \code{c(1,1)}.
#' @param legend logical indicating if legends should be plotted
#' @param title character vector of length 2, giving the titles for the pp- and the qq-plot
......@@ -368,11 +377,11 @@ gev.d.nll <- function(xdat,ds,mut,sig0,xi,theta,eta) {
#' data('example',package ='IDF')
#'
#' fit <- gev.d.fit(xdat=example$dat,ds = example$d,ydat=as.matrix(example[,c('cov1','cov2')])
#' ,mul=c(1,2),sigl=1)
#' ,mutl=c(1,2),sigma0l=1)
#' # diagnostic plots for complete data
#' gev.d.diag(fit)
#' gev.d.diag(fit,pch=1)
#' # diagnostic plots for subset of data (e.g. one station)
#' gev.d.diag(fit,subset = example$cov1==1)
#' gev.d.diag(fit,subset = example$cov1==1,pch=1)
gev.d.diag <- function(fit,subset=NULL,cols=NULL,pch=NULL,which='both',mfrow=c(1,2),legend=TRUE,
title=c('Residual Probability Plot','Residual Quantile Plot'),
emp.lab='Empirical',mod.lab='Model',...){
......@@ -381,7 +390,11 @@ gev.d.diag <- function(fit,subset=NULL,cols=NULL,pch=NULL,which='both',mfrow=c(1
" but only 'both','pp' or 'qq' are allowed.")
# subset data
df <- data.frame(data=fit$data,ds=fit$ds)
if(!is.null(subset))df <- df[subset,]
if(!is.null(subset)){
if(dim(df)[1]!=length(subset)){stop("Length of 'subset' does not match length of data
'xdat' used for fitting.")}
df <- df[subset,]
}
# get single durations
durs <- sort(unique(df$ds))
# rescale durations to assign colors
......@@ -407,7 +420,7 @@ gev.d.diag <- function(fit,subset=NULL,cols=NULL,pch=NULL,which='both',mfrow=c(1
abline(0, 1, col = 1,lwd=1)
title(title[1])
if(legend){legend('bottomright',legend = round(durs,digits = 2),pch=pch,
col = cols[1:length(durs)],title = 'Durations[h]',ncol = 2)}
col = cols[1:length(durs)],title = 'Duration [h]',ncol = 2)}
}
if(which=='both'|which=='qq'){
# qq
......@@ -416,7 +429,7 @@ gev.d.diag <- function(fit,subset=NULL,cols=NULL,pch=NULL,which='both',mfrow=c(1
abline(0, 1, col = 1,lwd=1)
title(title[2])
if(legend){legend('bottomright',legend = round(durs,digits = 2),pch=pch,
col = cols[1:length(durs)],title = 'Durations [h]',ncol = 2)}
col = cols[1:length(durs)],title = 'Duration [h]',ncol = 2)}
}
if(which=='both') par(mfrow=c(1,1)) # reset par
}
......@@ -426,11 +439,11 @@ gev.d.diag <- function(fit,subset=NULL,cols=NULL,pch=NULL,which='both',mfrow=c(1
#' Calculate gev(d) parameters from \code{gev.d.fit} output
#'
#' @description function to calculate mut, sigma0, xi, theta, eta
#' (modified location, scale, shape, duration offset, duration exponent)
#' from results of \code{\link{gev.d.fit}} with covariates
#' @param fit fit object returned by \code{gev.d.fit} or \code{gev.fit}
#' (modified location, scale offset, shape, duration offset, duration exponent)
#' from results of \code{\link{gev.d.fit}} with covariates or link funktions other than identity.
#' @param fit fit object returned by \code{\link{gev.d.fit}} or \code{\link{gev.fit}}
#' @param ydat A matrix containing the covariates in the same order as used in \code{gev.d.fit}.
#' @seealso \code{\link{dgev.d}}
#' @seealso \code{\link{IDF-package}}
#' @return data.frame containing mu_tilde, sigma0, xi, theta, eta (or mu, sigma, xi for gev.fit objects)
#' @export
#'
......@@ -441,11 +454,14 @@ gev.d.diag <- function(fit,subset=NULL,cols=NULL,pch=NULL,which='both',mfrow=c(1
#' gev.d.params(fit = fit,ydat = cbind(c(0.9,1),c(0.5,1)))
gev.d.params <- function(fit,ydat){
gev.d.params <- function(fit,ydat=NULL){
if(!class(fit)%in%c("gev.d.fit","gev.fit"))stop("'fit' must be an object returned by 'gev.d.fit' or 'gev.fit'.")
if(!is.matrix(ydat))stop("'ydat' must be of class matrix.")
n.par <- max(sapply(fit$model,function(x){return(ifelse(is.null(x),0,max(x)))}))
if(n.par>ncol(ydat))stop("Covariates-Matrix 'ydat' has ",ncol(ydat), " columns, but ", n.par," are required.")
if(fit$trans){
# check covariates matrix
if(!is.matrix(ydat))stop("'ydat' must be of class matrix.")
n.par <- max(sapply(fit$model,function(x){return(ifelse(is.null(x),0,max(x)))}))
if(n.par>ncol(ydat))stop("Covariates-Matrix 'ydat' has ",ncol(ydat), " columns, but ", n.par," are required.")
}else(ydat <- matrix(1))
# number of parameters
npmu <- length(fit$model[[1]]) + 1
......@@ -459,9 +475,9 @@ gev.d.params <- function(fit,ydat){
# inverse link functions
if(class(fit)=="gev.d.fit"){
mulink <- fit$link$mulink$linkinv
siglink <- fit$link$siglink$linkinv
shlink <- fit$link$shlink$linkinv
mulink <- fit$link$mutlink$linkinv
siglink <- fit$link$sigma0link$linkinv
shlink <- fit$link$xilink$linkinv
if(!fit$theta_zero) thetalink <- fit$link$thetalink$linkinv
etalink <- fit$link$etalink$linkinv
}else{
......@@ -489,7 +505,7 @@ gev.d.params <- function(fit,ydat){
if(class(fit)=="gev.d.fit"){eta <- etalink(etmat %*% (fit$mle[seq(npmu + npsc + npsh + npth + 1, length = npet)]))}
if(class(fit)=="gev.d.fit"){
return(data.frame(mut=mut,sig0=sc0,xi=xi,theta=theta,eta=eta))
return(data.frame(mut=mut,sigma0=sc0,xi=xi,theta=theta,eta=eta))
}else{return(data.frame(mu=mut,sig=sc0,xi=xi))}
}
......
......@@ -30,6 +30,11 @@ generalized extreme value distribution (GEV) is provided by Coles (2001). It sho
the assumption that block maxima (of different durations or stations) are independent of each other.
}
}
\examples{
## Here are a few examples to illustrate the order in which the functions are intended to be used.
## Step 0: sample 20 years of example hourly 'precipitation' data
}
\references{
\itemize{
\item Ulrich, J.; Jurado, O.E.; Peter, M.; Scheibel, M.;
......
......@@ -36,7 +36,8 @@ containing names of elements in data. If not given, all elements in `data` will
\item{cl}{optional, number of cores to be used from \code{\link[pbapply]{pblapply}} for parallelization.}
}
\value{
data.frame containing the annual intensity maxima [mm/h] in `$xdat`, the corresponding duration in `$ds`
data.frame containing the annual intensity maxima [mm/h] in `$xdat`, the corresponding duration in `$ds`,
the `$year` and month (`$mon`) in which the maxima occured
and the station id or name in `$station`.
}
\description{
......@@ -53,13 +54,22 @@ Afterwards the results can be joint together using `rbind`.
dates <- as.Date("2019-01-01")+0:729
x <- rgamma(n = 730, shape = 0.4, rate = 0.5)
df <- data.frame(date=dates,RR=x)
IDF.agg(list(df),ds=c(24,48))
## xdat ds station
## 1 0.3025660 24 1
## 2 0.4112304 24 1
## 3 0.1650978 48 1
## 4 0.2356849 48 1
# get annual maxima
IDF.agg(list('Sample'= df),ds=c(24,48),na.accept = 0.01)
## xdat ds year mon station
## 0.2853811 24 2019 0:11 Sample
## 0.5673122 24 2020 0:11 Sample
## 0.1598448 48 2019 0:11 Sample
## 0.3112713 48 2020 0:11 Sample
# get monthly maxima for each month of june, july and august
IDF.agg(list('Sample'=df),ds=c(24,48),na.accept = 0.01,which.mon = list(5,6,7))
# get maxima for time range from june to august
IDF.agg(list('Sample'=df),ds=c(24,48),na.accept = 0.01,which.mon = list(5:7))
}
\seealso{
\code{\link{pgev.d}}
......
......@@ -22,7 +22,7 @@ IDF.plot(
as obtained from \code{\link{gev.d.fit}}
(or \code{\link{gev.d.params}} for model with covariates).}
\item{probs}{vector of exeedance probabilities for which to plot IDF curves (p = 1-1/ReturnPeriod)}
\item{probs}{vector of non-exeedance probabilities for which to plot IDF curves (p = 1-1/(Return Period))}
\item{cols}{vector of colors for IDF curves. Should have same length as \code{probs}}
......
......@@ -43,7 +43,7 @@ plot(x,dens[[1]],type='l',ylim = c(0,0.21),ylab = 'Probability Density')
for(i in 2:4){
lines(x,dens[[i]],lty=i)
}
legend('topright',title = 'duration',legend = 1:4,lty=1:4)
legend('topright',title = 'Duration',legend = 1:4,lty=1:4)
}
\seealso{
\code{\link{pgev.d}}, \code{\link{qgev.d}}, \code{\link{rgev.d}}
......
......@@ -23,16 +23,16 @@ gev.d.diag(
\item{subset}{an optional vector specifying a subset of observations to be used in the plot}
\item{cols}{optional either one value or vector of same length as \code{unique(durations)} to
\item{cols}{optional either one value or vector of same length as \code{unique(fit$ds)} to
specify the colors of plotting points.
The default uses the \code{rainbow} function.}