# This file contains the functions: # - gev.d.fit, gev.d.init for fitting # - gev.d.diag for diagnostic plots # - gev.d.params, gev.d.rl for calculation of parameters/ return levels # and the documentation of the example data #### gev.d.fit #### #' @title Maximum-likelihood Fitting of the duration dependent GEV Distribution #' @description Modified \code{\link[ismev]{gev.fit}} function for Maximum-likelihood fitting #' for the duration dependent generalized extreme #' value distribution, following Koutsoyiannis et al. (1988), including generalized linear #' modelling of each parameter. #' @param xdat A vector containing maxima for different durations. #' This can be obtained from \code{\link{IDF.agg}}. #' @param ds A vector of aggregation levels corresponding to the maxima in xdat. #' @param ydat A matrix of covariates for generalized linear modelling of the parameters #' (or NULL (the default) for stationary fitting). The number of rows should be the same as the #' length of xdat. #' @param mul,sigl,shl,thetal,etal Numeric vectors of integers, giving the columns of ydat that contain #' covariates for generalized linear modelling of the parameters (or NULL (the default) #' if the corresponding parameter is stationary). #' Parameters are: modified location, scale_0, shape, duration offset, duration exponent repectively. #' @param mulink,siglink,shlink,thetalink,etalink Inverse link functions for generalized linear #' modelling of the parameters. #' @param muinit,siginit,shinit,thetainit,etainit initial values as numeric of length #' equal to total number of parameters #' used to model the parameters. If NULL (the default) is given, initial parameters are obtained #' internally by fitting the GEV seperately for each duration and applying a linear model to optain the #' duration dependency of the location and shape parameter. #' @param show Logical; if TRUE (the default), print details of the fit. #' @param method The optimization method used in \code{\link{optim}}. #' @param maxit The maximum number of iterations. #' @param ... Other control parameters for the optimization. #' @return A list containing the following components. #' A subset of these components are printed after the fit. #' If show is TRUE, then assuming that successful convergence is indicated, #' the components nllh, mle and se are always printed. #' \item{nllh}{single numeric giving the negative log-likelihood value.} #' \item{mle}{numeric vector giving the MLE's for the modified location, scale_0, shape, #' duration offset and duration exponent, resp.} #' \item{se}{numeric vector giving the standard errors for the MLE's (in the same order).} #' \item{trans}{An logical indicator for a non-stationary fit.} #' \item{model}{A list with components mul, sigl, shl, thetal and etal.} #' \item{link}{A character vector giving inverse link functions.} #' \item{conv}{The convergence code, taken from the list returned by \code{\link{optim}}. #' A zero indicates successful convergence.} #' \item{data}{data is standardized to standart Gumbel.} #' \item{cov}{The covariance matrix.} #' @seealso \code{\link{dgev.d}}, \code{\link{IDF.agg}}, \code{\link{gev.fit}}, \code{\link{optim}} #' @export #' @importFrom stats optim #' #' @examples #' # sampled random data from d-gev with covariates #' # GEV parameters: #' # mu = 4 + 0.2*cov1 +0.5*cov2 #' # sigma = 2+0.5*cov1 #' # xi = 0.5 #' # theta = 0 #' # eta = 0.5 #' #' data('example',package ='IDF') #' #' gev.d.fit(xdat=example$dat,ds = example$d,ydat=as.matrix(example[,c('cov1','cov2')]) #' ,mul=c(1,2),sigl=1) gev.d.fit<- function(xdat, ds, ydat = NULL, mul = NULL, sigl = NULL, shl = NULL, thetal = NULL, etal = NULL, mulink = identity, siglink = identity, shlink = identity, thetalink = identity, etalink = identity, muinit = NULL, siginit = NULL, shinit = NULL, thetainit = NULL, etainit = NULL, show = TRUE, method = "Nelder-Mead", maxit = 10000, ...) { # # obtains mles etc for d-gev distn # # test for NA values: if(any(is.na(xdat))) stop('xdat contains NA values. NA values need to be removed first.') z <- list() # number of parameters (betas) to estimate for each parameter: npmu <- length(mul) + 1 npsc <- length(sigl) + 1 npsh <- length(shl) + 1 npth <- length(thetal) + 1 npet <- length(etal) + 1 z$trans <- FALSE # indicates if fit is non-stationary # calculate initial values for mu.d, sigma_0, xi, eta using IDF.init: (thetainit=0) init.vals <- gev.d.init(xdat,ds,ifelse(is.null(thetainit),0,thetainit[1])) # generate covariates matrices: if (is.null(mul)) { mumat <- as.matrix(rep(1, length(xdat))) if (is.null(muinit)) muinit <- init.vals$mu }else { z$trans <- TRUE mumat <- cbind(rep(1, length(xdat)), ydat[, mul]) if (is.null(muinit)) muinit <- c(init.vals$mu, rep(0, length(mul))) } if (is.null(sigl)) { sigmat <- as.matrix(rep(1, length(xdat))) if (is.null(siginit)) siginit <- init.vals$sigma }else { z$trans <- TRUE sigmat <- cbind(rep(1, length(xdat)), ydat[, sigl]) if (is.null(siginit)) siginit <- c(init.vals$sigma, rep(0, length(sigl))) } if (is.null(shl)) { shmat <- as.matrix(rep(1, length(xdat))) if (is.null(shinit)) shinit <- init.vals$xi }else { z$trans <- TRUE shmat <- cbind(rep(1, length(xdat)), ydat[, shl]) if (is.null(shinit)) shinit <- c(init.vals$xi, rep(0, length(shl))) } if (is.null(thetal)) { thmat <- as.matrix(rep(1, length(xdat))) if (is.null(thetainit)) thetainit <- 0 }else { z$trans <- TRUE thmat <- cbind(rep(1, length(xdat)), ydat[, thetal]) if (is.null(thetainit)) thetainit <- c(0, rep(0, length(thetal))) } if (is.null(etal)) { etmat <- as.matrix(rep(1, length(xdat))) if (is.null(etainit)) etainit <- init.vals$eta }else { z$trans <- TRUE etmat <- cbind(rep(1, length(xdat)), ydat[, etal]) if (is.null(etainit)) etainit <- c(init.vals$eta, rep(0, length(etal))) } z$model <- list(mul, sigl, shl, thetal, etal) z$link <- deparse(substitute(c(mulink, siglink, shlink, thetalink, etalink))) init <- c(muinit, siginit, shinit, thetainit, etainit) # function to calculate neg log-likelihood: gev.lik <- function(a) { # computes neg log lik of d-gev model mu <- mulink(mumat %*% (a[1:npmu])) sigma <- siglink(sigmat %*% (a[seq(npmu + 1, length = npsc)])) xi <- shlink(shmat %*% (a[seq(npmu + npsc + 1, length = npsh)])) theta <- thetalink(thmat %*% (a[seq(npmu + npsc + npsh + 1, length = npth)])) eta <- etalink(etmat %*% (a[seq(npmu + npsc + npsh + npth + 1, length = npet)])) ds.t <- ds+theta sigma.d <- sigma/(ds.t^eta) y <- xdat/sigma.d - mu y <- 1 + xi * y if(any(eta <= 0) ||any(theta <= -0.01) || any(sigma.d <= 0) || any(y <= 0)) return(10^6) sum(log(sigma.d)) + sum(y^(-1/xi)) + sum(log(y) * (1/xi + 1)) } ##################################################################################### # derivations of nll after d-gev-parameters (for boosting): # get parameters from covariates and a (vector containing predictors) # mu <- mulink(mumat %*% (a[1:npmu])) # sigma <- siglink(sigmat %*% (a[seq(npmu + 1, length = npsc)])) # xi <- shlink(shmat %*% (a[seq(npmu + npsc + 1, length = npsh)])) # theta <- thetalink(thmat %*% (a[seq(npmu + npsc + npsh + 1, length = npth)])) # eta <- etalink(etmat %*% (a[seq(npmu + npsc + npsh + npth + 1, length = npet)])) # xd <- xdat*(ds+theta)^eta # y <- 1 + xi * (xd/sigma - mu) # # nll <- log(sigma/(ds+theta)^eta) + y^(-1/xi) + log(y) * (1/xi + 1) # dnll.mu <- -xi/y # dnll.sigma <- 1/(sigma+xi*xd/(1-mu*xi)) # dnll.xi <- 1/(xi+sigma/(xd-mu*sigma)) # dnll.theta <- - eta*sigma*(mu*xi-1)/(ds+theta)/(-xi*xd+mu*xi*sigma-sigma) # dnll.eta <- -sigma*(mu*xi-1)*log(ds+theta)/(-xi*xd+mu*xi*sigma-sigma) ##################################################################################### # finding minimum of log-likelihood: x <- optim(init, gev.lik, hessian = TRUE, method = method, control = list(maxit = maxit, ...)) # saving output parameters: z$conv <- x$convergence mut <- mulink(mumat %*% (x$par[1:npmu])) sc0 <- siglink(sigmat %*% (x$par[seq(npmu + 1, length = npsc)])) xi <- shlink(shmat %*% (x$par[seq(npmu + npsc + 1, length = npsh)])) theta <- thetalink(thmat %*% (x$par[seq(npmu + npsc + npsh + 1, length = npth)])) eta <- etalink(etmat %*% (x$par[seq(npmu + npsc + npsh + npth + 1, length = npet)])) z$nllh <- x$value # normalize data to standart gumbel: sc.d <- sc0/((ds+theta)^eta) z$data <- - log(as.vector((1 + xi * (xdat/sc.d-mut))^(-1/xi))) z$mle <- x$par z$cov <- solve(x$hessian) # invert hessian to get estimation on var-covar-matrix z$se <- sqrt(diag(z$cov)) # sqrt(digonal entries) = standart error of mle's z$vals <- cbind(mut, sc0, xi, theta, eta) colnames(z$vals) <- c('mut','sigma0','xi','theta','eta') z$ds <- ds if(show) { if(z$trans) # for nonstationary fit print(z[c(2, 3, 4)]) # print model, link, conv else print(z[4]) # for stationary fit print only conv if(!z$conv) # if fit converged print(z[c(5, 7, 9)]) # print nll, mle, se } class( z) <- "gev.d.fit" invisible(z) } #### gev.d.init #### # function to get initial values for gev.d.fit: # obtain initial values # by fitting every duration seperately # possible ways to improve: # take given initial values into accout, if there are any # xi -> mean vs. median ... how do we improve that? # mu_tilde -> is not very good for small sample sizes yet # improved inital value for eta, by fitting both mu~d and sigma~d in log-log scale #' @title get initial values for gev.d.fit #' @description obtain initial values by fitting every duration seperately #' @param xdat vector of maxima for differnt durations #' @param ds vector of durations belonging to maxima in xdat #' @param thetainit initial parameter for theta #' @return list of initail values for mu_tilde, sigma_0, xi, eta #' @importFrom stats lm #' @importFrom ismev gev.fit #' @keywords internal gev.d.init <- function(xdat,ds,thetainit){ durs <- unique(ds) mles <- matrix(NA, nrow=length(durs), ncol= 3) for(i in 1:length(durs)){ mles[i,] <- gev.fit(xdat[ds==durs[i]],show = FALSE)$mle } # get values for sig0 and eta (also mu_0) from linear model in log-log scale lmsig <- lm(log(mles[,2])~log(durs+thetainit)) lmmu <- lm(log(mles[,1])~log(durs+thetainit)) # sig0 <- exp Intercept siginit <- exp(lmsig$coefficients[[1]]) # eta <- mean of negativ slopes etainit <- mean(c(-lmsig$coefficients[[2]],-lmmu$coefficients[[2]])) # mean of mu_d/sig_d # could try: # mu0/sig0 is also an estimate but needs to be weighted in mean muinit <- mean(c(mles[,1]/mles[,2])) #exp(lmmu$coefficients[[1]])/exp(lmsig$coefficients[[1]]) # mean of shape parameters shinit <- mean(mles[,3]) return(list(mu=muinit,sigma=siginit,xi=shinit,eta=etainit)) } #### gev.d.diag #### #' Diagnostic Plots for d-gev Models #' #' @description Produces diagnostic plots for d-gev models using #' the output of the function \code{\link{gev.d.fit}}. Values for different durations can be plotted in #' different colors of with different symbols. #' @param fit object returned by \code{\link{gev.d.fit}} #' @param subset an optional vector specifying a subset of observations to be used in the plot #' @param cols optional either one value or vector of same length as \code{unique(durations)} to #' specify the colors of plotting points. #' The default uses the \code{rainbow} function. #' @param pch optional either one value or vector of same length as \code{unique(durations)} containing #' integers or symbols to specify the plotting points. #' @param which string containing 'both', 'pp' or 'qq' to specify, which plots should be produced. #' @param mfrow vector specifying layout of plots. If both plots should be produced seperately, #' set to \code{c(1,1)}. #' @param legend logical indicating if legends should be plotted #' #' @export #' @importFrom graphics plot abline par title #' @importFrom grDevices rainbow #' #' @examples #' data('example',package ='IDF') #' #' fit <- gev.d.fit(xdat=example$dat,ds = example$d,ydat=as.matrix(example[,c('cov1','cov2')]) #' ,mul=c(1,2),sigl=1) #' # diagnostic plots for complete data #' gev.d.diag(fit) #' # diagnostic plots for subset of data (e.g. one station) #' gev.d.diag(fit,subset = example$cov1==1) gev.d.diag <- function(fit,subset=NULL,cols=NULL,pch=NULL,which='both',mfrow=c(1,2),legend=TRUE){ # check parameter: if(!is.element(which,c('both','pp','qq'))) stop("Parameter `which`= ",which, " but only 'both','pp' or 'qq' are allowed.") # subset data df <- data.frame(data=fit$data,ds=fit$ds) if(!is.null(subset))df <- df[subset,] # rescale durations to assing colors df$cval <- sapply(df$ds,function(d){which(sort(unique(df$ds))==d)}) dcols <- unique(df[,c('ds',"cval")]) # sort data df <- df[order(df$data),] # plotting position n <- length(df$data) px <- (1:n)/(n + 1) # create plots: if(which=='both') par(mfrow=mfrow) # 2 subplots # colors and symbols if(is.null(cols))cols <- rainbow(length(unique(df$ds)))[df$cval] if(is.null(pch))pch <- df$cval if(which=='both'|which=='pp'){ # pp plot(px, exp( - exp( - df$data)), xlab = "Empirical", ylab = "Model",col=cols,pch=pch) abline(0, 1, col = 4) title("Residual Probability Plot") if(legend){legend('bottomright',legend = dcols$ds,pch=pch, col = cols,title = 'Durations',ncol = 2)} } if(which=='both'|which=='qq'){ # qq plot( - log( - log(px)), df$data, ylab = "Empirical", xlab = "Model",col=cols,pch=pch) abline(0, 1, col = 4) title("Residual Quantile Plot") if(legend){legend('bottomright',legend = dcols$ds,pch=pch, col = cols,title = 'Durations',ncol = 2)} } if(which=='both') par(mfrow=c(1,1)) # reset par } #### gev.d.params #### #' Calculate gev(d) parameters from \code{gev.d.fit} output #' #' @description function to calculate mut, sigma0, xi, theta, eta #' (modified location, scale, shape, duration offset, duration exponent) #' from results of \code{\link{gev.d.fit}} with covariates #' @param fit fit object returned by \code{gev.d.fit} #' @param cov.list list of covariates. Either single values - to calculate #' parameters at a single station or compatible vectors or matrices - to calculate #' parameters on a grid #' @seealso \code{\link{dgev.d}} #' @return list containing mu_tilde, sigma0, xi, theta, eta #' @export #' #' @examples #' data('example',package = 'IDF') #' fit <- gev.d.fit(example$dat,example$d,ydat = as.matrix(example[,c("cov1","cov2")]) #' ,mul = c(1,2),sigl = 1) #' gev.d.params(fit = fit,cov.list = list(0.9,0.5)) gev.d.params <- function(fit,cov.list){ mut <- fit$mle[1] if(is.null(fit$model[[1]])){i <- 1}else{ for(i in 1:length(fit$model[[1]])){ cov <- fit$model[[1]][i] mut <- mut + fit$mle[1+i]*cov.list[[cov]] } i <- i+1 } sig0 <- fit$mle[i+1] if(is.null(fit$model[[2]])){j <- 1}else{ for(j in 1:length(fit$model[[2]])){ cov <- fit$model[[2]][j] sig0 <- sig0 + fit$mle[1+i+j]*cov.list[[cov]] } j <- j+1 } xi <- fit$mle[i+j+1] if(is.null(fit$model[[3]])){k <- 1}else{ for(k in 1:length(fit$model[[3]])){ cov <- fit$model[[3]][k] xi <- xi + fit$mle[1+i+j+k]*cov.list[[cov]] } k <- k+1 } theta <- fit$mle[i+j+k+1] if(is.null(fit$model[[4]])){l <- 1}else{ for(l in 1:length(fit$model[[4]])){ cov <- fit$model[[4]][l] theta <- theta + fit$mle[i+j+k+l]*cov.list[[cov]] } l <- l+1 } eta <- fit$mle[i+j+k+l+1] if(!is.null(fit$model[[5]])){ for(m in 1:length(fit$model[[5]])){ cov <- fit$model[[5]][m] eta <- eta + fit$mle[1+i+j+k+l+m]*cov.list[[cov]] } } return(list(mut=mut,sig0=sig0,xi=xi,theta=theta,eta=eta)) } #### gev.d.rl #### #' Calculate (spatial) Returnlevel #' #' @description calculate (spatial) Returnlevel for chosen duration and return period #' from \code{\link{gev.d.fit}} parameters #' @param params list of parameters mu_tilde, sigma0, xi, theta, eta (single values and/or compatible matrices) #' as obtained from \code{\link{gev.d.fit}} or \code{\link{gev.d.params}} #' @param p.d numeric vector of length 2 containing one value the for exeedance probability p = 1-1/RP #' and one value for the duration at which to calculate the return level #' #' @return one return level value or matrix with return levels (depending on input to params) #' unit: e.g. mm/(given duration) #' @export #' #' @examples #' data('example',package = 'IDF') #' fit <- gev.d.fit(example$dat,example$d,ydat = as.matrix(example[,c("cov1","cov2")]) #' ,mul = c(1,2),sigl = 1) #' ### calculate rl on grid: #' # create matrixes for the covariates values #' cov1 <- matrix(seq(-1,1,0.1),ncol=11,nrow=21) #' cov2 <- matrix(seq(0,1,0.1),ncol=11,nrow=21,byrow = TRUE) #' # calculate parameters of d-gev on grid, based on output of gev.d.fit #' par <- gev.d.params(fit = fit,cov.list = list(cov1,cov2)) #' # calculate 100 year (p=0.99) rl for a duration of d=24 hours #' rl <- gev.d.rl(params = par,p.d = c(0.99,24)) #' # plot of 'spatial rl': #' image(x=seq(-1,1,0.1),y=seq(0,1,0.1),z=rl,xlab = 'cov1', ylab = 'cov2') gev.d.rl <- function(params,p.d){ sigma.d <- params[[2]]/((p.d[2]+params[[4]])^params[[5]]) mu <- params[[1]]*sigma.d yt <- -1/log(p.d[1]) rl <- mu+sigma.d/params[[3]]*(yt^params[[3]]-1) return(rl*p.d[2]) } #### example data #### #' Sampled data for duration dependent GEV #' #' A dataset containing: #' \itemize{ #' \item \code{$xdat}: 'annual' maxima values #' \item \code{$ds}: corresponding durations #' \item \code{$cov1}, \code{$cov2}: covariates} #' GEV parameters: #' \itemize{ #' \item mu = 4 + 0.2*cov1 +0.5*cov2 #' \item sigma = 2+0.5*cov1 #' \item xi = 0.5 #' \item theta = 0 #' \item eta = 0.5} #' #' @docType data #' @keywords datasets #' @name example #' @usage data('example',package ='IDF') NULL