Commit 32012b39 authored by Henning Rust's avatar Henning Rust
Browse files

first commit to branch covariates, IDF.nll not yet working

parent d7e36a8e
##################################################
##############################################################
## IDF package
## Authors: Sarah Joedicke, Carola Detring, Christoph Ritschel
## Update: 15.09.2017
###################################################
## Update: 15.09.2017
## revise for integration of covariates Sep. 2018
##############################################################
###############################################
############# Read Data function ##############
###############################################
##############################################################
## Read data
##############################################################
#' @title Reading precipitation data
#' @description The function \code{IDF.read} reads a file in table format and creates a \code{data.frame} from it
......@@ -108,10 +109,11 @@ IDF.read <- function(file,type){
return(Liste)
}
# End of function IDF.read
####################################################################################################################
## End of function IDF.read
##### Aggregation ###
##############################################################
## Accumulation
##############################################################
#' \code{TS.acc} accumulates a given time series \code{x} at a given accumulation level \code{acc.val}. Minimum value
#' for acc.val is 2 [unit time]. Option for using moving sum is given.
......@@ -154,13 +156,12 @@ TS.acc <- function(x,acc.val=2,moving.sum="FALSE") {
## return accumulated time series
return(x.acc)
} # End of function TS.acc
#####################################################################################
}
## End of function TS.acc
#######################
## Fitting Functions ##
#######################
##############################################################
## Define duration dependent GEV, d/p/q/rgev.d
##############################################################
#'@title Density function of modified generalized extreme value distribution
#'@description The function \code{dgev.d} is a modified version of the function \code{dgev} for different durations \code{d} developed by Koutsoyiannis et al. (1998).
......@@ -232,6 +233,10 @@ rgev.d <- function(n,mu=0,sigma=1,xi=0,theta=0,eta=1,d=1) {
}
##############################################################
## Define negative log-likelihood
##############################################################
#######################################################################
#' @title Negativ log-likelihood of modified GEV
#' @description The function \code{IDF.nll} calculates the negative log-likelihood for a given set of model parameters
......@@ -250,16 +255,9 @@ rgev.d <- function(n,mu=0,sigma=1,xi=0,theta=0,eta=1,d=1) {
#'@return retruns weightes negative log-likelihood by number of observatons uesd
#' @author Christoph Ritschel \email{christoph.ritschel@@met.fu-berlin.de}
IDF.nll <- function(mu=0,sigma=1,xi=0,theta=0,eta=1,x,d,use.log=F,DEBUG=F) {
IDF.nll <- function(mu=0,sigma=1,xi=0,theta=0,eta=1,x,d,DEBUG=F) {
## mu is the mu~ from Koutsoyiannis
if(use.log){
## ensure that critical parameters are positive
sigma <- exp(sigma)
theta <- exp(theta)
eta <- exp(eta)
}
sigma.d <- sigma/((d+theta)^eta)
if(DEBUG) debug.values <- c(mu,sigma,xi,theta,eta)
......@@ -289,8 +287,119 @@ IDF.nll <- function(mu=0,sigma=1,xi=0,theta=0,eta=1,x,d,use.log=F,DEBUG=F) {
return(nll/length(x))
} # end of function IDF.nll
######################################################################################################
}
## end of function IDF.nll
### copied gev.fit from ismev to be adapted to IDF.nll
gev.d.fit <- function (xdat, ds, ydat = NULL,
mul = NULL, sigl = NULL, shl = NULL, thetal = NULL, etal = NULL,
mulink = identity, siglink = identity, shlink = identity, thetalink = identity, etalink = identity,
muinit = NULL, siginit = NULL, shinit = NULL, thetainit = NULL, etainit = NULL,
show = TRUE, method = "Nelder-Mead", maxit = 10000, ...) {
z <- list()
npmu <- length(mul) + 1
npsc <- length(sigl) + 1
npsh <- length(shl) + 1
npth <- length(thetal) + 1
npet <- length(etal) + 1
z$trans <- FALSE
### guess initial values, this is done by Christoph's routine
init.vals <- IDF.init(xdat,ds)
if (is.null(mul)) {
mumat <- as.matrix(rep(1, length(xdat)))
if (is.null(muinit))
muinit <- init.vals$mu
}else {
z$trans <- TRUE
mumat <- cbind(rep(1, length(xdat)), ydat[, mul])
if (is.null(muinit))
muinit <- c(init.vals$mu, rep(0, length(mul)))
}
if (is.null(sigl)) {
sigmat <- as.matrix(rep(1, length(xdat)))
if (is.null(siginit))
siginit <- init.vals$sigma
}else {
z$trans <- TRUE
sigmat <- cbind(rep(1, length(xdat)), ydat[, sigl])
if (is.null(siginit))
siginit <- c(init.vals$sigma, rep(0, length(sigl)))
}
if (is.null(shl)) {
shmat <- as.matrix(rep(1, length(xdat)))
if (is.null(shinit))
shinit <- init.vals$xi #0.1
}else {
z$trans <- TRUE
shmat <- cbind(rep(1, length(xdat)), ydat[, shl])
if (is.null(shinit))
shinit <- c(init.vals$xi, rep(0, length(shl)))
}
if (is.null(thetal)) {
thmat <- as.matrix(rep(1, length(xdat)))
if (is.null(thetainit))
thetainit <- 0
}else {
z$trans <- TRUE
thmat <- cbind(rep(1, length(xdat)), ydat[, thetal])
if (is.null(thetainit))
thetainit <- c(0, rep(0, length(thetal)))
}
if (is.null(etal)) {
etmat <- as.matrix(rep(1, length(xdat)))
if (is.null(etainit))
etainit <- init.vals$eta
}else {
z$trans <- TRUE
etmat <- cbind(rep(1, length(xdat)), ydat[, etal])
if (is.null(etainit))
etainit <- c(init.vals$eta, rep(0, length(thetal)))
}
z$model <- list(mul, sigl, shl, thetal, etal)
z$link <- deparse(substitute(c(mulink, siglink, shlink, thetalink, etalink)))
init <- c(muinit, siginit, shinit, thetainit, etainit)
### define the likelihood function for the gev.d
gev.d.lik <- function(a) {
mu <- mulink(mumat %*% (a[1:npmu]))
sigma <- siglink(sigmat %*% (a[seq(npmu + 1, length = npsc)]))
xi <- shlink(shmat %*% (a[seq(npmu + npsc + 1, length = npsh)]))
theta <- shlink(thmat %*% (a[seq(npmu + npsc + npsh + 1, length = npth)]))
eta <- shlink(etmat %*% (a[seq(npmu + npsc + npsh + npth + 1, length = npet)]))
return(IDF.nll(mu,sigma,xi,theta,eta,xdat,ds))
}
x <- optim(init, gev.d.lik, hessian = TRUE, method = method,
control = list(maxit = maxit, ...))
z$conv <- x$convergence
mu <- mulink(mumat %*% (x$par[1:npmu]))
sc <- siglink(sigmat %*% (x$par[seq(npmu + 1, length = npsc)]))
xi <- shlink(shmat %*% (x$par[seq(npmu + npsc + 1, length = npsh)]))
theta <- thlink(thmat %*% (x$par[seq(npmu + npsc + npsh + 1, length = npth)]))
eta <- etlink(etmat %*% (x$par[seq(npmu + npsc + npsh + npth + 1, length = npet)]))
z$nllh <- x$value
z$data <- xdat
if (z$trans) {
zdata <- NULL
### Here I need to adjust the formular for d.gev
# z$data <- -log(as.vector((1 + (xi * (xdat - mu))/sc)^(-1/xi)))
}
z$mle <- x$par
z$cov <- solve(x$hessian)
z$se <- sqrt(diag(z$cov))
z$vals <- cbind(mu, sc, xi, theta, eta)
if (FALSE) { ## this is if(show) but I don't understand the lower part
if (z$trans)
print(z[c(2, 3, 4)])
else print(z[4])
if (!z$conv)
print(z[c(5, 7, 9)])
}
class(z) <- "gev.d.fit"
invisible(z)
}
#' @title Fitting function to optimize IDF model parameters
#' @description The function \code{fit.fun} fits IDF model parameters \code{mu,sigma,xi,theta,eta} to a set of given observations \code{obs},
......@@ -391,7 +500,10 @@ fit.fun <- function(obs,dur,mu=1,sigma=1,xi=0.5,theta=1,eta=1,use.log=F,DEBUG=F,
return(list("min"=fit.min,"par"=fit.par))
} ## end of function fit.fun
}
## end of function fit.fun
##################################################################################
#' @title Data aggregation for IDF parameter estimation
#' @description The function \code{IDF.agg} aggregates a data.frame of observations \code{data} with temporal inforamtion (at least years) and values of precipitation
......@@ -454,76 +566,54 @@ IDF.agg <- function(data,agg.lev=c(2,3,6,12,24,48,72,96),month="all",moving.sum=
return(list(int.vec=int.vec,durs=durs,n.y=n.y))
} #
###############################################################################
#' @title Estimation of initial values for IDF fitting.
#' @description The function \code{IDF.init} estimates inital values for \code{mu,sigma,xi and eta} assuming \code{theta}
#' equals zero. A generalized extreme value distribution is fitted individually for each year and then the inital values
#' for the duration dependent gev fit are estimated from those by applying a linear regression to the scale parameters of each year.
#' @param int.vec a \code{vector} of yearly maxima of intensity sorted by year and aggregatin level
#' @param durs a \code{vector} of durations used to fit the model. Has to have same length and order as \code{int.vec}
#' @param n.y \code{integer} value specifying the number of years of data.
#' @param method \code{character} defining the method to be used in \code{optim}, preferences are: "Nelder-Mead", "BFGS", "L-BFGS-B"e
#' @param xdat a \code{vector} of yearly maxima of intensity sorted by year and aggregatin level
#' @param ds a \code{vector} of durations used to fit the model. Has to have same length and order as \code{int.vec}
#' @return $mu initial estimation of location parameter
#' @return $sigma initial estimation of scale parameter
#' @return $xi inital estimation of shape parameter
#' @return $eta intial estimation of slope parameter for sigma-power law.
#' @examples
#' RR <- rgamma(10*30*24,shape=1)
#' year <- sort(rep(1:(10),30*24))
#' data <- data.frame(RR,year)
#' data.agg <- IDF.agg(data,agg.lev=c(2,6,12,24))
#' pars.init <- IDF.init(data.agg$int.vec,data.agg$durs,data.agg$n.y)
#' @author Christoph Ritschel \email{christoph.ritschel@@met.fu-berlin.de}
#' @author Henning Rust \email{henning.rust@fu-berlin.de}
IDF.init <- function(int.vec,durs,n.y,method="Nelder-Mead") {
IDF.init <- function(xdat,ds) {
## Fit a generalized extreme value distribution to the maximum intensities of each year for a single
## aggregation level and write the estimated parameters in an array for further analyisis.
d.all <- unique(durs)
ints.all <- matrix(int.vec,nrow=n.y) ## sort intensities in a matrix, rows are years, columns are aggregation levels
pars <- array(NA,dim=c(3,length(d.all)))
## In case of NA values the optimization fails, therefore years with NA values need to be removed.
ints.all <- matrix(ints.all[rowSums(!is.na(ints.all)) == length(d.all)],ncol=length(d.all))
if(nrow(ints.all)<3) {
cat("Warning: optimization did not converge and no parameters were estimated. Time Series contains less than 3 years of valid data. \n")
mu=NA
sigma=NA
xi=NA
eta=NA
}else{
## loop over all aggregation levels
for(d in 1:length(d.all)) {
#fit <- fit.fun.emp(obs=ints.all[,d],mu=mu,sigma=sigma,xi=xi,use.log=use.log,
# DEBUG=DEBUG,method=method,upper=upper,lower=lower)
fit <- gev.fit(xdat=ints.all[,d],method=method,show=F)
pars[,d] <- fit$mle
} ## end loop over aggregation levels
## Fit a generalized extreme value distribution to the maximum intensities of each year for a single
## aggregation level and write the estimated parameters in an array for further analyisis.
pars <- simplify2array(tapply(xdat,ds,function(xdat) gev.fit(xdat,show=FALSE)$mle,simplify=TRUE))
#############################################################
### Derive starting parameters for duration-dependent GEV ###
#############################################################
## Fit a linear model to the individual sigmas for individual aggregation times in a log-log environment
## The slope coefficient is an estimate for the slope in the duration-dependent GEV, namely parameter eta
## The intersection is an estimation of the starting parameter sigma
## Parameter mu is estimated as mean value of individual mus divided by indiviudal sigmas
## The initial value for xi will be the mean of all individual xi, since it is approximately independent of duration
formel <- lm(log(pars[2,]) ~ log(d.all))
sigma <- as.numeric(exp(formel$coefficients[1]))
mu <- mean(pars[1,]/pars[2,])
eta <- as.numeric(-formel$coefficients[2])
if(anyNA(pars)){
cat("Warning: optimization did not converge in some cases and no parameters were estimated.\n")
mu <- sigma <- xi <- eta <- NA
}else{
#############################################################
### Derive starting parameters for duration-dependent GEV ###
#############################################################
## Fit a linear model to the individual sigmas for individual aggregation times in a log-log environment
## The slope coefficient is an estimate for the slope in the duration-dependent GEV, namely parameter eta
## The intersection is an estimation of the starting parameter sigma
## Parameter mu is estimated as mean value of individual mus divided by indiviudal sigmas
## The initial value for xi will be the mean of all individual xi, since it is approximately independent of duration
formel <- lm(log(pars[2,]) ~ log(as.numeric(dimnames(pars)[[2]])))
sigma <- as.numeric(exp(formel$coefficients[1]))
mu <- mean(pars[1,]/pars[2,])
eta <- as.numeric(-formel$coefficients[2])
xi <- max(0,mean(pars[3,],na.rm=T))
}
xi <- max(0,mean(pars[3,],na.rm=T))
}
return(list("mu"=mu,"sigma"=sigma,"xi"=xi,"eta"=eta))
} # EOF
}
#################################################################################
......@@ -625,10 +715,9 @@ IDF <- function(data,agg.lev=c(2,3,6,12,24,48,72,96),month="all",moving.sum="FAL
return(list("ints"=int.vec,"durs"=durs,"min"=fit$min,"par"=fit$par))
} ## End of function IDF.fit
######################################################################################################################
}
## End of function IDF.fit
####
#' @title Fitting IDF model parameters to annual maximum intensity time series
#' @description The function \code{IDF.short} fits the IDF model parameters \code{mu,sigma,xi,eta,theta}
......@@ -784,3 +873,22 @@ IDF.plot <- function(pars,probs=c(0.5,0.9,0.99),
###################################################################################
ydat = NULL
mul = NULL
sigl = NULL
shl = NULL
thetal = NULL
etal = NULL
mulink = identity
siglink = identity
shlink = identity
thetalink = identity
etalink = identity
muinit = NULL
siginit = NULL
shinit = NULL
thetainit = NULL
etainit = NULL
show = TRUE
method = "Nelder-Mead"
maxit = 10000
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment