Commit 2b117368 authored by Jana Ulrich's avatar Jana Ulrich
Browse files

gev.d.fit is working+ manual for gev.d.fit

parent 32012b39
......@@ -290,116 +290,178 @@ IDF.nll <- function(mu=0,sigma=1,xi=0,theta=0,eta=1,x,d,DEBUG=F) {
}
## end of function IDF.nll
### copied gev.fit from ismev to be adapted to IDF.nll
gev.d.fit <- function (xdat, ds, ydat = NULL,
mul = NULL, sigl = NULL, shl = NULL, thetal = NULL, etal = NULL,
mulink = identity, siglink = identity, shlink = identity, thetalink = identity, etalink = identity,
muinit = NULL, siginit = NULL, shinit = NULL, thetainit = NULL, etainit = NULL,
show = TRUE, method = "Nelder-Mead", maxit = 10000, ...) {
##################################################################################
### copied gev.fit from ismev to be adapted to gev.d.fit
##################################################################################
#' @title Maximum-likelihood Fitting of the duration dependent GEV Distribution
#' @description Maximum-likelihood fitting for the duration dependent generalized extreme
#' value distribution, following Koutsoyiannis et al. (1988), including generalized linear
#' modelling of each parameter based on \code{\link{gev.fit}}.
#' @param xdat A vector containing maxima for different durations. This can be obtained from \code{\link{IDF.agg}}.
#' @param ds A vector of aggregation levels corresponding to the maxima in xdat.
#' @param n.y integer value specifying the number of years of data. Needed for estimation of initial
#' values with \code{\link{IDF.init}}.
#' @param ydat A matrix of covariates for generalized linear modelling of the parameters (or NULL (the default)
#' for stationary fitting). The number of rows should be the same as the length of xdat.
#' @param mul,sigl,shl,thetal,etal Numeric vectors of integers, giving the columns of ydat that contain
#' covariates for generalized linear modelling of the parameters (or NULL (the default).
#' if the corresponding parameter is stationary).
#' Parameters are: modified location, scale_0, shape, duration offset, duration exponent repectively.
#' @param mulink,siglink,shlink,thetalink,etalink Inverse link functions for generalized linear
#' modelling of the parameters.
#' @param muinit,siginit,shinit,thetainit,etainit initial values as numeric of length equal to total number of parameters
#' used to model the parameters. Default (NULL).
#' @param show Logical; if TRUE (the default), print details of the fit.
#' @param method The optimization method used in \code{\link{optim}}.
#' @param maxit The maximum number of iterations.
#' @param ... Other control parameters for the optimization.
#' @return A list containing the following components.
#' A subset of these components are printed after the fit.
#' If show is TRUE, then assuming that successful convergence is indicated, the components nllh, mle and se
#' are always printed.
#' \item{nllh}{single numeric giving the negative log-likelihood value.}
#' \item{mle}{numeric vector giving the MLE's for the modified location, scale_0, shape,
#' duration offset and duration exponent, resp.}
#' \item{se}{numeric vector giving the standard errors for the MLE's (in the same order).}
#' \item{trans}{An logical indicator for a non-stationary fit.}
#' \item{model}{A list with components mul, sigl, shl, thetal and etal.}
#' \item{link}{A character vector giving inverse link functions.}
#' \item{conv}{The convergence code, taken from the list returned by \code{\link{optim}}.
#' A zero indicates successful convergence.}
#' \item{data}{data is standardized to standart Gumbel.}
#' \item{cov}{The covariance matrix.}
#' @seealso \code{\link{IDF.agg}}, \code{\link{gev.fit}}, \code{\link{optim}}
#' @author Jana Ulrich \email{jana.ulrich@@met.fu-berlin.de}
#' @export
'gev.d.fit'<-
function(xdat, ds, n.y, ydat = NULL, mul = NULL, sigl = NULL, shl = NULL, thetal = NULL, etal = NULL,
mulink = identity, siglink = identity, shlink = identity, thetalink = identity, etalink = identity,
muinit = NULL, siginit = NULL, shinit = NULL, thetainit = NULL, etainit = NULL,
show = TRUE, method = "Nelder-Mead", maxit = 10000, ...)
{
#
# obtains mles etc for gev(d) distn
#
z <- list()
# number of parameters (betas) to estimate for each parameter:
npmu <- length(mul) + 1
npsc <- length(sigl) + 1
npsh <- length(shl) + 1
npth <- length(thetal) + 1
npet <- length(etal) + 1
z$trans <- FALSE
### guess initial values, this is done by Christoph's routine
init.vals <- IDF.init(xdat,ds)
z$trans <- FALSE # indicates if fit is non-stationary
# calculate initial values for mu.d, sigma_0, xi, eta using IDF.init: (thetainit=0)
init.vals <- IDF.init(xdat,ds,n.y)
# generate covariates matrices:
if (is.null(mul)) {
mumat <- as.matrix(rep(1, length(xdat)))
if (is.null(muinit))
muinit <- init.vals$mu
mumat <- as.matrix(rep(1, length(xdat)))
if (is.null(muinit))
muinit <- init.vals$mu
}else {
z$trans <- TRUE
mumat <- cbind(rep(1, length(xdat)), ydat[, mul])
if (is.null(muinit))
muinit <- c(init.vals$mu, rep(0, length(mul)))
z$trans <- TRUE
mumat <- cbind(rep(1, length(xdat)), ydat[, mul])
if (is.null(muinit))
muinit <- c(init.vals$mu, rep(0, length(mul)))
}
if (is.null(sigl)) {
sigmat <- as.matrix(rep(1, length(xdat)))
if (is.null(siginit))
siginit <- init.vals$sigma
sigmat <- as.matrix(rep(1, length(xdat)))
if (is.null(siginit))
siginit <- init.vals$sigma
}else {
z$trans <- TRUE
sigmat <- cbind(rep(1, length(xdat)), ydat[, sigl])
if (is.null(siginit))
siginit <- c(init.vals$sigma, rep(0, length(sigl)))
z$trans <- TRUE
sigmat <- cbind(rep(1, length(xdat)), ydat[, sigl])
if (is.null(siginit))
siginit <- c(init.vals$sigma, rep(0, length(sigl)))
}
if (is.null(shl)) {
shmat <- as.matrix(rep(1, length(xdat)))
if (is.null(shinit))
shinit <- init.vals$xi #0.1
shmat <- as.matrix(rep(1, length(xdat)))
if (is.null(shinit))
shinit <- init.vals$xi
}else {
z$trans <- TRUE
shmat <- cbind(rep(1, length(xdat)), ydat[, shl])
if (is.null(shinit))
shinit <- c(init.vals$xi, rep(0, length(shl)))
z$trans <- TRUE
shmat <- cbind(rep(1, length(xdat)), ydat[, shl])
if (is.null(shinit))
shinit <- c(init.vals$xi, rep(0, length(shl)))
}
if (is.null(thetal)) {
thmat <- as.matrix(rep(1, length(xdat)))
if (is.null(thetainit))
thetainit <- 0
thmat <- as.matrix(rep(1, length(xdat)))
if (is.null(thetainit))
thetainit <- 0
}else {
z$trans <- TRUE
thmat <- cbind(rep(1, length(xdat)), ydat[, thetal])
if (is.null(thetainit))
thetainit <- c(0, rep(0, length(thetal)))
z$trans <- TRUE
thmat <- cbind(rep(1, length(xdat)), ydat[, thetal])
if (is.null(thetainit))
thetainit <- c(0, rep(0, length(thetal)))
}
if (is.null(etal)) {
etmat <- as.matrix(rep(1, length(xdat)))
if (is.null(etainit))
etainit <- init.vals$eta
etmat <- as.matrix(rep(1, length(xdat)))
if (is.null(etainit))
etainit <- init.vals$eta
}else {
z$trans <- TRUE
etmat <- cbind(rep(1, length(xdat)), ydat[, etal])
if (is.null(etainit))
etainit <- c(init.vals$eta, rep(0, length(thetal)))
z$trans <- TRUE
etmat <- cbind(rep(1, length(xdat)), ydat[, etal])
if (is.null(etainit))
etainit <- c(init.vals$eta, rep(0, length(thetal)))
}
z$model <- list(mul, sigl, shl, thetal, etal)
z$link <- deparse(substitute(c(mulink, siglink, shlink, thetalink, etalink)))
init <- c(muinit, siginit, shinit, thetainit, etainit)
### define the likelihood function for the gev.d
gev.d.lik <- function(a) {
mu <- mulink(mumat %*% (a[1:npmu]))
sigma <- siglink(sigmat %*% (a[seq(npmu + 1, length = npsc)]))
xi <- shlink(shmat %*% (a[seq(npmu + npsc + 1, length = npsh)]))
theta <- shlink(thmat %*% (a[seq(npmu + npsc + npsh + 1, length = npth)]))
eta <- shlink(etmat %*% (a[seq(npmu + npsc + npsh + npth + 1, length = npet)]))
return(IDF.nll(mu,sigma,xi,theta,eta,xdat,ds))
# function to calculate neg log-likelihood:
gev.lik <- function(a) {
# computes neg log lik of gev(d) model
mu <- mulink(mumat %*% (a[1:npmu]))
sigma <- siglink(sigmat %*% (a[seq(npmu + 1, length = npsc)]))
xi <- shlink(shmat %*% (a[seq(npmu + npsc + 1, length = npsh)]))
theta <- thetalink(thmat %*% (a[seq(npmu + npsc + npsh + 1, length = npth)]))
eta <- etalink(etmat %*% (a[seq(npmu + npsc + npsh + npth + 1, length = npet)]))
ds.t <- ds+theta
sigma.d <- sigma/(ds.t^eta)
y <- xdat/sigma.d - mu
y <- 1 + xi * y
if(any(eta <= 0) ||any(ds.t <= 0) || any(sigma.d <= 0) || any(y <= 0)) return(10^6)
sum(log(sigma.d)) + sum(y^(-1/xi)) + sum(log(y) * (1/xi + 1))
}
x <- optim(init, gev.d.lik, hessian = TRUE, method = method,
control = list(maxit = maxit, ...))
# finding minimum of log-likelihood:
x <- optim(init, gev.lik, hessian = TRUE, method = method,
control = list(maxit = maxit, ...))
# saving output parameters:
z$conv <- x$convergence
mu <- mulink(mumat %*% (x$par[1:npmu]))
sc <- siglink(sigmat %*% (x$par[seq(npmu + 1, length = npsc)]))
mut <- mulink(mumat %*% (x$par[1:npmu]))
sc0 <- siglink(sigmat %*% (x$par[seq(npmu + 1, length = npsc)]))
xi <- shlink(shmat %*% (x$par[seq(npmu + npsc + 1, length = npsh)]))
theta <- thlink(thmat %*% (x$par[seq(npmu + npsc + npsh + 1, length = npth)]))
eta <- etlink(etmat %*% (x$par[seq(npmu + npsc + npsh + npth + 1, length = npet)]))
theta <- thetalink(thmat %*% (x$par[seq(npmu + npsc + npsh + 1, length = npth)]))
eta <- etalink(etmat %*% (x$par[seq(npmu + npsc + npsh + npth + 1, length = npet)]))
z$nllh <- x$value
z$data <- xdat
if (z$trans) {
zdata <- NULL
### Here I need to adjust the formular for d.gev
# z$data <- -log(as.vector((1 + (xi * (xdat - mu))/sc)^(-1/xi)))
}
# normalize data to standart gumbel:
sc.d <- sc0/((ds+theta)^eta)
z$data <- - log(as.vector((1 + xi * (xdat/sc.d-mut))^(-1/xi)))
z$mle <- x$par
z$cov <- solve(x$hessian)
z$se <- sqrt(diag(z$cov))
z$vals <- cbind(mu, sc, xi, theta, eta)
if (FALSE) { ## this is if(show) but I don't understand the lower part
if (z$trans)
print(z[c(2, 3, 4)])
else print(z[4])
if (!z$conv)
print(z[c(5, 7, 9)])
z$cov <- solve(x$hessian) # invert hessian to get estimation on var-covar-matrix
z$se <- sqrt(diag(z$cov)) # sqrt(digonal entries) = standart error of mle's
z$vals <- cbind(mut, sc0, xi, theta, eta)
z$ds <- ds
if(show) {
if(z$trans) # for nonstationary fit
print(z[c(2, 3, 4)]) # print model, link, conv
else print(z[4]) # for stationary fit print only conv
if(!z$conv) # if fit converged
print(z[c(5, 7, 9)]) # print nll, mle, se
}
class(z) <- "gev.d.fit"
class( z) <- "gev.d.fit"
invisible(z)
}
}
#########################################################################################
#' @title Fitting function to optimize IDF model parameters
#' @description The function \code{fit.fun} fits IDF model parameters \code{mu,sigma,xi,theta,eta} to a set of given observations \code{obs},
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/IDF.R
\name{gev.d.fit}
\alias{gev.d.fit}
\title{Maximum-likelihood Fitting of the duration dependent GEV Distribution}
\usage{
gev.d.fit(xdat, ds, n.y, ydat = NULL, mul = NULL, sigl = NULL,
shl = NULL, thetal = NULL, etal = NULL, mulink = identity,
siglink = identity, shlink = identity, thetalink = identity,
etalink = identity, muinit = NULL, siginit = NULL, shinit = NULL,
thetainit = NULL, etainit = NULL, show = TRUE,
method = "Nelder-Mead", maxit = 10000, ...)
}
\arguments{
\item{xdat}{A vector containing maxima for different durations. This can be obtained from \code{\link{IDF.agg}}.}
\item{ds}{A vector of aggregation levels corresponding to the maxima in xdat.}
\item{n.y}{integer value specifying the number of years of data. Needed for estimation of initial
values with \code{\link{IDF.init}}.}
\item{ydat}{A matrix of covariates for generalized linear modelling of the parameters (or NULL (the default)
for stationary fitting). The number of rows should be the same as the length of xdat.}
\item{mul, sigl, shl, thetal, etal}{Numeric vectors of integers, giving the columns of ydat that contain
covariates for generalized linear modelling of the parameters (or NULL (the default).
if the corresponding parameter is stationary).
Parameters are: modified location, scale_0, shape, duration offset, duration exponent repectively.}
\item{mulink, siglink, shlink, thetalink, etalink}{Inverse link functions for generalized linear
modelling of the parameters.}
\item{muinit, siginit, shinit, thetainit, etainit}{initial values as numeric of length equal to total number of parameters
used to model the parameters. Default (NULL).}
\item{show}{Logical; if TRUE (the default), print details of the fit.}
\item{method}{The optimization method used in \code{\link{optim}}.}
\item{maxit}{The maximum number of iterations.}
\item{...}{Other control parameters for the optimization.}
}
\value{
A list containing the following components.
A subset of these components are printed after the fit.
If show is TRUE, then assuming that successful convergence is indicated, the components nllh, mle and se
are always printed.
\item{nllh}{single numeric giving the negative log-likelihood value.}
\item{mle}{numeric vector giving the MLE's for the modified location, scale_0, shape,
duration offset and duration exponent, resp.}
\item{se}{numeric vector giving the standard errors for the MLE's (in the same order).}
\item{trans}{An logical indicator for a non-stationary fit.}
\item{model}{A list with components mul, sigl, shl, thetal and etal.}
\item{link}{A character vector giving inverse link functions.}
\item{conv}{The convergence code, taken from the list returned by \code{\link{optim}}.
A zero indicates successful convergence.}
\item{data}{data is standardized to standart Gumbel.}
\item{cov}{The covariance matrix.}
}
\description{
Maximum-likelihood fitting for the duration dependent generalized extreme
value distribution, following Koutsoyiannis et al. (1988), including generalized linear
modelling of each parameter based on \code{\link{gev.fit}}.
}
\seealso{
\code{\link{IDF.agg}}, \code{\link{gev.fit}}, \code{\link{optim}}
}
\author{
Jana Ulrich \email{jana.ulrich@met.fu-berlin.de}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment