IDF.R 10.4 KB
Newer Older
1
2
# This file contains:
# -IDF-package description
3
# -IDF.agg for preparing the data
4
5
6
# -IDF.plot for plotting of IDF curves at a chosen station


7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#### IDF-package ####

#' Introduction
#' @name IDF-package
#' @docType package
#' @description This package provides functions to estimate IDF relations for given
#' precipitation time series on the basis of a duration-dependent
#' generalized extreme value distribution (d-GEV). 
#' The central function is \code{\link{gev.d.fit}}, which uses the method 
#' of maximum-likelihood estimation for the d-GEV parameters, whereby it is 
#' possible to include generalized linear modeling 
#' for each parameter. This function was implemented on the basis of \code{\link[ismev]{gev.fit}}.
#' For more detailed information on the methods and the application of the package for estimating 
#' IDF curves with spatial covariates, see Ulrich et. al (2020). 
#' @details 
#' * The __d-GEV__ is defined following Koutsoyannis et al. (1998): 
#' \deqn{G(x)= \exp[-( 1+\xi(x/\sigma(d)- \tilde{\mu}) ) ^{-1/\xi}] } 
#' defined on \eqn{ \{ x: 1+\xi(x/\sigma(d)- \tilde{\mu} > 0) \} },
#' with the duration dependent scale parameter \eqn{\sigma(d)=\sigma_0/(d+\theta)^\eta > 0},
#' modified location parameter \eqn{\tilde{\mu}=\mu/\sigma(d)\in R}
#' and shape parameter \eqn{\xi\in R}, \eqn{\xi\neq 0}.
#' The parameters \eqn{\theta \leq 0} and \eqn{0<\eta<1} are duration offset and duration exponent
#' and describe the slope and curvature in the resulting IDF curves, respectively.
#' * A useful introduction to __Maximum Likelihood Estimation__ for fitting for the 
#' generalized extreme value distribution (GEV) is provided by Coles (2001). It should be noted, however, that this method uses
#' the assumption that block maxima (of different durations or stations) are independent of each other. 
#' @references 
#' * Ulrich, J.; Jurado, O.E.; Peter, M.; Scheibel, M.; 
#' Rust, H.W. Estimating IDF Curves Consistently over Durations with Spatial Covariates. Water 2020, 12, 3119,
#' https://doi.org/10.3390/w12113119
#' * Demetris Koutsoyiannis, Demosthenes Kozonis, Alexandros Manetas,
#' A mathematical framework for studying rainfall intensity-duration-frequency relationships,
#' Journal of Hydrology,
#' Volume 206, Issues 1–2,1998,Pages 118-135,ISSN 0022-1694, https://doi.org/10.1016/S0022-1694(98)00097-3
#' * Coles, S.An Introduction to Statistical Modeling of Extreme Values;   Springer:  New York, NY, USA, 2001,
#' https://doi.org/10.1198/tech.2002.s73
#' @md
NULL

46
47
#### IDF.agg ####

48
#' Aggregation and annual maxima for chosen durations
49
50
51
52
53
#' @description Aggregates several time series for chosen durations and finds annual maxima 
#' (either for the whole year or chosen months). Returns data.frame that can be used for
#' the function \code{\link{gev.d.fit}}.
#'
#' @param data list of data.frames containing time series for every station. 
Laura Mack's avatar
typos    
Laura Mack committed
54
#' The data.frame must have the columns 'date' and 'RR' unless other names 
55
56
57
58
#' are specified in the parameter `names`. The column 'date' must contain strings with 
#' standard date format.
#' @param ds numeric vector of aggregation durations. 
#' (Must be multiples of time resolution at all stations.)
59
60
#' @param na.accept numeric in [0,1) giving maximum percentage of missing values 
#' for which block max. should still be calculated.
61
62
#' @param which.stations optional, subset of stations. Either numeric vector or character vector 
#' containing names of elements in data. If not given, all elements in `data` will be used.
63
#' @param which.mon optional, subset of months (as list containing values from 0 to 11) of which to calculate the annual maxima from. 
64
65
66
67
#' @param names optional, character vector of length 2, containing the names of the columns to be used. 
#' @param cl optional, number of cores to be used from \code{\link[pbapply]{pblapply}} for parallelization.
#'
#' @details If data contains stations with different time resolutions that need to be aggregated at
Laura Mack's avatar
typos    
Laura Mack committed
68
69
#' different durations, IDF.agg needs to be run separately for the different groups of stations. 
#' Afterwards the results can be joint together using `rbind`.
70
#'
Jana Ulrich's avatar
Jana Ulrich committed
71
#' @return data.frame containing the annual intensity maxima [mm/h] in `$xdat`, the corresponding duration in `$ds` 
72
73
74
75
76
#' and the station id or name in `$station`.
#' 
#' @seealso \code{\link{pgev.d}}
#' 
#' @export
77
#' @importFrom pbapply pblapply 
78
#' @importFrom RcppRoll roll_sum
Laura Mack's avatar
Laura Mack committed
79
#' @importFrom fastmatch ctapply
80
#'
Christoph Ritschel's avatar
Christoph Ritschel committed
81
#' @examples
82
83
84
85
86
87
88
89
90
91
#' dates <- as.Date("2019-01-01")+0:729
#' x <- rgamma(n = 730, shape = 0.4, rate = 0.5)
#' df <- data.frame(date=dates,RR=x)
#' IDF.agg(list(df),ds=c(24,48))
#' 
#'##        xdat ds station
#'## 1 0.3025660 24       1
#'## 2 0.4112304 24       1
#'## 3 0.1650978 48       1
#'## 4 0.2356849 48       1
92
    IDF.agg <- function(data,ds,na.accept = 0,
93
                        which.stations = NULL,which.mon = list(0:11),names = c('date','RR'),cl = NULL){
94
95
96
97
98
99
100
101
102
103
104
105
106
107
      
      if(!inherits(data, "list"))stop("Argument 'data' must be a list, instead it is a: ", class(data))
      
      # function 2: aggregate station data over durations and find annual maxima:                                
      agg.station <- function(station){
        data.s <- data[[station]]
        if(!is.data.frame(data.s)){stop("Elements of 'data' must be data.frames. But element "
                                        ,station," contains: ", class(data.s))}
        if(sum(is.element(names[1:2],names(data.s)))!=2){stop('Dataframe of station ', station 
                                                              ,' does not contain $', names[1]
                                                              ,' or $', names[2], '.')}
        dtime<-as.numeric((data.s[,names[1]][2]-data.s[,names[1]][1]),units="hours")
        
        if(any(ds %% dtime > 10e-16)){
108
          stop('At least one of the given aggregation durations is not multiple of the time resolution = '
109
110
111
112
               ,dtime,' of station ',station,'.')}
        
        # function 1: aggregate over single durations and find annual maxima:
        agg.ts <- function(ds){
113
          runsum = RcppRoll::roll_sum(data.s[,names[2]],ds/dtime,fill=NA,align='right')
114
115
          #runmean <- rollapplyr(as.zoo(data.s[,names[2]]),ds/dtime,FUN=sum,fill =NA,align='right')
          runsum <- runsum/ds #intensity per hour
116
117
          max.subset <- lapply(1:length(which.mon),function(m.i){
            subset <- is.element(as.POSIXlt(data.s[,names[1]])$mon,which.mon[[m.i]])
118
            max <- fastmatch::ctapply(runsum[subset],(as.POSIXlt(data.s[,names[1]])$year+1900)[subset],
119
120
121
122
123
124
125
126
                          function(vec){
                            n.na <- sum(is.na(vec))
                            max <- ifelse(n.na <= na.accept*length(vec),max(vec,na.rm = TRUE),NA)
                            return(max)})
            df <- data.frame(xdat=max,ds=ds,year=as.numeric(names(max)),mon=deparse(which.mon[[m.i]]),
                             stringsAsFactors = FALSE)
            return(df)})
          df <- do.call(rbind,max.subset)  
127
          return(df) # maxima for single durations
128
129
        }
        # call function 1 in lapply to aggregate over all durations at single station
130
        data.agg <- pbapply::pblapply(ds,agg.ts,cl=cl)  
131
        df <- do.call(rbind,data.agg)
132
133
134
135
136
137
138
139
        return(df) # maxima for all durations at one station
      }
      # which stations should be used?
      if(is.null(which.stations))which.stations <- 1:length(data)
      # call function 2 in lapply to aggregate over all durations at all stations
      station.list <- lapply(which.stations,agg.station)
      
      return(do.call('rbind',station.list))
Christoph Ritschel's avatar
Christoph Ritschel committed
140
    }
Rust Henning's avatar
Rust Henning committed
141

142
143
144
145
#### IDF.plot ####

#' Plotting of IDF curves at a chosen station
#'
146
#' @param durations vector of durations for which to calculate the quantiles. 
147
#' @param fitparams vector containing parameters mut, sigma0, xi, theta, eta
148
#' (modified location, scale offset, shape, duration offset, duration exponent) for chosen station
149
150
#' as obtained from \code{\link{gev.d.fit}}
#' (or \code{\link{gev.d.params}} for model with covariates).
151
152
#' @param probs vector of exeedance probabilities for which to plot IDF curves (p = 1-1/ReturnPeriod)
#' @param cols vector of colors for IDF curves. Should have same length as \code{probs}
153
154
#' @param add logical indicating if plot should be added to existing plot, default is FALSE
#' @param legend logical indicating if legend should be plotted (TRUE, the default)
155
156
#' @param ... additional parameters passed on to the \code{plot} function
#'
157
#' @export
158
159
160
161
#' @importFrom grDevices rgb
#' @importFrom graphics axis box lines plot points 
#' @examples
#' data('example',package = 'IDF')
162
#' # fit d-gev
163
164
#' fit <- gev.d.fit(example$dat,example$d,ydat = as.matrix(example[,c("cov1","cov2")])
#'                  ,mul = c(1,2),sigl = 1)
165
#' # get parameters for cov1 = 1, cov2 = 1
166
#' par <- gev.d.params(fit = fit, ydat = matrix(1,1,2))
167
168
169
170
171
172
173
#' # plot quantiles
#' IDF.plot(durations = seq(0.5,35,0.2),fitparams = par)
#' # add data points
#' points(example[example$cov1==1,]$d,example[example$cov1==1,]$dat)
IDF.plot <- function(durations,fitparams,probs=c(0.5,0.9,0.99),
                     cols=4:2,add=FALSE,
                     legend=TRUE,...){
174
  
175
  # if cols is to short, make longer    
176
177
178
  if(length(cols)!=length(probs))cols <- rep_len(cols,length.out=length(probs))
  
  ## calculate IDF values for given probability and durations
179
180
  qs <- lapply(durations,qgev.d,p=probs,mut=fitparams[1],sigma0=fitparams[2],xi=fitparams[3],
         theta=fitparams[4],eta=fitparams[5])
181
  idf.array <- matrix(unlist(qs),length(probs),length(durations)) # array[probs,durs]
182
183
  if(!add){ #new plot
    ## initialize plot window
184
185
186
187
    # check if limits were passed
    if(is.element('ylim',names(list(...)))){
      ylim <- list(...)[['ylim']]
    }else{ylim <- range(idf.array,na.rm=TRUE)}
Laura Mack's avatar
Laura Mack committed
188
    if(is.element('xlim',names(list(...)))){
189
190
      xlim <- list(...)[['xlim']]
    }else{xlim <- range(durations,na.rm=TRUE)}
Laura Mack's avatar
Laura Mack committed
191
192
193
    if(is.element('main',names(list(...)))){
      main <- list(...)[['main']]
    }else{main <- ''}
194
    
195
    # empty plot
Laura Mack's avatar
Laura Mack committed
196
    plot(NA,xlim=xlim,ylim=ylim,xlab="Duration [h]",ylab="Intensity [mm/h]",log="xy",main=main)
197
198
  }
  
199
  ## plot IDF curves
200
201
202
203
  for(i in 1:length(probs)){
    lines(durations,idf.array[i,],col=cols[i],...)
  }
  
204
  if(legend){## plot legend
205
206
207
208
209
210
211
212
213
214
215
    # check if lwd, lty were passed
    if(is.element('lwd',names(list(...)))){
      lwd <- list(...)[['lwd']]
    }else{lwd <- 1}
    if(is.element('lty',names(list(...)))){
      lty <- list(...)[['lty']]
    }else{lty <- 1}
    
    legend(x="topright",title = 'p-quantile',legend=probs,
           col=cols,lty=lty,lwd=lwd)
  }
216
}