Commit 7b78ee3a authored by Jana Ulrich's avatar Jana Ulrich
Browse files

IDF.agg changed progress bar

parent 1fe6dd19
......@@ -5,6 +5,7 @@ Version: 2.0.0
Date: 2020-11-22
Authors@R: c(person("Jana", "Ulrich", email = "jana.ulrich@fu-berlin.de", role = c("aut", "cre")),
person("Laura","Mack", email= "laura.mack@fu-berlin.de",role="ctb"),
person("Oscar E.","Jurado", email= "jurado@zedat.fu-berlin.de",role="ctb"),
person("Christoph", "Ritschel", role = "aut"),
person("Carola", "Detring", role = "ctb"),
person("Sarah", "Joedicke", role = "ctb"))
......
......@@ -27,6 +27,9 @@ importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(graphics,title)
importFrom(ismev,gev.fit)
importFrom(parallel,makeCluster)
importFrom(parallel,parLapply)
importFrom(parallel,stopCluster)
importFrom(pbapply,pblapply)
importFrom(stats,lm)
importFrom(stats,make.link)
......
......@@ -91,7 +91,7 @@ NULL
#' The data.frame must have the columns 'date' and 'RR' unless other names
#' are specified in the parameter `names`. The column 'date' must contain strings with
#' standard date format.
#' @param ds numeric vector of aggregation durations.
#' @param ds numeric vector of aggregation durations in hours.
#' (Must be multiples of time resolution at all stations.)
#' @param na.accept numeric in [0,1) giving maximum percentage of missing values
#' for which block max. should still be calculated.
......@@ -99,7 +99,7 @@ NULL
#' containing names of elements in data. If not given, all elements in `data` will be used.
#' @param which.mon optional, subset of months (as list containing values from 0 to 11) of which to calculate the annual maxima from.
#' @param names optional, character vector of length 2, containing the names of the columns to be used.
#' @param cl optional, number of cores to be used from \code{\link[pbapply]{pblapply}} for parallelization.
#' @param cl optional, number of cores to be used from \code{\link[parallel]{parLapply}} for parallel computing.
#'
#' @details If data contains stations with different time resolutions that need to be aggregated at
#' different durations, IDF.agg needs to be run separately for the different groups of stations.
......@@ -112,6 +112,7 @@ NULL
#' @seealso \code{\link{pgev.d}}
#'
#' @export
#' @importFrom parallel parLapply makeCluster stopCluster
#' @importFrom pbapply pblapply
#' @importFrom RcppRoll roll_sum
#' @importFrom fastmatch ctapply
......@@ -137,7 +138,7 @@ NULL
#' IDF.agg(list('Sample'=df),ds=c(24,48),na.accept = 0.01,which.mon = list(5:7))
#'
IDF.agg <- function(data,ds,na.accept = 0,
which.stations = NULL,which.mon = list(0:11),names = c('date','RR'),cl = NULL){
which.stations = NULL,which.mon = list(0:11),names = c('date','RR'),cl = 1){
if(!inherits(data, "list"))stop("Argument 'data' must be a list, instead it is a: ", class(data))
......@@ -151,13 +152,13 @@ NULL
,' or $', names[2], '.')}
dtime<-as.numeric((data.s[,names[1]][2]-data.s[,names[1]][1]),units="hours")
if(any(ds %% dtime > 10e-16)){
if(any((ds/dtime)%%1 > 10e-8)){
stop('At least one of the given aggregation durations is not multiple of the time resolution = '
,dtime,' of station ',station,'.')}
,dtime,'hours at station ',station,'.')}
# function 1: aggregate over single durations and find annual maxima:
agg.ts <- function(ds){
runsum = RcppRoll::roll_sum(data.s[,names[2]],ds/dtime,fill=NA,align='right')
runsum = RcppRoll::roll_sum(data.s[,names[2]],round(ds/dtime),fill=NA,align='right')
#runmean <- rollapplyr(as.zoo(data.s[,names[2]]),ds/dtime,FUN=sum,fill =NA,align='right')
runsum <- runsum/ds #intensity per hour
max.subset <- lapply(1:length(which.mon),function(m.i){
......@@ -175,14 +176,16 @@ NULL
return(df) # maxima for single durations
}
# call function 1 in lapply to aggregate over all durations at single station
data.agg <- pbapply::pblapply(ds,agg.ts,cl=cl)
clust <- parallel::makeCluster(cl)
data.agg <- parallel::parLapply(cl = clust,ds,agg.ts)
parallel::stopCluster(clust)
df <- do.call(rbind,data.agg)
return(df) # maxima for all durations at one station
}
# which stations should be used?
if(is.null(which.stations))which.stations <- if(is.null(names(data))){1:length(data)}else{names(data)}
# call function 2 in lapply to aggregate over all durations at all stations
station.list <- lapply(which.stations,agg.station)
station.list <- pbapply::pblapply(which.stations,agg.station)
return(do.call('rbind',station.list))
}
......
......@@ -44,7 +44,7 @@
#' duration offset and duration exponent, resp.}
#' \item{se}{numeric vector giving the standard errors for the MLE's (in the same order)}
#' \item{trans}{A logical indicator for a non-stationary fit.}
#' \item{model}{A list with components mul, sigl, shl, thetal and etal.}
#' \item{model}{A list with components mutl, sigma0l, xil, thetal and etal.}
#' \item{link}{A character vector giving inverse link functions.}
#' \item{conv}{The convergence code, taken from the list returned by \code{\link{optim}}.
#' A zero indicates successful convergence.}
......
......@@ -11,7 +11,7 @@ IDF.agg(
which.stations = NULL,
which.mon = list(0:11),
names = c("date", "RR"),
cl = NULL
cl = 1
)
}
\arguments{
......@@ -20,7 +20,7 @@ The data.frame must have the columns 'date' and 'RR' unless other names
are specified in the parameter `names`. The column 'date' must contain strings with
standard date format.}
\item{ds}{numeric vector of aggregation durations.
\item{ds}{numeric vector of aggregation durations in hours.
(Must be multiples of time resolution at all stations.)}
\item{na.accept}{numeric in [0,1) giving maximum percentage of missing values
......@@ -33,7 +33,7 @@ containing names of elements in data. If not given, all elements in `data` will
\item{names}{optional, character vector of length 2, containing the names of the columns to be used.}
\item{cl}{optional, number of cores to be used from \code{\link[pbapply]{pblapply}} for parallelization.}
\item{cl}{optional, number of cores to be used from \code{\link[parallel]{parLapply}} for parallel computing.}
}
\value{
data.frame containing the annual intensity maxima [mm/h] in `$xdat`, the corresponding duration in `$ds`,
......
......@@ -72,7 +72,7 @@ the components nllh, mle and se are always printed.
duration offset and duration exponent, resp.}
\item{se}{numeric vector giving the standard errors for the MLE's (in the same order)}
\item{trans}{A logical indicator for a non-stationary fit.}
\item{model}{A list with components mul, sigl, shl, thetal and etal.}
\item{model}{A list with components mutl, sigma0l, xil, thetal and etal.}
\item{link}{A character vector giving inverse link functions.}
\item{conv}{The convergence code, taken from the list returned by \code{\link{optim}}.
A zero indicates successful convergence.}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment