API limits and loops

The following APIs impose a period limit in the data retrieved, not allowing querying more than the predetermined period in each API.

AEMET API

AEMET API limit the daily data download to 31 days:

# aemet api has a limit for 31 days:
get_meteo_from(
  'aemet',
  aemet_options(
    api_key = keyring::key_get('aemet'),
    resolution = 'daily',
    start_date = as.Date('1990-01-01'),
    end_date = as.Date('1990-12-31')
  )
)

This means that with one call to get_meteo_from to the AEMET service, one can only download 31 days of data. If the period needed is bigger than that, one option is performing all the calls necessary and join the results:

res_1990_jan <- get_meteo_from(
  'aemet',
  aemet_options(
    api_key = keyring::key_get('aemet'),
    resolution = 'daily',
    start_date = as.Date('1990-01-01'),
    end_date = as.Date('1990-01-31')
  )
)

res_1990_feb <- get_meteo_from(
  'aemet',
  aemet_options(
    api_key = keyring::key_get('aemet'),
    resolution = 'daily',
    start_date = as.Date('1990-02-01'),
    end_date = as.Date('1990-02-28')
  )
)

res <- rbind(res_1990_jan, res_1990_feb)
res

While for short periods this can be easily done, when needing long periods (years, decades), this can be tedious and prone to error (or at least involves a lot of copy&paste and generate longer scripts).
To avoid this, we can use loops, both in a tidyverse way (purrr::map) or in a more classic approach (for). For both ways, the first thing to do is create the vectors of dates to retrieve:

# First, we prepare the date vectors, with the start and end dates.
start_dates <- seq(as.Date('1990-01-01'), as.Date('1990-06-01'), 'months')
end_dates <- seq(as.Date('1990-02-01'), as.Date('1990-07-01'), 'months') - 1

# Both vectors must have the same length
length(start_dates) == length(end_dates)
#> [1] TRUE

# lets see them
data.frame(start_dates, end_dates)
#>   start_dates  end_dates
#> 1  1990-01-01 1990-01-31
#> 2  1990-02-01 1990-02-28
#> 3  1990-03-01 1990-03-31
#> 4  1990-04-01 1990-04-30
#> 5  1990-05-01 1990-05-31
#> 6  1990-06-01 1990-06-30

tidyverse loop

We are gonna use purrr::map2_dfr, to iterate both date vectors at the same time and return a data frame with all the results directly:

# tidyverse map
res_tidyverse <-
  purrr::map2_dfr(
    .x = start_dates, .y = end_dates,
    .f = function(start_date, end_date) {
      res <- get_meteo_from(
        'aemet',
        aemet_options(
          api_key = keyring::key_get('aemet'),
          resolution = 'daily',
          start_date = start_date,
          end_date = end_date
        )
      )
      return(res)
    }
  )

res_tidyverse

for loop

We use base::for, iterating by the index of the dates vectors:

# base for loop
res_for <- data.frame()

for (index in seq_along(start_dates)) {
  temp_res <- get_meteo_from(
    'aemet',
    aemet_options(
      api_key = keyring::key_get('aemet'),
      resolution = 'daily',
      start_date = start_dates[index],
      end_date = end_dates[index]
    )
  )
  
  res_for <- rbind(res_for, temp_res)
}

res_for

Both methods return identical results:

# both are identical
identical(res_tidyverse, res_for)

In a loop, no matter if a purrr::map or a for loop, each iteration will connect with the API, consuming connections from the user quota. Take this into consideration when creating loops for longer periods, as you can reach your API request limits for the day/month… (it depends on the service API).

MeteoCat API

When using MeteoCat in daily, monthly and yearly there are restrictions on the period that can be accessed.

`daily`

daily always returns the whole month the date selected is in, i.e. for start_date = as.Date('2020-04-10') it will return all days in April, 2020:

api_options <- meteocat_options(
  'daily', start_date = as.Date('2020-04-10'),
  api_key = keyring::key_get('meteocat')
)
april_2020 <- get_meteo_from('meteocat', api_options)
unique(april_2020$timestamp)

This means that if we want more than one month, we need to use loops in a similar way as described previously for AEMET:

start_dates <- seq(as.Date('2020-01-01'), as.Date('2020-04-01'), 'months')
# tidyverse map
meteocat_2020q1_tidyverse <-
  purrr::map_dfr(
    .x = start_dates,
    .f = function(start_date) {
      res <- get_meteo_from(
        'meteocat',
        meteocat_options(
          api_key = keyring::key_get('meteocat'),
          resolution = 'daily',
          start_date = start_date
        )
      )
      return(res)
    }
  )
meteocat_2020q1_tidyverse

# base for loop
meteocat_2020q1_for <- data.frame()

for (index in seq_along(start_dates)) {
  temp_res <- get_meteo_from(
    'meteocat',
    meteocat_options(
      api_key = keyring::key_get('meteocat'),
      resolution = 'daily',
      start_date = start_dates[index]
    )
  )
  
  meteocat_2020q1_for <- rbind(meteocat_2020q1_for, temp_res)
}
meteocat_2020q1_for

# both are identical
identical(meteocat_2020q1_tidyverse, meteocat_2020q1_for)

`monthly`

monthly always returns the whole year the date selected is in, i.e. for start_date = as.Date('2020-04-10') it will return all months in 2020:

api_options <- meteocat_options(
  'monthly', start_date = as.Date('2020-04-10'),
  api_key = keyring::key_get('meteocat')
)
year_2020 <- get_meteo_from('meteocat', api_options)
unique(year_2020$timestamp)

Which means that if we need more than one year of monthly data, we need to use loops again:

start_dates <- seq(as.Date('2019-01-01'), as.Date('2020-01-01'), 'years')
# tidyverse map
meteocat_2019_20_tidyverse <-
  purrr::map_dfr(
    .x = start_dates,
    .f = function(start_date) {
      res <- get_meteo_from(
        'meteocat',
        meteocat_options(
          api_key = keyring::key_get('meteocat'),
          resolution = 'monthly',
          start_date = start_date
        )
      )
      return(res)
    }
  )
meteocat_2019_20_tidyverse

# base for loop
meteocat_2019_20_for <- data.frame()

for (index in seq_along(start_dates)) {
  temp_res <- get_meteo_from(
    'meteocat',
    meteocat_options(
      api_key = keyring::key_get('meteocat'),
      resolution = 'monthly',
      start_date = start_dates[index]
    )
  )
  
  meteocat_2019_20_for <- rbind(meteocat_2019_20_for, temp_res)
}
meteocat_2019_20_for

# both are identical
identical(meteocat_2019_20_tidyverse, meteocat_2019_20_for)

`yearly`

yearly always returns all available years and start_date argument is ignored, i.e. using start_date = as.Date('2020-04-10') will return all years, independently of the date supplied:

api_options <- meteocat_options(
  'yearly', start_date = as.Date('2020-04-10'),
  api_key = keyring::key_get('meteocat')
)
all_years <- get_meteo_from('meteocat', api_options)
unique(all_years$timestamp)

This means that with yearly we always get all the data available, so there is no need of loops.