---
title: "ERDDAP Extraction"
params:
yml: "meta/erddap_sst.yml"
subtitle: "metadata: *`r basename(params$yml)`*"
execute:
cache: false
---
```{r}
#| label: setup
# dir_extractr = "/share/github/marinebon/extractr"
# dir_extractr = "~/Github/marinebon/extractr" # DEBUG
# devtools::load_all(dir_extractr)
# devtools::install_local(dir_extractr, force = T)
# devtools::install_github("marinebon/extractr", force = T)
librarian::shelf(
dplyr, DT, fs, glue, here, logger, lubridate, mapview, purrr, RColorBrewer, readr,
sf, stringr, terra, tibble, tidyr, units, yaml,
marinebon/extractr,
quiet = T)
options(readr.show_col_types = F)
mapviewOptions(
basemaps = "Esri.OceanBasemap",
vector.palette = colorRampPalette(brewer.pal(n=5, name="Spectral")))
if (!exists("params"))
params <- list(yml = here("meta/erddap_sst.yml")) # DEBUG
```
## Polygons
```{r}
#| label: polygons
sanctuaries_rds <- here("../climate-dashboard/data/sanctuaries.rds")
buf_pct <- 0.20 # 20% of area
ply_sanctuaries <- readRDS(sanctuaries_rds)
# buffer polygon and get bounding boxes
ply_sanctuaries <- ply_sanctuaries |>
mutate(
area_km2 = st_area(geom) |>
set_units("km^2") |>
as.numeric() |>
round(2),
buf_km = (sqrt(area_km2) * buf_pct) |>
round(2),
bbox_geom = map2(geom, area_km2, \(g,a) {
st_sfc(g, crs = 4326) |>
st_buffer(buf_km * 1000) |>
st_bbox() |>
round(2) |>
st_as_sfc() }),
bbox_chr = map_chr(bbox_geom, \(x) { # xmin, ymin, xmax and ymax
st_bbox(x) |>
as.numeric() |>
paste(collapse = ",") } ) ) |>
unnest(bbox_geom)
bb_sanctuaries <- st_set_geometry(ply_sanctuaries, "bbox_geom") |>
select(-geom)
ply_sanctuaries <- select(ply_sanctuaries, -bbox_geom)
# show map
mapView(ply_sanctuaries, zcol = "nms") +
mapView(bb_sanctuaries, zcol = "nms", legend = F)
ply_sanctuaries |>
st_drop_geometry() |>
datatable()
```
## Dataset
### Metadata
Contents of `r basename(params$yml)`:
```{r}
#| label: yml
#| results: asis
cat(
"```yaml",
readLines(params$yml),
"```",
sep = "\n")
```
### Information
```{r}
#| label: ed_info
meta <- read_yaml(params$yml)
proc_prod <- path_ext_remove(basename(params$yml))
dir_out <- glue("/share/data/noaa-onms/climate-dashboard-app/{proc_prod}")
dir_create(dir_out)
v <- meta$erddap_variable
(ed <- ed_info(meta$erddap_url))
times <- ed_dim(ed, "time")
```
## Polygon years
```{r}
#| label: d_nms_yr_todo
d_nms_yr_todo <- ply_sanctuaries |>
st_drop_geometry() |>
arrange(nms) |>
select(nms) |>
cross_join(
tibble(
year = year(min(times)):year(max(times)))) |>
mutate(
d = map2(nms, year, \(nms, year) { # nms = "CBNMS"; year = 2025
times_yr <- times[year(times) == year]
tif <- glue("{dir_out}/{nms}/{year}.tif")
# if missing tif,return all times
if (!file.exists(tif))
return(list(
time_min = min(times_yr),
time_max = max(times_yr),
n_times = length(times_yr)))
# get times from tif
r <- rast(tif)
times_tif <- time(r)
# if all times done, return NAs
if (all(times_yr %in% times_tif))
return(list(
n_times = 0,
time_min = NA,
time_max= NA))
# otherwise, return time range missing
i <- !times_yr %in% times_tif
list(
n_times = sum(i),
time_min = min(times_yr[i]),
time_max = max(times_yr[i]))
}),
time_min = map_vec(d, pluck, "time_min"),
time_max = map_vec(d, pluck, "time_max"),
n_times = map_int(d, pluck, "n_times") ) |>
select(-d) |>
filter(n_times > 0) |>
rowid_to_column("i") |>
relocate(i)
d_nms_yr_todo |>
group_by(nms) %>%
{if (nrow(.) > 0)
summarize(
.,
time_min = min(time_min, na.rm = T),
time_max = max(time_max, na.rm = T),
n_times = sum(n_times)) else .} |>
datatable(
caption = "Sanctuaries missing available ERDDAP times.",
rownames = F,
options = list(
pageLength = 5,
lengthMenu = c(5, 50, nrow(d_nms_yr_todo))))
```
## Extract dataset per polygon year
Using [`extractr::ed_extract()`](https://marinebon.github.io/extractr/reference/ed_extract.html), .
```{r}
#| label: iterate_ed_extract
# rerddap::cache_delete_all()
fxn <- function(i, nms, time_min, time_max, ...){ # nms = "MNMS"; year = 2010 ; i = 97
err <- tryCatch({
log_info("{sprintf('%03d', i)}: {nms}, {time_min} to {time_max}")
yr <- year(time_min)
ply <- ply_sanctuaries |>
filter(nms == !!nms)
bb <- bb_sanctuaries |>
filter(nms == !!nms) |>
st_bbox()
extractr::ed_extract(
ed,
var = v,
sf_zones = ply,
bbox = bb,
mask_tif = F,
rast_tif = glue("{dir_out}/{nms}/{yr}.tif"),
zonal_fun = "mean",
zonal_csv = glue("{dir_out}/{nms}/{yr}.csv"),
dir_nc = glue("{dir_out}/{nms}/{yr}_nc"),
keep_nc = F,
n_max_vals_per_req = 1e+05,
time_min = time_min,
time_max = time_max,
verbose = T)
return(NA)
}, error = function(e) {
log_error(conditionMessage(e))
return(conditionMessage(e))
})
err
}
res <- d_nms_yr_todo |>
mutate(
error = pmap_chr(list(i, nms, time_min, time_max), fxn))
```
### Successes
```{r}
#| label: success_summary
res |>
mutate(
success = is.na(error)) |>
group_by(success) |>
summarize(
n = n()) |>
datatable()
```
### Errors (if any)
```{r}
#| label: error_summary
# rast(glue("{dir_out}/{nms}/{yr}.tif")) |> names()
res |>
filter(!is.na(error)) |>
group_by(nms) |>
summarize(
n_years = n(),
errors = unique(error) |> paste(collapse = "\n\n----\n\n")) |>
datatable()
```
::: {.content-hidden}
## TODO
`ed_extract()`:
- [x] delete *_nc dir
- [x] differentiate existing done vs todo for given year
- [ ] allow irregular datasets, like `meta/irregular/*.yaml`: `ERROR: x cell sizes are not regular`
- [ ] break up into functions, not exported
`erddap.qmd`:
- [ ] make `ed_extract()` to single MBNMS with features for main vs david; add "ALL" option to `ed_extract()`
- [ ] add buffer to all (and redo)
- [ ] wrap retry with `ed_dim()` too
:::
```{r}
#| label: rm_empty_nc_dirs
#| eval: false
#| echo: false
# find empty directories ending in _nc and delete them
d_dirs <- tibble(
dir = list.dirs(here("data"), full.names = T, recursive = T)) |>
filter(str_detect(dir, "_nc$")) |>
mutate(
n_files = map_int(dir, \(x) {length(list.files(x))}))
# show non-empty directories
d_dirs |>
filter(n_files > 0) |>
datatable()
# delete empty directories
d_dirs |>
filter(n_files == 0) |>
pull(dir) |>
walk(\(x) {
message(glue("Deleting {x}"))
unlink(x, recursive = T) })
```
::: {.callout-caution collapse="true"}
## R package versions
```{r}
devtools::session_info()
```
:::