
The goal of sooty is to provide access to data of relevance to Southern Ocean research.
To see what files we know about in object storage, use
library(sooty)
(files <- sooty_files())
#> # A tibble: 118,292 × 7
#> date source Dataset Bucket Key Protocol Host
#> <dttm> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 1981-09-01 00:00:00 /vsicurl/https://pro… oisst-… idea-… www.… /vsicurl http…
#> 2 1981-09-02 00:00:00 /vsicurl/https://pro… oisst-… idea-… www.… /vsicurl http…
#> 3 1981-09-03 00:00:00 /vsicurl/https://pro… oisst-… idea-… www.… /vsicurl http…
#> 4 1981-09-04 00:00:00 /vsicurl/https://pro… oisst-… idea-… www.… /vsicurl http…
#> 5 1981-09-05 00:00:00 /vsicurl/https://pro… oisst-… idea-… www.… /vsicurl http…
#> 6 1981-09-06 00:00:00 /vsicurl/https://pro… oisst-… idea-… www.… /vsicurl http…
#> 7 1981-09-07 00:00:00 /vsicurl/https://pro… oisst-… idea-… www.… /vsicurl http…
#> 8 1981-09-08 00:00:00 /vsicurl/https://pro… oisst-… idea-… www.… /vsicurl http…
#> 9 1981-09-09 00:00:00 /vsicurl/https://pro… oisst-… idea-… www.… /vsicurl http…
#> 10 1981-09-10 00:00:00 /vsicurl/https://pro… oisst-… idea-… www.… /vsicurl http…
#> # ℹ 118,282 more rowsThe main columns of interest are date and
source and Dataset, the source is
a directly useable source identifier that a GDAL-enabled package can
read. (See examples below).
We can get a very simple model of a “logical dataset” by honing in
one Dataset in particular. First let’s make a summary from
everything:
library(sooty)
sooty_files() |>
dplyr::mutate(date = as.Date(date)) |> dplyr::group_by(Dataset) |>
dplyr::summarize(earliest = min(date), latest = max(date), n = dplyr::n()) |>
dplyr::arrange(Dataset, earliest)
#> # A tibble: 14 × 4
#> Dataset earliest latest n
#> <chr> <date> <date> <int>
#> 1 BREMEN-SEAICE-SMOS-north 2010-05-01 2025-05-20 5474
#> 2 BREMEN-SEAICE-SMOS-south 2010-10-01 2016-05-03 12
#> 3 NSIDC_SEAICE_PS_N25km 1978-10-26 2025-05-20 17003
#> 4 NSIDC_SEAICE_PS_S25km 1978-10-26 2025-05-20 17003
#> 5 SEALEVEL_GLO_PHY_L4 1993-01-01 2025-05-21 11715
#> 6 antarctica-amsr2-asi-s3125-tif 2012-07-02 2025-05-20 4695
#> 7 ccmp-wind-product-v2 1993-01-02 2024-08-31 11552
#> 8 esacci-oc-l3s-chlor-a-merged-5day 1997-09-03 2025-03-27 2019
#> 9 esacci-oc-l3s-chlor-a-merged-annual 1997-05-22 2024-05-22 28
#> 10 esacci-oc-l3s-chlor-a-merged-daily 1997-09-04 2025-03-31 10045
#> 11 esacci-oc-l3s-chlor-a-merged-monthly 1997-09-01 2025-03-01 331
#> 12 ghrsst-tif 2002-06-01 2025-05-20 8390
#> 13 oisst-avhrr-v02r01 1981-09-01 2025-05-20 15968
#> 14 oisst-tif 1981-09-01 2025-05-18 14057library(terra)
#> terra 1.8.50
file <- files$source[which.max(files$date) ]
print(file)
#> [1] "/vsicurl/https://projects.pawsey.org.au/idea-sealevel-glo-phy-l4-nrt-008-046/data.marine.copernicus.eu/SEALEVEL_GLO_PHY_L4_NRT_008_046/cmems_obs-sl_glo_phy-ssh_nrt_allsat-l4-duacs-0.125deg_P1D_202411/2025/05/nrt_global_allsat_phy_l4_20250521_20250521.nc"
rast(file)
#> class : SpatRaster
#> dimensions : 1440, 2880, 10 (nrow, ncol, nlyr)
#> resolution : 0.125, 0.125 (x, y)
#> extent : -180, 180, -90, 90 (xmin, xmax, ymin, ymax)
#> coord. ref. : +proj=longlat +a=6378136.3 +rf=298.257 +no_defs
#> sources : nrt_global_allsat_phy_l4_20250521_20250521.nc:sla
#> nrt_global_allsat_phy_l4_20250521_20250521.nc:err_sla
#> nrt_global_allsat_phy_l4_20250521_20250521.nc:ugosa
#> ... and 7 more sources
#> varnames : sla (Sea level anomaly)
#> err_sla (Formal mapping error)
#> ugosa (Geostrophic velocity anomalies: zonal component)
#> ...
#> names : sla, err_sla, ugosa, err_ugosa, vgosa, err_vgosa, ...
#> unit : m, m, m/s, m/s, m/s, m/s, ...
#> time (days) : 2025-05-21The available datasets can be found from the table.
dplyr::distinct(sooty_files(), Dataset)
#> # A tibble: 14 × 1
#> Dataset
#> <chr>
#> 1 oisst-avhrr-v02r01
#> 2 SEALEVEL_GLO_PHY_L4
#> 3 NSIDC_SEAICE_PS_S25km
#> 4 NSIDC_SEAICE_PS_N25km
#> 5 antarctica-amsr2-asi-s3125-tif
#> 6 ghrsst-tif
#> 7 ccmp-wind-product-v2
#> 8 BREMEN-SEAICE-SMOS-south
#> 9 BREMEN-SEAICE-SMOS-north
#> 10 oisst-tif
#> 11 esacci-oc-l3s-chlor-a-merged-monthly
#> 12 esacci-oc-l3s-chlor-a-merged-daily
#> 13 esacci-oc-l3s-chlor-a-merged-5day
#> 14 esacci-oc-l3s-chlor-a-merged-annualThere are data sets like the 25km resolution south polar stereographic sea ice concentration, available via the dataset identifier `:
icefiles <- sooty_files() |> dplyr::filter(Dataset == "NSIDC_SEAICE_PS_S25km")
dplyr::glimpse(icefiles)
#> Rows: 17,003
#> Columns: 7
#> $ date <dttm> 1978-10-26, 1978-10-27, 1978-10-28, 1978-10-29, 1978-10-30, …
#> $ source <chr> "/vsicurl/https://projects.pawsey.org.au/idea-10.5067-mpyg15w…
#> $ Dataset <chr> "NSIDC_SEAICE_PS_S25km", "NSIDC_SEAICE_PS_S25km", "NSIDC_SEAI…
#> $ Bucket <chr> "idea-10.5067-mpyg15waa4wx", "idea-10.5067-mpyg15waa4wx", "id…
#> $ Key <chr> "n5eil01u.ecs.nsidc.org/PM/NSIDC-0051.002/1978.10.26/NSIDC005…
#> $ Protocol <chr> "/vsicurl", "/vsicurl", "/vsicurl", "/vsicurl", "/vsicurl", "…
#> $ Host <chr> "https://projects.pawsey.org.au", "https://projects.pawsey.or…You can immediately see what range of dates is available:
range(icefiles$date)
#> [1] "1978-10-26 UTC" "2025-05-20 UTC"And, explore exactly what source files are available:
range(diff(icefiles$date)) ## there are some gaps, it's every two days to start and some are missing
#> Time differences in days
#> [1] 1 7
diff(range(icefiles$date)) ## the number of potential data days
#> Time difference of 17008 days
nrow(icefiles) ## the actual number of data days
#> [1] 17003This is a very experimental begin at replacing raadtools with a package anyone can use.
We can read from the datasets with a GDAL-ready package, such as terra. But note that we need to set a configuration first, and we need to unset it after, this is WIP.
amsrfiles <- dataset("antarctica-amsr2-asi-s3125-tif")@source
#> Warning in dataset("antarctica-amsr2-asi-s3125-tif"): 'dataset' is deprecated.
#> Use 'datasource' instead.
#> See help("Deprecated")
library(terra)
(r <- rast(tail(amsrfiles$source, 1)))
#> class : SpatRaster
#> dimensions : 2656, 2528, 1 (nrow, ncol, nlyr)
#> resolution : 3125, 3125 (x, y)
#> extent : -3950000, 3950000, -3950000, 4350000 (xmin, xmax, ymin, ymax)
#> coord. ref. : WGS 84 / NSIDC Sea Ice Polar Stereographic South (EPSG:3976)
#> source : asi-AMSR2-s3125-20250520-v5.4.tif
#> color table : 1
#> name : asi-AMSR2-s3125-20250520-v5.4
r[r > 100] <- NA
plot(r[[nlyr(r)]] * 1, main = format(max(icefiles$date)))
ghrsst <- dataset("ghrsst-tif")
#> Warning in dataset("ghrsst-tif"): 'dataset' is deprecated.
#> Use 'datasource' instead.
#> See help("Deprecated")
sstfile <- ghrsst@source$source[match(max(icefiles$date), ghrsst@source$date)]
sst <- rast(sprintf("vrt://%s?ovr=4", sstfile))
ct <- as.contour(crop(sst, ext(-180, 180, -90, -40)))
plot(project(ct, crs(r)), add = TRUE, col = "hotpink")
That should be the state of the sea ice in the Southern Ocean at the latest available date, sea ice concentration from passive microwave at 3.125km resolution, by the AWI artist sea ice group.
sooty_files()Dataset is the main grouping value, and files from
different Datasets are otherwise non-relatable, be it by
file format, available variable/s, grid specification (spatial extent
and resolution), or temporal organization. We guarantee that within a
Dataset, the files are ordered and unique by date(-time) and that they
are exactly relatable spatially and (mostly, or usually also) by
variable/s. The time series may not be regular or complete, but usually
is.
We retain the component parts of source, as
Bucket, Key, Protocol, and
Host as this is object storage and these are the separable
parts of addressing objects in different ways, and we’ve reserved that
for future usage. (We use https:// protocol by default to
avoid any required configurations). Get in touch if you have any
questions about this.
We have an in-progress class for dataset handling. Create a ‘dataset()’ and set an id.
ds <- dataset()
#> Warning in dataset(): 'dataset' is deprecated.
#> Use 'datasource' instead.
#> See help("Deprecated")
ds@id <- "NSIDC_SEAICE_PS_S25km"
ds
#> <sooty::dataset>
#> @ id : chr "NSIDC_SEAICE_PS_S25km"
#> @ n : int 17003
#> @ mindate: POSIXct[1:1], format: "1978-10-26"
#> @ maxdate: POSIXct[1:1], format: "2025-05-20"
#> @ source : tibble [17,003 × 7] (S3: tbl_df/tbl/data.frame)
#> $ date : POSIXct[1:17003], format: "1978-10-26" "1978-10-27" ...
#> $ source : chr [1:17003] "/vsicurl/https://projects.pawsey.org.au/idea-10.5067-mpyg15waa4wx/n5eil01u.ecs.nsidc.org/PM/NSIDC-0051.002/1978"| __truncated__ "/vsicurl/https://projects.pawsey.org.au/idea-10.5067-mpyg15waa4wx/n5eil01u.ecs.nsidc.org/PM/NSIDC-0051.002/1978"| __truncated__ "/vsicurl/https://projects.pawsey.org.au/idea-10.5067-mpyg15waa4wx/n5eil01u.ecs.nsidc.org/PM/NSIDC-0051.002/1978"| __truncated__ "/vsicurl/https://projects.pawsey.org.au/idea-10.5067-mpyg15waa4wx/n5eil01u.ecs.nsidc.org/PM/NSIDC-0051.002/1978"| __truncated__ ...
#> $ Dataset : chr [1:17003] "NSIDC_SEAICE_PS_S25km" "NSIDC_SEAICE_PS_S25km" "NSIDC_SEAICE_PS_S25km" "NSIDC_SEAICE_PS_S25km" ...
#> $ Bucket : chr [1:17003] "idea-10.5067-mpyg15waa4wx" "idea-10.5067-mpyg15waa4wx" "idea-10.5067-mpyg15waa4wx" "idea-10.5067-mpyg15waa4wx" ...
#> $ Key : chr [1:17003] "n5eil01u.ecs.nsidc.org/PM/NSIDC-0051.002/1978.10.26/NSIDC0051_SEAICE_PS_S25km_19781026_v2.0.nc" "n5eil01u.ecs.nsidc.org/PM/NSIDC-0051.002/1978.10.27/NSIDC0051_SEAICE_PS_S25km_19781027_v2.0.nc" "n5eil01u.ecs.nsidc.org/PM/NSIDC-0051.002/1978.10.28/NSIDC0051_SEAICE_PS_S25km_19781028_v2.0.nc" "n5eil01u.ecs.nsidc.org/PM/NSIDC-0051.002/1978.10.29/NSIDC0051_SEAICE_PS_S25km_19781029_v2.0.nc" ...
#> $ Protocol: chr [1:17003] "/vsicurl" "/vsicurl" "/vsicurl" "/vsicurl" ...
#> $ Host : chr [1:17003] "https://projects.pawsey.org.au" "https://projects.pawsey.org.au" "https://projects.pawsey.org.au" "https://projects.pawsey.org.au" ...
ds@source
#> # A tibble: 17,003 × 7
#> date source Dataset Bucket Key Protocol Host
#> <dttm> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 1978-10-26 00:00:00 /vsicurl/https://pro… NSIDC_… idea-… n5ei… /vsicurl http…
#> 2 1978-10-27 00:00:00 /vsicurl/https://pro… NSIDC_… idea-… n5ei… /vsicurl http…
#> 3 1978-10-28 00:00:00 /vsicurl/https://pro… NSIDC_… idea-… n5ei… /vsicurl http…
#> 4 1978-10-29 00:00:00 /vsicurl/https://pro… NSIDC_… idea-… n5ei… /vsicurl http…
#> 5 1978-10-30 00:00:00 /vsicurl/https://pro… NSIDC_… idea-… n5ei… /vsicurl http…
#> 6 1978-10-31 00:00:00 /vsicurl/https://pro… NSIDC_… idea-… n5ei… /vsicurl http…
#> 7 1978-11-01 00:00:00 /vsicurl/https://pro… NSIDC_… idea-… n5ei… /vsicurl http…
#> 8 1978-11-02 00:00:00 /vsicurl/https://pro… NSIDC_… idea-… n5ei… /vsicurl http…
#> 9 1978-11-03 00:00:00 /vsicurl/https://pro… NSIDC_… idea-… n5ei… /vsicurl http…
#> 10 1978-11-04 00:00:00 /vsicurl/https://pro… NSIDC_… idea-… n5ei… /vsicurl http…
#> # ℹ 16,993 more rowsPlease note that the idt project is released with a Contributor Code of Conduct. By contributing to this project, you agree to abide by its terms.