Example of creating a trajectory dataset in xarray¶
Example of creating a simple trajectory in xarray with attributes for S-ENDA
In [ ]:
%pip install xarray netCDF4
In [ ]:
import xarray as xr
import pandas as pd
import numpy as np
from datetime import datetime
Create a trajectory dataset¶
This creates a trajectory dataset from a csv file.
In [ ]:
%%bash
cat <<EOF >./trajecory.csv
time,temperature,salinity,oxygen,latitude,longitude
2024-01-01T13:40:59,15.694,15.314,90.618,60.39799,5.320411
2024-01-01T13:50:59,15.721,15.4,90.084,60.39789,5.319856
2024-01-01T14:00:59,5.633,15.22,88.002,60.39790,5.319894
2024-01-01T14:10:59,25.609,15.27,88.128,60.39780,5.319334
2024-01-01T14:20:00,15.573,15.2,89.927,60.39778,5.319025
2024-01-01T14:30:00,35.529,16.24,89.994,60.39778,5.318615
2024-01-01T14:40:00,15.476,15.22,90.345,60.39777,5.317961
2024-01-01T14:50:00,15.422,17.2,89.079,60.39776,5.317048
2024-01-01T15:00:00,15.423,15.1,91.195,60.39774,5.316310
EOF
In [ ]:
df = pd.read_csv("./trajecory.csv", parse_dates=['time'])
ds = xr.Dataset.from_dataframe(df.set_index(["time"]))
ds
Out[ ]:
<xarray.Dataset> Dimensions: (time: 9) Coordinates: * time (time) datetime64[ns] 2024-01-01T13:40:59 ... 2024-01-01T15:... Data variables: temperature (time) float64 15.69 15.72 5.633 25.61 ... 15.48 15.42 15.42 salinity (time) float64 15.31 15.4 15.22 15.27 ... 16.24 15.22 17.2 15.1 oxygen (time) float64 90.62 90.08 88.0 88.13 ... 90.34 89.08 91.19 latitude (time) float64 60.4 60.4 60.4 60.4 60.4 60.4 60.4 60.4 60.4 longitude (time) float64 5.32 5.32 5.32 5.319 ... 5.319 5.318 5.317 5.316
Update coordinates with location and metadata¶
A dataset have support for metadata on each variable.
In [ ]:
ds = ds.assign_coords(
dict(
longitude=xr.Variable("time", ds.longitude, dict(standard_name="longitude", long_name="Longitude", units="degree_east", axis="X")),
latitude=xr.Variable("time", ds.latitude, dict(standard_name="latitude", long_name="Latitude", units="degree_north", axis="Y")),
time=xr.Variable("time", ds.time, dict(standard_name="time", long_name="Time of measurement", axis="T")),
)
)
ds
Out[ ]:
<xarray.Dataset> Dimensions: (time: 9) Coordinates: latitude (time) float64 60.4 60.4 60.4 60.4 60.4 60.4 60.4 60.4 60.4 longitude (time) float64 5.32 5.32 5.32 5.319 ... 5.319 5.318 5.317 5.316 * time (time) datetime64[ns] 2024-01-01T13:40:59 ... 2024-01-01T15:... Data variables: temperature (time) float64 15.69 15.72 5.633 25.61 ... 15.48 15.42 15.42 salinity (time) float64 15.31 15.4 15.22 15.27 ... 16.24 15.22 17.2 15.1 oxygen (time) float64 90.62 90.08 88.0 88.13 ... 90.34 89.08 91.19
Add trajectory name¶
In [ ]:
ds["trajectory_name"] = xr.DataArray("vaagen_swim", dims=(), attrs=dict(cf_role="trajectory_id"))
Add metadata for each data variable¶
In [ ]:
ds.temperature.attrs["standard_name"] = "sea_water_temperature"
ds.temperature.attrs["long_name"] = "Sea Water Temperature"
ds.temperature.attrs["units"] = "degree_Celcius"
ds.temperature.attrs["comment"] = "I lost the thermometer in Store Lundgårdsvann"
ds.salinity.attrs["standard_name"] = "sea_water_salinity"
ds.salinity.attrs["long_name"] = "Sea Water Salinity"
ds.salinity.attrs["units"] = "PSU"
ds.oxygen.attrs["standard_name"] = "fractional_saturation_of_oxygen_in_sea_water"
ds.oxygen.attrs["long_name"] = "Sea Water Oxygen Saturation"
ds.oxygen.attrs["units"] = "%"
Assign global attributes¶
In [ ]:
ds = ds.assign_attrs(
dict(
id="92377f4b-099f", # can also be set later
naming_authority="bb.badebussen", # can also be set later
title="Measurements during a swim over Vågen",
title_no="Målinger under ein svømmetur over Vågen",
summary="Measurment during a swim in Vågen, I followed Beffen",
summary_no="Målinger tatt under ein svømmetur over Vågen, eg følgte Beffen si rute",
keywords=",".join(
[
"GCMDSK:EARTH SCIENCE > OCEANS > OCEAN TEMPERATURE > SEA SURFACE TEMPERATURE",
"GCMDLOC:CONTINENT > EUROPE > NORTHERN EUROPE > SCANDINAVIA > NORWAY",
]
),
keywords_vocabulary=",".join(
[
"GCMDSK:GCMD Science Keywords:https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/sciencekeywords",
"GCMDLOC:GCMD Locations:https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/locations",
]
),
iso_topic_category="Not available",
featureType="trajectory",
date_created=datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
project="swimming",
time_coverage_start=np.datetime_as_string(ds.time.min().values, unit="s", timezone="UTC"),
time_coverage_end=np.datetime_as_string(ds.time.max().values, unit="s", timezone="UTC"),
geospatial_lat_min=float(ds.latitude.min()),
geospatial_lat_max=float(ds.latitude.max()),
geospatial_lon_min=float(ds.longitude.min()),
geospatial_lon_max=float(ds.longitude.max()),
spatial_representation="trajectory",
creator_type='institution',
creator_institution='Badebussen',
institution='Badebussen',
institution_short_name='BB',
creator_email='badebussen@lungen.bb',
creator_url='https://badebussen.bb',
data_owner='Badebussen',
processing_level='Experimental',
Conventions='CF-1.7, ACDD-1.3',
publisher_name='badebussen',
publisher_email='publisher@badebussen.bb',
publisher_url='https://badebussen.bb',
license='http://spdx.org/licenses/CC-BY-4.0(CC-BY-4.0)',
history='Created on jupyterhub',
)
)
ds
Out[ ]:
<xarray.Dataset> Dimensions: (time: 9) Coordinates: latitude (time) float64 60.4 60.4 60.4 60.4 60.4 60.4 60.4 60.4 60.4 longitude (time) float64 5.32 5.32 5.32 5.319 ... 5.318 5.317 5.316 * time (time) datetime64[ns] 2024-01-01T13:40:59 ... 2024-01-01... Data variables: temperature (time) float64 15.69 15.72 5.633 ... 15.48 15.42 15.42 salinity (time) float64 15.31 15.4 15.22 15.27 ... 15.22 17.2 15.1 oxygen (time) float64 90.62 90.08 88.0 88.13 ... 90.34 89.08 91.19 trajectory_name <U11 'vaagen_swim' Attributes: (12/33) id: 92377f4b-099f naming_authority: bb.badebussen title: Measurements during a swim over Vågen title_no: Målinger under ein svømmetur over Vågen summary: Measurment during a swim in Vågen, I followed Be... summary_no: Målinger tatt under ein svømmetur over Vågen, eg... ... ... Conventions: CF-1.7, ACDD-1.3 publisher_name: badebussen publisher_email: publisher@badebussen.bb publisher_url: https://badebussen.bb license: http://spdx.org/licenses/CC-BY-4.0(CC-BY-4.0) history: Created on jupyterhub
Store the dataset¶
You can specify encoding as an dictionary, C&F doesn't use fillvalue in coordinates and some programs doesn't like int64
In [ ]:
ds.to_netcdf(
"badebussen-trajectory.nc",
unlimited_dims=["time"],
encoding=dict(
time={"dtype": "int32", "_FillValue": None, "units": "seconds since 1970-01-01 00:00:00"},
longitude={"_FillValue": None},
latitude={"_FillValue": None},
),
)