domino.deseasonaliser
1import numpy as np 2import xarray as xr 3import cftime 4import pandas as pd 5from scipy.optimize import leastsq 6import os 7from domino.util import xarr_times_to_ints 8 9class Agg_Deseasonaliser(object): 10 """ 11 Computes a seasonal cycle from an xarray dataset based on an aggregation 12 criteria i.e. month, day of year, week of year etc. 13 14 """ 15 def __init__(self): 16 17 self.data=None 18 """@private""" 19 20 self.cycle_coeffs=None 21 """@private""" 22 23 def fit_cycle(self,arr,dim="time",agg="dayofyear"): 24 """ 25 Computes a seasonal cycle for *arr* by aggregating over the coordinate *dim* and then taking the mean. Default functionality is to compute the average by day of year. 26 Seasonal cycle coefficients are stored in self.cycle_coeffs 27 28 **Arguments** 29 30 *arr* 31 32 An xarray.DataArray for which a seasonal cycle is to be computed. 33 34 **Optional Arguments** 35 36 *dim* 37 38 Defaults to 'time': the time-like dimension of *arr* along which the seasonal cycle is to be computed. Values of the coordinate must be a Pandas.DatetimeIndex. 39 40 *agg* 41 42 Defaults to 'dayofyear': The time parameter along which aggregation will be carried out. This can be any valid string which specifies a property of Pandas.DatetimeIndex. Likely alternate choices are 'month','day' and 'quarter'. 43 44 45 """ 46 var=dim+"."+agg 47 self.data=arr 48 self.dim=dim 49 self.agg=agg 50 self.cycle_coeffs=arr.groupby(var).mean(dim=dim) 51 return 52 53 def evaluate_cycle(self,data=None,smooth=1): 54 55 """ 56 57 When data is None, creates a seasonal cycle DataArray the same shape as the input data used in fit_cycle. 58 data can be a DataArray with a differing dim coordinate, but matching other coordinates. 59 This can be used to fit a seasonal cycle on one dataset, but evaluate it on another, as long as the grid, levels, etc are the same. 60 Returns The seasonal cycle as a DataArray A centred rolling mean can be optionally applied to the cycle. 61 62 **Optional Arguments** 63 64 *data* 65 66 An xarray.DataArray or Dataset with a time coordinate with the same name as the *arr* used in fit_cycle. 67 68 *smooth* 69 70 An integer specifying the width of the rolling mean to apply in terms of timesteps. No checks are made that timesteps are evenly spaced! 71 """ 72 if data is None: 73 data=self.data 74 cycle=self.cycle_coeffs.sel({self.agg:getattr(data[self.dim].dt,self.agg).data}) 75 return self.smooth(cycle,smooth) 76 77 def smooth(self,arr,w): 78 """@private""" 79 if w==1: 80 return arr 81 else: 82 return arr.rolling({self.agg:w},min_periods=1,center=True).mean() 83 84class Agg_FlexiDeseasonaliser(Agg_Deseasonaliser): 85 """Modifies Agg_Deseasonaliser, with the ability 86 to handle custom summary funcs other than the mean. 87 summary_func must take an xarray.groupby object, and 88 accept a keyword 'dim'.""" 89 90 def fit_cycle(self,arr,summary_func,dim="time",agg="dayofyear"): 91 var=dim+"."+agg 92 self.data=arr 93 self.dim=dim 94 self.agg=agg 95 self.cycle_coeffs=summary_func(arr.groupby(var),dim=dim) 96 return 97 98 99class Sinefit_Deseasonaliser(Agg_Deseasonaliser): 100 101 """ 102 Same functionalityy as Agg_Deseasonaliser but fits a number of sin modes 103 to the data using least squares fitting. This produces a smoother cycle than 104 aggregating but can be slow for large datasets. 105 106 """ 107 def _func_residual(self,p,x,t,N,period): 108 return x - self._evaluate_fit(t,p,N,period) 109 110 def _evaluate_fit(self,t,p,N,period): 111 ans=p[1] 112 for i in range(0,N): 113 ans+=p[2*i+2] * np.sin(2 * np.pi * (i+1)/period * t + p[2*i+3]) 114 return ans 115 116 def _lstsq_sine_fit(self,arr,t,N,period): 117 118 #Guess initial parameters 119 std=arr.std() 120 p=np.zeros(2*(N+1)) 121 for i in range(0,N): 122 p[2+2*i]=std/(i+1.0) 123 plsq=leastsq(self._func_residual,p,args=(arr,t,N,period)) 124 return plsq[0] 125 126 def fit_cycle(self,arr,dim="time",N=4,period=365.25): 127 """N sets the number of sine modes to fit as fractions of the period. i.e. the default of N=4 fits waves with periods of period, period/2, period/3 and period/4. period is a float setting the length of a year in days. 128 """ 129 dims=arr.dims 130 self.dim=dim 131 self.data=arr 132 self.N=N 133 self.period=period 134 t=xarr_times_to_ints(arr[dim]) 135 self.coeffs= xr.apply_ufunc( 136 self._lstsq_sine_fit, 137 arr, 138 input_core_dims=[[dim]], 139 output_core_dims=[["coeffs"]], 140 vectorize=True, 141 kwargs={"t":t,"N":N,"period":period}) 142 return 143 144 def evaluate_cycle(self,data=None): 145 """As for Agg_Deseasonaliser but without the option for further smoothing.""" 146 if data is None: 147 data=self.data 148 dims=data.dims 149 t=xarr_times_to_ints(data[self.dim]) 150 print(self.coeffs.shape) 151 cycle=np.array([[self._evaluate_fit(t,c2.data,self.N,self.period)\ 152 for c2 in c1] for c1 in np.atleast_3d(self.coeffs)]) 153 154 return data.transpose(...,"time").copy(data=cycle).transpose(*dims)
10class Agg_Deseasonaliser(object): 11 """ 12 Computes a seasonal cycle from an xarray dataset based on an aggregation 13 criteria i.e. month, day of year, week of year etc. 14 15 """ 16 def __init__(self): 17 18 self.data=None 19 """@private""" 20 21 self.cycle_coeffs=None 22 """@private""" 23 24 def fit_cycle(self,arr,dim="time",agg="dayofyear"): 25 """ 26 Computes a seasonal cycle for *arr* by aggregating over the coordinate *dim* and then taking the mean. Default functionality is to compute the average by day of year. 27 Seasonal cycle coefficients are stored in self.cycle_coeffs 28 29 **Arguments** 30 31 *arr* 32 33 An xarray.DataArray for which a seasonal cycle is to be computed. 34 35 **Optional Arguments** 36 37 *dim* 38 39 Defaults to 'time': the time-like dimension of *arr* along which the seasonal cycle is to be computed. Values of the coordinate must be a Pandas.DatetimeIndex. 40 41 *agg* 42 43 Defaults to 'dayofyear': The time parameter along which aggregation will be carried out. This can be any valid string which specifies a property of Pandas.DatetimeIndex. Likely alternate choices are 'month','day' and 'quarter'. 44 45 46 """ 47 var=dim+"."+agg 48 self.data=arr 49 self.dim=dim 50 self.agg=agg 51 self.cycle_coeffs=arr.groupby(var).mean(dim=dim) 52 return 53 54 def evaluate_cycle(self,data=None,smooth=1): 55 56 """ 57 58 When data is None, creates a seasonal cycle DataArray the same shape as the input data used in fit_cycle. 59 data can be a DataArray with a differing dim coordinate, but matching other coordinates. 60 This can be used to fit a seasonal cycle on one dataset, but evaluate it on another, as long as the grid, levels, etc are the same. 61 Returns The seasonal cycle as a DataArray A centred rolling mean can be optionally applied to the cycle. 62 63 **Optional Arguments** 64 65 *data* 66 67 An xarray.DataArray or Dataset with a time coordinate with the same name as the *arr* used in fit_cycle. 68 69 *smooth* 70 71 An integer specifying the width of the rolling mean to apply in terms of timesteps. No checks are made that timesteps are evenly spaced! 72 """ 73 if data is None: 74 data=self.data 75 cycle=self.cycle_coeffs.sel({self.agg:getattr(data[self.dim].dt,self.agg).data}) 76 return self.smooth(cycle,smooth) 77 78 def smooth(self,arr,w): 79 """@private""" 80 if w==1: 81 return arr 82 else: 83 return arr.rolling({self.agg:w},min_periods=1,center=True).mean()
Computes a seasonal cycle from an xarray dataset based on an aggregation criteria i.e. month, day of year, week of year etc.
24 def fit_cycle(self,arr,dim="time",agg="dayofyear"): 25 """ 26 Computes a seasonal cycle for *arr* by aggregating over the coordinate *dim* and then taking the mean. Default functionality is to compute the average by day of year. 27 Seasonal cycle coefficients are stored in self.cycle_coeffs 28 29 **Arguments** 30 31 *arr* 32 33 An xarray.DataArray for which a seasonal cycle is to be computed. 34 35 **Optional Arguments** 36 37 *dim* 38 39 Defaults to 'time': the time-like dimension of *arr* along which the seasonal cycle is to be computed. Values of the coordinate must be a Pandas.DatetimeIndex. 40 41 *agg* 42 43 Defaults to 'dayofyear': The time parameter along which aggregation will be carried out. This can be any valid string which specifies a property of Pandas.DatetimeIndex. Likely alternate choices are 'month','day' and 'quarter'. 44 45 46 """ 47 var=dim+"."+agg 48 self.data=arr 49 self.dim=dim 50 self.agg=agg 51 self.cycle_coeffs=arr.groupby(var).mean(dim=dim) 52 return
Computes a seasonal cycle for arr by aggregating over the coordinate dim and then taking the mean. Default functionality is to compute the average by day of year. Seasonal cycle coefficients are stored in self.cycle_coeffs
Arguments
arr
An xarray.DataArray for which a seasonal cycle is to be computed.
Optional Arguments
dim
Defaults to 'time': the time-like dimension of arr along which the seasonal cycle is to be computed. Values of the coordinate must be a Pandas.DatetimeIndex.
agg
Defaults to 'dayofyear': The time parameter along which aggregation will be carried out. This can be any valid string which specifies a property of Pandas.DatetimeIndex. Likely alternate choices are 'month','day' and 'quarter'.
54 def evaluate_cycle(self,data=None,smooth=1): 55 56 """ 57 58 When data is None, creates a seasonal cycle DataArray the same shape as the input data used in fit_cycle. 59 data can be a DataArray with a differing dim coordinate, but matching other coordinates. 60 This can be used to fit a seasonal cycle on one dataset, but evaluate it on another, as long as the grid, levels, etc are the same. 61 Returns The seasonal cycle as a DataArray A centred rolling mean can be optionally applied to the cycle. 62 63 **Optional Arguments** 64 65 *data* 66 67 An xarray.DataArray or Dataset with a time coordinate with the same name as the *arr* used in fit_cycle. 68 69 *smooth* 70 71 An integer specifying the width of the rolling mean to apply in terms of timesteps. No checks are made that timesteps are evenly spaced! 72 """ 73 if data is None: 74 data=self.data 75 cycle=self.cycle_coeffs.sel({self.agg:getattr(data[self.dim].dt,self.agg).data}) 76 return self.smooth(cycle,smooth)
When data is None, creates a seasonal cycle DataArray the same shape as the input data used in fit_cycle. data can be a DataArray with a differing dim coordinate, but matching other coordinates. This can be used to fit a seasonal cycle on one dataset, but evaluate it on another, as long as the grid, levels, etc are the same. Returns The seasonal cycle as a DataArray A centred rolling mean can be optionally applied to the cycle.
Optional Arguments
data
An xarray.DataArray or Dataset with a time coordinate with the same name as the arr used in fit_cycle.
smooth
An integer specifying the width of the rolling mean to apply in terms of timesteps. No checks are made that timesteps are evenly spaced!
85class Agg_FlexiDeseasonaliser(Agg_Deseasonaliser): 86 """Modifies Agg_Deseasonaliser, with the ability 87 to handle custom summary funcs other than the mean. 88 summary_func must take an xarray.groupby object, and 89 accept a keyword 'dim'.""" 90 91 def fit_cycle(self,arr,summary_func,dim="time",agg="dayofyear"): 92 var=dim+"."+agg 93 self.data=arr 94 self.dim=dim 95 self.agg=agg 96 self.cycle_coeffs=summary_func(arr.groupby(var),dim=dim) 97 return
Modifies Agg_Deseasonaliser, with the ability to handle custom summary funcs other than the mean. summary_func must take an xarray.groupby object, and accept a keyword 'dim'.
91 def fit_cycle(self,arr,summary_func,dim="time",agg="dayofyear"): 92 var=dim+"."+agg 93 self.data=arr 94 self.dim=dim 95 self.agg=agg 96 self.cycle_coeffs=summary_func(arr.groupby(var),dim=dim) 97 return
Computes a seasonal cycle for arr by aggregating over the coordinate dim and then taking the mean. Default functionality is to compute the average by day of year. Seasonal cycle coefficients are stored in self.cycle_coeffs
Arguments
arr
An xarray.DataArray for which a seasonal cycle is to be computed.
Optional Arguments
dim
Defaults to 'time': the time-like dimension of arr along which the seasonal cycle is to be computed. Values of the coordinate must be a Pandas.DatetimeIndex.
agg
Defaults to 'dayofyear': The time parameter along which aggregation will be carried out. This can be any valid string which specifies a property of Pandas.DatetimeIndex. Likely alternate choices are 'month','day' and 'quarter'.
Inherited Members
100class Sinefit_Deseasonaliser(Agg_Deseasonaliser): 101 102 """ 103 Same functionalityy as Agg_Deseasonaliser but fits a number of sin modes 104 to the data using least squares fitting. This produces a smoother cycle than 105 aggregating but can be slow for large datasets. 106 107 """ 108 def _func_residual(self,p,x,t,N,period): 109 return x - self._evaluate_fit(t,p,N,period) 110 111 def _evaluate_fit(self,t,p,N,period): 112 ans=p[1] 113 for i in range(0,N): 114 ans+=p[2*i+2] * np.sin(2 * np.pi * (i+1)/period * t + p[2*i+3]) 115 return ans 116 117 def _lstsq_sine_fit(self,arr,t,N,period): 118 119 #Guess initial parameters 120 std=arr.std() 121 p=np.zeros(2*(N+1)) 122 for i in range(0,N): 123 p[2+2*i]=std/(i+1.0) 124 plsq=leastsq(self._func_residual,p,args=(arr,t,N,period)) 125 return plsq[0] 126 127 def fit_cycle(self,arr,dim="time",N=4,period=365.25): 128 """N sets the number of sine modes to fit as fractions of the period. i.e. the default of N=4 fits waves with periods of period, period/2, period/3 and period/4. period is a float setting the length of a year in days. 129 """ 130 dims=arr.dims 131 self.dim=dim 132 self.data=arr 133 self.N=N 134 self.period=period 135 t=xarr_times_to_ints(arr[dim]) 136 self.coeffs= xr.apply_ufunc( 137 self._lstsq_sine_fit, 138 arr, 139 input_core_dims=[[dim]], 140 output_core_dims=[["coeffs"]], 141 vectorize=True, 142 kwargs={"t":t,"N":N,"period":period}) 143 return 144 145 def evaluate_cycle(self,data=None): 146 """As for Agg_Deseasonaliser but without the option for further smoothing.""" 147 if data is None: 148 data=self.data 149 dims=data.dims 150 t=xarr_times_to_ints(data[self.dim]) 151 print(self.coeffs.shape) 152 cycle=np.array([[self._evaluate_fit(t,c2.data,self.N,self.period)\ 153 for c2 in c1] for c1 in np.atleast_3d(self.coeffs)]) 154 155 return data.transpose(...,"time").copy(data=cycle).transpose(*dims)
Same functionalityy as Agg_Deseasonaliser but fits a number of sin modes to the data using least squares fitting. This produces a smoother cycle than aggregating but can be slow for large datasets.
127 def fit_cycle(self,arr,dim="time",N=4,period=365.25): 128 """N sets the number of sine modes to fit as fractions of the period. i.e. the default of N=4 fits waves with periods of period, period/2, period/3 and period/4. period is a float setting the length of a year in days. 129 """ 130 dims=arr.dims 131 self.dim=dim 132 self.data=arr 133 self.N=N 134 self.period=period 135 t=xarr_times_to_ints(arr[dim]) 136 self.coeffs= xr.apply_ufunc( 137 self._lstsq_sine_fit, 138 arr, 139 input_core_dims=[[dim]], 140 output_core_dims=[["coeffs"]], 141 vectorize=True, 142 kwargs={"t":t,"N":N,"period":period}) 143 return
N sets the number of sine modes to fit as fractions of the period. i.e. the default of N=4 fits waves with periods of period, period/2, period/3 and period/4. period is a float setting the length of a year in days.
145 def evaluate_cycle(self,data=None): 146 """As for Agg_Deseasonaliser but without the option for further smoothing.""" 147 if data is None: 148 data=self.data 149 dims=data.dims 150 t=xarr_times_to_ints(data[self.dim]) 151 print(self.coeffs.shape) 152 cycle=np.array([[self._evaluate_fit(t,c2.data,self.N,self.period)\ 153 for c2 in c1] for c1 in np.atleast_3d(self.coeffs)]) 154 155 return data.transpose(...,"time").copy(data=cycle).transpose(*dims)
As for Agg_Deseasonaliser but without the option for further smoothing.