domino.deseasonaliser

  1import numpy as np
  2import xarray as xr
  3import cftime
  4import pandas as pd
  5from scipy.optimize import leastsq
  6import os
  7from domino.util import xarr_times_to_ints
  8
  9class Agg_Deseasonaliser(object):
 10    """
 11    Computes a seasonal cycle from an xarray dataset based on an aggregation
 12    criteria i.e. month, day of year, week of year etc.
 13    
 14    """
 15    def __init__(self):
 16        
 17        self.data=None
 18        """@private"""
 19        
 20        self.cycle_coeffs=None
 21        """@private"""
 22
 23    def fit_cycle(self,arr,dim="time",agg="dayofyear"):
 24        """
 25        Computes a seasonal cycle for *arr* by aggregating over the coordinate *dim* and then taking the mean. Default functionality is to compute the average by day of year.
 26        Seasonal cycle coefficients are stored in self.cycle_coeffs
 27        
 28        **Arguments**
 29        
 30        *arr*
 31        
 32        An xarray.DataArray for which a seasonal cycle is to be computed.
 33        
 34        **Optional Arguments**
 35        
 36        *dim*
 37        
 38        Defaults to 'time': the time-like dimension of *arr* along which the seasonal cycle is to be computed. Values of the coordinate must be a Pandas.DatetimeIndex.
 39        
 40        *agg*
 41        
 42        Defaults to 'dayofyear': The time parameter along which aggregation will be carried out. This can be any valid string which specifies a property of Pandas.DatetimeIndex. Likely alternate choices are 'month','day' and 'quarter'.
 43        
 44        
 45        """
 46        var=dim+"."+agg
 47        self.data=arr
 48        self.dim=dim
 49        self.agg=agg
 50        self.cycle_coeffs=arr.groupby(var).mean(dim=dim)
 51        return
 52
 53    def evaluate_cycle(self,data=None,smooth=1):
 54        
 55        """
 56        
 57        When data is None, creates a seasonal cycle DataArray the same shape as the input data used in fit_cycle.
 58        data can be a DataArray with a differing dim coordinate, but matching other coordinates. 
 59        This can be used to fit a seasonal cycle on one dataset, but evaluate it on  another, as long as the grid, levels, etc are the same.
 60        Returns The seasonal cycle as a DataArray A centred rolling mean can be optionally applied to the cycle.
 61        
 62        **Optional Arguments**
 63        
 64        *data*
 65        
 66        An xarray.DataArray or Dataset with a time coordinate with the same name as the *arr* used in fit_cycle.
 67        
 68        *smooth*
 69        
 70        An integer specifying the width of the rolling mean to apply in terms of timesteps. No checks are made that timesteps are evenly spaced!
 71        """
 72        if data is None:
 73            data=self.data
 74        cycle=self.cycle_coeffs.sel({self.agg:getattr(data[self.dim].dt,self.agg).data})
 75        return self.smooth(cycle,smooth)
 76    
 77    def smooth(self,arr,w):
 78        """@private"""
 79        if w==1:
 80            return arr
 81        else:
 82            return arr.rolling({self.agg:w},min_periods=1,center=True).mean()
 83        
 84class Agg_FlexiDeseasonaliser(Agg_Deseasonaliser):
 85    """Modifies Agg_Deseasonaliser, with the ability
 86    to handle custom summary funcs other than the mean.
 87    summary_func must take an xarray.groupby object, and
 88    accept a keyword 'dim'."""
 89    
 90    def fit_cycle(self,arr,summary_func,dim="time",agg="dayofyear"):
 91        var=dim+"."+agg
 92        self.data=arr
 93        self.dim=dim
 94        self.agg=agg
 95        self.cycle_coeffs=summary_func(arr.groupby(var),dim=dim)
 96        return
 97
 98    
 99class Sinefit_Deseasonaliser(Agg_Deseasonaliser):
100
101    """
102    Same functionalityy as Agg_Deseasonaliser but fits a number of sin modes
103    to the data using least squares fitting. This produces a smoother cycle than
104    aggregating but can be slow for large datasets.
105    
106    """
107    def _func_residual(self,p,x,t,N,period):
108        return x - self._evaluate_fit(t,p,N,period)
109
110    def _evaluate_fit(self,t,p,N,period):
111        ans=p[1]
112        for i in range(0,N):
113            ans+=p[2*i+2] * np.sin(2 * np.pi * (i+1)/period * t + p[2*i+3])
114        return ans
115
116    def _lstsq_sine_fit(self,arr,t,N,period):
117
118        #Guess initial parameters
119        std=arr.std()
120        p=np.zeros(2*(N+1))
121        for i in range(0,N):
122            p[2+2*i]=std/(i+1.0)
123        plsq=leastsq(self._func_residual,p,args=(arr,t,N,period))
124        return plsq[0]
125
126    def fit_cycle(self,arr,dim="time",N=4,period=365.25):
127        """N sets the number of sine modes to fit as fractions of the period. i.e. the default of N=4 fits waves with periods of period, period/2, period/3 and period/4. period is a float setting the length of a year in days.
128        """
129        dims=arr.dims
130        self.dim=dim
131        self.data=arr
132        self.N=N
133        self.period=period
134        t=xarr_times_to_ints(arr[dim])
135        self.coeffs= xr.apply_ufunc(
136            self._lstsq_sine_fit,
137            arr,
138            input_core_dims=[[dim]],
139            output_core_dims=[["coeffs"]],
140            vectorize=True,
141            kwargs={"t":t,"N":N,"period":period})
142        return
143
144    def evaluate_cycle(self,data=None):
145        """As for Agg_Deseasonaliser but without the option for further smoothing."""
146        if data is None:
147            data=self.data
148        dims=data.dims
149        t=xarr_times_to_ints(data[self.dim])
150        print(self.coeffs.shape)
151        cycle=np.array([[self._evaluate_fit(t,c2.data,self.N,self.period)\
152                for c2 in c1] for c1 in np.atleast_3d(self.coeffs)])
153
154        return data.transpose(...,"time").copy(data=cycle).transpose(*dims)
class Agg_Deseasonaliser:
10class Agg_Deseasonaliser(object):
11    """
12    Computes a seasonal cycle from an xarray dataset based on an aggregation
13    criteria i.e. month, day of year, week of year etc.
14    
15    """
16    def __init__(self):
17        
18        self.data=None
19        """@private"""
20        
21        self.cycle_coeffs=None
22        """@private"""
23
24    def fit_cycle(self,arr,dim="time",agg="dayofyear"):
25        """
26        Computes a seasonal cycle for *arr* by aggregating over the coordinate *dim* and then taking the mean. Default functionality is to compute the average by day of year.
27        Seasonal cycle coefficients are stored in self.cycle_coeffs
28        
29        **Arguments**
30        
31        *arr*
32        
33        An xarray.DataArray for which a seasonal cycle is to be computed.
34        
35        **Optional Arguments**
36        
37        *dim*
38        
39        Defaults to 'time': the time-like dimension of *arr* along which the seasonal cycle is to be computed. Values of the coordinate must be a Pandas.DatetimeIndex.
40        
41        *agg*
42        
43        Defaults to 'dayofyear': The time parameter along which aggregation will be carried out. This can be any valid string which specifies a property of Pandas.DatetimeIndex. Likely alternate choices are 'month','day' and 'quarter'.
44        
45        
46        """
47        var=dim+"."+agg
48        self.data=arr
49        self.dim=dim
50        self.agg=agg
51        self.cycle_coeffs=arr.groupby(var).mean(dim=dim)
52        return
53
54    def evaluate_cycle(self,data=None,smooth=1):
55        
56        """
57        
58        When data is None, creates a seasonal cycle DataArray the same shape as the input data used in fit_cycle.
59        data can be a DataArray with a differing dim coordinate, but matching other coordinates. 
60        This can be used to fit a seasonal cycle on one dataset, but evaluate it on  another, as long as the grid, levels, etc are the same.
61        Returns The seasonal cycle as a DataArray A centred rolling mean can be optionally applied to the cycle.
62        
63        **Optional Arguments**
64        
65        *data*
66        
67        An xarray.DataArray or Dataset with a time coordinate with the same name as the *arr* used in fit_cycle.
68        
69        *smooth*
70        
71        An integer specifying the width of the rolling mean to apply in terms of timesteps. No checks are made that timesteps are evenly spaced!
72        """
73        if data is None:
74            data=self.data
75        cycle=self.cycle_coeffs.sel({self.agg:getattr(data[self.dim].dt,self.agg).data})
76        return self.smooth(cycle,smooth)
77    
78    def smooth(self,arr,w):
79        """@private"""
80        if w==1:
81            return arr
82        else:
83            return arr.rolling({self.agg:w},min_periods=1,center=True).mean()

Computes a seasonal cycle from an xarray dataset based on an aggregation criteria i.e. month, day of year, week of year etc.

def fit_cycle(self, arr, dim='time', agg='dayofyear'):
24    def fit_cycle(self,arr,dim="time",agg="dayofyear"):
25        """
26        Computes a seasonal cycle for *arr* by aggregating over the coordinate *dim* and then taking the mean. Default functionality is to compute the average by day of year.
27        Seasonal cycle coefficients are stored in self.cycle_coeffs
28        
29        **Arguments**
30        
31        *arr*
32        
33        An xarray.DataArray for which a seasonal cycle is to be computed.
34        
35        **Optional Arguments**
36        
37        *dim*
38        
39        Defaults to 'time': the time-like dimension of *arr* along which the seasonal cycle is to be computed. Values of the coordinate must be a Pandas.DatetimeIndex.
40        
41        *agg*
42        
43        Defaults to 'dayofyear': The time parameter along which aggregation will be carried out. This can be any valid string which specifies a property of Pandas.DatetimeIndex. Likely alternate choices are 'month','day' and 'quarter'.
44        
45        
46        """
47        var=dim+"."+agg
48        self.data=arr
49        self.dim=dim
50        self.agg=agg
51        self.cycle_coeffs=arr.groupby(var).mean(dim=dim)
52        return

Computes a seasonal cycle for arr by aggregating over the coordinate dim and then taking the mean. Default functionality is to compute the average by day of year. Seasonal cycle coefficients are stored in self.cycle_coeffs

Arguments

arr

An xarray.DataArray for which a seasonal cycle is to be computed.

Optional Arguments

dim

Defaults to 'time': the time-like dimension of arr along which the seasonal cycle is to be computed. Values of the coordinate must be a Pandas.DatetimeIndex.

agg

Defaults to 'dayofyear': The time parameter along which aggregation will be carried out. This can be any valid string which specifies a property of Pandas.DatetimeIndex. Likely alternate choices are 'month','day' and 'quarter'.

def evaluate_cycle(self, data=None, smooth=1):
54    def evaluate_cycle(self,data=None,smooth=1):
55        
56        """
57        
58        When data is None, creates a seasonal cycle DataArray the same shape as the input data used in fit_cycle.
59        data can be a DataArray with a differing dim coordinate, but matching other coordinates. 
60        This can be used to fit a seasonal cycle on one dataset, but evaluate it on  another, as long as the grid, levels, etc are the same.
61        Returns The seasonal cycle as a DataArray A centred rolling mean can be optionally applied to the cycle.
62        
63        **Optional Arguments**
64        
65        *data*
66        
67        An xarray.DataArray or Dataset with a time coordinate with the same name as the *arr* used in fit_cycle.
68        
69        *smooth*
70        
71        An integer specifying the width of the rolling mean to apply in terms of timesteps. No checks are made that timesteps are evenly spaced!
72        """
73        if data is None:
74            data=self.data
75        cycle=self.cycle_coeffs.sel({self.agg:getattr(data[self.dim].dt,self.agg).data})
76        return self.smooth(cycle,smooth)

When data is None, creates a seasonal cycle DataArray the same shape as the input data used in fit_cycle. data can be a DataArray with a differing dim coordinate, but matching other coordinates. This can be used to fit a seasonal cycle on one dataset, but evaluate it on another, as long as the grid, levels, etc are the same. Returns The seasonal cycle as a DataArray A centred rolling mean can be optionally applied to the cycle.

Optional Arguments

data

An xarray.DataArray or Dataset with a time coordinate with the same name as the arr used in fit_cycle.

smooth

An integer specifying the width of the rolling mean to apply in terms of timesteps. No checks are made that timesteps are evenly spaced!

class Agg_FlexiDeseasonaliser(Agg_Deseasonaliser):
85class Agg_FlexiDeseasonaliser(Agg_Deseasonaliser):
86    """Modifies Agg_Deseasonaliser, with the ability
87    to handle custom summary funcs other than the mean.
88    summary_func must take an xarray.groupby object, and
89    accept a keyword 'dim'."""
90    
91    def fit_cycle(self,arr,summary_func,dim="time",agg="dayofyear"):
92        var=dim+"."+agg
93        self.data=arr
94        self.dim=dim
95        self.agg=agg
96        self.cycle_coeffs=summary_func(arr.groupby(var),dim=dim)
97        return

Modifies Agg_Deseasonaliser, with the ability to handle custom summary funcs other than the mean. summary_func must take an xarray.groupby object, and accept a keyword 'dim'.

def fit_cycle(self, arr, summary_func, dim='time', agg='dayofyear'):
91    def fit_cycle(self,arr,summary_func,dim="time",agg="dayofyear"):
92        var=dim+"."+agg
93        self.data=arr
94        self.dim=dim
95        self.agg=agg
96        self.cycle_coeffs=summary_func(arr.groupby(var),dim=dim)
97        return

Computes a seasonal cycle for arr by aggregating over the coordinate dim and then taking the mean. Default functionality is to compute the average by day of year. Seasonal cycle coefficients are stored in self.cycle_coeffs

Arguments

arr

An xarray.DataArray for which a seasonal cycle is to be computed.

Optional Arguments

dim

Defaults to 'time': the time-like dimension of arr along which the seasonal cycle is to be computed. Values of the coordinate must be a Pandas.DatetimeIndex.

agg

Defaults to 'dayofyear': The time parameter along which aggregation will be carried out. This can be any valid string which specifies a property of Pandas.DatetimeIndex. Likely alternate choices are 'month','day' and 'quarter'.

class Sinefit_Deseasonaliser(Agg_Deseasonaliser):
100class Sinefit_Deseasonaliser(Agg_Deseasonaliser):
101
102    """
103    Same functionalityy as Agg_Deseasonaliser but fits a number of sin modes
104    to the data using least squares fitting. This produces a smoother cycle than
105    aggregating but can be slow for large datasets.
106    
107    """
108    def _func_residual(self,p,x,t,N,period):
109        return x - self._evaluate_fit(t,p,N,period)
110
111    def _evaluate_fit(self,t,p,N,period):
112        ans=p[1]
113        for i in range(0,N):
114            ans+=p[2*i+2] * np.sin(2 * np.pi * (i+1)/period * t + p[2*i+3])
115        return ans
116
117    def _lstsq_sine_fit(self,arr,t,N,period):
118
119        #Guess initial parameters
120        std=arr.std()
121        p=np.zeros(2*(N+1))
122        for i in range(0,N):
123            p[2+2*i]=std/(i+1.0)
124        plsq=leastsq(self._func_residual,p,args=(arr,t,N,period))
125        return plsq[0]
126
127    def fit_cycle(self,arr,dim="time",N=4,period=365.25):
128        """N sets the number of sine modes to fit as fractions of the period. i.e. the default of N=4 fits waves with periods of period, period/2, period/3 and period/4. period is a float setting the length of a year in days.
129        """
130        dims=arr.dims
131        self.dim=dim
132        self.data=arr
133        self.N=N
134        self.period=period
135        t=xarr_times_to_ints(arr[dim])
136        self.coeffs= xr.apply_ufunc(
137            self._lstsq_sine_fit,
138            arr,
139            input_core_dims=[[dim]],
140            output_core_dims=[["coeffs"]],
141            vectorize=True,
142            kwargs={"t":t,"N":N,"period":period})
143        return
144
145    def evaluate_cycle(self,data=None):
146        """As for Agg_Deseasonaliser but without the option for further smoothing."""
147        if data is None:
148            data=self.data
149        dims=data.dims
150        t=xarr_times_to_ints(data[self.dim])
151        print(self.coeffs.shape)
152        cycle=np.array([[self._evaluate_fit(t,c2.data,self.N,self.period)\
153                for c2 in c1] for c1 in np.atleast_3d(self.coeffs)])
154
155        return data.transpose(...,"time").copy(data=cycle).transpose(*dims)

Same functionalityy as Agg_Deseasonaliser but fits a number of sin modes to the data using least squares fitting. This produces a smoother cycle than aggregating but can be slow for large datasets.

def fit_cycle(self, arr, dim='time', N=4, period=365.25):
127    def fit_cycle(self,arr,dim="time",N=4,period=365.25):
128        """N sets the number of sine modes to fit as fractions of the period. i.e. the default of N=4 fits waves with periods of period, period/2, period/3 and period/4. period is a float setting the length of a year in days.
129        """
130        dims=arr.dims
131        self.dim=dim
132        self.data=arr
133        self.N=N
134        self.period=period
135        t=xarr_times_to_ints(arr[dim])
136        self.coeffs= xr.apply_ufunc(
137            self._lstsq_sine_fit,
138            arr,
139            input_core_dims=[[dim]],
140            output_core_dims=[["coeffs"]],
141            vectorize=True,
142            kwargs={"t":t,"N":N,"period":period})
143        return

N sets the number of sine modes to fit as fractions of the period. i.e. the default of N=4 fits waves with periods of period, period/2, period/3 and period/4. period is a float setting the length of a year in days.

def evaluate_cycle(self, data=None):
145    def evaluate_cycle(self,data=None):
146        """As for Agg_Deseasonaliser but without the option for further smoothing."""
147        if data is None:
148            data=self.data
149        dims=data.dims
150        t=xarr_times_to_ints(data[self.dim])
151        print(self.coeffs.shape)
152        cycle=np.array([[self._evaluate_fit(t,c2.data,self.N,self.period)\
153                for c2 in c1] for c1 in np.atleast_3d(self.coeffs)])
154
155        return data.transpose(...,"time").copy(data=cycle).transpose(*dims)

As for Agg_Deseasonaliser but without the option for further smoothing.