domino.categorical_analysis

 1import numpy as np
 2import xarray as xr
 3
 4def get_occurrence(states,state_combinations=None):
 5    
 6    if np.ndim(states[0])==0:
 7        states=np.atleast_2d(states)
 8
 9    states=np.array([a for b in states for a in b])
10    
11    if state_combinations is None:
12        state_combinations=np.unique(states)
13    K=len(state_combinations)
14    occ=np.zeros(K)
15
16    occ=np.array([sum(states==k) for k in state_combinations])
17    occ=occ/len(states)
18    
19    assert np.abs(np.sum(occ)-1)<1e-5
20    return occ
21
22def get_xr_occurrence(da,dim='time',s=None,coord_name='regime'):
23
24    da=da.dropna(dim)
25    if s is None:
26        states=np.unique(da)
27    elif type(s)==xr.Dataset:
28        states=s[da.name].values
29        states=states[~np.isnan(states)]
30    else:
31        states=s
32        
33    if len(da)==0:
34        occ=np.zeros_like(states)
35    else:
36        occ=get_occurrence(da,state_combinations=states)
37    
38    return xr.DataArray(data=occ,coords={coord_name:np.arange(len(occ))},attrs=da.attrs)
39
40
41#Takes a state sequence or list of state sequences and calculates a transition matrix.
42#states can be grouped with state_combinations, and the diag removed with exclude_diag.
43
44def get_transmat(states,state_combinations=None,exclude_diag=False,t=1):
45    
46    #Rest of function assumes a list of state sequences
47    if np.ndim(states[0])==0:
48        states=np.atleast_2d(states)
49        
50    #Data setup
51    if state_combinations is None:
52        state_combinations=np.unique([x for y in states for x in y])
53    K=len(state_combinations)
54    trans=np.zeros([K,K])
55
56    #We loop over transition matrix elements, using list comprehensions
57    #to unravel our list of sequences
58    for i,state1 in enumerate(state_combinations):
59        for j,state2 in enumerate(state_combinations):
60            trans[i,j]=np.sum([sum((np.isin(s[t:],state2))&(np.isin(s[:-t],state1)))\
61                               for s in states])/np.sum([sum(np.isin(s[:-t],state1)) for s in states])
62
63    if exclude_diag:
64        trans -= np.diag(trans)*np.eye(K)
65        trans /= trans.sum(axis=1)[:,None]
66    return trans    
67
68def synthetic_states_from_transmat(T,L,init_its=50,state_labs=None):
69    
70    if state_labs is None:
71        K=T.shape[0]
72        state_labs=np.arange(K)
73        
74    #get the occurrence distribution
75    init_T=T
76    for n in range(init_its):
77        init_T=np.matmul(init_T,T)
78    s0=np.digitize(np.random.rand(1).item(),np.cumsum(np.diagonal(init_T)))
79    
80    probs=np.random.rand(L)
81    
82    states=[s0]
83    Tcumsum=np.cumsum(T,axis=1)
84    for l in range(L):
85        states.append(np.digitize(probs[l],Tcumsum[states[-1]]).item())
86        
87    states=[state_labs[s] for s in states]
88    return np.array(states)
def get_occurrence(states, state_combinations=None):
 5def get_occurrence(states,state_combinations=None):
 6    
 7    if np.ndim(states[0])==0:
 8        states=np.atleast_2d(states)
 9
10    states=np.array([a for b in states for a in b])
11    
12    if state_combinations is None:
13        state_combinations=np.unique(states)
14    K=len(state_combinations)
15    occ=np.zeros(K)
16
17    occ=np.array([sum(states==k) for k in state_combinations])
18    occ=occ/len(states)
19    
20    assert np.abs(np.sum(occ)-1)<1e-5
21    return occ
def get_xr_occurrence(da, dim='time', s=None, coord_name='regime'):
23def get_xr_occurrence(da,dim='time',s=None,coord_name='regime'):
24
25    da=da.dropna(dim)
26    if s is None:
27        states=np.unique(da)
28    elif type(s)==xr.Dataset:
29        states=s[da.name].values
30        states=states[~np.isnan(states)]
31    else:
32        states=s
33        
34    if len(da)==0:
35        occ=np.zeros_like(states)
36    else:
37        occ=get_occurrence(da,state_combinations=states)
38    
39    return xr.DataArray(data=occ,coords={coord_name:np.arange(len(occ))},attrs=da.attrs)
def get_transmat(states, state_combinations=None, exclude_diag=False, t=1):
45def get_transmat(states,state_combinations=None,exclude_diag=False,t=1):
46    
47    #Rest of function assumes a list of state sequences
48    if np.ndim(states[0])==0:
49        states=np.atleast_2d(states)
50        
51    #Data setup
52    if state_combinations is None:
53        state_combinations=np.unique([x for y in states for x in y])
54    K=len(state_combinations)
55    trans=np.zeros([K,K])
56
57    #We loop over transition matrix elements, using list comprehensions
58    #to unravel our list of sequences
59    for i,state1 in enumerate(state_combinations):
60        for j,state2 in enumerate(state_combinations):
61            trans[i,j]=np.sum([sum((np.isin(s[t:],state2))&(np.isin(s[:-t],state1)))\
62                               for s in states])/np.sum([sum(np.isin(s[:-t],state1)) for s in states])
63
64    if exclude_diag:
65        trans -= np.diag(trans)*np.eye(K)
66        trans /= trans.sum(axis=1)[:,None]
67    return trans    
def synthetic_states_from_transmat(T, L, init_its=50, state_labs=None):
69def synthetic_states_from_transmat(T,L,init_its=50,state_labs=None):
70    
71    if state_labs is None:
72        K=T.shape[0]
73        state_labs=np.arange(K)
74        
75    #get the occurrence distribution
76    init_T=T
77    for n in range(init_its):
78        init_T=np.matmul(init_T,T)
79    s0=np.digitize(np.random.rand(1).item(),np.cumsum(np.diagonal(init_T)))
80    
81    probs=np.random.rand(L)
82    
83    states=[s0]
84    Tcumsum=np.cumsum(T,axis=1)
85    for l in range(L):
86        states.append(np.digitize(probs[l],Tcumsum[states[-1]]).item())
87        
88    states=[state_labs[s] for s in states]
89    return np.array(states)