Demonstration¶
This is a demonstration of my work. This is neither the paper or the software. Simply a naive walk through of my program.
1. Generate time series data from a dynamic system¶
Setting up libraries¶
In [1]:
from collections import defaultdict
import plotly.graph_objects as go
import numpy as np
from tools.lorenz import Lorenz63
from tools.utils import iterate_solver, Runge_Kutta
%matplotlib widget
In [2]:
from pylab import *
from mpl_toolkits.mplot3d import Axes3D
Here, I am using Lorenz system with parameter s
= 10, r
= 28, and b
= 8/3. This set of parameter gives all the solution bounded (do not escape to infinity). With initial condition: -1, 1, 18.4¶
In [3]:
x, t = iterate_solver(Runge_Kutta, Lorenz63, [-1., 1., 18.4], 0, 0.01, 50.)
fig = go.Figure()
fig.add_trace(go.Scatter(x=t, y=x[:, 0],mode='lines'))
fig.update_layout(
title="Lorenz Attractor Simulation",
xaxis_title="Time (t)",
yaxis_title="Variable State",
)
fig.show()
In [4]:
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=x[:,0], y=x[:,1], z=x[:,2], mode='lines', line=dict(color='blue', width=2)))
fig.update_layout(
title="3D Trajectory of the Lorenz Attractor",
scene=dict(
xaxis_title='X Axis',
yaxis_title='Y Axis',
zaxis_title='Z Axis'
)
)
fig.show()
In [5]:
type(x), len(x)
Out[5]:
(numpy.ndarray, 5001)
making a gap¶
In [6]:
import copy
x_gapped = copy.deepcopy(x)
In The demonstration, I will fill in the gap of 100 losing data¶
In [7]:
x_gapped[2000:2100] = np.nan
len(x_gapped), len(t)
Out[7]:
(5001, 5001)
Checking everything¶
In [8]:
nan_count = np.sum(np.isnan(x_gapped)) / 3
nan_count
Out[8]:
100.0
In [9]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=t, y=x_gapped[:, 0],mode='lines', name="x"))
fig.add_trace(go.Scatter(x=t, y=x_gapped[:, 1],mode='lines', name="y"))
fig.add_trace(go.Scatter(x=t, y=x_gapped[:, 2],mode='lines', name="z"))
fig.update_layout(
title="Lorenz Attractor Simulation in separate coordinate",
xaxis_title="Time (t)",
yaxis_title="Variable State",
)
fig.show()
In [10]:
len(x_gapped[:, 0]), len(t), t.shape, x_gapped.shape, x_gapped[:, 0].shape
Out[10]:
(5001, 5001, (5001,), (5001, 3), (5001,))
In [11]:
ts = x_gapped[:, 0] # The time series data we will fill out
fig = go.Figure()
fig.add_trace(go.Scatter(x=t, y=ts, mode='lines', name="Time Series"))
fig.update_layout(
title="Time series data with gap",
xaxis_title="Time (t)",
yaxis_title="data",
)
fig.show()
In [12]:
len(ts)
Out[12]:
5001
Next, we extract (reconstructed vectors) vectors out from the time series. In this example, we use t = 10, m = 500¶
In [13]:
indices = np.arange(0, len(ts), 10)
In [14]:
len(indices) # the last one will be obmitted
Out[14]:
501
In [15]:
v_1_s = ts[indices]
In [16]:
indices2 = (indices + 1)
indices2 = indices2[:-1]
v_2_s = ts[indices2]
indices3 = (indices + 2)
indices3 = indices3[:-1]
v_3_s = ts[indices3]
v_1_s = v_1_s[:-1] # delete the last one as mentioned above
In [17]:
vectors = column_stack((v_1_s, v_2_s, v_3_s))
In [18]:
np.sum(np.isnan(vectors)) # 30 entries in total, indicating 10 vectors with all entry nan
Out[18]:
30
We get every points' distance between each other by this method¶
In [19]:
def get_dis_matrix(vectors):
dis_matrix = np.zeros((len(vectors), len(vectors)))
for m, i in enumerate(vectors):
for n, j in enumerate(vectors):
if any(isnan(i)) or any(isnan(j)):
dis_matrix[m, n] = np.inf
continue
if m == n:
dis_matrix[m, n] = np.inf
continue
dis_matrix[m, n] = linalg.norm(i - j)
return dis_matrix
In [20]:
vectors
Out[20]:
array([[-1. , -0.81455935, -0.65544343], [ 0.04204991, 0.08979239, 0.13369679], [ 0.44743763, 0.4918592 , 0.53934628], ..., [ 8.33262969, 7.44005897, 6.58650641], [ 1.67715964, 1.29532625, 0.95466352], [-0.74803086, -0.88332236, -1.01056978]])
In [21]:
dis_matrix = get_dis_matrix(vectors)
In [22]:
dis_matrix
Out[22]:
array([[ inf, 1.58948493, 2.2867723 , ..., 14.41119488, 3.76978054, 0.4408305 ], [ 1.58948493, inf, 0.70039163, ..., 12.82181282, 2.19109153, 1.69721122], [ 2.2867723 , 0.70039163, inf, ..., 12.12525791, 1.5265202 , 2.39217663], ..., [14.41119488, 12.82181282, 12.12525791, ..., inf, 10.66633377, 14.47247869], [ 3.76978054, 2.19109153, 1.5265202 , ..., 10.66633377, inf, 3.80659962], [ 0.4408305 , 1.69721122, 2.39217663, ..., 14.47247869, 3.80659962, inf]])
In [23]:
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=vectors[:,0], y=vectors[:,1], z=vectors[:,2], mode='lines', line=dict(color='blue', width=2)))
fig.update_layout(
title="3D Trajectory of the reconstructed vector field from tiem series",
scene=dict(
xaxis_title='X Axis',
yaxis_title='Y Axis',
zaxis_title='Z Axis'
)
)
fig.show()
In [24]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=arange(len(vectors)), y=vectors[:, 0], mode='lines', name="First Entry"))
fig.add_trace(go.Scatter(x=arange(len(vectors)), y=vectors[:, 1], mode='lines', name="Second Entry"))
fig.add_trace(go.Scatter(x=arange(len(vectors)), y=vectors[:, 2], mode='lines', name="Third Entry"))
fig.update_layout(
title="Reconstructed Time Series in separate coordinate",
xaxis_title="Time (t)",
yaxis_title="data",
)
fig.show()
get_breaking_points
function filter out the index of last valid last_valid_v_index
vector before the gap and first valid gap after the gap next_valid_v_index
¶
In [25]:
def get_breaking_points(vectors):
contains_nan = np.array([np.any(np.isnan(vector)) for vector in vectors])
# Find the index of the first sub-array containing NaN
first_nan_index = np.argmax(contains_nan)
# Find the index of the last sub-array containing NaN
# np.max(np.where(contains_nan)[0]) provides the last index with True
last_nan_index = np.max(np.where(contains_nan)[0]) if np.any(contains_nan) else None
return first_nan_index, last_nan_index
In [26]:
first_nan_index, last_nan_index = get_breaking_points(vectors)
In [27]:
first_nan_index, last_nan_index, vectors[first_nan_index - 1], vectors[last_nan_index]
Out[27]:
(200, 209, array([10.2923708 , 9.59186354, 8.888964 ]), array([nan, nan, nan]))
In [28]:
last_valid_v_index = first_nan_index - 1
next_valid_v_index = last_nan_index + 1
l = next_valid_v_index - last_valid_v_index
In [29]:
dis_matrix[last_valid_v_index]
Out[29]:
array([18.08056936, 16.49282081, 15.7939515 , 14.55868094, 11.34621452, 3.43052589, 11.79431908, 6.01438878, 12.73234273, 20.94818707, 24.86383871, 29.75267702, 36.01171858, 37.21026881, 30.59424624, 25.00563872, 23.83610122, 26.26390284, 32.00495651, 38.13459794, 35.75209119, 27.64237263, 23.2567466 , 23.24113133, 26.63121593, 33.51724264, 39.465039 , 34.38130148, 25.54439079, 21.76719105, 22.09693809, 25.58942048, 32.99979166, 40.55240709, 35.3326563 , 24.86128511, 20.19643136, 19.68679589, 21.56120911, 26.61798336, 36.55200709, 42.62057083, 31.04727168, 20.35856243, 16.54192804, 15.10698036, 13.3708168 , 9.41853622, 2.00523035, 10.94161178, 2.31989196, 12.23303053, 18.25791743, 21.02389959, 24.85674441, 32.3251493 , 40.80054661, 36.25425601, 24.94958347, 19.65073596, 18.66078005, 19.66591821, 22.86181591, 30.46463897, 42.17574317, 39.15417754, 23.98101921, 16.46489889, 13.64168595, 10.81336668, 5.22913498, 5.15161899, 7.66375768, 4.16795814, 12.86291859, 15.59547376, 16.0239991 , 15.64694879, 14.41244364, 11.11674389, 3.17729555, 11.70768085, 5.53833598, 12.62055802, 20.55085186, 24.35611594, 29.24869482, 35.9405341 , 37.83587573, 30.92327281, 24.71965142, 23.17175753, 25.26581346, 30.82537073, 37.97263354, 37.19176902, 28.42150522, 22.91629196, 22.14071588, 24.70030845, 30.92379119, 39.06536237, 37.57863135, 27.32304831, 21.42751217, 20.37612536, 22.17620813, 27.26530673, 36.73890065, 41.63549124, 30.79544749, 21.10659863, 17.81247798, 17.18856607, 17.41483277, 18.33101341, 20.81666487, 27.4298308 , 41.19842924, 43.88240852, 24.8169096 , 14.02858279, 9.7258526 , 5.48910834, 1.85171646, 4.93422003, 0.47618983, 7.24470934, 9.7858495 , 8.4792974 , 3.93296847, 3.74213707, 4.82877463, 3.08195818, 9.37445275, 10.81519548, 8.7462075 , 3.39579953, 5.13106673, 4.83189743, 4.75583317, 10.99742948, 12.24527282, 10.36487619, 5.18198943, 4.57391401, 7.25270408, 3.46911857, 11.89633068, 14.46931655, 14.28218857, 12.27616809, 7.18595579, 4.21556997, 10.45860239, 2.82001436, 14.30966705, 18.52619592, 20.93305995, 24.98795277, 33.1410157 , 41.78447031, 35.36656065, 23.54379913, 18.45856095, 17.2445776 , 17.2410077 , 17.8387951 , 19.51079303, 24.0227167 , 35.0933664 , 46.75480502, 32.3468746 , 16.95697685, 11.08233305, 7.26637181, 2.15775066, 4.41833732, 2.48867857, 4.96931082, 9.23773035, 9.25154168, 5.88795026, 1.8108125 , 5.62125866, 0.32649134, 7.58234996, 10.61608452, 9.69694297, 5.48097519, 2.88094586, 6.35959362, 1.39832854, 9.40845611, 12.0309156 , 11.04057228, 6.9373703 , 2.30948902, 7.90863898, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, 19.56949075, 22.51116331, 28.977103 , 39.76487294, 40.69818854, 27.02092147, 18.62774962, 15.87997212, 14.44891064, 12.03115101, 6.43456176, 5.48260966, 9.99623352, 4.57427924, 15.22595501, 19.08484454, 21.69313561, 26.40550963, 35.24574493, 41.68617143, 32.65592245, 22.30124277, 18.47550174, 17.87972323, 18.62494655, 20.93985134, 26.84632211, 38.99385698, 43.98630488, 27.75409344, 16.61175408, 12.43464869, 9.00560543, 3.24443734, 5.49688812, 4.70759095, 5.23694946, 11.39145221, 12.58566806, 10.78059135, 5.73507046, 4.21425007, 7.88804558, 2.89866999, 12.0009825 , 14.96614079, 15.20423526, 14.10761803, 11.04226787, 3.82943611, 9.56597352, 6.79706532, 9.84240115, 18.04068814, 21.39949116, 25.23033865, 32.24167359, 39.91260855, 36.10935644, 25.86819569, 20.88593309, 20.32438181, 22.45773642, 28.05528625, 37.94259845, 41.20262357, 29.28515494, 20.29984894, 17.40690475, 16.76503674, 16.64257755, 16.600669 , 16.53205063, 16.34645068, 15.81021053, 14.2201692 , 9.50742248, 4.26329707, 15.80646829, 4.28023538, 20.25897429, 26.02120226, 29.69786471, 33.44091597, 34.85080735, 32.08305183, 28.53337912, 27.40424534, 29.07627739, 32.63478287, 35.17748573, 33.28771975, 29.20537714, 27.13206944, 28.07532576, 31.44428325, 35.01762125, 34.5383596 , 30.26181567, 27.13469508, 27.16101098, 30.02937248, 34.25981026, 35.60525122, 31.75445848, 27.5362695 , 26.43196632, 28.50632485, 32.86174373, 36.11794015, 33.63841055, 28.50725211, 26.02186248, 27.02137888, 30.91915234, 35.6461945 , 35.62707169, 30.25465449, 26.13423837, 25.74836631, 28.66479166, 33.91299953, 36.99774862, 32.91122688, 27.10380384, 24.9151955 , 26.41809422, 31.05116128, 36.61965194, 36.12014667, 29.456033 , 24.89953752, 24.53794515, 27.64745695, 33.7024342 , 38.16179272, 33.68697751, 26.46979375, 23.48633477, 24.48521084, 28.90225831, 36.00330966, 38.52332318, 31.11321904, 24.24650917, 22.33977164, 24.06337236, 29.27395217, 37.33249706, 38.94804653, 29.68449847, 22.63268207, 20.85088417, 22.31324018, 27.01664949, 35.88307236, 41.3141758 , 31.83329298, 22.14318745, 18.75136282, 18.48339324, 19.83604264, 23.58298113, 32.32214562, 43.77716527, 36.61291185, 21.68947382, 15.25394405, 12.44961123, 8.81837743, 2.30114429, 7.42215217, 3.69537204, 7.78562853, 13.47521277, 14.64425882, 13.76178741, 10.80366183, 3.95767508, 8.48916431, 6.87253782, 8.46708902, 16.54543998, 19.52168519, 22.28778349, 27.79818744, 37.43829589, 41.24176282, 29.90294146, 20.77680968, 17.81199084, 17.3391602 , 17.75659742, 19.12506184, 22.78161279, 31.98653165, 45.42467813, 36.96792781, 19.79075378, 12.62428922, 8.95643152, 4.07674805, 3.46445181, 4.70130264, 2.80926378, 9.01299518, 10.46484121, 8.35985475, 3.04301894, 5.12353727, 4.29155981, 4.93505304, 10.7267826 , 11.68841036, 9.43485352, 3.8048251 , 5.59765929, 5.67797317, 5.07405651, 12.02804483, 13.73156231, 12.71808658, 9.12926964, 2.06800021, 9.18991529, 3.21983193, 9.95030136, 15.83366222, 17.73083878, 19.16220175, 22.08831672, 29.0454814 , 41.15679652, 41.10185459, 25.28467028, 16.53687862, 13.26745675, 10.22702602, 4.5145592 , 5.40761174, 6.65233816, 4.64029353, 12.42568483, 14.63106522, 14.32076005, 12.23683718, 6.99581849, 4.62007072, 10.43730108, 3.40146298, 14.70638343, 18.84013898, 21.37785067, 25.78445912, 34.30260765, 41.7590872 , 33.85800851, 22.89343419, 18.55886648, 17.74331299, 18.27459731, 20.12379317, 24.93740256, 35.87583914, 45.40785929, 31.61613588, 17.98824093, 12.8056392 , 9.45917878, 4.1736582 , 4.36326807, 5.4878608 , 3.6315996 , 10.47539939, 12.14140616, 10.52673053, 5.66844971, 3.86836207, 7.51002832, 2.48763882, 11.33033828, 14.18345686, 14.02744603, 11.89619586, 6.54682528, 4.86812614, 9.78659248, 3.71108597, 14.352962 , 18.15546082])
In [30]:
closest_point_index = np.argmin(dis_matrix[last_valid_v_index])
In [31]:
closest_point_index, dis_matrix[last_valid_v_index, closest_point_index]
Out[31]:
(185, 0.3264913367868948)
In [32]:
vectors[closest_point_index], vectors[first_nan_index - 1], first_nan_index - 1, closest_point_index
Out[32]:
(array([10.19749426, 9.69550573, 9.18367289]), array([10.2923708 , 9.59186354, 8.888964 ]), 199, 185)
Functions for getting branches, forward and backward¶
The function get_branches_forward
is designed to get branch every r
step for (n_f
) times. Just like the charm above, we will form a tree structure-like branches rooted on (forward) and (backward). We call get_one_branch
in get_branches_forward
to finish storing one branch.
For later use, we not only store the branch in a tree-like structure from root to branches, we will also store a dictionary of every node to its parent. By doing this, one we found two closes point synchronously on two one forward and one backward branch, we can easily track back of where these nodes come from.
In [33]:
def get_one_branch(index: int, vectors: np.array, rest_steps: int, forward_branches_df: dict ,forward_branches_df_reverse, forward: bool = True):
# root_node = vectors[index]
# forward_branches_df[index]
# rest steps = l - j
# next_node
# /
# vectors[index]
# \
# next_node_2
step = 1 if forward else -1
curr_root = index
curr_child = curr_root + step
for i in range(rest_steps):
if curr_child >= len(vectors):
break
if np.any(isnan(vectors[curr_child])):
break
forward_branches_df[curr_root].add(curr_child)
forward_branches_df_reverse[curr_child] = curr_root
curr_root = curr_root + step
curr_child = curr_root + step
return forward_branches_df, forward_branches_df_reverse
def get_branches_forward(last_valid_v_index: int, next_valid_v_index: int, n_f: int, r: int, vectors: np.array, dis_matrix: np.array):
l = next_valid_v_index - last_valid_v_index - 1
forward_branches_df = defaultdict(set)
forward_branches_df_reverse = dict()
curr_node_index = last_valid_v_index
for i in range(n_f): # + 1 because zero included
if last_valid_v_index + (i * (1 + r)) >= next_valid_v_index:
break
jump_to_index = np.argmin(dis_matrix[curr_node_index])
forward_branches_df[curr_node_index].add(jump_to_index)
forward_branches_df_reverse[jump_to_index] = curr_node_index
forward_branches_df, forward_branches_df_reverse = get_one_branch(index = jump_to_index, vectors=vectors, rest_steps= l - (i * (r + 1)) - 1, forward_branches_df=forward_branches_df, forward = True, forward_branches_df_reverse = forward_branches_df_reverse)
curr_node_index = jump_to_index + r
return forward_branches_df, forward_branches_df_reverse
def get_branches_backward(last_valid_v_index: int, next_valid_v_index: int, n_b: int, r: int, vectors: np.array, dis_matrix: np.array):
l = next_valid_v_index - last_valid_v_index - 1
back_branches_df = defaultdict(set)
back_branches_df_reverse = dict()
curr_node_index = next_valid_v_index
for i in range(n_b): # + 1 because zero included
if last_valid_v_index + (i * (1 + r)) >= next_valid_v_index:
break
jump_to_index = np.argmin(dis_matrix[curr_node_index])
back_branches_df[curr_node_index].add(jump_to_index)
back_branches_df_reverse[jump_to_index] = curr_node_index
back_branches_df, back_branches_df_reverse = get_one_branch(index = jump_to_index, vectors=vectors, rest_steps= l - (i * (r + 1)) - 1, forward_branches_df=back_branches_df, forward_branches_df_reverse=back_branches_df_reverse, forward = False)
curr_node_index = jump_to_index - r
return back_branches_df, back_branches_df_reverse
In [34]:
forward_branches_df, forward_branches_df_reverse = get_branches_forward(last_valid_v_index=last_valid_v_index,
next_valid_v_index=next_valid_v_index,
n_f=5, r=2, vectors=vectors, dis_matrix=dis_matrix)
In [35]:
vectors[closest_point_index], vectors[first_nan_index - 1], last_valid_v_index, next_valid_v_index, next_valid_v_index - last_valid_v_index
Out[35]:
(array([10.19749426, 9.69550573, 9.18367289]), array([10.2923708 , 9.59186354, 8.888964 ]), 199, 210, 11)
In [36]:
forward_branches_df_reverse, forward_branches_df
Out[36]:
({185: 199, 186: 185, 187: 186, 188: 187, 189: 188, 190: 189, 191: 190, 192: 191, 193: 192, 194: 193, 416: 187, 417: 416, 418: 417, 419: 418, 420: 419, 421: 420, 422: 421, 243: 418, 244: 243, 245: 244, 246: 245, 139: 245}, defaultdict(set, {199: {185}, 185: {186}, 186: {187}, 187: {188, 416}, 188: {189}, 189: {190}, 190: {191}, 191: {192}, 192: {193}, 193: {194}, 416: {417}, 417: {418}, 418: {243, 419}, 419: {420}, 420: {421}, 421: {422}, 243: {244}, 244: {245}, 245: {139, 246}}))
In [37]:
backward_branches_df, back_branches_df_reverse = get_branches_backward(last_valid_v_index=last_valid_v_index,
next_valid_v_index=next_valid_v_index,
n_b=5, r=2, vectors=vectors, dis_matrix=dis_matrix)
In [38]:
back_branches_df_reverse, backward_branches_df
Out[38]:
({393: 210, 392: 393, 391: 392, 390: 391, 389: 390, 388: 389, 387: 388, 386: 387, 385: 386, 384: 385, 383: 391, 382: 383, 381: 382, 380: 381, 379: 380, 378: 379, 377: 378, 198: 381, 197: 198, 196: 197, 195: 196, 456: 196}, defaultdict(set, {210: {393}, 393: {392}, 392: {391}, 391: {383, 390}, 390: {389}, 389: {388}, 388: {387}, 387: {386}, 386: {385}, 385: {384}, 383: {382}, 382: {381}, 381: {198, 380}, 380: {379}, 379: {378}, 378: {377}, 198: {197}, 197: {196}, 196: {195, 456}}))
In [39]:
def tree_to_layers(tree, queue):
layers = []
# queue = [("root", 0)] # Queue of tuples (node, layer_index)
while queue:
current_node, layer = queue.pop(0)
# Ensure the layer exists in the layers list
if len(layers) <= layer:
layers.append([])
# Append the current node to its respective layer
layers[layer].append(current_node)
# Enqueue all children of the current node
for child in tree.get(current_node, []):
queue.append((child, layer + 1))
return layers
In [40]:
forward_layer = tree_to_layers(forward_branches_df, queue = [(199, 0)])
backward_layer = tree_to_layers(backward_branches_df, queue = [(210, 0)])
In [41]:
forward_layer, len(forward_layer), backward_layer, len(backward_layer)
Out[41]:
([[199], [185], [186], [187], [416, 188], [417, 189], [418, 190], [419, 243, 191], [420, 244, 192], [421, 245, 193], [422, 139, 246, 194]], 11, [[210], [393], [392], [391], [390, 383], [389, 382], [388, 381], [387, 380, 198], [386, 379, 197], [385, 378, 196], [384, 377, 456, 195]], 11)
In [42]:
def get_closest_points_layer(vectors, forward_layer, backward_layer):
l = len(forward_layer) - 1
closest_dis = float('inf')
closest_forward_index = 0
closest_forward_index_sub = 0
closest_backward_index_sub = 0
for i in range(l):
forward_vectors = forward_layer[i]
backward_vectors = backward_layer[l - i]
forward_sub_index, backward_sub_index, dis = get_closest_points_one_layer(vectors, forward_vectors, backward_vectors)
if dis < closest_dis:
closest_dis = dis
closest_forward_index = i
closest_forward_index_sub = forward_sub_index
closest_backward_index_sub = backward_sub_index
return closest_dis, closest_forward_index, closest_forward_index_sub, closest_backward_index_sub
def get_closest_points_one_layer(vectors, forward_vectors, backward_vectors):
min_dis = float('inf')
forward_sub_index = 0
backward_sub_index = 0
for n, i in enumerate(forward_vectors):
for m, j in enumerate(backward_vectors):
curr_dis = linalg.norm(vectors[i] - vectors[j])
if curr_dis < min_dis:
min_dis = curr_dis
forward_sub_index = n
backward_sub_index = m
return forward_sub_index, backward_sub_index, min_dis
In [43]:
closest_dis, closest_forward_index, closest_forward_index_sub, closest_backward_index_sub = get_closest_points_layer(vectors=vectors, forward_layer=forward_layer, backward_layer=backward_layer)
closest_dis, closest_forward_index, closest_forward_index_sub, closest_backward_index_sub
Out[43]:
(0.46095557331336956, 3, 0, 0)
In [44]:
linalg.norm(vectors[387] - vectors[187]), closest_forward_index
Out[44]:
(0.46095557331336956, 3)
This function generate a initial guess for filling vectors.¶
In [45]:
def get_gap_vector_index_list(closest_forward_index, closest_forward_index_sub, closest_backward_index_sub, forward_branches_df_reverse, backward_branches_df_reverse, forward_layer, backward_layer, l:int):
forward_vector_index = forward_layer[closest_forward_index][closest_forward_index_sub]
backward_vector_index = backward_layer[l - closest_forward_index][closest_backward_index_sub]
vector_index_list = list()
curr_backward_vector_index = backward_vector_index
curr_forward_vector_index = forward_vector_index
for i in range(closest_forward_index):
vector_index_list.insert(0, curr_forward_vector_index)
curr_forward_vector_index = forward_branches_df_reverse[curr_forward_vector_index]
for i in range(l - closest_forward_index):
vector_index_list.append(curr_backward_vector_index)
curr_backward_vector_index = backward_branches_df_reverse[curr_backward_vector_index]
return vector_index_list
In [46]:
gap_vector_index_list = get_gap_vector_index_list(closest_forward_index=closest_forward_index, closest_forward_index_sub=closest_forward_index_sub, closest_backward_index_sub=closest_backward_index_sub, forward_layer=forward_layer, backward_layer=backward_layer, forward_branches_df_reverse=forward_branches_df_reverse, backward_branches_df_reverse=back_branches_df_reverse, l = next_valid_v_index - last_valid_v_index - 1)
In [47]:
gap_vector_index_list, len(gap_vector_index_list)
Out[47]:
([185, 186, 187, 387, 388, 389, 390, 391, 392, 393], 10)
In [48]:
vector_list = [vectors[i] for i in gap_vector_index_list]
In [49]:
vector_list = row_stack(vector_list)
vector_list
Out[49]:
array([[10.19749426, 9.69550573, 9.18367289], [ 5.54997895, 5.21396194, 4.90955097], [ 3.53373263, 3.47802345, 3.44442361], [ 3.14215157, 3.39585566, 3.67331826], [ 6.97908427, 7.55251047, 8.1634549 ], [13.78651416, 14.40624893, 14.9452472 ], [14.32184219, 13.59056173, 12.76266367], [ 5.44333402, 4.68294492, 3.98172886], [ 0.30680013, 0.03603931, -0.20642966], [-1.53792912, -1.6702225 , -1.80241638]])
In [50]:
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=vector_list[:,0], y=vector_list[:,1], z=vector_list[:,2], mode='lines', line=dict(color='blue', width=2)))
fig.update_layout(
title="3D Trajectory of the filling vectors",
scene=dict(
xaxis_title='X Axis',
yaxis_title='Y Axis',
zaxis_title='Z Axis'
)
)
fig.show()
In [51]:
vector_sample = vectors[190:220]
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=vector_sample[:,0], y=vector_sample[:,1], z=vector_sample[:,2], mode='lines', line=dict(color='blue', width=2)))
fig.update_layout(
title="3D Trajectory sampled vectors",
scene=dict(
xaxis_title='X Axis',
yaxis_title='Y Axis',
zaxis_title='Z Axis'
)
)
fig.show()
In [52]:
vector_draw = np.vstack([vectors[199], vector_list, vectors[210]])
fig = go.Figure()
fig.add_trace(go.Scatter(x=arange(len(vectors)), y=vectors[:, 0], mode='lines', name="Time Series"))
fig.add_trace(go.Scatter(x=arange(199, 211), y=vector_draw[:, 0], mode='lines', name="Time Series"))
fig.update_layout(
title="Time series data with gap",
xaxis_title="Time (t)",
yaxis_title="data",
)
fig.show()
In [53]:
vectors[209], len(vector_list)
Out[53]:
(array([nan, nan, nan]), 10)
In this part, we introduce a measure to quantify how smooth our initial guess is. Then, we will use a optimizer to find the minimum of the function. By doing this, we find the best filling vectors.¶
In [54]:
def get_distance_list(vector, vectors):
dis_list = np.zeros(len(vectors))
for n, j in enumerate(vectors):
if any(isnan(j)):
dis_list[n] = np.inf
continue
dis_list[n] = linalg.norm(vector - j)
return dis_list
def F_j_minus_one_half(j: int, vector_list, vectors, t):
# vector_list is the gapped vector
# closes to x_j
# second close to x_j
# predecesor of both
x_j = vector_list[j]
dis_list = get_distance_list(vector=x_j, vectors=vectors)
closest_x_j_index = np.argmin(dis_list)
while closest_x_j_index - 1 < 0 or any(isnan(vectors[closest_x_j_index - 1])):
dis_list[closest_x_j_index] = np.inf
closest_x_j_index = np.argmin(dis_list)
x_j_bar = vectors[closest_x_j_index]
p_x_j_bar = vectors[closest_x_j_index - 1]
dis_list[closest_x_j_index] = np.inf
sec_closest_x_j_index = np.argmin(dis_list)
while sec_closest_x_j_index - 1 < 0 or any(isnan(vectors[sec_closest_x_j_index - 1])):
dis_list[sec_closest_x_j_index] = np.inf
sec_closest_x_j_index = np.argmin(dis_list)
x_j_bar_bar = vectors[sec_closest_x_j_index]
p_x_j_bar_bar = vectors[sec_closest_x_j_index - 1]
delta_t = t[1] - t[0]
return (x_j_bar - p_x_j_bar + x_j_bar_bar - p_x_j_bar_bar) / (2 * delta_t)
F_j_minus_one_half(1, vector_list[:, 0], vectors, t)
Out[54]:
array([-97.48662606, -88.24742037, -76.2015698 ])
In [55]:
dis_list = zeros_like(len(vectors))
In [56]:
def J1(vector_list, vectors, t):
w = vector_list
l = len(w)
sum_squares = 0
for j in range(l):
delta_w = (w[j] - w[j-1]) / t[j + 1] - t[j]
F_value = F_j_minus_one_half(j=j, vectors=vectors, vector_list=vector_list, t=t)
sum_squares += linalg.norm(delta_w - F_value) ** 2
return sum_squares
# This is for storing some intermidate steps. We can check how the trajactory being smoothed.
def objective_function_to_minimize(v_f):
vector_list = v_f.reshape((10, 3))
return J1(vector_list, vectors, t)
In [57]:
from scipy.optimize import minimize
optimization_steps = []
# Callback function to store each step
def record_step(x):
optimization_steps.append(x.copy()) # Use x.copy() to avoid overwriting
result = minimize(objective_function_to_minimize, vector_list.flatten(), callback=record_step, method='L-BFGS-B')
In [58]:
objective_function_to_minimize(vector_list)
Out[58]:
9807457.787389794
In [59]:
result.x
Out[59]:
array([ 5.05551444, 4.8266988 , 4.6199308 , 3.71559817, 3.33777618, 3.01817514, 3.0326533 , 3.00760192, 3.00107178, 2.99669559, 3.22070579, 3.46935596, 6.81363322, 7.43705029, 8.1118388 , 14.36624821, 15.09203264, 15.72522276, 14.68743542, 13.81981325, 12.85225815, 5.24925082, 4.49102369, 3.79571783, 0.36018015, 0.07813449, -0.17586045, 5.44578309, 5.07061376, 4.63751934])
In [60]:
vector_list
Out[60]:
array([[10.19749426, 9.69550573, 9.18367289], [ 5.54997895, 5.21396194, 4.90955097], [ 3.53373263, 3.47802345, 3.44442361], [ 3.14215157, 3.39585566, 3.67331826], [ 6.97908427, 7.55251047, 8.1634549 ], [13.78651416, 14.40624893, 14.9452472 ], [14.32184219, 13.59056173, 12.76266367], [ 5.44333402, 4.68294492, 3.98172886], [ 0.30680013, 0.03603931, -0.20642966], [-1.53792912, -1.6702225 , -1.80241638]])
In [61]:
result.fun
Out[61]:
3671046.651039092
In [62]:
result_array = result.x.reshape((10, 3))
intermediate_vectors = [i.reshape((10, 3)) for i in optimization_steps]
In [63]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=arange(len(vector_list)), y=vector_list[:, 0],mode='lines', name="initial guess"))
fig.add_trace(go.Scatter(x=arange(len(intermediate_vectors[0])), y=intermediate_vectors[0][:, 0],mode='lines', name="intermediate step 1"))
fig.add_trace(go.Scatter(x=arange(len(intermediate_vectors[1])), y=intermediate_vectors[1][:, 0],mode='lines', name="intermediate step 2"))
fig.add_trace(go.Scatter(x=arange(len(intermediate_vectors[2])), y=intermediate_vectors[2][:, 0],mode='lines', name="intermediate step 3"))
fig.add_trace(go.Scatter(x=arange(len(vector_list)), y=result_array[:, 0],mode='lines', name="final curve"))
fig.update_layout(
title="Intermediate Steps",
xaxis_title="Time (t)",
yaxis_title="Filling Vectors",
)
fig.show()
To see clearly, zoom in the 150 ~ 250 area.¶
In [65]:
# fig = plt.figure()
# # plot(arange(len(vectors)), vectors[:, 0])
vector_list_draw = np.vstack([vectors[199], vector_list, vectors[210]])
result_draw = np.vstack([vectors[199], result_array, vectors[210]])
result_draw_intermediate = [np.vstack([vectors[199], i, vectors[210]]) for i in intermediate_vectors]
# plot(arange(199, 211), result_draw[:, 0])
original_data = x[:, 0]
original_ts = original_data[indices][: -1]
fig = go.Figure()
fig.add_trace(go.Scatter(x=arange(len(vectors)), y=vectors[:, 0],mode='lines', name="Gapped Data Set"))
fig.add_trace(go.Scatter(x=arange(len(original_ts)), y=original_ts,mode='lines', name="Original Data Set"))
fig.add_trace(go.Scatter(x=arange(199, 211), y=result_draw[:, 0],mode='lines', name="final curve"))
fig.add_trace(go.Scatter(x=arange(199, 211), y=vector_list_draw[:, 0],mode='lines', name="initial guess"))
fig.add_trace(go.Scatter(x=arange(199, 211), y=result_draw_intermediate[0][:, 0],mode='lines', name="intermediate step 1"))
fig.add_trace(go.Scatter(x=arange(199, 211), y=result_draw_intermediate[1][:, 0],mode='lines', name="intermediate step 2"))
fig.add_trace(go.Scatter(x=arange(199, 211), y=result_draw_intermediate[2][:, 0],mode='lines', name="intermediate step 3"))
# fig.add_trace(go.Scatter(x=arange(len(vector_list)), y=result_array[:, 0],mode='lines', name="final curve"))
fig.update_layout(
title="Intermediate Steps",
xaxis_title="Time (t)",
yaxis_title="Filling Vectors",
)
fig.show()
In [ ]: