|
|
|
|
@ -49,7 +49,10 @@
|
|
|
|
|
"\n", |
|
|
|
|
"_Loading the audio data, and embedding the audio time series_\n", |
|
|
|
|
" Our algorithm operates on time series\n", |
|
|
|
|
"(here, it's audio, what a coincidence!).\n" |
|
|
|
|
"(here, it's audio, what a coincidence!).\n", |
|
|
|
|
"\n", |
|
|
|
|
"Then, we'll create a special function that creates an embedding of several time\n", |
|
|
|
|
"series for our audio data.\n" |
|
|
|
|
], |
|
|
|
|
"metadata": { |
|
|
|
|
"collapsed": false, |
|
|
|
|
@ -82,10 +85,12 @@
|
|
|
|
|
"outputs": [], |
|
|
|
|
"source": [ |
|
|
|
|
"def embed_time_series(data: np.ndarray, dim: int, tau: int):\n", |
|
|
|
|
" \"\"\"This creates an special embedding for the input data. The\n", |
|
|
|
|
" \"\"\"This creates a special embedding for the input data. The\n", |
|
|
|
|
" output shape of this function is (N, D) with\n", |
|
|
|
|
" N = len(data) - (dim - 1) * tau\n", |
|
|
|
|
" D = dim\"\"\"\n", |
|
|
|
|
" D = dim\n", |
|
|
|
|
"\n", |
|
|
|
|
" N is the number of time series in the embedding\"\"\"\n", |
|
|
|
|
" embed_points_nb = data.shape[0] - (dim - 1) * tau\n", |
|
|
|
|
" embed_mask = np.arange(embed_points_nb)[:, np.newaxis] + np.arange(dim)\n", |
|
|
|
|
" tau_offsets = np.arange(dim) * (tau - 1)\n", |
|
|
|
|
@ -100,7 +105,7 @@
|
|
|
|
|
"outputs": [], |
|
|
|
|
"source": [ |
|
|
|
|
"# this creates an embedding of a slice of the time series\n", |
|
|
|
|
"ts = embed_time_series(data[:2000], dim=4, tau=22)\n", |
|
|
|
|
"ts = embed_time_series(data[:2000], dim=3, tau=22)\n", |
|
|
|
|
"print(ts.shape)" |
|
|
|
|
] |
|
|
|
|
}, |
|
|
|
|
@ -156,7 +161,14 @@
|
|
|
|
|
"as it outputs a value close to the actual value, but some race conditions make it\n", |
|
|
|
|
"undeterministic.\n", |
|
|
|
|
"3. The third one is a fixed parallelized version of the previous numba implementation,\n", |
|
|
|
|
"fully adapted to work in parallel" |
|
|
|
|
"fully adapted to work in parallel\n", |
|
|
|
|
"\n", |
|
|
|
|
"The goal of our RPDE algorithm is to find, for each one of the\n", |
|
|
|
|
" times series contained in the embeddings:\n", |
|
|
|
|
"- the first point to exit a sphere of radius epsilon (index k_0)\n", |
|
|
|
|
"- the first point, after k_0, to re-enter the sphere (index k_1)\n", |
|
|
|
|
"\n", |
|
|
|
|
"" |
|
|
|
|
], |
|
|
|
|
"metadata": { |
|
|
|
|
"collapsed": false, |
|
|
|
|
@ -172,11 +184,12 @@
|
|
|
|
|
"source": [ |
|
|
|
|
"def plain_python_recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n", |
|
|
|
|
" distances_histogram = np.zeros(len(ts))\n", |
|
|
|
|
" epsilon = 0.25\n", |
|
|
|
|
" for i in tqdm(np.arange(len(ts))):\n", |
|
|
|
|
" # finding the first \"out of ball\" index\n", |
|
|
|
|
" first_out = len(ts) # security\n", |
|
|
|
|
" for j in np.arange(i + 1, len(ts)):\n", |
|
|
|
|
" if 0 < t_max < j - i:\n", |
|
|
|
|
" break\n", |
|
|
|
|
" d = norm(ts[i] - ts[j])\n", |
|
|
|
|
" if d > epsilon:\n", |
|
|
|
|
" first_out = j\n", |
|
|
|
|
@ -184,6 +197,8 @@
|
|
|
|
|
"\n", |
|
|
|
|
" # finding the first \"back to the ball\" index\n", |
|
|
|
|
" for j in np.arange(first_out + 1, len(ts)):\n", |
|
|
|
|
" if 0 < t_max < j - i:\n", |
|
|
|
|
" break\n", |
|
|
|
|
" d = norm(ts[i] - ts[j])\n", |
|
|
|
|
" if d < epsilon:\n", |
|
|
|
|
" distances_histogram[j - i] += 1\n", |
|
|
|
|
@ -213,13 +228,13 @@
|
|
|
|
|
"outputs": [], |
|
|
|
|
"source": [ |
|
|
|
|
"@jit(int32[:](float32[:,:], float32, int32), nopython=True)\n", |
|
|
|
|
"def recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n", |
|
|
|
|
"def numba_recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n", |
|
|
|
|
" distances_histogram = np.zeros(len(ts), dtype=np.int32)\n", |
|
|
|
|
" for i in np.arange(len(ts)):\n", |
|
|
|
|
" # finding the first \"out of ball\" index\n", |
|
|
|
|
" first_out = len(ts) # security\n", |
|
|
|
|
" for j in np.arange(i + 1, len(ts)):\n", |
|
|
|
|
" if t_max > 0 and j - i > t_max:\n", |
|
|
|
|
" if 0 < t_max < j - i:\n", |
|
|
|
|
" break\n", |
|
|
|
|
" d = norm(ts[i] - ts[j])\n", |
|
|
|
|
" if d > epsilon:\n", |
|
|
|
|
@ -250,7 +265,7 @@
|
|
|
|
|
" # finding the first \"out of ball\" index\n", |
|
|
|
|
" first_out = len(ts) # security\n", |
|
|
|
|
" for j in prange(i + 1, len(ts)):\n", |
|
|
|
|
" if t_max > 0 and j - i > t_max:\n", |
|
|
|
|
" if 0 < t_max < j - i:\n", |
|
|
|
|
" break\n", |
|
|
|
|
" d = norm(ts[i] - ts[j])\n", |
|
|
|
|
" if d > epsilon:\n", |
|
|
|
|
@ -287,7 +302,7 @@
|
|
|
|
|
" # finding the first \"out of ball\" index\n", |
|
|
|
|
" first_out = len(ts) # security\n", |
|
|
|
|
" for j in np.arange(i + 1, len(ts)):\n", |
|
|
|
|
" if t_max > 0 and j - i > t_max:\n", |
|
|
|
|
" if 0 < t_max < j - i:\n", |
|
|
|
|
" break\n", |
|
|
|
|
" d = norm(ts[i] - ts[j])\n", |
|
|
|
|
" if d > epsilon:\n", |
|
|
|
|
@ -296,7 +311,7 @@
|
|
|
|
|
"\n", |
|
|
|
|
" # finding the first \"back to the ball\" index\n", |
|
|
|
|
" for j in np.arange(first_out + 1, len(ts)):\n", |
|
|
|
|
" if t_max > 0 and j - i > t_max:\n", |
|
|
|
|
" if 0 < t_max < j - i:\n", |
|
|
|
|
" break\n", |
|
|
|
|
" d = norm(ts[i] - ts[j])\n", |
|
|
|
|
" if d < epsilon:\n", |
|
|
|
|
@ -354,14 +369,34 @@
|
|
|
|
|
} |
|
|
|
|
], |
|
|
|
|
"source": [ |
|
|
|
|
"# taking a look at our histogram\n", |
|
|
|
|
"\n", |
|
|
|
|
"distances_histogram = plain_python_recurrence_histogram(ts, 0.12, 10000)\n", |
|
|
|
|
"px.line(distances_histogram, x=range(len(distances_histogram)), y=distances_histogram)" |
|
|
|
|
] |
|
|
|
|
}, |
|
|
|
|
{ |
|
|
|
|
"cell_type": "code", |
|
|
|
|
"execution_count": null, |
|
|
|
|
"outputs": [], |
|
|
|
|
"source": [ |
|
|
|
|
"# checking that values match\n", |
|
|
|
|
"numba_histogram = numba_recurrence_histogram(ts, 0.12, 10000)\n", |
|
|
|
|
"np.isclose(distances_histogram, numba_histogram)" |
|
|
|
|
], |
|
|
|
|
"metadata": { |
|
|
|
|
"collapsed": false, |
|
|
|
|
"pycharm": { |
|
|
|
|
"name": "#%%\n" |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
}, |
|
|
|
|
{ |
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
"source": [ |
|
|
|
|
"### The ~~hour~~ seconds of truth" |
|
|
|
|
"### The ~~hour~~ seconds of truth\n", |
|
|
|
|
"\n", |
|
|
|
|
"Let's now test the performance of each implementation." |
|
|
|
|
], |
|
|
|
|
"metadata": { |
|
|
|
|
"collapsed": false |
|
|
|
|
|