Browse Source

Mostly done with the RPDE notebook.

master
hadware 6 years ago
parent
commit
0a7c32d7f4
  1. BIN
      numba_examples/principe_rpde.png
  2. 59
      numba_examples/rpde.ipynb

BIN
numba_examples/principe_rpde.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

59
numba_examples/rpde.ipynb

@ -49,7 +49,10 @@
"\n", "\n",
"_Loading the audio data, and embedding the audio time series_\n", "_Loading the audio data, and embedding the audio time series_\n",
" Our algorithm operates on time series\n", " Our algorithm operates on time series\n",
"(here, it's audio, what a coincidence!).\n" "(here, it's audio, what a coincidence!).\n",
"\n",
"Then, we'll create a special function that creates an embedding of several time\n",
"series for our audio data.\n"
], ],
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
@ -82,10 +85,12 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def embed_time_series(data: np.ndarray, dim: int, tau: int):\n", "def embed_time_series(data: np.ndarray, dim: int, tau: int):\n",
" \"\"\"This creates an special embedding for the input data. The\n", " \"\"\"This creates a special embedding for the input data. The\n",
" output shape of this function is (N, D) with\n", " output shape of this function is (N, D) with\n",
" N = len(data) - (dim - 1) * tau\n", " N = len(data) - (dim - 1) * tau\n",
" D = dim\"\"\"\n", " D = dim\n",
"\n",
" N is the number of time series in the embedding\"\"\"\n",
" embed_points_nb = data.shape[0] - (dim - 1) * tau\n", " embed_points_nb = data.shape[0] - (dim - 1) * tau\n",
" embed_mask = np.arange(embed_points_nb)[:, np.newaxis] + np.arange(dim)\n", " embed_mask = np.arange(embed_points_nb)[:, np.newaxis] + np.arange(dim)\n",
" tau_offsets = np.arange(dim) * (tau - 1)\n", " tau_offsets = np.arange(dim) * (tau - 1)\n",
@ -100,7 +105,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# this creates an embedding of a slice of the time series\n", "# this creates an embedding of a slice of the time series\n",
"ts = embed_time_series(data[:2000], dim=4, tau=22)\n", "ts = embed_time_series(data[:2000], dim=3, tau=22)\n",
"print(ts.shape)" "print(ts.shape)"
] ]
}, },
@ -156,7 +161,14 @@
"as it outputs a value close to the actual value, but some race conditions make it\n", "as it outputs a value close to the actual value, but some race conditions make it\n",
"undeterministic.\n", "undeterministic.\n",
"3. The third one is a fixed parallelized version of the previous numba implementation,\n", "3. The third one is a fixed parallelized version of the previous numba implementation,\n",
"fully adapted to work in parallel" "fully adapted to work in parallel\n",
"\n",
"The goal of our RPDE algorithm is to find, for each one of the\n",
" times series contained in the embeddings:\n",
"- the first point to exit a sphere of radius epsilon (index k_0)\n",
"- the first point, after k_0, to re-enter the sphere (index k_1)\n",
"\n",
"![RPDE algorithm's basic principle](principe_rpde.png)"
], ],
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
@ -172,11 +184,12 @@
"source": [ "source": [
"def plain_python_recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n", "def plain_python_recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n",
" distances_histogram = np.zeros(len(ts))\n", " distances_histogram = np.zeros(len(ts))\n",
" epsilon = 0.25\n",
" for i in tqdm(np.arange(len(ts))):\n", " for i in tqdm(np.arange(len(ts))):\n",
" # finding the first \"out of ball\" index\n", " # finding the first \"out of ball\" index\n",
" first_out = len(ts) # security\n", " first_out = len(ts) # security\n",
" for j in np.arange(i + 1, len(ts)):\n", " for j in np.arange(i + 1, len(ts)):\n",
" if 0 < t_max < j - i:\n",
" break\n",
" d = norm(ts[i] - ts[j])\n", " d = norm(ts[i] - ts[j])\n",
" if d > epsilon:\n", " if d > epsilon:\n",
" first_out = j\n", " first_out = j\n",
@ -184,6 +197,8 @@
"\n", "\n",
" # finding the first \"back to the ball\" index\n", " # finding the first \"back to the ball\" index\n",
" for j in np.arange(first_out + 1, len(ts)):\n", " for j in np.arange(first_out + 1, len(ts)):\n",
" if 0 < t_max < j - i:\n",
" break\n",
" d = norm(ts[i] - ts[j])\n", " d = norm(ts[i] - ts[j])\n",
" if d < epsilon:\n", " if d < epsilon:\n",
" distances_histogram[j - i] += 1\n", " distances_histogram[j - i] += 1\n",
@ -213,13 +228,13 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"@jit(int32[:](float32[:,:], float32, int32), nopython=True)\n", "@jit(int32[:](float32[:,:], float32, int32), nopython=True)\n",
"def recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n", "def numba_recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n",
" distances_histogram = np.zeros(len(ts), dtype=np.int32)\n", " distances_histogram = np.zeros(len(ts), dtype=np.int32)\n",
" for i in np.arange(len(ts)):\n", " for i in np.arange(len(ts)):\n",
" # finding the first \"out of ball\" index\n", " # finding the first \"out of ball\" index\n",
" first_out = len(ts) # security\n", " first_out = len(ts) # security\n",
" for j in np.arange(i + 1, len(ts)):\n", " for j in np.arange(i + 1, len(ts)):\n",
" if t_max > 0 and j - i > t_max:\n", " if 0 < t_max < j - i:\n",
" break\n", " break\n",
" d = norm(ts[i] - ts[j])\n", " d = norm(ts[i] - ts[j])\n",
" if d > epsilon:\n", " if d > epsilon:\n",
@ -250,7 +265,7 @@
" # finding the first \"out of ball\" index\n", " # finding the first \"out of ball\" index\n",
" first_out = len(ts) # security\n", " first_out = len(ts) # security\n",
" for j in prange(i + 1, len(ts)):\n", " for j in prange(i + 1, len(ts)):\n",
" if t_max > 0 and j - i > t_max:\n", " if 0 < t_max < j - i:\n",
" break\n", " break\n",
" d = norm(ts[i] - ts[j])\n", " d = norm(ts[i] - ts[j])\n",
" if d > epsilon:\n", " if d > epsilon:\n",
@ -287,7 +302,7 @@
" # finding the first \"out of ball\" index\n", " # finding the first \"out of ball\" index\n",
" first_out = len(ts) # security\n", " first_out = len(ts) # security\n",
" for j in np.arange(i + 1, len(ts)):\n", " for j in np.arange(i + 1, len(ts)):\n",
" if t_max > 0 and j - i > t_max:\n", " if 0 < t_max < j - i:\n",
" break\n", " break\n",
" d = norm(ts[i] - ts[j])\n", " d = norm(ts[i] - ts[j])\n",
" if d > epsilon:\n", " if d > epsilon:\n",
@ -296,7 +311,7 @@
"\n", "\n",
" # finding the first \"back to the ball\" index\n", " # finding the first \"back to the ball\" index\n",
" for j in np.arange(first_out + 1, len(ts)):\n", " for j in np.arange(first_out + 1, len(ts)):\n",
" if t_max > 0 and j - i > t_max:\n", " if 0 < t_max < j - i:\n",
" break\n", " break\n",
" d = norm(ts[i] - ts[j])\n", " d = norm(ts[i] - ts[j])\n",
" if d < epsilon:\n", " if d < epsilon:\n",
@ -354,14 +369,34 @@
} }
], ],
"source": [ "source": [
"# taking a look at our histogram\n",
"\n",
"distances_histogram = plain_python_recurrence_histogram(ts, 0.12, 10000)\n", "distances_histogram = plain_python_recurrence_histogram(ts, 0.12, 10000)\n",
"px.line(distances_histogram, x=range(len(distances_histogram)), y=distances_histogram)" "px.line(distances_histogram, x=range(len(distances_histogram)), y=distances_histogram)"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# checking that values match\n",
"numba_histogram = numba_recurrence_histogram(ts, 0.12, 10000)\n",
"np.isclose(distances_histogram, numba_histogram)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [ "source": [
"### The ~~hour~~ seconds of truth" "### The ~~hour~~ seconds of truth\n",
"\n",
"Let's now test the performance of each implementation."
], ],
"metadata": { "metadata": {
"collapsed": false "collapsed": false

Loading…
Cancel
Save