diff --git a/numba_examples/principe_rpde.png b/numba_examples/principe_rpde.png new file mode 100644 index 0000000..9317932 Binary files /dev/null and b/numba_examples/principe_rpde.png differ diff --git a/numba_examples/rpde.ipynb b/numba_examples/rpde.ipynb index 1b87642..9539681 100644 --- a/numba_examples/rpde.ipynb +++ b/numba_examples/rpde.ipynb @@ -49,7 +49,10 @@ "\n", "_Loading the audio data, and embedding the audio time series_\n", " Our algorithm operates on time series\n", - "(here, it's audio, what a coincidence!).\n" + "(here, it's audio, what a coincidence!).\n", + "\n", + "Then, we'll create a special function that creates an embedding of several time\n", + "series for our audio data.\n" ], "metadata": { "collapsed": false, @@ -82,10 +85,12 @@ "outputs": [], "source": [ "def embed_time_series(data: np.ndarray, dim: int, tau: int):\n", - " \"\"\"This creates an special embedding for the input data. The\n", + " \"\"\"This creates a special embedding for the input data. The\n", " output shape of this function is (N, D) with\n", " N = len(data) - (dim - 1) * tau\n", - " D = dim\"\"\"\n", + " D = dim\n", + "\n", + " N is the number of time series in the embedding\"\"\"\n", " embed_points_nb = data.shape[0] - (dim - 1) * tau\n", " embed_mask = np.arange(embed_points_nb)[:, np.newaxis] + np.arange(dim)\n", " tau_offsets = np.arange(dim) * (tau - 1)\n", @@ -100,7 +105,7 @@ "outputs": [], "source": [ "# this creates an embedding of a slice of the time series\n", - "ts = embed_time_series(data[:2000], dim=4, tau=22)\n", + "ts = embed_time_series(data[:2000], dim=3, tau=22)\n", "print(ts.shape)" ] }, @@ -156,7 +161,14 @@ "as it outputs a value close to the actual value, but some race conditions make it\n", "undeterministic.\n", "3. The third one is a fixed parallelized version of the previous numba implementation,\n", - "fully adapted to work in parallel" + "fully adapted to work in parallel\n", + "\n", + "The goal of our RPDE algorithm is to find, for each one of the\n", + " times series contained in the embeddings:\n", + "- the first point to exit a sphere of radius epsilon (index k_0)\n", + "- the first point, after k_0, to re-enter the sphere (index k_1)\n", + "\n", + "![RPDE algorithm's basic principle](principe_rpde.png)" ], "metadata": { "collapsed": false, @@ -172,11 +184,12 @@ "source": [ "def plain_python_recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n", " distances_histogram = np.zeros(len(ts))\n", - " epsilon = 0.25\n", " for i in tqdm(np.arange(len(ts))):\n", " # finding the first \"out of ball\" index\n", " first_out = len(ts) # security\n", " for j in np.arange(i + 1, len(ts)):\n", + " if 0 < t_max < j - i:\n", + " break\n", " d = norm(ts[i] - ts[j])\n", " if d > epsilon:\n", " first_out = j\n", @@ -184,6 +197,8 @@ "\n", " # finding the first \"back to the ball\" index\n", " for j in np.arange(first_out + 1, len(ts)):\n", + " if 0 < t_max < j - i:\n", + " break\n", " d = norm(ts[i] - ts[j])\n", " if d < epsilon:\n", " distances_histogram[j - i] += 1\n", @@ -213,13 +228,13 @@ "outputs": [], "source": [ "@jit(int32[:](float32[:,:], float32, int32), nopython=True)\n", - "def recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n", + "def numba_recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n", " distances_histogram = np.zeros(len(ts), dtype=np.int32)\n", " for i in np.arange(len(ts)):\n", " # finding the first \"out of ball\" index\n", " first_out = len(ts) # security\n", " for j in np.arange(i + 1, len(ts)):\n", - " if t_max > 0 and j - i > t_max:\n", + " if 0 < t_max < j - i:\n", " break\n", " d = norm(ts[i] - ts[j])\n", " if d > epsilon:\n", @@ -250,7 +265,7 @@ " # finding the first \"out of ball\" index\n", " first_out = len(ts) # security\n", " for j in prange(i + 1, len(ts)):\n", - " if t_max > 0 and j - i > t_max:\n", + " if 0 < t_max < j - i:\n", " break\n", " d = norm(ts[i] - ts[j])\n", " if d > epsilon:\n", @@ -287,7 +302,7 @@ " # finding the first \"out of ball\" index\n", " first_out = len(ts) # security\n", " for j in np.arange(i + 1, len(ts)):\n", - " if t_max > 0 and j - i > t_max:\n", + " if 0 < t_max < j - i:\n", " break\n", " d = norm(ts[i] - ts[j])\n", " if d > epsilon:\n", @@ -296,7 +311,7 @@ "\n", " # finding the first \"back to the ball\" index\n", " for j in np.arange(first_out + 1, len(ts)):\n", - " if t_max > 0 and j - i > t_max:\n", + " if 0 < t_max < j - i:\n", " break\n", " d = norm(ts[i] - ts[j])\n", " if d < epsilon:\n", @@ -354,14 +369,34 @@ } ], "source": [ + "# taking a look at our histogram\n", + "\n", "distances_histogram = plain_python_recurrence_histogram(ts, 0.12, 10000)\n", "px.line(distances_histogram, x=range(len(distances_histogram)), y=distances_histogram)" ] }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# checking that values match\n", + "numba_histogram = numba_recurrence_histogram(ts, 0.12, 10000)\n", + "np.isclose(distances_histogram, numba_histogram)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, { "cell_type": "markdown", "source": [ - "### The ~~hour~~ seconds of truth" + "### The ~~hour~~ seconds of truth\n", + "\n", + "Let's now test the performance of each implementation." ], "metadata": { "collapsed": false