Mostly done with the RPDE notebook.

6 years ago · 0a7c32d7f4
2 changed files with 47 additions and 12 deletions
--- a/numba_examples/principe_rpde.png
+++ b/numba_examples/principe_rpde.png
--- a/numba_examples/rpde.ipynb
+++ b/numba_examples/rpde.ipynb
@ -49,7 +49,10 @@
    "\n",
    "_Loading the audio data, and embedding the audio time series_\n",
    " Our algorithm operates on time series\n",
-    "(here, it's audio, what a coincidence!).\n"
+    "(here, it's audio, what a coincidence!).\n",
+    "\n",
+    "Then, we'll create a special function that creates an embedding of several time\n",
+    "series for our audio data.\n"
   ],
   "metadata": {
    "collapsed": false,
@ -82,10 +85,12 @@
   "outputs": [],
   "source": [
    "def embed_time_series(data: np.ndarray, dim: int, tau: int):\n",
-    "    \"\"\"This creates an special embedding for the input data. The\n",
+    "    \"\"\"This creates a special embedding for the input data. The\n",
    "    output shape of this function is (N, D) with\n",
    "    N = len(data) - (dim - 1) * tau\n",
-    "    D = dim\"\"\"\n",
+    "    D = dim\n",
+    "\n",
+    "    N is the number of time series in the embedding\"\"\"\n",
    "    embed_points_nb = data.shape[0] - (dim - 1) * tau\n",
    "    embed_mask = np.arange(embed_points_nb)[:, np.newaxis] + np.arange(dim)\n",
    "    tau_offsets = np.arange(dim) * (tau - 1)\n",
@ -100,7 +105,7 @@
   "outputs": [],
   "source": [
    "# this creates an embedding of a slice of the time series\n",
-    "ts = embed_time_series(data[:2000], dim=4, tau=22)\n",
+    "ts = embed_time_series(data[:2000], dim=3, tau=22)\n",
    "print(ts.shape)"
   ]
  },
@ -156,7 +161,14 @@
    "as it outputs a value close to the actual value, but some race conditions make it\n",
    "undeterministic.\n",
    "3. The third one is a fixed parallelized version of the previous numba implementation,\n",
-    "fully adapted to work in parallel"
+    "fully adapted to work in parallel\n",
+    "\n",
+    "The goal of our RPDE algorithm is to find, for each one of the\n",
+    " times series contained in the embeddings:\n",
+    "- the first point to exit a sphere of radius epsilon (index k_0)\n",
+    "- the first point, after k_0, to re-enter the sphere (index k_1)\n",
+    "\n",
+    "![RPDE algorithm's basic principle](principe_rpde.png)"
   ],
   "metadata": {
    "collapsed": false,
@ -172,11 +184,12 @@
   "source": [
    "def plain_python_recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n",
    "    distances_histogram = np.zeros(len(ts))\n",
-    "    epsilon = 0.25\n",
    "    for i in tqdm(np.arange(len(ts))):\n",
    "        # finding the first \"out of ball\" index\n",
    "        first_out = len(ts) # security\n",
    "        for j in np.arange(i + 1, len(ts)):\n",
+    "            if 0 < t_max < j - i:\n",
+    "                break\n",
    "            d = norm(ts[i] - ts[j])\n",
    "            if d > epsilon:\n",
    "                first_out = j\n",
@ -184,6 +197,8 @@
    "\n",
    "        # finding the first \"back to the ball\" index\n",
    "        for j in np.arange(first_out + 1, len(ts)):\n",
+    "            if 0 < t_max < j - i:\n",
+    "                break\n",
    "            d = norm(ts[i] - ts[j])\n",
    "            if d < epsilon:\n",
    "                distances_histogram[j - i] += 1\n",
@ -213,13 +228,13 @@
   "outputs": [],
   "source": [
    "@jit(int32[:](float32[:,:], float32, int32), nopython=True)\n",
-    "def recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n",
+    "def numba_recurrence_histogram(ts: np.ndarray, epsilon: float, t_max: int):\n",
    "    distances_histogram = np.zeros(len(ts), dtype=np.int32)\n",
    "    for i in np.arange(len(ts)):\n",
    "        # finding the first \"out of ball\" index\n",
    "        first_out = len(ts) # security\n",
    "        for j in np.arange(i + 1, len(ts)):\n",
-    "            if t_max > 0 and j - i > t_max:\n",
+    "            if 0 < t_max < j - i:\n",
    "                break\n",
    "            d = norm(ts[i] - ts[j])\n",
    "            if d > epsilon:\n",
@ -250,7 +265,7 @@
    "        # finding the first \"out of ball\" index\n",
    "        first_out = len(ts) # security\n",
    "        for j in prange(i + 1, len(ts)):\n",
-    "            if t_max > 0 and j - i > t_max:\n",
+    "            if 0 < t_max < j - i:\n",
    "                break\n",
    "            d = norm(ts[i] - ts[j])\n",
    "            if d > epsilon:\n",
@ -287,7 +302,7 @@
    "        # finding the first \"out of ball\" index\n",
    "        first_out = len(ts) # security\n",
    "        for j in np.arange(i + 1, len(ts)):\n",
-    "            if t_max > 0 and j - i > t_max:\n",
+    "            if 0 < t_max < j - i:\n",
    "                break\n",
    "            d = norm(ts[i] - ts[j])\n",
    "            if d > epsilon:\n",
@ -296,7 +311,7 @@
    "\n",
    "        # finding the first \"back to the ball\" index\n",
    "        for j in np.arange(first_out + 1, len(ts)):\n",
-    "            if t_max > 0 and j - i > t_max:\n",
+    "            if 0 < t_max < j - i:\n",
    "                break\n",
    "            d = norm(ts[i] - ts[j])\n",
    "            if d < epsilon:\n",
@ -354,14 +369,34 @@
    }
   ],
   "source": [
+    "# taking a look at our histogram\n",
+    "\n",
    "distances_histogram = plain_python_recurrence_histogram(ts, 0.12, 10000)\n",
    "px.line(distances_histogram, x=range(len(distances_histogram)), y=distances_histogram)"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "# checking that values match\n",
+    "numba_histogram = numba_recurrence_histogram(ts, 0.12, 10000)\n",
+    "np.isclose(distances_histogram, numba_histogram)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
  {
   "cell_type": "markdown",
   "source": [
-    "### The ~~hour~~ seconds of truth"
+    "### The ~~hour~~ seconds of truth\n",
+    "\n",
+    "Let's now test the performance of each implementation."
   ],
   "metadata": {
    "collapsed": false