Added some examples to 09

lyubolp · lyubolp · commit 9dea882f94a6 · 2023-11-29T23:41:17.000+02:00
diff --git a/09 - Multithreading/09 - Multithreading.ipynb b/09 - Multithreading/09 - Multithreading.ipynb
@@ -1387,7 +1387,200 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "WIP"
+    "### Пример 1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Нека имаме функция, която умножава две матрици. \n",
+    "\n",
+    "Нека напишем функция, която умножава две матрици паралелно. Нека използваме `multiprocessing` библиотеката за целта."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "def timeit():\n",
+    "    def wrapper(func):\n",
+    "        def inner(*args, **kwargs):\n",
+    "            start = time.time()\n",
+    "            result = func(*args, **kwargs)\n",
+    "            end = time.time()\n",
+    "            print(f'The function took {(end-start):.2f} seconds')\n",
+    "            return result\n",
+    "        return inner\n",
+    "    return wrapper\n",
+    "\n",
+    "\n",
+    "@timeit()\n",
+    "def multiply_matrix(a: list[list[int]], b: list[list[int]]) -> list[list[int]]:\n",
+    "    size_of_a = (len(a), len(a[0]))\n",
+    "    size_of_b = (len(b), len(b[0]))\n",
+    "\n",
+    "    if size_of_a[1] != size_of_b[0]:\n",
+    "        raise ValueError(\"The matrixes cannot be multiplied\")\n",
+    "    \n",
+    "    m, n, p = size_of_a[0], size_of_a[1], size_of_b[1]\n",
+    "\n",
+    "    result = [[0 for _ in range(p)] for _ in range(m)]\n",
+    "\n",
+    "    for i in range(m):\n",
+    "        for j in range(p):\n",
+    "            result[i][j] = sum(a[i][k] * b[k][j] for k in range(n))\n",
+    "\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Решение на пример 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from multiprocessing import Array, Process\n",
+    "\n",
+    "@timeit()\n",
+    "def multithreaded_multiply_matrix(a: list[list[int]], b: list[list[int]]) -> list[list[int]]:\n",
+    "    size_of_a = (len(a), len(a[0]))\n",
+    "    size_of_b = (len(b), len(b[0]))\n",
+    "\n",
+    "    if size_of_a[1] != size_of_b[0]:\n",
+    "        raise ValueError(\"The matrixes cannot be multiplied\")\n",
+    "    \n",
+    "    m, n, p = size_of_a[0], size_of_a[1], size_of_b[1]\n",
+    "    \n",
+    "\n",
+    "    result = Array('i', m * p)\n",
+    "\n",
+    "    threads = [Process(target=multiply_row, args=(result, n, p, i, a, b)) for i in range(m)]\n",
+    "\n",
+    "    for thread in threads:\n",
+    "        thread.start()\n",
+    "\n",
+    "    for thread in threads:\n",
+    "        thread.join()\n",
+    "\n",
+    "    return [result[i*p:(i+1)*p] for i in range(m)]\n",
+    "\n",
+    "def multiply_row(shared_memory: Array, n: int, p: int, i: int, a: list[list[int]], b: list[list[int]]):\n",
+    "    for j in range(p):\n",
+    "        target_index = i * p + j\n",
+    "        shared_memory[target_index] = sum(a[i][k] * b[k][j] for k in range(n))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The function took 3.87 seconds\n",
+      "The function took 0.99 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "from random import randint\n",
+    "\n",
+    "m, n, p = 400, 400, 400\n",
+    "value_range = (0, 10)\n",
+    "\n",
+    "a = [[randint(*value_range) for _ in range(n)] for _ in range(m)]\n",
+    "b = [[randint(*value_range) for _ in range(p)] for _ in range(n)]\n",
+    "\n",
+    "res_1 = multiply_matrix(a, b)\n",
+    "res_2 = multithreaded_multiply_matrix(a, b)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Пример 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Нека напишем функция `parallel_file_search`, която приема път до файл, низ по който търсим и брой на нишките, които ще използваме за търсенето. Функцията трябва да запише редовете, които съдържат низа в нов файл.\n",
+    "\n",
+    "Файла трябва да се раздели на `N` части, в които да се търси паралелно.\n",
+    "\n",
+    "Казваме, че един ред съдържа търсения низ, ако той се среща в него."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Решение на пример 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from multiprocessing import Process, Semaphore\n",
+    "\n",
+    "def search_in_lines(lines: list[str], to_search: str,  output_path: str, semaphore: Semaphore):\n",
+    "    for line in lines:\n",
+    "        if to_search in line:\n",
+    "            results.append(line)\n",
+    "\n",
+    "@timeit()\n",
+    "def parallel_file_search(input_path: str, to_search: str, n: int, output_path: str) -> list[str]:\n",
+    "    with open(input_path, encoding='utf-8') as input_file_descriptor:\n",
+    "            amount_of_lines = len(input_file_descriptor.readlines())\n",
+    "            \n",
+    "    chunk_size = amount_of_lines // n\n",
+    "    regions = [(i * chunk_size, (i+1) * chunk_size) for i in range(n - 1)] + [((n - 1) * chunk_size, amount_of_lines + 1)]\n",
+    "\n",
+    "    procs = [Process(target=search_string_in_file, args=(to_search, input_path, region, output_path)) for region in regions]\n",
+    "\n",
+    "    for proc in procs:\n",
+    "        proc.start()\n",
+    "\n",
+    "    for proc in procs:\n",
+    "        proc.join()\n",
+    "\n",
+    "def search_string_in_file(string: str, file_path: str, region: tuple[int, int], output_file_path: str, semaphore: Semaphore):\n",
+    "        results = []\n",
+    "        start, end = region\n",
+    "        with open(file_path, encoding='utf-8') as input_file_descriptor:\n",
+    "            for line_number, line in enumerate(input_file_descriptor):\n",
+    "                if start <= line_number < end:\n",
+    "                    column = line.find(string)\n",
+    "                    \n",
+    "                    while column != -1:\n",
+    "                        results.append(line.strip())\n",
+    "                        column = line.find(string, column+1)\n",
+    "\n",
+    "        semaphore.acquire()\n",
+    "\n",
+    "        with open(output_file_path, encoding='utf-8', mode='w+') as output_file_descriptor:\n",
+    "            for result in results:\n",
+    "                output_file_descriptor.write(result + '\\n')\n",
+    "\n",
+    "        semaphore.release()"
    ]
   }
  ],
@@ -1407,7 +1600,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.8"
+   "version": "3.10.12"
   },
   "vscode": {
    "interpreter": {