diff --git a/examples/html_2_xlsx.ipynb b/examples/html_2_xlsx.ipynb new file mode 100644 index 0000000..e212178 --- /dev/null +++ b/examples/html_2_xlsx.ipynb @@ -0,0 +1,153 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# HTML 2 XLSX\n", + "Notebook to convert the HTML tables in the output of `AnyParser` to an XLSX file." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Install Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip3 install BeautifulSoup4\n", + "# !pip3 install lxml\n", + "# !pip3 install openpyxl\n", + "# !pip3 install pandas" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Input Data\n", + "First, we load the `input_html` string from the `AnyParser` output, located in the html_2_xlsx/input folder." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from html_2_xlsx.input.html_input import input_html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Convert HTML to XLSX\n", + "Next, we define the `html_to_excel` function, which reads the html string and converts it to an XLSX file using `BeautifulSoup` and `pandas`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from bs4 import BeautifulSoup\n", + "import pandas as pd\n", + "from io import StringIO\n", + "\n", + "\n", + "def html_to_excel(html_string, output_folder, output_filename):\n", + " if not os.path.exists(output_folder):\n", + " os.makedirs(output_folder)\n", + "\n", + " soup = BeautifulSoup(html_string, 'html.parser')\n", + "\n", + " tables = soup.find_all('table')\n", + "\n", + " dfs = {}\n", + " for i, table in enumerate(tables):\n", + " dfs[f\"Table_{i+1}\"] = pd.read_html(StringIO(str(table)))[0]\n", + "\n", + " output_file = os.path.join(output_folder, output_filename)\n", + " with pd.ExcelWriter(output_file) as writer:\n", + " for name, df in dfs.items():\n", + " df.to_excel(writer, sheet_name=name, index=False)\n", + "\n", + " print(f\"Excel file saved to {output_file}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we run `html_to_excel` to convert the HTML to XLSX." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Excel file saved to html_2_xlsx/output/html_2_excel_output.xlsx\n" + ] + } + ], + "source": [ + "output_file = 'html_2_excel_output.xlsx'\n", + "output_folder = 'html_2_xlsx/output'\n", + "html_to_excel(input_html, output_folder, output_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## End of the notebook\n", + "\n", + "Check more [case studies](https://www.cambioml.com/blog) of CambioML!\n", + "\n", + "\n", + " \n", + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "open-parser", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/html_2_xlsx/input/html_input.py b/examples/html_2_xlsx/input/html_input.py new file mode 100644 index 0000000..e7478ad --- /dev/null +++ b/examples/html_2_xlsx/input/html_input.py @@ -0,0 +1,46 @@ +input_html = """
Summary | Current Month | - March-2024 | Three | Months - | 1/1/2024 to 3/31/2024 | Twelve | Months - | 4/1/2023 to 3/31/2024 | |||||||
Number | of Reporting | Licensees - | 444 | Number | of Reporting | Licensees - 445 | Number | of Reporting | Licensees | 453 | |||||
Unit Description | # Of Loc* | # Of Units | Win* Amount | % Chg | Win Percent | # Of Loc* | Avg Units | Win** Amount | % Chg | Win Percent | # Of Loc* | Avg Units | Win** Amount | % Chg | Win Percent |
Table, Counter and Card | Games | ||||||||||||||
Twenty One | 114 | 2,065 | 108,802 | (18.17) | 13.59 | 114 | 2,063 | 339,520 | (7.64) | 13.85 | 115 | 2,017 | 1,297,555 | (3.75) | 14.19 |
Craps | 101 | 336 | 41,567 | 2.11 | 16.48 | 102 | 324 | 118,285 | 1.03 | 16.70 | 105 | 319 | 468,742 | 2.60 | 16.82 |
Roulette | 100 | 435 | 37,678 | 8.00 | 20.41 | 100 | 441 | 96,295 | (20.68) | 14.96 | 101 | 434 | 435,203 | (7.09) | 18.01 |
Ultim Texas Hold'em | 70 | 194 | 17,256 | (0.22) | 23.08 | 71 | 194 | 51,882 | 0.45 | 24.06 | 72 | 182 | 188,983 | 0.36 | 21.77 |
3-Card Poker | 90 | 165 | 9,997 | (23.54) | 27.97 | 90 | 167 | 29,993 | (15.13) | 31.02 | 93 | 169 | 119,934 | (8.19) | 31.53 |
Baccarat | 34 | 447 | 114,694 | 75.51 | 18.81 | 34 | 446 | 392,829 | 51.81 | 17.45 | 36 | 405 | 1,622,476 | 39.79 | 17.85 |
Mini-Baccarat | 37 | 115 | 4,980 | (24.61) | 7.65 | 38 | 119 | 21,183 | (8.28) | 10.14 | 41 | 115 | 88,698 | (9.65) | 11.85 |
Keno | 31 | 41 | 1,487 | (6.03) | 30.51 | 31 | 41 | 4,148 | (8.07) | 29.02 | 33 | 42 | 17,648 | (5.14) | 29.57 |
Bingo | 31 | 31 | 6,890 | 1.51 | 30.58 | 31 | 31 | 12,486 | (5.93) | 23.22 | 34 | 31 | 44,769 | (3.15) | 21.65 |
Let It Ride | 30 | 35 | 1,487 | (0.12) | 24.21 | 31 | 35 | 4,184 | (16.04) | 25.66 | 33 | 37 | 16,970 | (17.15) | 25.37 |
Pai Gow | 13 | 25 | 2,164 | 368.92 | 46.16 | 15 | 26 | 2,731 | 19.56 | 19.10 | 16 | 25 | 11,112 | 19.01 | 22.00 |
Pai Gow Poker | 76 | 208 | 10,875 | (6.82) | 20.99 | 76 | 209 | 32,533 | 22.28 | 21.69 | 78 | 208 | 130,222 | (5.34) | 21.84 |
Race Book (1) | 165 | 163 | 2,815 | (2.94) | 16.52 | 167 | 164 | 7,076 | (6.26) | 16.41 | 169 | 160 | 29,962 | (9.65) | 15.64 |
Sports Pool (2) | 181 | 181 | 29,762 | (32.13) | 3.79 | 181 | 180 | 142,411 | 5.06 | 6.25 | 182 | 177 | 488,194 | 5.12 | 6.02 |
Card Games | 35 | 454 | 14,664 | (4.42) | 35 | 452 | 39,840 | (6.00) | 38 | 481 | 226,354 | 1.71 | |||
Other | 153 | 10,773 | 1.63 | 23.76 | 164 | 28,620 | (14.25) | 18.40 | 158 | 129,395 | 1.44 | 20.27 | |||
Total | 263 | 5,048 | 415,892 | 2.54 | 13.98 | 263 | 5,056 | 1,324,017 | 6.31 | 14.17 | 266 | 4,960 | 5,316,219 | 7.81 | 14.94 |
Slot Machines | |||||||||||||||
1 Cent | 247 | 40,371 | 220,252 | (23.43) | 8,97 | 248 | 41,784 | 659,530 | (21.36) | 9.66 | 256 | 43,757 | 2,973,572 | (15.68) | 9.58 |
5 Cent | 90 | 847 | 3,307 | (8.54) | 6.56 | 94 | 815 | 9,880 | (5.73) | 6.61 | 104 | 834 | 41,975 | (1.27) | 5.69 |
25 Cent | 163 | 2,692 | 15,903 | (15.81) | 7.33 | 167 | 2,761 | 46,711 | (17.96) | 7.99 | 174 | 2,958 | 209,647 | (12.20) | 7.93 |
1 Dollar | 169 | 5,679 | 43,643 | (26.86) | 6.23 | 171 | 5,873 | 138,351 | (20.30) | 6.93 | 179 | 6,157 | 602,837 | (16.78) | 6.43 |
5 Dollars | 87 | 574 | 4,988 | (20.77) | 5.15 | 88 | 599 | 15,291 | (23.43) | 5.11 | 92 | 621 | 65,712 | (22.31) | 5.06 |
25 Dollars | 46 | 109 | 1,329 | (39.77) | 5.90 | 47 | 113 | 4,796 | (22.22) | 6.39 | 52 | 121 | 21,451 | (27.94) | 5.21 |
100 Dollars | 32 | 86 | 2,196 | (36.11) | 6.35 | 32 | 87 | 7,722 | (8.40) | 7.08 | 35 | 86 | 31,216 | (14.11) | 6.24 |
Multi Denomination | 309 | 76,718 | 573,675 | 12.37 | 6.41 | 310 | 75,315 | 1,677,377 | 17.60 | 6.73 | 317 | 70,289 | 6,224,784 | 15.94 | 6.50 |
Other | 917 | 9,220 | (34.95) | 1,009 | 28,173 | (21.78) | 1,037 | 124,892 | (7.27) | ||||||
Total | 316 | 127,993 | 874,515 | (3.53) | 6.88 | 317 | 128,356 | 2,587,831 | 0.44 | 7.29 | 325 | 125,860 | 10,296,085 | 1.08 | 7.15 |
Total Gaming | 1,290,407 | (1.65) | 3,911,849 | 2.35 | 15,612,304 | 3.27 | |||||||||
(1) Race Book Parimutuel | 60 | 60 | 2,786 | (3.60) | 16.51 | 62 | 62 | 7,017 | (6.54) | 16.44 | 66 | 66 | 29,587 | (8.81) | 15.91 |
Sports Mobile | 56 | 26,010 | 1.66 | 5.13 | 58 | 79,867 | 33.25 | 5.55 | 68 | 260,585 | 18.54 | 4.86 | |||
(2) Sports Football | 120 | 120 | (12,881) | (3.82) | (4,049.07) | 181 | 181 | 37,565 | 51.81 | 6.90 | 182 | 182 | 170,755 | 9.25 | 6.42 |
Sports Basketball | 180 | 180 | 32,464 | (27.64) | 5.31 | 180 | 180 | 75,596 | (7.81) | 5.74 | 182 | 182 | 127,091 | (4.29) | 5.24 |
Sports Baseball | 179 | 179 | 1,974 | 5.92 | 179 | 179 | 16 | (101.44) | 0.05 | 181 | 181 | 94,320 | 17.15 | 5.82 | |
Sports Parlay Cards | 26 | 26 | (155) | (496.78) | (90.37) | 153 | 153 | 77 | (96.21) | 2.91 | 163 | 163 | 6,510 | (43.39) | 23.22 |
Sports Hockey | 178 | 178 | 730 | (85.23) | 1.60 | 178 | 178 | 8,117 | (34.42) | 6.35 | 181 | 181 | 14,706 | (41.12) | 3.76 |
Other | 181 | 7,630 | 4.05 | 8.14 | 180 | 21,039 | 35.67 | 8.33 | 177 | 74,812 | 28.23 | 7.56 |