CompressHtml.py

#########################################################################################################################################################
#
# CompressHtml
#
# This script is intended for joining a html-file with its js & css files and then compress it and convert it to Arduino (ESP32/ESP8266) PROGMEM format
#
# Why man why ???
# Ever had the wish to use full-blown frameworks on the ESP ? JQuery, W3 or even HTMX ? Well, here is your answer.
# This will compress the html, resulting in a smaller memory footprint (~30%), which also will results in faster serving, thus response times
# Further, by joining the html-file with its css and js-files, the webpage will be loaded even faster.
# And you just can easily build and debug your webpage with css and javascript the normal way without having to convert back and forth to embed it in C++ 
# string format.
# Intended use is to let the script run at (pre-)compile time and it will compress and join the html-files in the HTML-directory.
#
# As a small example, a index.html (800B), w3.css(24kB) & app.js (400B) together approx 25.2kBytes will result in a gzipped size of approx 5.8kB, which is
# served by an ESP32 within 120mS.
# to Push things the script was tested with jquery.js and htmx.js also included, resulting in 155kB, compressed 50kB, served in approx 200mS.
# Think of the possibilities !
# 
# remarks
# this is ment to use in platformio
# - for optimal performance you should create a single-page app on the ESP (use modals etc.)
# - you still need to keep an eye on filesize, for example only use one framework, lightweight if possible (w3.css)
# - the gz format demands the webpage to be served as binary data ! Asyncwebser example:
#
#   server.on("/", HTTP_GET, [](AsyncWebServerRequest *request)   // send gzipped index (gzip))
#            {
#              AsyncWebServerResponse *response = request->beginResponse_P(200, "text/html", index_html_gz, sizeof(index_html_gz));
#              response->addHeader("Content-Encoding", "gzip");
#              request->send(response); 
#            });
#
# usefull tips:
# For writing and debugging your webpage, use a "live server" in platformIO

import os, gzip, re
from datetime import datetime

#from slimit import minify as jsminify            # minify(js_code, mangle=True, mangle_toplevel=True)
#from htmlmin import minify as htmlminify         # minify(html_code)
#from csscompressor import compress as cssminify  # compress(css_code)

filesToProcess = ["index.html", "favicon.png"]

def GetProgMemString(data, progmemName, comment = ""):
    buffer = f' \n\n// {comment}\nconst uint8_t {progmemName}[] PROGMEM = {{\n  '
    bytecount = 0
    for b in data:
        if (bytecount !=0):
            buffer += ", "
        buffer += "0x%02x" % b
        bytecount +=1
        if ((bytecount % 64) == 0):
            buffer += "\n"
        #buffer += "\n, 0x00"
    buffer += "\n};"
    return buffer

def GetCompressedProgMemStringFromFile(fileName, compress = True):
    inFile = open(fileName, "rb")
    inData = bytes(inFile.read())
    inSize = len(inData)
    if (compress):
        compressedData = gzip.compress(inData)
    else:
        compressedData = inData

    comment = f'File {fileName} is compressed with {compress}-compression, ratio {int(len(compressedData) * 100/inSize)}% ({len(inData)}/{len(compressedData)}Bytes).'
    print (comment)

    return GetProgMemString(compressedData,fileName.replace(".","_"), comment)

def ProcessCssFiles(inData, indir):
    # Checks inData for external css links, works only for local, eg.:    
    # <link rel="stylesheet" href="css/w3.css">
    # file needs to be in the indir-directory, subfolders are allowed.

    cssLinks = re.findall("<link .+?>", inData)
    for cssLink in cssLinks:
        cssFilePath = f'{indir}/' + re.findall("href=\"(.+?)\"",cssLink)[0]
        #print(cssFilePath)
        #print(f'cssFile path: {cssFilePath}, exist: {os.path.isfile(cssFilePath)}')
        if joinedFilesList != "":
            joinedFilesList += ", "
        joinedFilesList += cssFilePath

        cssFile = open(cssFilePath, "r")
        cssFileContent = cssFile.read()
        replaceCssLinkBuffer = "\n<style>\n"
        replaceCssLinkBuffer += cssFileContent
        replaceCssLinkBuffer += "\n</style>\n"
        # and replace
        inData = inData.replace(cssLink, replaceCssLinkBuffer)
    return inData

    


#GetProgMemStringFromFile("html/index.html", "none")                  
#GetProgMemStringFromFile("html/index.html", "zlib")                  
#GetCompressedProgMemStringFromFile("html/index.html")

indir = "html"
outfileName = "include/apphtml.h"

outfileData  = f'// This file is auto-generated by CompressHtml.py @{datetime.now().strftime("%d/%m/%Y %H:%M:%S")}\n'
outfileData += "// Do not edit, since it will be overwritten\n\n" 
outfileData += "#include <Arduino.h>\n"

joinedFilesList = ""

files = os.listdir("html")
for file in files:
    #print (file)
    if (file.lower().endswith(".html")):
        print (f'processing file: {file}')
        inFile = open(f'{indir}/{file}', "r")
        inData = inFile.read()
        # find css files
        # <link rel="stylesheet" href="css/w3.css"> 
        cssLinks = re.findall("<link .+?>", inData)
        #print (cssLinks)
        for cssLink in cssLinks:

            print (f"Processing css-link: {cssLink}")

            cssFilePath = f'{indir}/' + re.findall("href=\"(.+?)\"",cssLink)[0]
            #print(cssFilePath)
            #print(f'cssFile path: {cssFilePath}, exist: {os.path.isfile(cssFilePath)}')
            if joinedFilesList != "":
                joinedFilesList += ", "
            joinedFilesList += cssFilePath

            cssFile = open(cssFilePath, "r")
            cssFileContent = cssFile.read()
            replaceCssLinkBuffer = "\n<style>\n"
            replaceCssLinkBuffer += cssFileContent
            replaceCssLinkBuffer += "\n</style>\n"
            # and replace
            inData = inData.replace(cssLink, replaceCssLinkBuffer)
        
        # find javascript files
        jsLinks = re.findall("<script.+?src=\".+?.js\"></script>", inData)
        #print (jsLinks)
        for jsLink in jsLinks:
            #print (jsLink)
            jsFilePath = f'{indir}/' + re.findall(" src=\"(.+?)\"",jsLink)[0]
            #print (jsFilePath)
            if joinedFilesList != "":
                joinedFilesList += ", "
            joinedFilesList += jsFilePath

            jsFile = open(jsFilePath, "r")
            jsFileContent = jsFile.read()
            replaceJsLinkBuffer = "\n<script>\n"
            replaceJsLinkBuffer += jsFileContent
            replaceJsLinkBuffer += "\n</script>\n"
            # and replace
            inData = inData.replace(jsLink, replaceJsLinkBuffer)

        #  <script src="myscripts.js"></script>
        # write buffer to targetfile

        # now compress:
        compressedData = gzip.compress(bytes(inData, "UTF-8"))
        dataComment = f'File {file} is joined with: {joinedFilesList}, total size: {len(inData)} bytes, compressed size: {len(compressedData)} bytes, ratio {int(len(compressedData) * 100/len(inData))}%.'
        
        print (f"\n{dataComment}\n")

        progMemData = GetProgMemString(gzip.compress(bytes(inData, "UTF-8")), file.replace(".","_") + "_gz", dataComment)
        outfileData += progMemData

outfile = open(outfileName, "w")
outfile.write(outfileData)
outfile.close()