From b07b49b3e6006f1080cc0a176e58246a343aecc2 Mon Sep 17 00:00:00 2001
From: BrightShard <brightshard@brightshard.dev>
Date: Fri, 25 Oct 2024 11:59:30 -0400
Subject: [PATCH] Initial commit

---
 .gitignore                 |   1 +
 Cargo.lock                 |  23 ++
 Cargo.toml                 |  12 +
 README.md                  | 103 +++++++
 src/compiler.rs            | 121 ++++++++
 src/lib.rs                 |  71 +++++
 src/main.rs                |  99 +++++++
 src/minifier.rs            |   5 +
 src/minifier/css.rs        |  79 +++++
 src/minifier/html.rs       | 589 +++++++++++++++++++++++++++++++++++++
 src/translator.rs          |   4 +
 src/translator/gemtext.rs  | 119 ++++++++
 src/translator/markdown.rs |   0
 tests/gemtext.rs           |  21 ++
 tests/gemtext/header.gmi   |   3 +
 tests/gemtext/header.html  |   1 +
 tests/gemtext/link.gmi     |   4 +
 tests/gemtext/link.html    |   1 +
 tests/gemtext/list.gmi     |   3 +
 tests/gemtext/list.html    |   1 +
 tests/gemtext/text.gmi     |   1 +
 tests/gemtext/text.html    |   1 +
 22 files changed, 1262 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Cargo.lock
 create mode 100644 Cargo.toml
 create mode 100644 README.md
 create mode 100644 src/compiler.rs
 create mode 100644 src/lib.rs
 create mode 100644 src/main.rs
 create mode 100644 src/minifier.rs
 create mode 100644 src/minifier/css.rs
 create mode 100644 src/minifier/html.rs
 create mode 100644 src/translator.rs
 create mode 100644 src/translator/gemtext.rs
 create mode 100644 src/translator/markdown.rs
 create mode 100644 tests/gemtext.rs
 create mode 100644 tests/gemtext/header.gmi
 create mode 100644 tests/gemtext/header.html
 create mode 100644 tests/gemtext/link.gmi
 create mode 100644 tests/gemtext/link.html
 create mode 100644 tests/gemtext/list.gmi
 create mode 100644 tests/gemtext/list.html
 create mode 100644 tests/gemtext/text.gmi
 create mode 100644 tests/gemtext/text.html

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ea8c4bf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..fe1a8f8
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,23 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
+[[package]]
+name = "boml"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85fdb93f04c73bff54305fa437ffea5449c41edcaadfe882f35836206b166ac5"
+
+[[package]]
+name = "webby"
+version = "0.1.0"
+dependencies = [
+ "base64",
+ "boml",
+]
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..a3a24b8
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "webby"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+base64 = "0.22.1"
+boml = "0.3.1"
+
+[features]
+default = []
+log = []
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9063717
--- /dev/null
+++ b/README.md
@@ -0,0 +1,103 @@
+# webby
+
+> The smol web compiler
+
+As seen in [my website](https://github.com/bright-shard/website).
+
+**webby** is a small and efficient compiler for making static sites. It adds macros, minifiers, and translators to compile your project into a tiny static site.
+
+> Note: Webby is WIP. The above is a summary of what I want it to do when it's finished. For the current project status, see [todo](#todo).
+
+# macros
+
+webby adds a few simple macros to make writing HTML simpler. Macros open with `#!`, followed by the macro name, followed by arguments in parentheses, like so:
+
+```
+#!MACRO_NAME(args)
+```
+
+Macros can be combined, like this:
+
+```
+#!MACRO1(#!MACRO2(args))
+```
+
+- `#!INCLUDE(path/to/file)`: Webby will compile the given file, then embed it at the macro's location. The file must contain valid UTF-8 text.
+- `#!BASE64(text)`: Base64-encode the given text.
+- `#!INCLUDE_BASE64(path/to/file)`: Base64-encode the given file. This differs from `#!BASE64(#!INCLUDE(path/to/file))` because it can also base64-encode binary files.
+
+# minifiers
+
+webby will automatically strip comments and unneeded whitespace from your code to make it as small as possible.
+
+# translators
+
+Translators cross-compile between languages - for example, Markdown to HTML, or Gemtext to HTML.
+
+
+
+# usage
+
+webby projects have a `webby.toml` in the root of their project, just like Rust projects have a `Cargo.toml` in the root of theirs. The format of `webby.toml` is given in [config](#config).
+
+To install webby, just install it with Cargo:
+
+```sh
+cargo install --git https://github.com/bright-shard/webby
+```
+
+Then just run `webby` in your webby project.
+
+# config
+
+In its simplest form, the `webby.toml` file will look like this:
+
+```toml
+# For every file you want to compile with webby, add a `[[target]]` section
+[[target]]
+# The path to the file to compile
+path = "index.html"
+
+[[target]]
+path = "blog.html"
+```
+
+However, webby allows customising more if you need it:
+
+```toml
+# (Optional) the directory to put the output files at
+# If this isn't specified it defaults to `webby`
+# The path is relative to the webby.toml file
+output = "my/custom/build/dir"
+
+[[target]]
+# The path to the file, relative to the webby.toml file
+# If you list a folder instead of a file, webby will compile all of the files
+# in that folder
+path = "path/to/file.html"
+# (Optional) Where to put the compiled file
+# If this isn't specified it defaults to the name of the file given in path
+# The path is relative to the output directory
+output = "file.out.html"
+# (Optional) The compilation mode
+# This can be "compile", "copy", or "link". Compile will compile the file. Copy
+# will just copy the file as-is and will not compile it at all. Link is the same
+# as copy, but it creates a hard link (not a symlink) to the file instead of
+# copying it.
+# If this isn't specified, webby will infer if it should compile or copy the
+# file based on the file's ending.
+mode = "compile"
+```
+
+# todo
+
+- [x] Macros
+  - [x] INCLUDE
+  - [x] BASE64
+  - [x] BASE64_INCLUDE
+- [x] HTML minifier
+- [x] CSS minifier
+- [ ] JS minifier
+- [x] Gemtext translator
+- [ ] Markdown translator
+- [ ] Redo macro compiler... it's old and has bugs
diff --git a/src/compiler.rs b/src/compiler.rs
new file mode 100644
index 0000000..6c14d41
--- /dev/null
+++ b/src/compiler.rs
@@ -0,0 +1,121 @@
+use {
+    crate::{line_number_of_offset, Cow},
+    base64::{engine::general_purpose::STANDARD, Engine},
+    std::{fs, path::Path},
+};
+
+pub fn compile_macros<'a>(original: &'a str, source_path: &'a Path) -> Cow<'a> {
+    let mut output = String::default();
+    let mut offset = 0;
+
+    while let Some(start_idx) = original[offset..].find("#!") {
+        if original[offset..]
+            .as_bytes()
+            .get(start_idx.saturating_sub(1))
+            .copied()
+            == Some(b'\\')
+        {
+            if !output.is_empty() {
+                output += &original[offset..offset + start_idx + 1]
+            }
+            offset += start_idx + 1;
+            continue;
+        }
+
+        output += &original[offset..offset + start_idx];
+        offset += start_idx;
+
+        let macro_src = &original[offset..];
+        let paren_open = macro_src.find('(').unwrap_or_else(|| {
+            panic!(
+                "Expected ( in macro invocation at {source_path:?}:{}",
+                line_number_of_offset(original, offset)
+            )
+        });
+        let mut paren_close = macro_src.find(')').unwrap_or_else(|| {
+            panic!(
+                "Expected ) to end macro invocation at {source_path:?}:{}",
+                line_number_of_offset(original, offset)
+            )
+        });
+        while macro_src.as_bytes().get(paren_close + 1).copied() == Some(b')') {
+            paren_close += 1;
+        }
+
+        let macro_name = &macro_src[2..paren_open];
+        let macro_args = &macro_src[paren_open + 1..paren_close];
+        let macro_args = compile_macros(macro_args, source_path);
+        let macro_args = macro_args.as_ref();
+
+        match macro_name {
+            "INCLUDE" => {
+                let path = source_path.parent().unwrap().join(macro_args);
+                let src = fs::read_to_string(&path).unwrap_or_else(|err| {
+                    panic!(
+                        "Error in INCLUDE macro at {source_path:?}:{}: {err}",
+                        line_number_of_offset(original, offset)
+                    )
+                });
+                let compiled = compile_macros(&src, &path);
+                output += compiled.as_ref();
+            }
+            "BASE64" => {
+                output += STANDARD.encode(macro_args).as_str();
+            }
+            "INCLUDE_BASE64" => {
+                let path = source_path.parent().unwrap().join(macro_args);
+                let src = fs::read(&path).unwrap_or_else(|err| {
+                    panic!(
+                        "Error in INCLUDE_BASE64 macro at {source_path:?}:{}: {err}",
+                        line_number_of_offset(original, offset)
+                    )
+                });
+                output += STANDARD.encode(&src).as_str();
+            }
+            other => panic!(
+                "Unknown macro '{other}' in macro invocation at {source_path:?}:{}",
+                line_number_of_offset(original, offset)
+            ),
+        }
+
+        offset += paren_close + 1;
+    }
+
+    if output.is_empty() {
+        Cow::Borrowed(original)
+    } else {
+        output += &original[offset..];
+        Cow::Owned(output)
+    }
+}
+
+pub fn copy_batch_target(src: &Path, dest: &Path) {
+    if dest.is_file() {
+        fs::remove_file(dest).unwrap_or_else(|err| {
+                panic!("Failed to copy batch target {src:?}. There was already a file where its output should go ({dest:?}), which couldn't be removed: {err}");
+            });
+    }
+    if !dest.exists() {
+        fs::create_dir_all(dest).unwrap_or_else(|err| {
+                panic!("Failed to copy batch target {src:?}. Couldn't create its output folder at {dest:?} because: {err}");
+            });
+    }
+
+    let src = src.read_dir().unwrap_or_else(|err| {
+        panic!(
+            "Failed to copy batch target {dest:?}. Couldn't open its source directory because: {err}"
+        );
+    });
+
+    for dir_entry in src.filter_map(|dir_entry| dir_entry.ok()) {
+        let dir_entry = &dir_entry.path();
+
+        if dir_entry.is_file() {
+            fs::copy(dir_entry, dest.join(dir_entry.file_name().unwrap())).unwrap_or_else(|err| {
+                panic!("Failed to copy batch target {dest:?}. Couldn't copy file at {dir_entry:?} because: {err}");
+            });
+        } else {
+            copy_batch_target(dir_entry, &dest.join(dir_entry.file_name().unwrap()));
+        }
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..797d9ff
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,71 @@
+pub mod compiler;
+pub mod minifier;
+pub mod translator;
+
+use std::{fs, path::PathBuf};
+
+type Cow<'a> = std::borrow::Cow<'a, str>;
+
+pub enum Mode {
+    Compile,
+    Copy,
+    Link,
+}
+
+pub struct Target {
+    pub path: PathBuf,
+    pub output: PathBuf,
+    pub mode: Mode,
+}
+
+pub fn build_target(target: Target) -> Result<(), Cow<'static>> {
+    match target.mode {
+        Mode::Copy => {
+            if target.path.is_file() | target.path.is_symlink() {
+                fs::copy(target.path, target.output).unwrap();
+            } else {
+                compiler::copy_batch_target(&target.path, &target.output);
+            }
+        }
+        Mode::Link => {
+            if target.output.exists() {
+                fs::remove_file(&target.output)
+                    .unwrap_or_else(|err| panic!("Failed to link target {:?}: {err}", &target.path))
+            }
+            fs::hard_link(&target.path, target.output)
+                .unwrap_or_else(|err| panic!("Failed to link target {:?}: {err}", &target.path));
+        }
+        Mode::Compile => {
+            let original = fs::read_to_string(&target.path).unwrap_or_else(|err| {
+                panic!(
+                    "Failed to compile target {:?}: Error occurred while reading the source file: {err}",
+                    &target.path
+                )
+            });
+            let compiled_macros = compiler::compile_macros(&original, &target.path);
+
+            let output = match target.path.extension().and_then(|val| val.to_str()) {
+                Some("gmi") => Cow::Owned(translator::translate_gemtext(
+                    &target.path,
+                    compiled_macros.as_ref(),
+                )?),
+                Some("html") => Cow::Owned(minifier::minify_html(
+                    target.path.to_str().unwrap(),
+                    &compiled_macros,
+                    &original,
+                )?),
+                Some("css") => Cow::Owned(minifier::minify_css(&compiled_macros)),
+                _ => compiled_macros,
+            };
+
+            fs::write(&target.output, output.as_ref())
+                .unwrap_or_else(|err| panic!("Failed to compile target {:?}: Error occured while writing the compiled file: {err}", &target.path));
+        }
+    }
+
+    Ok(())
+}
+
+fn line_number_of_offset(src: &str, offset: usize) -> usize {
+    src[..offset].bytes().filter(|byte| *byte == b'\n').count()
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..ea0a3c5
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,99 @@
+use {
+    boml::{table::TomlGetError, Toml},
+    std::{borrow::Cow, env, fs, thread},
+    webby::{build_target, Mode, Target},
+};
+
+pub fn main() -> Result<(), Cow<'static, str>> {
+    let cwd = env::current_dir().expect("Failed to find current directory");
+    let mut root = cwd.as_path();
+
+    while !root
+        .read_dir()
+        .expect("Failed to list files in current folder")
+        .any(|file| {
+            if let Ok(ref file) = file {
+                if let Some(name) = file.file_name().to_str() {
+                    if name == "webby.toml" && file.path().is_file() {
+                        return true;
+                    }
+                }
+            }
+
+            false
+        })
+    {
+        let Some(parent) = root.parent() else {
+            return Err("Failed to find webby.toml".into());
+        };
+        root = parent;
+    }
+
+    let cfg = fs::read_to_string(root.join("webby.toml")).expect("Failed to read webby.toml");
+    let toml = Toml::parse(&cfg).unwrap();
+
+    let output_dir = if let Ok(output) = toml.get_string("output") {
+        root.join(output)
+    } else {
+        root.join("webby")
+    };
+
+    if !output_dir.exists() {
+        fs::create_dir(&output_dir).expect("Failed to create output directory");
+    }
+
+    let mut tasks = Vec::default();
+
+    match toml.get_array("target") {
+        Ok(targets) => {
+            for target in targets {
+                let Some(table) = target.table() else {
+                    return Err("All target entries in webby.toml must be a TOML table.".into());
+                };
+                let Ok(path) = table.get_string("path") else {
+                    return Err("Target in webby.toml didn't have a path".into());
+                };
+                let path = root.join(path);
+                let mode = if let Ok(mode) = table.get_string("mode") {
+                    match mode {
+                        "compile" => Mode::Compile,
+                        "copy" => Mode::Copy,
+                        "link" => Mode::Link,
+                        other => panic!("Unknown mode: {other} for target: {path:?}"),
+                    }
+                } else {
+                    match path.extension().and_then(|osstr| osstr.to_str()) {
+                        Some("gmi" | "html" | "svg" | "md" | "css") => Mode::Compile,
+                        _ => Mode::Copy,
+                    }
+                };
+                let output = if let Ok(output_name) = table.get_string("output") {
+                    output_dir.join(output_name)
+                } else {
+                    output_dir.join(path.file_name().unwrap())
+                };
+
+                let target = Target { path, output, mode };
+                let worker = thread::spawn(move || build_target(target));
+                tasks.push(worker);
+            }
+        }
+        Err(e) => match e {
+            TomlGetError::InvalidKey => {
+                return Err("No targets specified. See the GitHub for an example on setting up a webby project: https://github.com/bright-shard/webby".into());
+            }
+            TomlGetError::TypeMismatch(_, _) => {
+                return Err("The 'target' entry has to an array in webby.toml".into());
+            }
+        },
+    }
+
+    for task in tasks {
+        match task.join().unwrap() {
+            Ok(()) => {}
+            Err(err) => println!("{err}"),
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/minifier.rs b/src/minifier.rs
new file mode 100644
index 0000000..17107a1
--- /dev/null
+++ b/src/minifier.rs
@@ -0,0 +1,5 @@
+mod css;
+mod html;
+
+pub use css::minify_css;
+pub use html::minify_html;
diff --git a/src/minifier/css.rs b/src/minifier/css.rs
new file mode 100644
index 0000000..e7203ae
--- /dev/null
+++ b/src/minifier/css.rs
@@ -0,0 +1,79 @@
+pub fn minify_css(source: &str) -> String {
+    let mut out = String::new();
+    let mut chars = source.chars().peekable();
+
+    let mut function_depth = 0;
+    let mut maybe_in_rule = false;
+
+    while let Some(char) = chars.next() {
+        match char {
+            '/' if chars.peek().copied() == Some('*') => {
+                chars.next();
+
+                while let Some(char) = chars.next() {
+                    if char == '*' && chars.peek().copied() == Some('/') {
+                        chars.next();
+                        break;
+                    }
+                }
+                continue;
+            }
+            '\'' | '"' => {
+                out.push(char);
+
+                while let Some(subchar) = chars.next() {
+                    if subchar == char {
+                        out.push(char);
+                        break;
+                    } else if subchar == '\\' {
+                        if let Some(char) = chars.next() {
+                            out.push(char);
+                        }
+                    }
+
+                    out.push(subchar);
+                }
+
+                continue;
+            }
+            '\n' => {
+                while chars.peek().map(|c| c.is_whitespace()).unwrap_or(false) {
+                    chars.next();
+                }
+
+                if function_depth > 0 || maybe_in_rule {
+                    out.push(' ');
+                }
+                continue;
+            }
+            '(' => {
+                function_depth += 1;
+                out.truncate(out.trim_end().len());
+                while chars.peek().map(|c| c.is_whitespace()).unwrap_or(false) {
+                    chars.next();
+                }
+            }
+            ')' => {
+                function_depth -= 1;
+                out.truncate(out.trim_end().len());
+            }
+            '{' | '}' | ',' => {
+                out.truncate(out.trim_end().len());
+                while chars.peek().map(|c| c.is_whitespace()).unwrap_or(false) {
+                    chars.next();
+                }
+            }
+            ':' => {
+                while chars.peek().map(|c| c.is_whitespace()).unwrap_or(false) {
+                    chars.next();
+                }
+                maybe_in_rule = true;
+            }
+            ';' => maybe_in_rule = false,
+            _ => {}
+        }
+        out.push(char);
+    }
+
+    out
+}
diff --git a/src/minifier/html.rs b/src/minifier/html.rs
new file mode 100644
index 0000000..5080d7d
--- /dev/null
+++ b/src/minifier/html.rs
@@ -0,0 +1,589 @@
+use crate::{line_number_of_offset, minifier, Cow};
+
+macro_rules! log {
+    ($($t:tt)*) => {
+        #[cfg(any(test, feature = "log"))]
+        println!($($t)*);
+    };
+}
+
+pub fn minify_html(
+    source_path: &str,
+    source: &str,
+    original: &str,
+) -> Result<String, Cow<'static>> {
+    let mut result = String::new();
+    let mut handled_bytes = 0;
+
+    while handled_bytes < source.len() {
+        let (tag, bytes) = handle_tag(
+            source_path,
+            &source[handled_bytes..],
+            (original, handled_bytes),
+        )?;
+        result += &tag;
+        handled_bytes += bytes + 1;
+    }
+
+    Ok(result)
+}
+
+/// Parses an individual HTML tag and minifies it.
+///
+/// This is a rough overview of the strategy this function uses to parse HTML,
+/// handle its edge cases, and then minify it:
+///
+/// DISCLAIMER: The above is what this minifier will do when it's complete. See
+/// the TODO at the bottom for what's not yet implemented.
+///
+/// 1. Tags begin with a <, then have the tag type. If there is whitespace after
+///    the <, it's not considered a tag.
+/// 2. If the tag's type is `!--`, it is a comment and will be removed from
+///    the resulting HTML.
+/// 3. The tag may have properties in the format `name=value`, with optional
+///    whitespace around the `=` and optional quotes around the value. The tag
+///    may also have properties in the format `name`. Properties are never
+///    minimised except to remove whitespace around the `=`.
+/// 4. The tag may be closed with either a `/>`, a closing tag, or may not be
+///    closed properly at all.
+/// 5. If the tag is closed with a closing tag, this function will classify the
+///    tag as either a *text* tag or a *content* tag. Text tags store text (`p`,
+///    `a`, `h1`, etc), while content tags store other HTMl elements (`head`,
+///    `body`, etc). If the tag is not closed, this function just returns the
+///    tag. If the tag is a style tag, it will be run through the CSS minifier.
+///    Script tags will only be minified with the `js-minify` feature enabled.
+/// 6. If the tag is a text tag, the only minification that will occur is
+///    removing newlines around tags inside it. Any content tags inside the text
+///    tag will be minified as normal for a content tag.
+/// 7. If the tag is a content tag, newlines will be stripped from it.
+///    Whitespace that isn't in a tag property's value will also be stripped.
+///    Any nested tags inside that content tag will be re-run through this
+///    minifier.
+///
+/// # TODO
+/// - Find a decent JS minifier, add it as a dep, and feature flag it. JS is
+///   too complicated to write a minifier for, when I don't even use it.
+fn handle_tag<'a>(
+    source_path: &'a str,
+    source: &'a str,
+    error_meta: (&'a str, usize),
+) -> Result<(Cow<'a>, usize), Cow<'static>> {
+    if source
+        .chars()
+        .next()
+        .map(|char| char.is_whitespace())
+        .unwrap_or(false)
+    {
+        return Ok((Cow::Borrowed("<"), 1));
+    } else if source.starts_with("<!--") {
+        let Some(ending) = source.find("-->") else {
+            return Err(Cow::Owned(format!(
+                "HTML error: Unclosed HTML comment at {source_path}:{}",
+                line_number_of_offset(error_meta.0, error_meta.1)
+            )));
+        };
+        return Ok((Cow::Borrowed(""), ending + 2));
+    }
+
+    let mut output = String::from("<");
+    let mut chars = source.char_indices().peekable();
+    chars.next(); // discard opening <
+
+    let tag_name_end;
+    let tag_closed;
+    loop {
+        let Some((byte_idx, char)) = chars.next() else {
+            return Ok((Cow::Owned(output), source.len() - 1));
+        };
+
+        if char == '/' && chars.peek().map(|(_, char)| *char) == Some('>') {
+            output += "/>";
+            chars.next();
+            let mut end = byte_idx + 1;
+            if chars.peek().map(|(_, char)| *char) == Some('\n') {
+                while chars
+                    .peek()
+                    .map(|(_, c)| c.is_whitespace())
+                    .unwrap_or(false)
+                {
+                    end = chars.next().unwrap().0;
+                }
+            }
+
+            return Ok((Cow::Owned(output), end));
+        } else if char == '>' {
+            tag_name_end = byte_idx;
+            tag_closed = true;
+            break;
+        } else if char.is_whitespace() {
+            tag_name_end = byte_idx;
+            tag_closed = false;
+
+            while chars
+                .peek()
+                .map(|(_, c)| c.is_whitespace())
+                .unwrap_or(false)
+            {
+                chars.next();
+            }
+
+            output.push(' ');
+            break;
+        }
+
+        output.push(char);
+    }
+
+    let tag_name = &source[1..tag_name_end];
+    log!("Parsing tag `{tag_name}`");
+
+    if tag_closed {
+        log!("  Opening tag closed w/o properties");
+        output.push('>');
+    } else {
+        // Each loop parses 1 property
+        'parse_properties: loop {
+            let Some((byte_idx, char)) = chars.next() else {
+                return Ok((Cow::Owned(output), source.len() - 1));
+            };
+            log!("  'parse_properties: Found `{char}`");
+
+            if char == '/' && chars.peek().map(|(_, char)| *char) == Some('>') {
+                output += "/>";
+                chars.next();
+                let mut end = byte_idx + 1;
+                if chars.peek().map(|(_, char)| *char) == Some('\n') {
+                    while chars
+                        .peek()
+                        .map(|(_, c)| c.is_whitespace())
+                        .unwrap_or(false)
+                    {
+                        end = chars.next().unwrap().0;
+                    }
+                }
+
+                return Ok((Cow::Owned(output), end));
+            } else if char == '>' {
+                log!("  Opening tag closed in 'parsed_properties");
+                output.push('>');
+                break 'parse_properties;
+            } else if char == '\n' {
+                continue 'parse_properties;
+            } else if char.is_whitespace() {
+                while chars
+                    .peek()
+                    .map(|(_, c)| c.is_whitespace())
+                    .unwrap_or(false)
+                {
+                    chars.next();
+                }
+                output.push(' ');
+                continue 'parse_properties;
+            }
+
+            // Parse property's name
+            output.push(char);
+            'parse_property_name: loop {
+                let Some((byte_idx, char)) = chars.next() else {
+                    return Ok((Cow::Owned(output), source.len() - 1));
+                };
+
+                if char == '=' {
+                    break 'parse_property_name;
+                } else if char == '/' && chars.peek().map(|(_, char)| *char) == Some('>') {
+                    output += "/>";
+                    chars.next();
+                    let mut end = byte_idx + 1;
+                    if chars.peek().map(|(_, char)| *char) == Some('\n') {
+                        while chars
+                            .peek()
+                            .map(|(_, c)| c.is_whitespace())
+                            .unwrap_or(false)
+                        {
+                            end = chars.next().unwrap().0;
+                        }
+                    }
+
+                    return Ok((Cow::Owned(output), end));
+                } else if char == '>' {
+                    output += ">";
+                    break 'parse_properties;
+                } else if char.is_whitespace() {
+                    while chars
+                        .peek()
+                        .map(|(_, c)| c.is_whitespace())
+                        .unwrap_or(false)
+                    {
+                        chars.next();
+                    }
+
+                    if chars.peek().map(|(_, char)| *char) == Some('=') {
+                        // Whitespace followed by an `=` - should break name
+                        // parsing and start parsing the value
+                        chars.next();
+                        break 'parse_property_name;
+                    } else {
+                        // Whitespace followed by other characters - this
+                        // property didn't have a value and we should go parse
+                        // the next one
+                        output.push(' ');
+                        continue 'parse_properties;
+                    }
+                }
+
+                output.push(char);
+            }
+
+            // If we get to this point, the property has a value. The chars
+            // iterator will pick up after the =.
+            output.push('=');
+
+            while chars
+                .peek()
+                .map(|(_, c)| c.is_whitespace())
+                .unwrap_or(false)
+            {
+                chars.next();
+            }
+
+            // Parse property's value
+            let Some((idx, char)) = chars.next() else {
+                return Ok((Cow::Owned(output), source.len() - 1));
+            };
+            match char {
+                '\'' | '"' => {
+                    output.push(char);
+                    loop {
+                        let Some((_, next)) = chars.next() else {
+                            return Err(Cow::Owned(format!(
+                                "Unclosed quotation in HTML property at {source_path}:{}",
+                                line_number_of_offset(error_meta.0, error_meta.1 + idx)
+                            )));
+                        };
+                        if next == '\\' {
+                            if let Some((_, next)) = chars.next() {
+                                output.push(next);
+                            }
+                            continue;
+                        }
+
+                        output.push(next);
+
+                        if next == char {
+                            break;
+                        }
+                    }
+                }
+                _ => {
+                    output.push(char);
+                    loop {
+                        let Some((idx, char)) = chars.next() else {
+                            return Err(Cow::Owned(format!(
+                                "Unclosed quotation in HTML property at {source_path}:{}",
+                                line_number_of_offset(error_meta.0, error_meta.1 + idx)
+                            )));
+                        };
+
+                        if char == '/' && chars.peek().map(|(_, char)| *char) == Some('>') {
+                            output += "/>";
+                            chars.next();
+                            let mut end = idx + 1;
+                            if chars.peek().map(|(_, char)| *char) == Some('\n') {
+                                while chars
+                                    .peek()
+                                    .map(|(_, c)| c.is_whitespace())
+                                    .unwrap_or(false)
+                                {
+                                    end = chars.next().unwrap().0;
+                                }
+                            }
+
+                            return Ok((Cow::Owned(output), end));
+                        } else if char == '>' {
+                            output += ">";
+                            break 'parse_properties;
+                        } else if char.is_whitespace() {
+                            while chars
+                                .peek()
+                                .map(|(_, c)| c.is_whitespace())
+                                .unwrap_or(false)
+                            {
+                                chars.next();
+                            }
+                            output.push(char);
+
+                            continue 'parse_properties;
+                        } else {
+                            output.push(char);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // By now the > of the opening tag has been reached
+    // We need to find the closing tag, then minify the contents of the tag
+    // as needed
+    debug_assert!(output.ends_with('>'), "output is: `{output}`");
+
+    if chars.peek().map(|(_, char)| *char) == Some('\n') {
+        while chars
+            .peek()
+            .map(|(_, char)| char.is_whitespace())
+            .unwrap_or(false)
+        {
+            chars.next();
+        }
+    }
+
+    if tag_name == "script" || tag_name == "style" {
+        // TODO: Actually parse and minify JS
+        let closing_tag = if tag_name == "script" {
+            "</script>"
+        } else {
+            "</style>"
+        };
+        let Some(search_start_idx) = chars.next().map(|(idx, _)| idx) else {
+            return Ok((Cow::Owned(output), source.len() - 1));
+        };
+
+        let Some(closing) = source[search_start_idx..].find(closing_tag) else {
+            return Err(Cow::Owned(format!(
+                "Unclosed style or script tag at {source_path}:{}",
+                line_number_of_offset(error_meta.0, error_meta.1 + search_start_idx)
+            )));
+        };
+        let closing = search_start_idx + closing + closing_tag.len();
+
+        if tag_name == "style" {
+            output += &minifier::minify_css(&source[search_start_idx..closing]);
+        } else {
+            output += &source[search_start_idx..closing];
+        }
+
+        return Ok((Cow::Owned(output), closing));
+    }
+
+    let textual_tag = matches!(
+        tag_name,
+        "a" | "abbr"
+            | "acronym"
+            | "aside"
+            | "b"
+            | "bdi"
+            | "bdo"
+            | "big"
+            | "blockquote"
+            | "button"
+            | "caption"
+            | "cite"
+            | "code"
+            | "dd"
+            | "del"
+            | "details"
+            | "dfn"
+            | "dt"
+            | "em"
+            | "figcaption"
+            | "h1"
+            | "h2"
+            | "h3"
+            | "h4"
+            | "h5"
+            | "h6"
+            | "i"
+            | "ins"
+            | "kbd"
+            | "label"
+            | "legend"
+            | "li"
+            | "mark"
+            | "marquee"
+            | "meter"
+            | "nobr"
+            | "option"
+            | "output"
+            | "p"
+            | "pre"
+            | "progress"
+            | "q"
+            | "rb"
+            | "rp"
+            | "rt"
+            | "s"
+            | "sample"
+            | "small"
+            | "span"
+            | "strong"
+            | "sub"
+            | "summary"
+            | "sup"
+            | "td"
+            | "textarea"
+            | "th"
+            | "time"
+            | "title"
+            | "u"
+            | "var"
+    );
+    let preformatted = tag_name == "pre";
+    log!("  Textual tag? {textual_tag}");
+
+    while let Some((byte_idx, char)) = chars.next() {
+        if char == '<' {
+            let Some((next_idx, next_char)) = chars.peek().copied() else {
+                return Ok((Cow::Owned(output), source.len() - 1));
+            };
+
+            if next_char == '/' {
+                chars.next();
+
+                if !preformatted {
+                    let mut trim = false;
+                    let mut chars_rev = source[..byte_idx].chars();
+                    while let Some(char) = chars_rev.next_back() {
+                        if !char.is_whitespace() {
+                            break;
+                        } else if char == '\n' {
+                            trim = true;
+                        }
+                    }
+                    if trim {
+                        output = output.trim_end().to_string();
+                    }
+                }
+
+                output += "</";
+
+                loop {
+                    let Some((idx, char)) = chars.next() else {
+                        return Err(Cow::Owned(format!(
+                            "Unclosed HTML closing tag at {source_path}:{}",
+                            line_number_of_offset(error_meta.0, error_meta.1 + next_idx)
+                        )));
+                    };
+
+                    if char == '>' {
+                        let mut end = idx;
+                        if !preformatted && chars.peek().map(|(_, char)| *char) == Some('\n') {
+                            while chars
+                                .peek()
+                                .map(|(_, c)| c.is_whitespace())
+                                .unwrap_or(false)
+                            {
+                                end = chars.next().unwrap().0;
+                            }
+                        }
+
+                        if output.ends_with(tag_name) {
+                            output.push('>');
+                            return Ok((Cow::Owned(output), end));
+                        } else {
+                            output.push('>');
+                            break;
+                        }
+                    }
+
+                    if !char.is_whitespace() {
+                        output.push(char);
+                    }
+                }
+            } else if !next_char.is_whitespace() {
+                let (subtag, used) = handle_tag(source_path, &source[byte_idx..], error_meta)?;
+                log!("  Found subtag `{subtag}`. Ends at {used}, current char is {next_idx}.");
+                let used = used + byte_idx;
+
+                loop {
+                    let (next_idx, _) = chars.next().unwrap();
+                    if next_idx == used {
+                        break;
+                    }
+                }
+
+                output += &subtag;
+            } else {
+                output.push('<');
+            }
+        } else if !preformatted {
+            match char {
+                '\n' => {}
+                _ if char.is_whitespace() => {
+                    while chars
+                        .peek()
+                        .map(|(_, char)| char.is_whitespace())
+                        .unwrap_or(false)
+                    {
+                        chars.next();
+                    }
+                    if textual_tag {
+                        output.push(' ');
+                    }
+                }
+                _ => output.push(char),
+            }
+        } else {
+            output.push(char);
+        }
+    }
+
+    Ok((Cow::Owned(output), source.len() - 1))
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::minifier::minify_html;
+
+    struct Tester {
+        name: &'static str,
+        source: &'static str,
+        expected: &'static str,
+    }
+    impl Tester {
+        fn test(self) {
+            log!("\nSTARTING TEST '{}'", self.name);
+            let result = minify_html("test/path", self.source, self.source).unwrap();
+            assert_eq!(&result, self.expected, "Test name: {}", self.name);
+        }
+    }
+
+    #[test]
+    fn test() {
+        let cases = [
+            Tester {
+                name: "Trim whitespace between tags",
+                source: "<body>    <p>hi</p></body>",
+                expected: "<body><p>hi</p></body>",
+            },
+            Tester {
+                name: "Trim comments",
+                source: "<body><!--commentcomment--><p>hi</p></body>",
+                expected: "<body><p>hi</p></body>",
+            },
+            Tester {
+                name: "Includes whitespace in textual comments",
+                source: "<p> This has   weird whitespace!!!\n</p>",
+                expected: "<p> This has   weird whitespace!!!\n</p>",
+            },
+            Tester {
+                name: "Element properties",
+                source: "<p string1='string1 string2' string2=\"string1 string2\" number=1 singleword=hi eek>hewwo</p>",
+                expected: "<p string1='string1 string2' string2=\"string1 string2\" number=1 singleword=hi eek>hewwo</p>",
+            },
+            Tester {
+                name: "Element properties 2",
+                source: "<p string='string1 string2n\'t'>hewwo</p>",
+                expected: "<p string='string1 string2n\'t'>hewwo</p>",
+            },
+            Tester {
+                name: "Unclosed Elements",
+                source: "<body>  <br/><img src='https://example.com/img.png'><p>hello</p>\n<br/></body>",
+                expected: "<body><br/><img src='https://example.com/img.png'><p>hello</p><br/></body>"
+            }
+        ];
+
+        for case in cases {
+            case.test();
+        }
+    }
+}
diff --git a/src/translator.rs b/src/translator.rs
new file mode 100644
index 0000000..d80002b
--- /dev/null
+++ b/src/translator.rs
@@ -0,0 +1,4 @@
+mod gemtext;
+mod markdown;
+
+pub use gemtext::translate_gemtext;
diff --git a/src/translator/gemtext.rs b/src/translator/gemtext.rs
new file mode 100644
index 0000000..1e92c42
--- /dev/null
+++ b/src/translator/gemtext.rs
@@ -0,0 +1,119 @@
+use {
+    crate::Cow,
+    std::{fmt::Write, path::Path},
+};
+
+#[derive(PartialEq, Eq)]
+enum ParserState {
+    Text,
+    List,
+    Preformatted,
+}
+
+/// Escapes characters from an input string so valid Gemtext doesn't get
+/// misinterpreted as HTML.
+//
+// This should prevent any form of HTML injection... but other programs filter
+// more characters than are being filtered here, which should be looked into...
+//
+// Cases covered by Canvas LMS:
+//     '&' => *out += "&amp;",
+//     '<' => *out += "&lt;",
+//     '>' => *out += "&gt;",
+//     '"' => *out += "&quot;",
+//     '\'' => *out += "&#x27;",
+//     '/' => *out += "&#x2F;",
+//     '`' => *out += "&#x60;",
+//     '=' => *out += "&#x3D;",
+// From https://github.com/instructure/canvas-lms/blob/master/packages/html-escape/index.js#L85
+fn html_escape_into(input: &str, out: &mut String) {
+    for char in input.chars() {
+        match char {
+            '<' => *out += "&lt;",
+            '>' => *out += "&gt;",
+            '"' => *out += "&quot;",
+            '&' => *out += "&amp;",
+            other => out.push(other),
+        }
+    }
+}
+
+pub fn translate_gemtext(source_path: &Path, source: &str) -> Result<String, Cow<'static>> {
+    let mut output = String::new();
+    let mut state = ParserState::Text;
+    output += "<p>";
+
+    for (line_num, line) in source.lines().enumerate() {
+        if state == ParserState::Preformatted {
+            if line.starts_with("```") {
+                state = ParserState::Text;
+                output += "</pre>";
+                continue;
+            }
+
+            html_escape_into(line, &mut output);
+            continue;
+        }
+
+        if let Some(list_line) = line.strip_prefix("* ") {
+            if state != ParserState::List {
+                state = ParserState::List;
+                output += "<ul>";
+            }
+            output += "<li>";
+            html_escape_into(list_line, &mut output);
+            output += "</li>";
+            continue;
+        } else if state == ParserState::List {
+            state = ParserState::Text;
+            output += "</ul>";
+        }
+
+        if let Some(link_line) = line.strip_prefix("=>") {
+            let mut line = link_line.split_whitespace();
+            let link = line.next().ok_or(Cow::Owned(format!(
+                "Expected URL in link at {source_path:?}:{line_num}"
+            )))?;
+
+            output += "<a href=\"";
+            html_escape_into(link, &mut output);
+            output += "\">";
+
+            if let Some(link_text) = line.next() {
+                html_escape_into(link_text, &mut output);
+            } else {
+                html_escape_into(link, &mut output);
+            }
+
+            output += "</a><br>";
+        } else if let Some(alt) = line.strip_prefix("```") {
+            output += "<pre alt=\"";
+            html_escape_into(alt, &mut output);
+            output += "\">";
+            state = ParserState::Preformatted;
+        } else if let Some(quote) = line.strip_prefix("> ") {
+            output += "<blockquote><p>";
+            html_escape_into(quote, &mut output);
+            output += "</p></blockquote>";
+        } else if line.starts_with('#') {
+            let mut chars = line.bytes();
+            let mut level = 0;
+            while chars.next() == Some(b'#') {
+                level += 1;
+            }
+
+            write!(output, "<h{level}>").unwrap();
+            html_escape_into(line[level..].trim_start(), &mut output);
+            write!(output, "</h{level}>").unwrap();
+        } else {
+            output += line;
+        }
+    }
+
+    if state == ParserState::List {
+        output += "</ul>";
+    }
+
+    output += "</p>";
+    Ok(output)
+}
diff --git a/src/translator/markdown.rs b/src/translator/markdown.rs
new file mode 100644
index 0000000..e69de29
diff --git a/tests/gemtext.rs b/tests/gemtext.rs
new file mode 100644
index 0000000..cfbe520
--- /dev/null
+++ b/tests/gemtext.rs
@@ -0,0 +1,21 @@
+use {
+    std::{fs, path::PathBuf},
+    webby::translator,
+};
+
+#[test]
+fn test() {
+    let tests = ["link", "header", "text", "list"];
+
+    for test in tests {
+        let gmi_path = PathBuf::from(format!("tests/gemtext/{test}.gmi"));
+        let html_path = PathBuf::from(format!("tests/gemtext/{test}.html"));
+        let html =
+            translator::translate_gemtext(&gmi_path, &fs::read_to_string(&gmi_path).unwrap())
+                .unwrap();
+        assert_eq!(
+            html,
+            format!("<p>{}</p>", fs::read_to_string(&html_path).unwrap())
+        )
+    }
+}
diff --git a/tests/gemtext/header.gmi b/tests/gemtext/header.gmi
new file mode 100644
index 0000000..5da5539
--- /dev/null
+++ b/tests/gemtext/header.gmi
@@ -0,0 +1,3 @@
+# header1
+## header2
+### header3
diff --git a/tests/gemtext/header.html b/tests/gemtext/header.html
new file mode 100644
index 0000000..ff686c4
--- /dev/null
+++ b/tests/gemtext/header.html
@@ -0,0 +1 @@
+<h1>header1</h1><h2>header2</h2><h3>header3</h3>
\ No newline at end of file
diff --git a/tests/gemtext/link.gmi b/tests/gemtext/link.gmi
new file mode 100644
index 0000000..e02069a
--- /dev/null
+++ b/tests/gemtext/link.gmi
@@ -0,0 +1,4 @@
+=> https://google.com
+=> https://google.com google
+=>      https://google.com
+=>      https://google.com           google
diff --git a/tests/gemtext/link.html b/tests/gemtext/link.html
new file mode 100644
index 0000000..9e84ed5
--- /dev/null
+++ b/tests/gemtext/link.html
@@ -0,0 +1 @@
+<a href="https://google.com">https://google.com</a><br><a href="https://google.com">google</a><br><a href="https://google.com">https://google.com</a><br><a href="https://google.com">google</a><br>
\ No newline at end of file
diff --git a/tests/gemtext/list.gmi b/tests/gemtext/list.gmi
new file mode 100644
index 0000000..e61e1d4
--- /dev/null
+++ b/tests/gemtext/list.gmi
@@ -0,0 +1,3 @@
+* one
+* two
+* three
diff --git a/tests/gemtext/list.html b/tests/gemtext/list.html
new file mode 100644
index 0000000..a66f58a
--- /dev/null
+++ b/tests/gemtext/list.html
@@ -0,0 +1 @@
+<ul><li>one</li><li>two</li><li>three</li></ul>
\ No newline at end of file
diff --git a/tests/gemtext/text.gmi b/tests/gemtext/text.gmi
new file mode 100644
index 0000000..840b569
--- /dev/null
+++ b/tests/gemtext/text.gmi
@@ -0,0 +1 @@
+ipsum lorem I don't speak latinum
diff --git a/tests/gemtext/text.html b/tests/gemtext/text.html
new file mode 100644
index 0000000..02ee347
--- /dev/null
+++ b/tests/gemtext/text.html
@@ -0,0 +1 @@
+ipsum lorem I don't speak latinum
\ No newline at end of file