From 30aef7be5830c8ff353a917e170d9ab544b4df6e Mon Sep 17 00:00:00 2001
From: boatbomber <zack@boatbomber.com>
Date: Sat, 7 Jan 2023 15:54:49 -0500
Subject: [PATCH] Support string interpolation syntax

---
 package.json       |   2 +-
 src/lexer/init.lua | 170 +++++++++++++++++++++++++++++++--------------
 wally.toml         |   2 +-
 3 files changed, 120 insertions(+), 54 deletions(-)

diff --git a/package.json b/package.json
index c8b8fb9..5c58b63 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
     "name": "@boatbomber/highlighter",
-    "version": "0.6.2",
+    "version": "0.7.0",
     "license": "MIT",
     "repository": {
         "type": "git",
diff --git a/src/lexer/init.lua b/src/lexer/init.lua
index 3611182..2afe49b 100644
--- a/src/lexer/init.lua
+++ b/src/lexer/init.lua
@@ -38,6 +38,7 @@ local BRACKETS = "[%[%]]+" -- needs to be separate pattern from other operators
 local IDEN = "[%a_][%w_]*"
 local STRING_EMPTY = "(['\"])%1" --Empty String
 local STRING_PLAIN = "(['\"])[^\n]-([^\\]%1)" --TODO: Handle escaping escapes
+local STRING_INTER = "`[^\n]-`"
 local STRING_INCOMP_A = "(['\"]).-\n" --Incompleted String with next line
 local STRING_INCOMP_B = "(['\"])[^\n]*" --Incompleted String without next line
 local STRING_MULTI = "%[(=*)%[.-%]%1%]" --Multiline-String
@@ -72,6 +73,7 @@ local lua_matches = {
 	{ Prefix .. STRING_INCOMP_B .. Suffix, "string" },
 	{ Prefix .. STRING_MULTI .. Suffix, "string" },
 	{ Prefix .. STRING_MULTI_INCOMP .. Suffix, "string" },
+	{ Prefix .. STRING_INTER .. Suffix, "string_inter" },
 
 	-- Comments
 	{ Prefix .. COMMENT_MULTI .. Suffix, "comment" },
@@ -90,74 +92,138 @@ local lua_matches = {
 	{ "^.", "iden" },
 }
 
+-- To reduce the amount of table indexing during lexing, we separate the matches now
+local PATTERNS, TOKENS = {}, {}
+for i, m in lua_matches do
+	PATTERNS[i] = m[1]
+	TOKENS[i] = m[2]
+end
+
 --- Create a plain token iterator from a string.
 -- @tparam string s a string.
 
 function lexer.scan(s: string)
-	-- local startTime = os.clock()
-	lexer.finished = false
-
 	local index = 1
-	local sz = #s
-	local p1, p2, p3, pT = "", "", "", ""
-
-	return function()
-		if index <= sz then
-			for _, m in ipairs(lua_matches) do
-				local i1, i2 = string.find(s, m[1], index)
-				if i1 then
-					local tok = string.sub(s, i1, i2)
-					index = i2 + 1
-					lexer.finished = index > sz
-					--if lexer.finished then
-					--	print((os.clock()-startTime)*1000, "ms")
-					--end
-
-					local t = m[2]
-					local t2 = t
-
-					-- Process t into t2
-					if t == "var" then
-						-- Since we merge spaces into the tok, we need to remove them
-						-- in order to check the actual word it contains
-						local cleanTok = string.gsub(tok, Cleaner, "")
-
-						if lua_keyword[cleanTok] then
-							t2 = "keyword"
-						elseif lua_builtin[cleanTok] then
-							t2 = "builtin"
+	local size = #s
+	local previousContent1, previousContent2, previousContent3, previousToken = "", "", "", ""
+
+	local thread = coroutine.create(function()
+		while index <= size do
+			local matched = false
+			for tokenType, pattern in ipairs(PATTERNS) do
+				-- Find match
+				local start, finish = string.find(s, pattern, index)
+				if start == nil then continue end
+
+				-- Move head
+				index = finish + 1
+				matched = true
+
+				-- Gather results
+				local content = string.sub(s, start, finish)
+				local rawToken = TOKENS[tokenType]
+				local processedToken = rawToken
+
+				-- Process token
+				if rawToken == "var" then
+					-- Since we merge spaces into the tok, we need to remove them
+					-- in order to check the actual word it contains
+					local cleanContent = string.gsub(content, Cleaner, "")
+
+					if lua_keyword[cleanContent] then
+						processedToken = "keyword"
+					elseif lua_builtin[cleanContent] then
+						processedToken = "builtin"
+					elseif string.find(previousContent1, "%.[%s%c]*$") and previousToken ~= "comment" then
+						-- The previous was a . so we need to special case indexing things
+						local parent = string.gsub(previousContent2, Cleaner, "")
+						local lib = lua_libraries[parent]
+						if lib and lib[cleanContent] and not string.find(previousContent3, "%.[%s%c]*$") then
+							-- Indexing a builtin lib with existing item, treat as a builtin
+							processedToken = "builtin"
 						else
-							t2 = "iden"
+							-- Indexing a non builtin, can't be treated as a keyword/builtin
+							processedToken = "iden"
 						end
+						-- print("indexing",parent,"with",cleanTok,"as",t2)
+					else
+						processedToken = "iden"
+					end
+				elseif rawToken == "string_inter" then
+					if not string.find(content, "[^\\]{") then
+						-- This inter string doesnt actually have any inters
+						processedToken = "string"
+					else
+						-- We're gonna do our own yields, so the main loop won't need to
+						-- Our yields will be a mix of string and whatever is inside the inters
+						processedToken = nil
+
+						local isString = true
+						local subIndex = 1
+						local subSize = #content
+						while subIndex <= subSize do
+							-- Find next brace
+							local subStart, subFinish = string.find(content, "^.-[^\\][{}]", subIndex)
+							if subStart == nil then
+								-- No more braces, all string
+								coroutine.yield("string", string.sub(content, subIndex))
+								break
+							end
+
+							if isString then
+								-- We are currently a string
+								subIndex = subFinish + 1
+								coroutine.yield("string", string.sub(content, subStart, subFinish))
 
-						if string.find(p1, "%.[%s%c]*$") and pT ~= "comment" then
-							-- The previous was a . so we need to special case indexing things
-							local parent = string.gsub(p2, Cleaner, "")
-							local lib = lua_libraries[parent]
-							if lib and lib[cleanTok] and not string.find(p3, "%.[%s%c]*$") then
-								-- Indexing a builtin lib with existing item, treat as a builtin
-								t2 = "builtin"
+								-- This brace opens code
+								isString = false
 							else
-								-- Indexing a non builtin, can't be treated as a keyword/builtin
-								t2 = "iden"
+								-- We are currently in code
+								subIndex = subFinish
+								local subContent = string.sub(content, subStart, subFinish-1)
+								for innerToken, innerContent in lexer.scan(subContent) do
+									coroutine.yield(innerToken, innerContent)
+								end
+
+								-- This brace opens string/closes code
+								isString = true
 							end
-							-- print("indexing",parent,"with",cleanTok,"as",t2)
 						end
 					end
+				end
 
-					-- Record last 3 tokens for the indexing context check
-					p3 = p2
-					p2 = p1
-					p1 = tok
-					pT = t2
-					return t2, tok
+				-- Record last 3 tokens for the indexing context check
+				previousContent3 = previousContent2
+				previousContent2 = previousContent1
+				previousContent1 = content
+				previousToken = processedToken or rawToken
+				if processedToken then
+					coroutine.yield(processedToken, content)
 				end
+				break
+			end
+
+			-- No matches found
+			if not matched then
+				return
 			end
-			-- No matches
-			return nil
 		end
-		-- Reached end
-		return nil
+
+		-- Completed the scan
+		return
+	end)
+
+	return function()
+		if coroutine.status(thread) == "dead" then
+			return
+		end
+
+		local success, token, content = coroutine.resume(thread)
+		if success and token then
+			return token, content
+		end
+
+		return
 	end
 end
 
diff --git a/wally.toml b/wally.toml
index 97b8bd7..9f329be 100644
--- a/wally.toml
+++ b/wally.toml
@@ -1,7 +1,7 @@
 [package]
 name = "boatbomber/highlighter"
 description = "RichText highlighting Lua code with a pure Lua lexer"
-version = "0.6.2"
+version = "0.7.0"
 license = "MIT"
 authors = ["boatbomber (https://boatbomber.com)"]
 registry = "https://github.com/upliftgames/wally-index"