Basic implementation

reside-ic · Aug 25, 2023 · fdfe73a · fdfe73a
1 parent 0e1c19f
commit fdfe73a
Show file tree

Hide file tree

Showing 7 changed files with 359 additions and 0 deletions.
diff --git a/R/format.R b/R/format.R
@@ -0,0 +1,15 @@
+to_json <- function(x) {
+  if (x$type == "array") {
+    contents <- vcapply(x$value, to_json)
+    sprintf("[%s]", paste(contents, collapse = ","))
+  } else if (x$type == "object") {
+    contents <- vcapply(x$value, function(el) {
+      sprintf('"%s":%s', el$key, to_json(el$value))
+    })
+    sprintf("{%s}", paste(contents, collapse = ","))
+  } else if (x$type == "string") {
+    sprintf('"%s"', x$value)
+  } else if (x$type %in% c("literal", "number")) {
+    x$value
+  }
+}
diff --git a/R/lexer.R b/R/lexer.R
@@ -0,0 +1,76 @@
+lex_json <- function(string) {
+  chomp <- json_reader(string)
+  tokens <- list()
+  while (!is.null(t <- chomp())) {
+    tokens[[length(tokens) + 1L]] <- t
+  }
+  tokens
+}
+
+JSON_NUMBER_START <- c(as.character(0:9), ".", "-")
+JSON_NUMBER <- c(JSON_NUMBER_START, "e")
+JSON_WHITESPACE = c(" ", "\t", "\b", "\n", "\r")
+JSON_SYNTAX = c(",", ":", "[", "]", "{", "}")
+
+json_element <- function(type, value) {
+  list(type = type, value = value)
+}
+
+json_reader <- function(string) {
+  idx_quote <- c(gregexec('(?<!\\\\)"', string, perl = TRUE)[[1]])
+  chars <- strsplit(string, NULL)[[1]]
+  pos <- 1L
+  len <- length(chars)
+
+  chomp_chars <- function(type, include) {
+    i <- pos
+    while (i <= len && any(chars[[i]] == include)) {
+      i <- i + 1L
+    }
+    value <- substr(string, pos, i - 1)
+    pos <<- i
+    json_element(type, value)
+  }
+
+  chomp_string <- function() {
+    to <- idx_quote[idx_quote > pos]
+    if (length(to) == 0) {
+      stop("Expected end of string")
+    }
+    to <- to[[1L]]
+    value <- substr(string, pos + 1L, to - 1L)
+    pos <<- to + 1L
+    json_element("string", value)
+  }
+
+  chomp_literal_or_fail <- function() {
+    value <- substr(string, pos, pos + 3L)
+    if (value != "null" && value != "true") {
+      value <- substr(string, pos, pos + 4L)
+      if (value != "false") {
+        stop(sprintf("Unexpected token at pos %d", pos))
+      }
+    }
+    pos <<- pos + nchar(value)
+    json_element("literal", value)
+  }
+
+  function() {
+    if (pos > length(chars)) {
+      return(NULL)
+    }
+    cur <- chars[[pos]]
+    if (any(cur == JSON_SYNTAX)) {
+      pos <<- pos + 1L
+      json_element("syntax", cur)
+    } else if (cur == '"') {
+      chomp_string()
+    } else if (any(cur == JSON_WHITESPACE)) {
+      chomp_chars("whitespace", JSON_WHITESPACE)
+    } else if (cur %in% JSON_NUMBER_START) {
+      chomp_chars("number", JSON_NUMBER)
+    } else {
+      chomp_literal_or_fail()
+    }
+  }
+}
diff --git a/R/parser.R b/R/parser.R
@@ -0,0 +1,107 @@
+from_json <- function(string) {
+  structure(json_parse_tokens(lex_json(string)), class = "tinyjson")
+}
+
+
+json_parse_tokens <- function(tokens) {
+  tokens <- json_tokens(tokens)
+  if (tokens$is_complete()) {
+    return(list())
+  }
+  res <- json_parse_any(tokens)
+  if (!tokens$is_complete()) {
+    stop("Did not consume all tokens")
+  }
+  res
+}
+
+
+json_parse_any <- function(tokens) {
+  t <- tokens$pop("json")
+  if (t$type == "syntax" && t$value == "{") {
+    json_parse_object(tokens)
+  } else if (t$type == "syntax" && t$value == "[") {
+    json_parse_array(tokens)
+  } else {
+    t
+  }
+}
+
+
+json_tokens <- function(tokens) {
+  tokens <- tokens[vapply(tokens, "[[", "", "type") != "whitespace"]
+  pos <- 0L
+  len <- length(tokens)
+  list(
+    is_complete = function() {
+      pos == len
+    },
+    peek = function(current) {
+      if (pos == len) {
+        stop(sprintf("Ran out of tokens while parsing %s", current))
+      }
+      tokens[[pos + 1L]]
+    },
+    pop = function(current) {
+      if (pos == len) {
+        stop(sprintf("Ran out of tokens while parsing %s", current))
+      }
+      tokens[[pos <<- pos + 1L]]
+    }
+  )
+}
+
+
+json_parse_object <- function(tokens) {
+  ret <- list(type = "object",
+              value = list())
+
+  t <- tokens$peek("object")
+  if (t$type == "syntax" && t$value == "}") {
+    tokens$pop("object")
+    return(ret)
+  }
+
+  repeat {
+    key <- tokens$pop("object")
+    if (key$type != "string") {
+      stop("Expected string key")
+    }
+    t <- tokens$pop("object")
+    if (!(t$type == "syntax" && t$value == ":")) {
+      stop("Expected ':' after an object key")
+    }
+    value <- json_parse_any(tokens)
+    ret$value[[length(ret$value) + 1]] <- list(key = key$value, value = value)
+    t <- tokens$pop("object")
+    if (t$type == "syntax" && t$value == "}") {
+      return(ret)
+    }
+    if (!(t$type == "syntax" && t$value == ",")) {
+      stop("Expected a comma after object element")
+    }
+  }
+}
+
+
+json_parse_array <- function(tokens) {
+  ret <- list(type = "array",
+              value = list())
+
+  t <- tokens$peek("array")
+  if (t$type == "syntax" && t$value == "]") {
+    tokens$pop("array")
+    return(ret)
+  }
+
+  repeat {
+    ret$value[[length(ret$value) + 1L]] <- json_parse_any(tokens)
+    t <- tokens$pop("array")
+    if (t$type == "syntax" && t$value == "]") {
+      return(ret)
+    }
+    if (!(t$type == "syntax" && t$value == ",")) {
+      stop("Expected a comma after array element")
+    }
+  }
+}
diff --git a/R/util.R b/R/util.R
@@ -1,3 +1,8 @@
 `%||%` <- function(x, y) { # nolint
   if (is.null(x)) y else x
 }
+
+
+vcapply <- function(...) {
+  vapply(..., FUN.VALUE = "")
+}
diff --git a/tests/testthat/test-format.R b/tests/testthat/test-format.R
@@ -0,0 +1,17 @@
+expect_can_roundtrip <- function(x) {
+  expect_equal(to_json(from_json(x)), x,
+               label = sprintf("Roundtrip of '%s'", x))
+}
+
+test_that("can roundtrip simple json", {
+  # expect_can_roundtrip("")
+  expect_can_roundtrip("[]")
+  expect_can_roundtrip("{}")
+  expect_can_roundtrip("true")
+  expect_can_roundtrip("false")
+  expect_can_roundtrip("null")
+  expect_can_roundtrip("1")
+  expect_can_roundtrip('"something"')
+  expect_can_roundtrip("[1,2,3]")
+  expect_can_roundtrip('{"a":1,"b":2}')
+})
diff --git a/tests/testthat/test-lexer.R b/tests/testthat/test-lexer.R
@@ -0,0 +1,63 @@
+test_that("simple cases", {
+  expect_equal(
+    lex_json(""),
+    list())
+  expect_equal(
+    lex_json("1"),
+    list(json_element("number", "1")))
+  expect_equal(
+    lex_json("true"),
+    list(json_element("literal", "true")))
+  expect_equal(
+    lex_json("false"),
+    list(json_element("literal", "false")))
+  expect_equal(
+    lex_json("null"),
+    list(json_element("literal", "null")))
+  expect_equal(
+    lex_json('"something"'),
+    list(json_element("string", "something")))
+  expect_equal(
+    lex_json("[]"),
+    list(json_element("syntax", "["),
+         json_element("syntax", "]")))
+  expect_equal(
+    lex_json("{}"),
+    list(json_element("syntax", "{"),
+         json_element("syntax", "}")))
+  expect_equal(
+    lex_json("[1, 2]"),
+    list(json_element("syntax", "["),
+         json_element("number", "1"),
+         json_element("syntax", ","),
+         json_element("whitespace", " "),
+         json_element("number", "2"),
+         json_element("syntax", "]")))
+  expect_equal(
+    lex_json('["first", "second"]'),
+    list(json_element("syntax", "["),
+         json_element("string", "first"),
+         json_element("syntax", ","),
+         json_element("whitespace", " "),
+         json_element("string", "second"),
+         json_element("syntax", "]")))
+
+  expect_equal(
+    lex_json("[null, true, false]"),
+    list(json_element("syntax", "["),
+         json_element("literal", "null"),
+         json_element("syntax", ","),
+         json_element("whitespace", " "),
+         json_element("literal", "true"),
+         json_element("syntax", ","),
+         json_element("whitespace", " "),
+         json_element("literal", "false"),
+         json_element("syntax", "]")))
+})
+
+
+## We never actually expect to fail; these would need improvement.
+test_that("handle error cases with some grace", {
+  expect_error(lex_json('"unterminated'), "Expected end of string")
+  expect_error(lex_json('falser'), "Unexpected token at pos 6")
+})
diff --git a/tests/testthat/test-parser.R b/tests/testthat/test-parser.R
@@ -0,0 +1,76 @@
+test_that("can convert simple things from json", {
+  expect_equal(from_json(""),
+               structure(list(), class = "tinyjson"))
+  expect_equal(from_json("1"),
+               structure(list(type = "number", value = "1"),
+                         class = "tinyjson"))
+  expect_equal(from_json("true"),
+               structure(list(type = "literal", value = "true"),
+                         class = "tinyjson"))
+  expect_equal(from_json("false"),
+               structure(list(type = "literal", value = "false"),
+                         class = "tinyjson"))
+  expect_equal(from_json("null"),
+               structure(list(type = "literal", value = "null"),
+                         class = "tinyjson"))
+  expect_equal(from_json('"something"'),
+               structure(list(type = "string", value = "something"),
+                         class = "tinyjson"))
+  expect_equal(from_json('"some \\"thing\\" else"'),
+               structure(list(type = "string",
+                              value = "some \\\"thing\\\" else"),
+                         class = "tinyjson"))
+  expect_equal(from_json("[]"),
+               structure(list(type = "array", value = list()),
+                         class = "tinyjson"))
+  expect_equal(from_json("{}"),
+               structure(list(type = "object", value = list()),
+                         class = "tinyjson"))
+  expect_equal(
+    from_json("[1, 2]"),
+    structure(list(
+      type = "array",
+      value = list(
+        list(type = "number", value = "1"),
+        list(type = "number", value = "2"))),
+      class = "tinyjson"))
+  expect_equal(
+    from_json('{"a": 1, "b": 2}'),
+    structure(list(
+      type = "object",
+      value = list(
+        list(key = "a", value = list(type = "number", value = "1")),
+        list(key = "b", value = list(type = "number", value = "2")))),
+      class = "tinyjson"))
+})
+
+
+test_that("error if unconsumed tokens", {
+  expect_error(from_json("[1, 2]3"), "Did not consume all tokens")
+  ## but whitespace is fine
+  expect_equal(from_json("[1, 2]  "), from_json("[1, 2]"))
+})
+
+
+test_that("objects throw parse errors when invalid", {
+  expect_error(from_json('{"a": 1, 10: 2}'),
+               "Expected string key")
+  expect_error(from_json('{"a": 1, "b" 2}'),
+               "Expected ':' after an object key")
+  expect_error(from_json('{"a": 1 "b": 2}'),
+               "Expected a comma after object element")
+  expect_error(from_json('{"a": 1, "b": 2'),
+               "Ran out of tokens while parsing object")
+  expect_error(from_json('{'),
+               "Ran out of tokens while parsing object")
+})
+
+
+test_that("arrays throw parse errors when invalid", {
+  expect_error(from_json('[1, 2 3]'),
+               "Expected a comma after array element")
+  expect_error(from_json('[1, 2, 3'),
+               "Ran out of tokens while parsing array")
+  expect_error(from_json('['),
+               "Ran out of tokens while parsing array")
+})