Skip to content

Commit

Permalink
Basic implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
richfitz committed Aug 25, 2023
1 parent 0e1c19f commit fdfe73a
Show file tree
Hide file tree
Showing 7 changed files with 359 additions and 0 deletions.
15 changes: 15 additions & 0 deletions R/format.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
to_json <- function(x) {
if (x$type == "array") {
contents <- vcapply(x$value, to_json)
sprintf("[%s]", paste(contents, collapse = ","))
} else if (x$type == "object") {
contents <- vcapply(x$value, function(el) {
sprintf('"%s":%s', el$key, to_json(el$value))
})
sprintf("{%s}", paste(contents, collapse = ","))
} else if (x$type == "string") {
sprintf('"%s"', x$value)
} else if (x$type %in% c("literal", "number")) {
x$value
}
}
76 changes: 76 additions & 0 deletions R/lexer.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
lex_json <- function(string) {
chomp <- json_reader(string)
tokens <- list()
while (!is.null(t <- chomp())) {
tokens[[length(tokens) + 1L]] <- t
}
tokens
}

JSON_NUMBER_START <- c(as.character(0:9), ".", "-")
JSON_NUMBER <- c(JSON_NUMBER_START, "e")
JSON_WHITESPACE = c(" ", "\t", "\b", "\n", "\r")
JSON_SYNTAX = c(",", ":", "[", "]", "{", "}")

json_element <- function(type, value) {
list(type = type, value = value)
}

json_reader <- function(string) {
idx_quote <- c(gregexec('(?<!\\\\)"', string, perl = TRUE)[[1]])
chars <- strsplit(string, NULL)[[1]]
pos <- 1L
len <- length(chars)

chomp_chars <- function(type, include) {
i <- pos
while (i <= len && any(chars[[i]] == include)) {
i <- i + 1L
}
value <- substr(string, pos, i - 1)
pos <<- i
json_element(type, value)
}

chomp_string <- function() {
to <- idx_quote[idx_quote > pos]
if (length(to) == 0) {
stop("Expected end of string")
}
to <- to[[1L]]
value <- substr(string, pos + 1L, to - 1L)
pos <<- to + 1L
json_element("string", value)
}

chomp_literal_or_fail <- function() {
value <- substr(string, pos, pos + 3L)
if (value != "null" && value != "true") {
value <- substr(string, pos, pos + 4L)
if (value != "false") {
stop(sprintf("Unexpected token at pos %d", pos))
}
}
pos <<- pos + nchar(value)
json_element("literal", value)
}

function() {
if (pos > length(chars)) {
return(NULL)
}
cur <- chars[[pos]]
if (any(cur == JSON_SYNTAX)) {
pos <<- pos + 1L
json_element("syntax", cur)
} else if (cur == '"') {
chomp_string()
} else if (any(cur == JSON_WHITESPACE)) {
chomp_chars("whitespace", JSON_WHITESPACE)
} else if (cur %in% JSON_NUMBER_START) {
chomp_chars("number", JSON_NUMBER)
} else {
chomp_literal_or_fail()
}
}
}
107 changes: 107 additions & 0 deletions R/parser.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from_json <- function(string) {
structure(json_parse_tokens(lex_json(string)), class = "tinyjson")
}


json_parse_tokens <- function(tokens) {
tokens <- json_tokens(tokens)
if (tokens$is_complete()) {
return(list())
}
res <- json_parse_any(tokens)
if (!tokens$is_complete()) {
stop("Did not consume all tokens")
}
res
}


json_parse_any <- function(tokens) {
t <- tokens$pop("json")
if (t$type == "syntax" && t$value == "{") {
json_parse_object(tokens)
} else if (t$type == "syntax" && t$value == "[") {
json_parse_array(tokens)
} else {
t
}
}


json_tokens <- function(tokens) {
tokens <- tokens[vapply(tokens, "[[", "", "type") != "whitespace"]
pos <- 0L
len <- length(tokens)
list(
is_complete = function() {
pos == len
},
peek = function(current) {
if (pos == len) {
stop(sprintf("Ran out of tokens while parsing %s", current))
}
tokens[[pos + 1L]]
},
pop = function(current) {
if (pos == len) {
stop(sprintf("Ran out of tokens while parsing %s", current))
}
tokens[[pos <<- pos + 1L]]
}
)
}


json_parse_object <- function(tokens) {
ret <- list(type = "object",
value = list())

t <- tokens$peek("object")
if (t$type == "syntax" && t$value == "}") {
tokens$pop("object")
return(ret)
}

repeat {
key <- tokens$pop("object")
if (key$type != "string") {
stop("Expected string key")
}
t <- tokens$pop("object")
if (!(t$type == "syntax" && t$value == ":")) {
stop("Expected ':' after an object key")
}
value <- json_parse_any(tokens)
ret$value[[length(ret$value) + 1]] <- list(key = key$value, value = value)
t <- tokens$pop("object")
if (t$type == "syntax" && t$value == "}") {
return(ret)
}
if (!(t$type == "syntax" && t$value == ",")) {
stop("Expected a comma after object element")
}
}
}


json_parse_array <- function(tokens) {
ret <- list(type = "array",
value = list())

t <- tokens$peek("array")
if (t$type == "syntax" && t$value == "]") {
tokens$pop("array")
return(ret)
}

repeat {
ret$value[[length(ret$value) + 1L]] <- json_parse_any(tokens)
t <- tokens$pop("array")
if (t$type == "syntax" && t$value == "]") {
return(ret)
}
if (!(t$type == "syntax" && t$value == ",")) {
stop("Expected a comma after array element")
}
}
}
5 changes: 5 additions & 0 deletions R/util.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
`%||%` <- function(x, y) { # nolint
if (is.null(x)) y else x
}


vcapply <- function(...) {
vapply(..., FUN.VALUE = "")
}
17 changes: 17 additions & 0 deletions tests/testthat/test-format.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
expect_can_roundtrip <- function(x) {
expect_equal(to_json(from_json(x)), x,
label = sprintf("Roundtrip of '%s'", x))
}

test_that("can roundtrip simple json", {
# expect_can_roundtrip("")
expect_can_roundtrip("[]")
expect_can_roundtrip("{}")
expect_can_roundtrip("true")
expect_can_roundtrip("false")
expect_can_roundtrip("null")
expect_can_roundtrip("1")
expect_can_roundtrip('"something"')
expect_can_roundtrip("[1,2,3]")
expect_can_roundtrip('{"a":1,"b":2}')
})
63 changes: 63 additions & 0 deletions tests/testthat/test-lexer.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
test_that("simple cases", {
expect_equal(
lex_json(""),
list())
expect_equal(
lex_json("1"),
list(json_element("number", "1")))
expect_equal(
lex_json("true"),
list(json_element("literal", "true")))
expect_equal(
lex_json("false"),
list(json_element("literal", "false")))
expect_equal(
lex_json("null"),
list(json_element("literal", "null")))
expect_equal(
lex_json('"something"'),
list(json_element("string", "something")))
expect_equal(
lex_json("[]"),
list(json_element("syntax", "["),
json_element("syntax", "]")))
expect_equal(
lex_json("{}"),
list(json_element("syntax", "{"),
json_element("syntax", "}")))
expect_equal(
lex_json("[1, 2]"),
list(json_element("syntax", "["),
json_element("number", "1"),
json_element("syntax", ","),
json_element("whitespace", " "),
json_element("number", "2"),
json_element("syntax", "]")))
expect_equal(
lex_json('["first", "second"]'),
list(json_element("syntax", "["),
json_element("string", "first"),
json_element("syntax", ","),
json_element("whitespace", " "),
json_element("string", "second"),
json_element("syntax", "]")))

expect_equal(
lex_json("[null, true, false]"),
list(json_element("syntax", "["),
json_element("literal", "null"),
json_element("syntax", ","),
json_element("whitespace", " "),
json_element("literal", "true"),
json_element("syntax", ","),
json_element("whitespace", " "),
json_element("literal", "false"),
json_element("syntax", "]")))
})


## We never actually expect to fail; these would need improvement.
test_that("handle error cases with some grace", {
expect_error(lex_json('"unterminated'), "Expected end of string")
expect_error(lex_json('falser'), "Unexpected token at pos 6")
})
76 changes: 76 additions & 0 deletions tests/testthat/test-parser.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
test_that("can convert simple things from json", {
expect_equal(from_json(""),
structure(list(), class = "tinyjson"))
expect_equal(from_json("1"),
structure(list(type = "number", value = "1"),
class = "tinyjson"))
expect_equal(from_json("true"),
structure(list(type = "literal", value = "true"),
class = "tinyjson"))
expect_equal(from_json("false"),
structure(list(type = "literal", value = "false"),
class = "tinyjson"))
expect_equal(from_json("null"),
structure(list(type = "literal", value = "null"),
class = "tinyjson"))
expect_equal(from_json('"something"'),
structure(list(type = "string", value = "something"),
class = "tinyjson"))
expect_equal(from_json('"some \\"thing\\" else"'),
structure(list(type = "string",
value = "some \\\"thing\\\" else"),
class = "tinyjson"))
expect_equal(from_json("[]"),
structure(list(type = "array", value = list()),
class = "tinyjson"))
expect_equal(from_json("{}"),
structure(list(type = "object", value = list()),
class = "tinyjson"))
expect_equal(
from_json("[1, 2]"),
structure(list(
type = "array",
value = list(
list(type = "number", value = "1"),
list(type = "number", value = "2"))),
class = "tinyjson"))
expect_equal(
from_json('{"a": 1, "b": 2}'),
structure(list(
type = "object",
value = list(
list(key = "a", value = list(type = "number", value = "1")),
list(key = "b", value = list(type = "number", value = "2")))),
class = "tinyjson"))
})


test_that("error if unconsumed tokens", {
expect_error(from_json("[1, 2]3"), "Did not consume all tokens")
## but whitespace is fine
expect_equal(from_json("[1, 2] "), from_json("[1, 2]"))
})


test_that("objects throw parse errors when invalid", {
expect_error(from_json('{"a": 1, 10: 2}'),
"Expected string key")
expect_error(from_json('{"a": 1, "b" 2}'),
"Expected ':' after an object key")
expect_error(from_json('{"a": 1 "b": 2}'),
"Expected a comma after object element")
expect_error(from_json('{"a": 1, "b": 2'),
"Ran out of tokens while parsing object")
expect_error(from_json('{'),
"Ran out of tokens while parsing object")
})


test_that("arrays throw parse errors when invalid", {
expect_error(from_json('[1, 2 3]'),
"Expected a comma after array element")
expect_error(from_json('[1, 2, 3'),
"Ran out of tokens while parsing array")
expect_error(from_json('['),
"Ran out of tokens while parsing array")
})

0 comments on commit fdfe73a

Please sign in to comment.