Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial Semantic token #133

Merged
merged 6 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 71 additions & 2 deletions doc.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"path-util.rkt"
"doc-trace.rkt"
"struct.rkt"
"highlight.rkt"
racket/match
racket/class
racket/set
Expand Down Expand Up @@ -315,7 +316,73 @@
#:end (Pos #:line line #:char really-indent))
#:newText new-text)]))

(provide Doc-trace
(define (token-type-encoding token)
(index-of *semantic-token-types* (SemanticToken-type token)))

(define (token-modifier-encoding token)
(define indexes (indexes-where *semantic-token-modifiers*
(λ (m) (memq m (SemanticToken-modifiers token)))))
;; build a bit flag of the modifiers of `token`.
;;
;; equivalent to C family pseudocode
;;
;; uint32_t flag = 0
;; for index in indexes:
;; flag = flag | (1 << index)
;; return flag
;;
;; But the integer bit width is ignored here, because
;; the *semantic-token-modifiers* is very small.
(for/sum ([index indexes])
(expt 2 index)))
6cdh marked this conversation as resolved.
Show resolved Hide resolved

;; encode `token` using relative encoding
;;
;; each token is encoded as five integers (copied from lsp specificatioin 3.17):
;; * deltaLine: token line number, relative to the start of the previous token
;; * deltaStart: token start character, relative to the start of the previous token
;; (relative to 0 or the previous token’s start if they are on the same line)
;; * length: the length of the token.
;; * tokenType: will be looked up in SemanticTokensLegend.tokenTypes.
;; We currently ask that tokenType < 65536.
;; * tokenModifiers: each set bit will be looked up in SemanticTokensLegend.tokenModifiers
;;
;; for the first token, its previous token is defined as a zero length fake token which
;; has line number 0 and character position 0.
(define (token-encoding editor token prev-pos)
(match-define (list line ch) (send editor pos->line/char (SemanticToken-start token)))
(match-define (list prev-line prev-ch) (send editor pos->line/char prev-pos))
(define delta-line (- line prev-line))
(define delta-start
6cdh marked this conversation as resolved.
Show resolved Hide resolved
(if (= line prev-line)
(- ch prev-ch)
ch))
(define len (- (SemanticToken-end token) (SemanticToken-start token)))
(define type (token-type-encoding token))
(define modifier (token-modifier-encoding token))
(values delta-line delta-start len type modifier))

;; get the tokens whose range are contained in interval [pos-start, pos-end)
;; the tokens whose range intersects the given range is included.
;; the previous token of the first token in the result is defined as a zero length fake token which
;; has line number 0 and character position 0.
(define (doc-range-tokens editor uri pos-start pos-end)
(define tokens (collect-semantic-tokens editor (uri->path uri)))
(define tokens-in-range
(filter-not (λ (tok) (or (<= (SemanticToken-end tok) pos-start)
(>= (SemanticToken-start tok) pos-end)))
tokens))
(for/fold ([result '()]
[prev-pos 0]
#:result (flatten (reverse result)))
([token tokens-in-range])
(define-values (delta-line delta-start len type modifier)
(token-encoding editor token prev-pos))
(values (cons (list delta-line delta-start len type modifier) result)
(SemanticToken-start token))))

(provide Doc-text
Doc-trace
new-doc
doc-checked?
doc-update!
Expand All @@ -329,4 +396,6 @@
doc-find-containing-paren
doc-get-symbols
get-definition-by-id
format!)
format!
doc-range-tokens)

168 changes: 168 additions & 0 deletions highlight.rkt
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#lang racket/base

(require syntax/modread
drracket/check-syntax
syntax/parse
"struct.rkt"
racket/class
racket/set
racket/list
racket/bool
racket/match)

(provide collect-semantic-tokens)

;; A temporary structure to hold tokens
;; `tag` is symbol that is a tag associated with this token.
;; An identifier may correspond multiple tokens. They will be merged, then converted into
;; lsp semantic token types and modifiers.
(struct Token
(start end tag))

(define collector%
(class (annotations-mixin object%)
(define styles '())

(super-new)

(define/override (syncheck:find-source-object stx)
#f)

(define/override (syncheck:color-range src start end style)
(when (< start end)
(set! styles (cons (Token start end (string->symbol style)) styles))))

(define/override (syncheck:add-definition-target src start finish id mods)
(when (< start finish)
(set! styles (cons (Token start finish 'definition) styles))))

(define/public (get-styles)
(set->list (list->set styles)))))

; (-> lsp-editor% Path (Listof SemanticToken))
(define (collect-semantic-tokens doc-text path)
(define code-str (send doc-text get-text))
(define in (open-input-string code-str))
(port-count-lines! in)
(define-values (path-dir _1 _2) (split-path path))

(define base-ns (make-base-namespace))

(define-values (add-syntax done)
(make-traversal base-ns #f))

(define token-list '())

(define collector (new collector%))
(with-handlers ([(λ (_) #t) (λ (_) #f)])
(parameterize ([current-load-relative-directory path-dir]
[current-namespace base-ns]
[current-annotations collector])
(define stx (with-module-reading-parameterization
(lambda () (read-syntax path in))))
(set! token-list (append (walk-stx stx) token-list))

(define expanded (expand stx))
(set! token-list (append (walk-expanded-stx path expanded) token-list))
(add-syntax expanded)
(done))

(define drracket-styles (convert-drracket-color-styles (send collector get-styles)))
(set! token-list (append drracket-styles token-list)))

(let* ([tokens-no-false (filter-not false? token-list)]
[tokens-no-out-bounds (filter (λ (t) (< -1 (Token-start t) (string-length code-str)))
tokens-no-false)]
[tokens-in-order (sort tokens-no-out-bounds < #:key Token-start)]
[same-ident-token-groups (group-by Token-start tokens-in-order)]
[tokens-with-merged-tags
(for/list ([token-group same-ident-token-groups])
(define tok (first token-group))
(list (Token-start tok) (Token-end tok) (map Token-tag token-group)))]
[result-tokens
(for*/list ([t tokens-with-merged-tags]
[type (in-value (select-type (third t)))]
[modifiers (in-value (get-valid-modifiers (third t)))]
#:when (not (false? type)))
(SemanticToken (first t) (second t) type modifiers))])
result-tokens))

(define (convert-drracket-color-styles styles)
(for/list ([s styles])
(match s
[(Token start end 'drracket:check-syntax:lexically-bound)
(Token start end 'variable)]
[_ #f])))

;; `tags` might contains multiple valid types.
;; This function selects a proper type based on some rules.
(define (select-type tags)
(define valid-types (filter (λ (t) (memq t *semantic-token-types*)) tags))
(cond [(null? valid-types)
#f]
[(memq 'function valid-types)
'function]
[(memq 'variable valid-types)
'variable]
[else (first valid-types)]))

(define (get-valid-modifiers tags)
(filter (λ (t) (memq t *semantic-token-modifiers*)) tags))

(define (walk-stx stx)
(syntax-parse stx
#:datum-literals (#%module-begin)
[() (list)]
[(any1 any* ...)
(append (walk-stx #'any1)
(walk-stx #'(any* ...)))]
[#(any1 any* ...)
(append (walk-stx #'any1)
(walk-stx #'(any* ...)))]
[#%module-begin
(list)]
[atom (list (tag-of-atom-stx #'atom))]))

(define (walk-expanded-stx src stx)
(syntax-parse stx
#:datum-literals (lambda define-values)
[(lambda (args ...) expr ...)
(walk-expanded-stx src #'(expr ...))]
[(define-values (fs) (lambda _ ...))
(append (tags-of-stx-lst src #'(fs) 'function)
(walk-expanded-stx src (drop (syntax-e stx) 2)))]
[(any1 any* ...)
(append (walk-expanded-stx src #'any1)
(walk-expanded-stx src #'(any* ...)))]
[_ (list)]))

(define (tags-of-stx-lst src stx-lst tag)
(define (in-current-file? stx)
(equal? src (syntax-source stx)))

(let* ([stx-lst (syntax-e stx-lst)]
[stx-lst-in-current-file (filter in-current-file? stx-lst)]
[tag-lst (map (λ (stx) (tag-of-atom-stx stx tag)) stx-lst-in-current-file)])
tag-lst))

(define (tag-of-atom-stx atom-stx [expect-tag #f])
(define pos+1 (syntax-position atom-stx))
(define len (syntax-span atom-stx))
(if (or (not pos+1) (not len) (= len 0)
(not (syntax-original? atom-stx)))
#f
(let ([pos (sub1 pos+1)])
(Token pos (+ pos len)
(if (false? expect-tag)
(get-atom-tag (syntax-e atom-stx))
expect-tag)))))

(define (get-atom-tag atom)
(match atom
[(? number?) 'number]
[(? symbol?) 'symbol]
[(? string?) 'string]
[(? bytes?) 'string]
[(? regexp?) 'regexp]
[_ 'unknown]))

21 changes: 20 additions & 1 deletion methods.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"error-codes.rkt"
"msg-io.rkt"
"responses.rkt"
"struct.rkt"
(prefix-in text-document/ "text-document.rkt"))

;; TextDocumentSynKind enumeration
Expand All @@ -30,7 +31,15 @@
['method (? string? method)])
(define params (hash-ref msg 'params hasheq))
(define response (process-request id method params))
(display-message/flush response)]
;; the result can be a response or a procedure which returns
;; a response. If it's a procedure, then it's expected to run
;; concurrently.
(thread (λ ()
(display-message/flush
(if (procedure? response)
(response)
response))))
(void)]
;; Notification
[(hash-table ['method (? string? method)])
(define params (hash-ref msg 'params hasheq))
Expand Down Expand Up @@ -88,6 +97,10 @@
(text-document/range-formatting! id params)]
["textDocument/onTypeFormatting"
(text-document/on-type-formatting! id params)]
["textDocument/semanticTokens/full"
(text-document/full-semantic-tokens id params)]
["textDocument/semanticTokens/range"
(text-document/range-semantic-tokens id params)]
[_
(eprintf "invalid request for method ~v\n" method)
(define err (format "The method ~v was not found" method))
Expand Down Expand Up @@ -127,6 +140,11 @@
(hash-table ['prepareSupport #t])])])
(hasheq 'prepareProvider #t)]
[_ #t]))
(define semantic-provider
(hasheq 'legend (hasheq 'tokenTypes (map symbol->string *semantic-token-types*)
'tokenModifiers (map symbol->string *semantic-token-modifiers*))
'full #t
'range #t))
(define server-capabilities
(hasheq 'textDocumentSync sync-options
'hoverProvider #t
Expand All @@ -137,6 +155,7 @@
'signatureHelpProvider (hasheq 'triggerCharacters (list " " ")" "]"))
'inlayHintProvider #t
'renameProvider renameProvider
'semanticTokensProvider semantic-provider
'documentHighlightProvider #t
'documentSymbolProvider #t
'documentFormattingProvider #t
Expand Down
26 changes: 25 additions & 1 deletion struct.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,27 @@
#:trim-final-newlines (hash-ref jsexpr 'trimFinalNewlines undef-object)
#:key (hash-ref jsexpr 'key undef-object))))

(struct SemanticToken
(start end type modifiers)
#:transparent)

;; The order of this list is irrelevant.
;; The client receives this list from server ability declaration during
;; initialize handshake then use it to decode server semantic tokens messages.
;; Different order produces different encoding results of semantic tokens,
;; but does not affect client and server behavior.
;; To change the order, simply change it here, don't need to change other code.
(define *semantic-token-types*
'(variable
function
string
number
regexp))
6cdh marked this conversation as resolved.
Show resolved Hide resolved

;; The order of this list is irrelevant, similar to *semantic-token-types*.
(define *semantic-token-modifiers*
'(definition))

;; usage:
;; (jsexpr? jsexpr) ;; #t
;; (match jsexpr
Expand All @@ -147,4 +168,7 @@
(provide FormattingOptions
FormattingOptions-tab-size
FormattingOptions-trim-trailing-whitespace
as-FormattingOptions)
as-FormattingOptions
(struct-out SemanticToken)
*semantic-token-types*
*semantic-token-modifiers*)
6 changes: 5 additions & 1 deletion tests/lifecycle/init_resp.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
"inlayHintProvider": true,
"referencesProvider": true,
"renameProvider": true,
"semanticTokensProvider": {
"full": true,
"range": true
},
"signatureHelpProvider": {
"triggerCharacters": [
" ",
Expand All @@ -40,4 +44,4 @@
}
}
}
}
}
11 changes: 9 additions & 2 deletions tests/lifecycle/test-main.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
(require chk
json
racket/os
"../../msg-io.rkt")
"../../msg-io.rkt"
"../../json-util.rkt"
"../../struct.rkt")

(define init-req
(hasheq 'jsonrpc "2.0"
Expand Down Expand Up @@ -35,7 +37,11 @@
;; Initialize request
(display-message/flush init-req stdin)
(let ([resp (read-message stdout)])
(chk #:= resp (read-json (open-input-file "init_resp.json"))))
(define expected-json (read-json (open-input-file "init_resp.json")))
(define json (jsexpr-set expected-json '(result capabilities semanticTokensProvider legend)
(hasheq 'tokenModifiers (map symbol->string *semantic-token-modifiers*)
'tokenTypes (map symbol->string *semantic-token-types*))))
(chk #:= resp json))

;; Shutdown request
(display-message/flush shutdown-req stdin)
Expand All @@ -49,3 +55,4 @@
(subprocess-wait sp)
(define st (subprocess-status sp))
(chk (zero? st)))

Loading
Loading