Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Language detection via upstream api #23

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions build.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import json
import os
import glob
import pprint
import re
import subprocess
import sys
from tree_sitter import Language
Expand Down Expand Up @@ -32,13 +36,12 @@
subprocess.check_call(["git", "fetch", "--depth=1", "origin", commit], cwd=clone_directory)
subprocess.check_call(["git", "checkout", commit], cwd=clone_directory)

print()

if sys.platform == "win32":
languages_filename = "tree_sitter_languages\\languages.dll"
else:
languages_filename = "tree_sitter_languages/languages.so"

index=dict()
print(f"{sys.argv[0]}: Building", languages_filename)
Language.build_library(
languages_filename,
Expand Down Expand Up @@ -90,5 +93,11 @@
'vendor/tree-sitter-typescript/tsx',
'vendor/tree-sitter-typescript/typescript',
'vendor/tree-sitter-yaml',
]
],
index,
)

print(f"{sys.argv[0]}: Writing index entries for {len(index)} languages")
with open('tree_sitter_languages/generated.pyx', 'w') as file:
file.write('index = ')
pprint.pprint(index, stream=file)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
author_email='[email protected]',
url='https://github.com/grantjenks/py-tree-sitter-languages',
license='Apache 2.0',
ext_modules=cythonize('tree_sitter_languages/core.pyx', language_level='3'),
ext_modules=cythonize('tree_sitter_languages/*.pyx', language_level='3'),
packages=['tree_sitter_languages'],
package_data={'tree_sitter_languages': ['languages.so', 'languages.dll']},
install_requires=['tree-sitter'],
Expand Down
15 changes: 14 additions & 1 deletion tests/test_tree_sitter_languages.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from tree_sitter_languages import get_language, get_parser
from tree_sitter_languages import get_language, get_parser, get_language_for_file
from tree_sitter_languages.generated import index

LANGUAGES = [
'bash',
Expand Down Expand Up @@ -45,6 +46,7 @@
'sqlite',
'toml',
'tsq',
'tsx',
'typescript',
'yaml',
]
Expand Down Expand Up @@ -87,3 +89,14 @@ def test_get_language():
for language in LANGUAGES:
language = get_language(language)
assert language

def test_generated():
for language in LANGUAGES:
assert index[language] is not None

def test_get_language_for_file():
for filename, lang in {
'file.sh': 'bash',
'test.go': 'go',
}.items():
assert get_language_for_file(filename).name == get_language(lang).name
2 changes: 1 addition & 1 deletion tree_sitter_languages/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Tree Sitter with Languages
"""

from .core import get_language, get_parser
from .core import get_language, get_parser, get_language_for_file

__version__ = '1.7.0'
__title__ = 'tree_sitter_languages'
Expand Down
5 changes: 5 additions & 0 deletions tree_sitter_languages/core.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import pathlib
import re
import sys

from .generated import index
from tree_sitter import Language, Parser


Expand All @@ -14,6 +16,9 @@ def get_language(language):
language = Language(binary_path, language)
return language

def get_language_for_file(file_name, file_contents=None):
name = Language.lookup_language_name_for_file(index, file_name, file_contents)
return get_language(name) if name is not None else None

def get_parser(language):
language = get_language(language)
Expand Down
128 changes: 128 additions & 0 deletions tree_sitter_languages/generated.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
index = {'bash': [{'file-types': ['sh', 'bash', 'zsh'], 'scope': 'source.bash'}],
'c': [{'file-types': ['c', 'h'], 'scope': 'source.c'}],
'c_sharp': [{'file-types': ['cs'], 'scope': 'source.cs'}],
'commonlisp': [{'file-types': ['lisp'], 'scope': 'source.lisp'}],
'cpp': [{'file-types': ['cc', 'cpp', 'hpp', 'h'],
'highlights': ['queries/highlights.scm',
'node_modules/tree-sitter-c/queries/highlights.scm'],
'scope': 'source.cpp'}],
'css': [{'file-types': ['css'],
'injection-regex': '^css$',
'scope': 'source.css'}],
'dockerfile': [{'file-types': ['Dockerfile',
'dockerfile',
'docker',
'Containerfile',
'container'],
'highlights': ['queries/highlights.scm']}],
'dot': [{'file-types': ['dot', 'gv'], 'scope': 'source.dot'}],
'elisp': [{'file-types': ['el'], 'scope': 'source.emacs.lisp'}],
'elixir': [{'file-types': ['ex', 'exs'],
'injection-regex': '^(ex|elixir)$',
'scope': 'source.elixir'}],
'elm': [{'file-types': ['elm'], 'scope': 'source.elm'}],
'embedded_template': [{'file-types': ['ejs'],
'injection-regex': 'ejs',
'injections': 'queries/injections-ejs.scm',
'scope': 'text.html.ejs'},
{'file-types': ['erb'],
'injection-regex': 'erb',
'injections': 'queries/injections-erb.scm',
'scope': 'text.html.erb'}],
'erlang': {},
'go': [{'file-types': ['go'], 'scope': 'source.go'}],
'gomod': {},
'hack': [{'file-types': ['hack'],
'first-line-regex': '^((<\\?hh.*)|(#!.+ hhvm))',
'scope': 'source.hack'}],
'haskell': [{'file-types': ['hs'],
'highlights': ['queries/highlights.scm'],
'injection-regex': '^(hs|haskell)$',
'scope': 'source.haskell'}],
'hcl': [{'file-types': ['hcl'], 'scope': 'source.hcl'}],
'html': [{'file-types': ['html'],
'injection-regex': 'html',
'scope': 'text.html.basic'}],
'java': [{'file-types': ['java'], 'scope': 'source.java'}],
'javascript': [{'file-types': ['js'],
'highlights': ['queries/highlights-jsx.scm',
'queries/highlights-params.scm',
'queries/highlights.scm'],
'injection-regex': '^(js|javascript)$',
'scope': 'source.js'}],
'jsdoc': [{'injection-regex': 'jsdoc', 'scope': 'text.jsdoc'}],
'json': [{'file-types': ['json'], 'scope': 'source.json'}],
'julia': [{'file-types': ['jl'], 'scope': 'source.julia'}],
'kotlin': [{'file-types': ['kt', 'kts'], 'scope': 'source.kotlin'}],
'lua': [{'file-types': ['lua'], 'scope': 'source.lua'}],
'make': [{'file-types': ['makefile',
'Makefile',
'MAKEFILE',
'GNUmakefile',
'mk',
'mak',
'dsp'],
'scope': 'source.mk'}],
'markdown': {},
'objc': [{'file-types': ['h', 'm'],
'highlights': ['queries/highlights.scm',
'node_modules/tree-sitter-c/queries/highlights.scm'],
'scope': 'source.objc'}],
'ocaml': [{'file-types': ['ml'],
'first-line-regex': '',
'injection-regex': '^(ocaml|ml)$',
'path': 'ocaml',
'scope': 'source.ocaml'}],
'perl': [{'file-types': ['pl'], 'scope': 'source.perl'}],
'php': [{'file-types': ['php'],
'highlights': 'queries/highlights.scm',
'scope': 'source.php'}],
'python': [{'file-types': ['py'], 'scope': 'source.python'}],
'ql': [{'file-types': ['ql', 'qll'], 'scope': 'source.ql'}],
'r': [{'file-types': ['R', 'r'],
'first-line-regex': '#!.*\\bRscript$',
'scope': 'source.R'}],
'regex': [{'injection-regex': '^regex$', 'scope': 'source.regex'}],
'rst': [{'file-types': ['rst'],
'injection-regex': 'rst',
'scope': 'text.rst'}],
'ruby': [{'file-types': ['rb'],
'injection-regex': 'ruby',
'scope': 'source.ruby'}],
'rust': [{'file-types': ['rs'],
'injection-regex': 'rust',
'scope': 'source.rust'}],
'scala': [{'file-types': ['scala'], 'scope': 'source.scala'}],
'sql': [{'file-types': ['sql'], 'scope': 'source.sql'}],
'sqlite': [{'file-types': ['sql'],
'highlights': 'queries/highlights.scm',
'injection-regex': '^(sql)$',
'scope': 'source.sql'}],
'toml': [{'file-types': ['toml'],
'highlights': ['queries/highlights.scm'],
'injection-regex': '^toml$',
'scope': 'source.toml'}],
'tsq': [{'file-types': ['tsq', 'scm'], 'scope': 'scope.tsq'}],
'tsx': [{'file-types': ['tsx'],
'highlights': ['queries/highlights.scm',
'node_modules/tree-sitter-javascript/queries/highlights-jsx.scm',
'node_modules/tree-sitter-javascript/queries/highlights.scm'],
'injection-regex': '^(ts|typescript)$',
'injections': 'node_modules/tree-sitter-javascript/queries/injections.scm',
'locals': 'node_modules/tree-sitter-javascript/queries/locals.scm',
'path': 'tsx',
'scope': 'source.tsx',
'tags': ['queries/tags.scm',
'node_modules/tree-sitter-javascript/queries/tags.scm']}],
'typescript': [{'file-types': ['ts'],
'highlights': ['queries/highlights.scm',
'node_modules/tree-sitter-javascript/queries/highlights.scm'],
'injection-regex': '^(ts|typescript)$',
'injections': 'node_modules/tree-sitter-javascript/queries/injections.scm',
'locals': ['queries/locals.scm',
'node_modules/tree-sitter-javascript/queries/locals.scm'],
'path': 'typescript',
'scope': 'source.ts',
'tags': ['queries/tags.scm',
'node_modules/tree-sitter-javascript/queries/tags.scm']}],
'yaml': {}}