diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 0000000..c458494 --- /dev/null +++ b/.tool-versions @@ -0,0 +1,3 @@ +elasticsearch 7.17.18 +java openjdk-17.0.2 +maven 3.9.0 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..bdf1a00 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +ARG ES_VERSION=8.6.2 +FROM docker.elastic.co/elasticsearch/elasticsearch:${ES_VERSION} + +ARG ES_VERSION +COPY --chown=elasticsearch:elasticsearch ./target/releases/elasticsearch-analysis-lemmagen-${ES_VERSION}-plugin.zip /tmp/elasticsearch-analysis-lemmagen-${ES_VERSION}-plugin.zip + +USER elasticsearch + +RUN elasticsearch-plugin install file:///tmp/elasticsearch-analysis-lemmagen-${ES_VERSION}-plugin.zip + +RUN mkdir -p /usr/share/elasticsearch/config/lemmagen && \ + cd /usr/share/elasticsearch/config/lemmagen && \ + curl -L https://github.com/vhyza/lemmagen-lexicons/raw/master/free/lexicons/en.lem -o en.lem diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..43fa9cd --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,23 @@ +services: + elasticsearch: + # image: docker.elastic.co/elasticsearch/elasticsearch:8.6.2 + build: + context: . + platforms: + - linux/amd64 + args: + - ES_VERSION=8.6.2 + ports: + - 9200:9200 + environment: + - cluster.name=lemmagen-test + - bootstrap.memory_lock=true + - xpack.security.enabled=false + - xpack.security.http.ssl.enabled=false + - xpack.security.transport.ssl.enabled=false + - network.host=0.0.0.0 + - discovery.type=single-node + ulimits: + memlock: + soft: -1 + hard: -1 diff --git a/plugin-descriptor.properties b/plugin-descriptor.properties index 8fdbee1..c814b3c 100644 --- a/plugin-descriptor.properties +++ b/plugin-descriptor.properties @@ -1,6 +1,6 @@ description=Lemmatizer token filter -version=${elasticsearch.version} +version=${project.version} name=elasticsearch-analysis-lemmagen classname=org.elasticsearch.plugin.analysis.lemmagen.AnalysisLemmagenPlugin java.version=1.8 -elasticsearch.version=${plugin.version} +elasticsearch.version=${elasticsearch.version} diff --git a/pom.xml b/pom.xml index 02bd3b9..a8b1117 100644 --- a/pom.xml +++ b/pom.xml @@ -7,12 +7,11 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-lemmagen - 8.6.1 + 8.6.2 jar - 8.6.1 - 9.4.2 - 8.6.1 + 8.6.2 + ${elasticsearch.version} UTF-8 @@ -37,12 +36,6 @@ slf4j-simple 1.6.2 - - org.apache.lucene - lucene-test-framework - ${lucene.version} - test - org.hamcrest hamcrest-core @@ -86,13 +79,13 @@ org.apache.logging.log4j log4j-core - [2.16.0,) + 2.23.1 test org.apache.logging.log4j log4j-api - [2.16.0,) + 2.23.1 test @@ -133,7 +126,7 @@ org.apache.maven.plugins maven-surefire-plugin - 2.14 + 3.2.2 -Dtests.security.manager=false diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..729c8de --- /dev/null +++ b/test.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +set -e + +# To clear possible old index +curl -s -H "Content-Type: application/json" -X DELETE "http://localhost:9200/lemmagen-test" > /dev/null + +echo -e "--------------- CREATE INDEX ---------------\n" +curl -f -H "Content-Type: application/json" -X PUT "http://localhost:9200/lemmagen-test" -d '{ + "settings": { + "index": { + "analysis": { + "filter": { + "lemmagen_filter_en": { + "type": "lemmagen", + "lexicon": "en" + } + }, + "analyzer": { + "lemmagen_en": { + "type": "custom", + "tokenizer": "uax_url_email", + "filter": [ + "lemmagen_filter_en" + ] + } + } + } + } + }, + "mappings": { + "properties": { + "text": { + "type": "text", + "analyzer": "lemmagen_en" + } + } + } +}' + +echo -e "\n" +echo -e "--------------- ANALYZE TEXT ---------------\n" +curl -f -H "Content-Type: application/json" -X GET "http://localhost:9200/lemmagen-test/_analyze" -d ' +{ + "text": "I am late.", + "analyzer": "lemmagen_en" +}' + +echo -e "\n" +echo -e "--------------- INDEX DOCUMENT ---------------\n" +curl -f -H "Content-Type: application/json" -X PUT "http://localhost:9200/lemmagen-test/_doc/1?refresh=wait_for" -d ' +{ + "user": "tester", + "published_at": "2013-11-15T14:12:12", + "text": "I am late." +}' + + +echo -e "\n" +echo -e "--------------- SEARCH DOCUMENT ---------------\n" +curl -f -H "Content-Type: application/json" -X GET "http://localhost:9200/lemmagen-test/_search" -d ' +{ + "query": { + "match": { + "text": "is" + } + } +}' + +echo -e "\n" +echo -e "--------------- DELETE INDEX ---------------\n" +curl -f -H "Content-Type: application/json" -X DELETE "http://localhost:9200/lemmagen-test" + +echo -e "\n" +echo -e "👍 ALL DONE 👍\n"