diff --git a/.gitignore b/.gitignore index 4bfad84..5182409 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ build derby.log metastore_db +target/ +.DS_Store diff --git a/Makefile b/Makefile deleted file mode 100644 index 2359689..0000000 --- a/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -run: - ./gradlew shadowJar && \ - spark-submit --verbose \ - --class com.github.geoheil.geomesaGeospark.Foo \ - --master 'local[*]' \ - --driver-memory 8G \ - build/libs/geomesaGeospark-all.jar - -replSparkShell: - ./gradlew shadowJar && \ - spark-shell --master 'local[2]' \ - --jars build/libs/geomesaGeospark-all.jar \ No newline at end of file diff --git a/build.gradle b/build.gradle deleted file mode 100644 index 4ad5f9c..0000000 --- a/build.gradle +++ /dev/null @@ -1,81 +0,0 @@ -description = 'GeomesaGeospark' - -buildscript { - repositories { - maven { url "https://plugins.gradle.org/m2/" } - jcenter() - mavenCentral() - } - dependencies { - classpath "com.github.jengelman.gradle.plugins:shadow:$shadowPluginV" - } -} - -configurations.all { - exclude group: 'xerces', module: 'xercesImpl' -} - -allprojects { - group = 'com.geoheil.geomesaspark' - - ext { - scalaFullV = "${scalaMinorV}.$scalaPatchV" - sparkFullV = "$sparkOpenV.$sparkOpenVPatch.$hdpV" - - deps = [geomesaSparkSql : "org.locationtech.geomesa:geomesa-spark-sql_${scalaMinorV}:$geomesaV", - geospark : "org.datasyslab:geospark:$geosparkV", - geosparkSql : "org.datasyslab:geospark-sql_$sparkOpenV:$geosparkV", - sparkCore : "org.apache.spark:spark-core_${scalaMinorV}:$sparkFullV", - sparkSql : "org.apache.spark:spark-sql_${scalaMinorV}:$sparkFullV", - sparkHive : "org.apache.spark:spark-hive_${scalaMinorV}:$sparkFullV", - scalaLib : "org.scala-lang:scala-library:$scalaFullV" - ] - } - - repositories { - maven { url "https://repo.locationtech.org/content/groups/releases" } - maven { url "http://repo.boundlessgeo.com/main" } - maven { url "http://download.osgeo.org/webdav/geotools" } - maven { url "http://conjars.org/repo" } - jcenter() - mavenCentral() - maven { url "http://nexus-private.hortonworks.com/nexus/content/groups/public" } - } - - apply plugin: 'scala' - apply plugin: 'com.github.johnrengelman.shadow' - - - sourceCompatibility = 1.8 - targetCompatibility = 1.8 - - dependencies { - - compile deps.geomesaSparkSql - compile deps.geospark - compile deps.geosparkSql - - compileOnly deps.sparkCore - compileOnly deps.sparkSql - compileOnly deps.sparkHive - compileOnly deps.scalaLib - - } - - shadowJar { - zip64 true - dependencies { - exclude(dependency("com.chuusai:shapeless_$scalaMinorV")) - } - exclude 'META-INF/*.DSA' - exclude 'META-INF/*.RSA' - exclude 'META-INF/LICENSE' - exclude 'META-INF/LICENSE.txt' - exclude 'META-INF/NOTICE.txt' - exclude 'META-INF/NOTICE' - exclude 'NOTICE' - exclude 'LICENSE' - exclude 'LICENSE.txt' - exclude 'LICENSE-2.0.txt' - } -} \ No newline at end of file diff --git a/gradle.properties b/gradle.properties deleted file mode 100644 index 7f5c0b6..0000000 --- a/gradle.properties +++ /dev/null @@ -1,8 +0,0 @@ -geomesaV=2.0.2 -hdpV=2.6.4.9-3 -scalaMinorV=2.11 -scalaPatchV=12 -shadowPluginV=2.0.4 -sparkOpenV=2.2 -sparkOpenVPatch=0 -geosparkV=1.1.3 \ No newline at end of file diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar deleted file mode 100644 index 758de96..0000000 Binary files a/gradle/wrapper/gradle-wrapper.jar and /dev/null differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties deleted file mode 100644 index 2d80b69..0000000 --- a/gradle/wrapper/gradle-wrapper.properties +++ /dev/null @@ -1,5 +0,0 @@ -distributionBase=GRADLE_USER_HOME -distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-4.8.1-bin.zip -zipStoreBase=GRADLE_USER_HOME -zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew deleted file mode 100755 index cccdd3d..0000000 --- a/gradlew +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env sh - -############################################################################## -## -## Gradle start up script for UN*X -## -############################################################################## - -# Attempt to set APP_HOME -# Resolve links: $0 may be a link -PRG="$0" -# Need this for relative symlinks. -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "$PRG"`"/$link" - fi -done -SAVED="`pwd`" -cd "`dirname \"$PRG\"`/" >/dev/null -APP_HOME="`pwd -P`" -cd "$SAVED" >/dev/null - -APP_NAME="Gradle" -APP_BASE_NAME=`basename "$0"` - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS="" - -# Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD="maximum" - -warn () { - echo "$*" -} - -die () { - echo - echo "$*" - echo - exit 1 -} - -# OS specific support (must be 'true' or 'false'). -cygwin=false -msys=false -darwin=false -nonstop=false -case "`uname`" in - CYGWIN* ) - cygwin=true - ;; - Darwin* ) - darwin=true - ;; - MINGW* ) - msys=true - ;; - NONSTOP* ) - nonstop=true - ;; -esac - -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar - -# Determine the Java command to use to start the JVM. -if [ -n "$JAVA_HOME" ] ; then - if [ -x "$JAVA_HOME/jre/sh/java" ] ; then - # IBM's JDK on AIX uses strange locations for the executables - JAVACMD="$JAVA_HOME/jre/sh/java" - else - JAVACMD="$JAVA_HOME/bin/java" - fi - if [ ! -x "$JAVACMD" ] ; then - die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -else - JAVACMD="java" - which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." -fi - -# Increase the maximum file descriptors if we can. -if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then - MAX_FD_LIMIT=`ulimit -H -n` - if [ $? -eq 0 ] ; then - if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then - MAX_FD="$MAX_FD_LIMIT" - fi - ulimit -n $MAX_FD - if [ $? -ne 0 ] ; then - warn "Could not set maximum file descriptor limit: $MAX_FD" - fi - else - warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" - fi -fi - -# For Darwin, add options to specify how the application appears in the dock -if $darwin; then - GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" -fi - -# For Cygwin, switch paths to Windows format before running java -if $cygwin ; then - APP_HOME=`cygpath --path --mixed "$APP_HOME"` - CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` - JAVACMD=`cygpath --unix "$JAVACMD"` - - # We build the pattern for arguments to be converted via cygpath - ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` - SEP="" - for dir in $ROOTDIRSRAW ; do - ROOTDIRS="$ROOTDIRS$SEP$dir" - SEP="|" - done - OURCYGPATTERN="(^($ROOTDIRS))" - # Add a user-defined pattern to the cygpath arguments - if [ "$GRADLE_CYGPATTERN" != "" ] ; then - OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" - fi - # Now convert the arguments - kludge to limit ourselves to /bin/sh - i=0 - for arg in "$@" ; do - CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` - CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option - - if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition - eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` - else - eval `echo args$i`="\"$arg\"" - fi - i=$((i+1)) - done - case $i in - (0) set -- ;; - (1) set -- "$args0" ;; - (2) set -- "$args0" "$args1" ;; - (3) set -- "$args0" "$args1" "$args2" ;; - (4) set -- "$args0" "$args1" "$args2" "$args3" ;; - (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; - esac -fi - -# Escape application args -save () { - for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done - echo " " -} -APP_ARGS=$(save "$@") - -# Collect all arguments for the java command, following the shell quoting and substitution rules -eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" - -# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong -if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then - cd "$(dirname "$0")" -fi - -exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat deleted file mode 100644 index e95643d..0000000 --- a/gradlew.bat +++ /dev/null @@ -1,84 +0,0 @@ -@if "%DEBUG%" == "" @echo off -@rem ########################################################################## -@rem -@rem Gradle startup script for Windows -@rem -@rem ########################################################################## - -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal - -set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. -set APP_BASE_NAME=%~n0 -set APP_HOME=%DIRNAME% - -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS= - -@rem Find java.exe -if defined JAVA_HOME goto findJavaFromJavaHome - -set JAVA_EXE=java.exe -%JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto init - -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:findJavaFromJavaHome -set JAVA_HOME=%JAVA_HOME:"=% -set JAVA_EXE=%JAVA_HOME%/bin/java.exe - -if exist "%JAVA_EXE%" goto init - -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:init -@rem Get command-line arguments, handling Windows variants - -if not "%OS%" == "Windows_NT" goto win9xME_args - -:win9xME_args -@rem Slurp the command line arguments. -set CMD_LINE_ARGS= -set _SKIP=2 - -:win9xME_args_slurp -if "x%~1" == "x" goto execute - -set CMD_LINE_ARGS=%* - -:execute -@rem Setup the command line - -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar - -@rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% - -:end -@rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..8f16692 --- /dev/null +++ b/pom.xml @@ -0,0 +1,89 @@ + + + 4.0.0 + + com.github.geoheil + geomesaGeospark + 0.1 + + + 2.11.12 + 2.2.1 + + + + + + org.scala-lang + scala-library + ${scala.version} + + + org.apache.spark + spark-sql_2.11 + ${spark.version} + + + org.apache.spark + spark-hive_2.11 + ${spark.version} + + + org.datasyslab + geospark + 1.1.3 + + + org.datasyslab + geospark-sql_2.2 + 1.1.3 + + + org.locationtech.geomesa + geomesa-spark-sql_2.11 + 2.0.2 + + + + src/main/scala + + + org.scala-tools + maven-scala-plugin + 2.15.2 + + + + compile + + + + + + maven-assembly-plugin + + + + com.github.geoheil.geomesaGeospark.Foo + + + ${project.artifactId} + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + \ No newline at end of file diff --git a/settings.gradle b/settings.gradle deleted file mode 100644 index c44cbe2..0000000 --- a/settings.gradle +++ /dev/null @@ -1 +0,0 @@ -rootProject.name = 'geomesaGeospark' \ No newline at end of file diff --git a/src/main/scala/com/github/geoheil/geomesaGeospark/CustomGeosparkRegistrator.scala b/src/main/scala/com/github/geoheil/geomesaGeospark/CustomGeosparkRegistrator.scala index ca2aac0..d4c3c0e 100644 --- a/src/main/scala/com/github/geoheil/geomesaGeospark/CustomGeosparkRegistrator.scala +++ b/src/main/scala/com/github/geoheil/geomesaGeospark/CustomGeosparkRegistrator.scala @@ -8,7 +8,7 @@ import org.datasyslab.geosparksql.UDF.Catalog object CustomGeosparkRegistrator { def registerAll(sparkSession: SparkSession): Unit = { - Catalog.expressions.foreach(f => FunctionRegistry.builtin.registerFunction("geospark_" + f.getClass.getSimpleName.dropRight(1), f)) + Catalog.expressions.foreach(f => sparkSession.sessionState.functionRegistry.registerFunction("geospark_" + f.getClass.getSimpleName.dropRight(1), f)) Catalog.aggregateExpressions.foreach(f => sparkSession.udf.register("geospark_" + f.getClass.getSimpleName, f)) UdtRegistratorWrapper.registerAll() } diff --git a/src/main/scala/com/github/geoheil/geomesaGeospark/Foo.scala b/src/main/scala/com/github/geoheil/geomesaGeospark/Foo.scala index bde5a21..c5ebeda 100644 --- a/src/main/scala/com/github/geoheil/geomesaGeospark/Foo.scala +++ b/src/main/scala/com/github/geoheil/geomesaGeospark/Foo.scala @@ -6,6 +6,7 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.DecimalType +import org.datasyslab.geospark.serde.GeoSparkKryoRegistrator import org.datasyslab.geosparksql.utils.GeoSparkSQLRegistrator import org.locationtech.geomesa.spark.jts._ @@ -20,7 +21,6 @@ object Foo extends App { .builder() .config(new SparkConf() .setAppName("geomesaGeospark") - .setMaster("local[*]") .setIfMissing("spark.serializer", classOf[KryoSerializer].getCanonicalName) .setIfMissing("spark.kryo.unsafe", "true") @@ -33,14 +33,15 @@ object Foo extends App { .getOrCreate() import spark.implicits._ - + CustomGeosparkRegistrator.registerAll(spark) // register spatial functions // now using custom namespace // register geomesa functions spark.withJTS // register geospark functions TODO WARNING function names overlap. Require custom registrator //GeoSparkSQLRegistrator.registerAll(spark) - CustomGeosparkRegistrator.registerAll(spark) + spark.sessionState.functionRegistry.listFunction.foreach(println) + val points = Seq(MyPoint(1, 30, 10), MyPoint(2, 31, 35)).toDS .withColumn("x", col("x").cast(DecimalType(38, 18)))