From a2eb6ca8076f78b3557197cd83090362ec3a3a03 Mon Sep 17 00:00:00 2001 From: Villu Ruusmann Date: Thu, 14 Dec 2023 08:49:36 +0200 Subject: [PATCH] Refactored the parsing of 'pandas_categorical' attribute --- NOTICE.txt | 1 + pmml-lightgbm/pom.xml | 25 ++---- .../main/java/org/jpmml/lightgbm/GBDT.java | 4 +- .../lightgbm/PandasCategoricalParser.java | 76 +++++++++++++++++++ .../lightgbm/PandasCategoricalParserTest.java | 8 +- pom.xml | 6 ++ 6 files changed, 93 insertions(+), 27 deletions(-) create mode 100644 pmml-lightgbm/src/main/java/org/jpmml/lightgbm/PandasCategoricalParser.java diff --git a/NOTICE.txt b/NOTICE.txt index 17603d5..449df04 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,4 +1,5 @@ JPMML-LightGBM includes third-party dependencies that are released under the Apache License, Version 2.0: + * Gson - https://github.com/google/gson * Guava - https://github.com/google/guava * JCommander - http://jcommander.org diff --git a/pmml-lightgbm/pom.xml b/pmml-lightgbm/pom.xml index 56622ba..a6bdaea 100644 --- a/pmml-lightgbm/pom.xml +++ b/pmml-lightgbm/pom.xml @@ -39,6 +39,11 @@ provided + + com.google.code.gson + gson + + junit junit @@ -65,28 +70,8 @@ maven-javadoc-plugin 1.8 - ${basedir}/src/main/java - - org.codehaus.mojo - javacc-maven-plugin - 3.0.1 - - - - javacc - - - - - - net.java.dev.javacc - javacc - 7.0.13 - - - diff --git a/pmml-lightgbm/src/main/java/org/jpmml/lightgbm/GBDT.java b/pmml-lightgbm/src/main/java/org/jpmml/lightgbm/GBDT.java index 9eacfac..ff8b1c9 100644 --- a/pmml-lightgbm/src/main/java/org/jpmml/lightgbm/GBDT.java +++ b/pmml-lightgbm/src/main/java/org/jpmml/lightgbm/GBDT.java @@ -68,7 +68,7 @@ public class GBDT { private Map feature_importances = Collections.emptyMap(); - private List> pandas_categorical = Collections.emptyList(); + private List> pandas_categorical = Collections.emptyList(); public void load(List
sections){ @@ -611,7 +611,7 @@ private Map loadFeatureSection(Section section){ return result; } - private List> loadPandasCategorical(Section section){ + private List> loadPandasCategorical(Section section){ String id = section.id(); try { diff --git a/pmml-lightgbm/src/main/java/org/jpmml/lightgbm/PandasCategoricalParser.java b/pmml-lightgbm/src/main/java/org/jpmml/lightgbm/PandasCategoricalParser.java new file mode 100644 index 0000000..17e0204 --- /dev/null +++ b/pmml-lightgbm/src/main/java/org/jpmml/lightgbm/PandasCategoricalParser.java @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2023 Villu Ruusmann + * + * This file is part of JPMML-LightGBM + * + * JPMML-LightGBM is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * JPMML-LightGBM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with JPMML-LightGBM. If not, see . + */ +package org.jpmml.lightgbm; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonElement; +import com.google.gson.JsonParser; +import com.google.gson.ToNumberPolicy; + +public class PandasCategoricalParser { + + private String string = null; + + + public PandasCategoricalParser(String string){ + setString(string); + } + + public List> parsePandasCategorical(){ + String string = getString(); + + if(!string.startsWith(PandasCategoricalParser.PREFIX)){ + throw new IllegalArgumentException(string); + } + + string = string.substring(PandasCategoricalParser.PREFIX.length()); + + JsonElement element = JsonParser.parseString(string); + + Gson gson = new GsonBuilder() + .setObjectToNumberStrategy(ToNumberPolicy.LONG_OR_DOUBLE) + .create(); + + List> result = gson.fromJson(element, ListOfLists.class); + if(result == null){ + result = Collections.emptyList(); + } + + return result; + } + + public String getString(){ + return this.string; + } + + private void setString(String string){ + this.string = string; + } + + static + private class ListOfLists extends ArrayList> { + } + + private static final String PREFIX = "pandas_categorical:"; +} \ No newline at end of file diff --git a/pmml-lightgbm/src/test/java/org/jpmml/lightgbm/PandasCategoricalParserTest.java b/pmml-lightgbm/src/test/java/org/jpmml/lightgbm/PandasCategoricalParserTest.java index 8a4045d..791d278 100644 --- a/pmml-lightgbm/src/test/java/org/jpmml/lightgbm/PandasCategoricalParserTest.java +++ b/pmml-lightgbm/src/test/java/org/jpmml/lightgbm/PandasCategoricalParserTest.java @@ -22,8 +22,6 @@ import java.util.Collections; import java.util.List; -import org.jpmml.lightgbm.PandasCategoricalParser; -import org.jpmml.lightgbm.ParseException; import org.junit.Test; import static org.junit.Assert.assertEquals; @@ -32,17 +30,17 @@ public class PandasCategoricalParserTest { @Test public void parse() throws Exception { - List> pandasCategories = parsePandasCategorical("null"); + List> pandasCategories = parsePandasCategorical("null"); assertEquals(Collections.emptyList(), pandasCategories); pandasCategories = parsePandasCategorical("[[\"null\", \"A\", \"B, B\", \"C, [C], C\"], [-2, -1, 0, 1, 2], [-2.0, -1.0, 0.0, 1.0, 2.0], [false, true]]"); - assertEquals(Arrays.asList(Arrays.asList("null", "A", "B, B", "C, [C], C"), Arrays.asList(-2, -1, 0, 1, 2), Arrays.asList(-2d, -1d, 0d, 1d, 2d), Arrays.asList(Boolean.FALSE, Boolean.TRUE)), pandasCategories); + assertEquals(Arrays.asList(Arrays.asList("null", "A", "B, B", "C, [C], C"), Arrays.asList(-2L, -1L, 0L, 1L, 2L), Arrays.asList(-2d, -1d, 0d, 1d, 2d), Arrays.asList(Boolean.FALSE, Boolean.TRUE)), pandasCategories); } static - private List> parsePandasCategorical(String value) throws ParseException { + private List> parsePandasCategorical(String value){ PandasCategoricalParser parser = new PandasCategoricalParser("pandas_categorical:" + value); return parser.parsePandasCategorical(); diff --git a/pom.xml b/pom.xml index a1b6d94..d4a721c 100644 --- a/pom.xml +++ b/pom.xml @@ -84,6 +84,12 @@ 1.72 + + com.google.code.gson + gson + [2.8.1, 2.10.1] + + junit junit