diff --git a/Model/src/main/java/org/gusdb/wdk/model/report/AbstractAttributeReporter.java b/Model/src/main/java/org/gusdb/wdk/model/report/AbstractAttributeReporter.java index 95da6f1b8..410cb67e7 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/report/AbstractAttributeReporter.java +++ b/Model/src/main/java/org/gusdb/wdk/model/report/AbstractAttributeReporter.java @@ -2,21 +2,21 @@ import java.io.OutputStream; import java.io.PrintStream; -import java.sql.ResultSet; import java.sql.SQLException; import java.util.LinkedHashMap; import java.util.Map; +import java.util.Optional; import java.util.regex.Matcher; -import javax.sql.DataSource; - +import org.gusdb.fgputil.Tuples; import org.gusdb.fgputil.db.SqlUtils; +import org.gusdb.fgputil.db.stream.ResultSetIterator; import org.gusdb.wdk.model.WdkModel; import org.gusdb.wdk.model.WdkModelException; +import org.gusdb.wdk.model.WdkRuntimeException; import org.gusdb.wdk.model.answer.AnswerValue; import org.gusdb.wdk.model.query.Column; import org.gusdb.wdk.model.query.SqlQuery; -import org.gusdb.wdk.model.record.PrimaryKeyDefinition; import org.gusdb.wdk.model.record.PrimaryKeyValue; import org.gusdb.wdk.model.record.RecordClass; import org.gusdb.wdk.model.record.attribute.AttributeField; @@ -27,6 +27,8 @@ import org.gusdb.wdk.model.record.attribute.TextAttributeField; import org.json.JSONObject; +import static org.gusdb.fgputil.functional.Functions.mapException; + public abstract class AbstractAttributeReporter extends AbstractReporter { protected static final String ATTRIBUTE_COLUMN = "wdk_attribute"; @@ -158,37 +160,32 @@ private String formatColumn(AnswerValue answerValue, AttributeField attribute, return builder.append("'").toString(); } - /** - * @return the values of the associated attribute. the key of the map is the - * primary key of a record instance. - */ - protected Map getAttributeValues(AnswerValue answerValue) - throws WdkModelException, SQLException { - WdkModel wdkModel = answerValue.getWdkModel(); - Map values = new LinkedHashMap<>(); - RecordClass recordClass = answerValue.getAnswerSpec().getQuestion().getRecordClass(); - PrimaryKeyDefinition pkDef = recordClass.getPrimaryKeyDefinition(); - String[] pkColumns = pkDef.getColumnRefs(); - String sql = getAttributeSql(answerValue); - DataSource dataSource = wdkModel.getAppDb().getDataSource(); - ResultSet resultSet = null; - try { - resultSet = SqlUtils.executeQuery(dataSource, sql, - answerValue.getAnswerSpec().getQuestion().getQuery().getFullName() - + "__attribute-plugin-combined", 5000); - while (resultSet.next()) { - Map pkValues = new LinkedHashMap<>(); - for (String pkColumn : pkColumns) { - pkValues.put(pkColumn, resultSet.getObject(pkColumn)); - } - PrimaryKeyValue pkValue = new PrimaryKeyValue(pkDef, pkValues); - Object value = resultSet.getObject(ATTRIBUTE_COLUMN); - values.put(pkValue, value); - } - } finally { - SqlUtils.closeResultSetAndStatement(resultSet, null); - } - return values; + protected ResultSetIterator> getAttributeValueStream(AnswerValue answerValue) + throws WdkModelException, SQLException { + var pkDef = answerValue.getAnswerSpec() + .getQuestion() + .getRecordClass() + .getPrimaryKeyDefinition(); + + var pkColumns = pkDef.getColumnRefs(); + + var resultSet = SqlUtils.executeQuery( + answerValue.getWdkModel().getAppDb().getDataSource(), + getAttributeSql(answerValue), + answerValue.getAnswerSpec().getQuestion().getQuery().getFullName() + "__attribute-plugin-combined", + 5000 + ); + + return new ResultSetIterator<>(resultSet, row -> { + var pkValues = new LinkedHashMap(pkColumns.length); + + for (var pkColumn : pkColumns) + pkValues.put(pkColumn, resultSet.getObject(pkColumn)); + + return Optional.of(new Tuples.TwoTuple<>( + mapException(() -> new PrimaryKeyValue(pkDef, pkValues), WdkRuntimeException::new), + resultSet.getObject(ATTRIBUTE_COLUMN) + )); + }); } - } diff --git a/Model/src/main/java/org/gusdb/wdk/model/report/reporter/WordCloudAttributeReporter.java b/Model/src/main/java/org/gusdb/wdk/model/report/reporter/WordCloudAttributeReporter.java index 4e06bd01b..4fada6ddb 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/report/reporter/WordCloudAttributeReporter.java +++ b/Model/src/main/java/org/gusdb/wdk/model/report/reporter/WordCloudAttributeReporter.java @@ -12,13 +12,12 @@ import org.apache.log4j.Logger; import org.gusdb.wdk.model.WdkModelException; import org.gusdb.wdk.model.answer.AnswerValue; -import org.gusdb.wdk.model.record.PrimaryKeyValue; import org.gusdb.wdk.model.report.AbstractAttributeReporter; import org.json.JSONArray; import org.json.JSONObject; -public class WordCloudAttributeReporter extends AbstractAttributeReporter { - +public class WordCloudAttributeReporter extends AbstractAttributeReporter { + private static final String PROP_SPLIT_PATTERN = "split-pattern"; private static final String PROP_MIN_WORD_LENGTH = "min-word-length"; private static final String PROP_EXCLUDE_NUMBERS = "exclude-numbers"; @@ -42,7 +41,7 @@ public JSONObject getJsonResult(AnswerValue answerValue) throws WdkModelExceptio List tags = loadTags(answerValue); JSONObject jsonResult = new JSONObject(); - + JSONArray jsonWordTags = new JSONArray(); for (WordTag tag : tags) { JSONObject tagJson = new JSONObject(); @@ -50,24 +49,28 @@ public JSONObject getJsonResult(AnswerValue answerValue) throws WdkModelExceptio tagJson.put("count", tag.getCount()); jsonWordTags.put(tagJson); } - + jsonResult.put(ATTR_TAGS, jsonWordTags); return jsonResult; } - + private List loadTags(AnswerValue answerValue) { - List tags = new ArrayList<>(); + List tags; resolveProperties(); - try { - Map tagMap = new HashMap(); - Map values = getAttributeValues(answerValue); - for (Object value : values.values()) { - if (value == null) + var tagMap = new HashMap(); + + try (var valueStream = getAttributeValueStream(answerValue)) { + while (valueStream.hasNext()) { + var record = valueStream.next(); + + if (record.getSecond() == null) continue; - splitWords(value.toString(), tagMap); + + splitWords(record.getSecond().toString(), tagMap); } + // the tags are sorted by count tags = processTags(tagMap); return tags; @@ -134,10 +137,10 @@ private void splitWords(String content, Map tags) { WordTag tag = tags.get(word); if (tag == null) tag = new WordTag(word, originalWord); - + else { tag.increment(); - + // In addition to incrementing the overall count for the word, we need to amend the // mixedCaseCounter map either by adding a new case sensitive version of the word or // incrementing the count for an existing case sensitive version. @@ -145,7 +148,7 @@ private void splitWords(String content, Map tags) { Integer count = mixedCaseCounter.get(originalWord); if(count == null) mixedCaseCounter.put(originalWord, 1); else mixedCaseCounter.put(originalWord, ++count); - } + } tags.put(word, tag); // logger.debug("word count: '" + word + "' = " + count); } @@ -175,7 +178,7 @@ private List processTags(Map tags) { WordTag partTag = tags.get(part); int count = tag.getCount() + partTag.getCount(); partTag.setCount(count); - + // In addition to absorbing overall plural counts for the case neutral word, we // need to absorb plural counts for the case sensitive versions of the word as // well. @@ -204,14 +207,14 @@ private List processTags(Map tags) { // weights and scores are easier. Collections.sort(list); } - + return list.stream().map(tag -> { String dominantCase = tag.getDominantCase(); tag.setWord(dominantCase); return tag; }).collect(Collectors.toList()); } - + /** * Mixed case plurals are stored in the mixed case counter map as plurals since no * determination about plurals can be easily be made. But since the ending of the