Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Simplify ownership analytics [DHIS2-18660] #19527

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,6 @@
public class JdbcOwnershipAnalyticsTableManager extends AbstractEventJdbcTableManager {
private final JdbcConfiguration jdbcConfiguration;

private static final String HISTORY_TABLE_ID = "1001-01-01";

// Must be later than the dummy HISTORY_TABLE_ID for SQL query order.
private static final String TRACKED_ENTITY_OWN_TABLE_ID = "2002-02-02";

protected static final List<AnalyticsTableColumn> FIXED_COLS =
List.of(
AnalyticsTableColumn.builder()
Expand All @@ -97,12 +92,12 @@ public class JdbcOwnershipAnalyticsTableManager extends AbstractEventJdbcTableMa
AnalyticsTableColumn.builder()
.name("startdate")
.dataType(DATE)
.selectExpression("a.startdate")
.selectExpression("null")
.build(),
AnalyticsTableColumn.builder()
.name("enddate")
.dataType(DATE)
.selectExpression("a.enddate")
.selectExpression("h.enddate")
.build(),
AnalyticsTableColumn.builder()
.name("ou")
Expand Down Expand Up @@ -213,6 +208,7 @@ private void populateOwnershipTableInternal(AnalyticsTablePartition partition, S
writer.write(getRowMap(columnNames, resultSet));
queryRowCount.getAndIncrement();
});
writer.flush();

log.info(
"OwnershipAnalytics query row count was {} for table '{}'", queryRowCount, tableName);
Expand All @@ -223,19 +219,12 @@ private void populateOwnershipTableInternal(AnalyticsTablePartition partition, S
}

/**
* Returns a SQL select query. For the from clause, for tracked entities in this program in
* programownershiphistory, get one row for each programownershiphistory row and then get a final
* row from the trackedentityprogramowner table to show the final owner.
*
* <p>The start date values are dummy so that all the history table rows will be ordered first and
* the tracked entity owner table row will come last.
* Returns a SQL select query.
*
* <p>The start date in the analytics table will be a far past date for the first row for each
* tracked entity, or the previous row's end date plus one day in subsequent rows for that tracked
* entity.
*
* <p>Rows in programownershiphistory that don't have organisationunitid will be filtered out.
*
* @param program the {@link Program}.
* @return a SQL select query.
*/
Expand All @@ -248,29 +237,15 @@ private String getInputSql(Program program) {
sql.append(
replaceQualify(
"""
\sfrom (\
select h.trackedentityid, '${historyTableId}' as startdate, h.enddate as enddate, h.organisationunitid \
from ${programownershiphistory} h \
\sfrom ${programownershiphistory} h \
inner join ${trackedentity} te on h.trackedentityid = te.trackedentityid \
inner join ${organisationunit} ou on h.organisationunitid = ou.organisationunitid \
left join analytics_rs_orgunitstructure ous on h.organisationunitid = ous.organisationunitid \
left join analytics_rs_organisationunitgroupsetstructure ougs on h.organisationunitid = ougs.organisationunitid \
where h.programid = ${programId} \
and h.organisationunitid is not null \
union distinct \
select o.trackedentityid, '${trackedEntityOwnTableId}' as startdate, null as enddate, o.organisationunitid \
from ${trackedentityprogramowner} o \
where o.programid = ${programId} \
and o.trackedentityid in (\
select distinct p.trackedentityid \
from ${programownershiphistory} p \
where p.programid = ${programId} \
and p.organisationunitid is not null)) a \
inner join ${trackedentity} te on a.trackedentityid = te.trackedentityid \
inner join ${organisationunit} ou on a.organisationunitid = ou.organisationunitid \
left join analytics_rs_orgunitstructure ous on a.organisationunitid = ous.organisationunitid \
left join analytics_rs_organisationunitgroupsetstructure ougs on a.organisationunitid = ougs.organisationunitid \
order by te.uid, a.startdate, a.enddate""",
Map.of(
"historyTableId", HISTORY_TABLE_ID,
"trackedEntityOwnTableId", TRACKED_ENTITY_OWN_TABLE_ID,
"programId", String.valueOf(program.getId()))));
order by te.uid, h.enddate""",
Map.of("programId", String.valueOf(program.getId()))));
return sql.toString();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
*/
package org.hisp.dhis.analytics.table.writer;

import static java.util.Calendar.DECEMBER;
import static java.util.Calendar.JANUARY;
import static org.apache.commons.lang3.time.DateUtils.truncate;
import static org.hisp.dhis.util.DateUtils.addDays;
Expand All @@ -51,12 +50,15 @@
public class JdbcOwnershipWriter {
private final MappingBatchHandler batchHandler;

/** Previous row for this TRACKED ENTITY, if any. */
private Map<String, Object> prevRow = null;

/** Row of the current write, possibly modified. */
private Map<String, Object> newRow;

/** Previous row, if any. */
private Map<String, Object> prevRow;

/** Does the previous row exist? */
private boolean previousRowExists = false;

public static final String TRACKEDENTITY = "teuid";

public static final String STARTDATE = "startdate";
Expand All @@ -65,9 +67,7 @@ public class JdbcOwnershipWriter {

public static final String OU = "ou";

private static final Date FAR_PAST_DATE = new GregorianCalendar(1000, JANUARY, 1).getTime();

private static final Date FAR_FUTURE_DATE = new GregorianCalendar(9999, DECEMBER, 31).getTime();
private static Date FAR_PAST_DATE = new GregorianCalendar(1001, JANUARY, 1).getTime();

/** Gets instance by a factory method (so it can be mocked). */
public static JdbcOwnershipWriter getInstance(MappingBatchHandler batchHandler) {
Expand All @@ -76,103 +76,83 @@ public static JdbcOwnershipWriter getInstance(MappingBatchHandler batchHandler)

/**
* Write a row to an analytics_ownership staging table. Work on a copy of the row, so we do not
* change the original row. We cannot use immutable maps because the orgUnit levels contain nulls
* when the orgUnit is not at the lowest level, and immutable maps do not allow null values. Also,
* the end date is null in the last record for each TRACKED ENTITY.
* change the original row. We cannot use immutable maps because some of the orgUnit levels are
* null when the orgUnit isn't at the lowest level. Immutable maps don't allow null values.
*
* @param row map of values to write
*/
public void write(Map<String, Object> row) {
newRow = new HashMap<>(row);
adjustNewRowDates();

if (newRow.get(ENDDATE) != null) {
// Remove the time of day portion of the ENDDATE.
newRow.put(ENDDATE, truncate(newRow.get(ENDDATE), Calendar.DATE));
}

if (prevRow == null) {
startNewTrackedEntity();
} else if (sameValue(OU) || sameValue(ENDDATE)) {
combineWithPreviousRow();
if (shouldContinuePreviousRow()) {
continuePreviousRow();
} else {
writePreviousRow();
writePreviousRowIfExists();
}

prevRow = newRow;
previousRowExists = true;
}

/** Flush the last row to the output. We will have a row to flush unless we had no input. */
public void flush() {
writePreviousRowIfExists();
}

// -------------------------------------------------------------------------
// Supportive methods
// -------------------------------------------------------------------------

/**
* Process the first row for a TRACKED ENTITY. Save it as the previous row and set the start date
* for far in the past. If the previous row does not have an "enddate", we enforce a default one.
*/
private void startNewTrackedEntity() {
prevRow = newRow;

// Ensure a default "enddate" value.
prevRow.putIfAbsent(ENDDATE, FAR_FUTURE_DATE);

prevRow.put(STARTDATE, FAR_PAST_DATE);
}

/**
* Combine the current row with the previous row by updating the previous row's OU and ENDDATE. If
* the ENDDATE is the same this means there were multiple assignments during a day, and we want to
* record only the last OU assignment for the day. If the OU is the same this means there were
* successive assignments to the same OU (possibly after collapsing the same ENDDATE assignments
* such as if a TRACKED ENTITY was switched during a day to a different OU and then switched back
* again.) In this case we can combine the two successive records for the same OU into a single
* record.
* Adjust the dates in the new row.
*
* <p>If we are continuing entries for a tracked entity, then set the start date equal to the
* previous end date plus one day, otherwise we are recording values for a different tacked entity
* so set the start date equal to a far past date.
*
* <p>If this is the last (and not only) row for this TRACKED ENTITY, write it out.
* <p>Remove the time portion of the end date, so it is just the date.
*/
private void combineWithPreviousRow() {
prevRow.put(OU, newRow.get(OU));
prevRow.put(ENDDATE, newRow.get(ENDDATE));

writeRowIfLast(prevRow);
private void adjustNewRowDates() {
newRow.put(STARTDATE, sameTrackedEntity() ? startAfterPrevious() : FAR_PAST_DATE);
newRow.put(ENDDATE, truncate(newRow.get(ENDDATE), Calendar.DATE));
}

/**
* The new row is for a different ownership period of the same TRACKED ENTITY. So write out the
* old row and start the new row after the old row's end date. Then also write out the new row if
* it is the last for this TRACKED ENTITY.
*/
private void writePreviousRow() {
batchHandler.addObject(prevRow);

newRow.put(STARTDATE, addDays((Date) prevRow.get(ENDDATE), 1));

prevRow = newRow;
/** Do we have a previous row with the same tracked entity? */
private boolean sameTrackedEntity() {
return previousRowExists && sameValue(TRACKEDENTITY);
}

writeRowIfLast(prevRow);
/** Start the new row one day after the previous one ends. */
private Date startAfterPrevious() {
return addDays((Date) prevRow.get(ENDDATE), 1);
}

/**
* If the passed row is the last for this TRACKED ENTITY (no end date), then set the end date to
* far in the future and write it out. However, if this is the only row for this TRACKED ENTITY
* (from the beginning of time to the end of time), then don't write it because the ownership
* never changed and analytics queries can always use the enrollment orgUnit.
* Should we continue the previous row?
*
* <p>It should be continued if the previous row is the same tracked entity and either the OU or
* the ENDDATE has the same value.
*
* <p>If the ENDDATE is the same (now that the time of day has been removed), this means that
* there were multiple ownership changes within the same day. In this event, we want to record
* only a single ownership period that starts with the previous ownership start date and ends at
* the end of the day.
*
* <p>After, there will be no previous row for this TRACKED ENTITY.
* <p>If the OU is the same (perhaps after combining rows with the same ENDDATE), then also we
* only want to record a single ownership that starts with the previous ownership start date and
* ends with the new ownership end date.
*/
private void writeRowIfLast(Map<String, Object> row) {
if (hasNullValue(row, ENDDATE)) // If the last row
{
row.put(ENDDATE, FAR_FUTURE_DATE);

if (!FAR_PAST_DATE.equals(row.get(STARTDATE))) {
batchHandler.addObject(row);
}

prevRow = null;
}
private boolean shouldContinuePreviousRow() {
return sameTrackedEntity() && (sameValue(ENDDATE) || sameValue(OU));
}

/** Returns true if the map has a null value. */
private boolean hasNullValue(Map<String, Object> row, String colName) {
return row.get(colName) == null;
/**
* Continue the previous row by transferring the previous row's start date to the new row. All
* other properties such as OU and ENDDATE come from the new row.
*/
private void continuePreviousRow() {
newRow.put(STARTDATE, prevRow.get(STARTDATE));
}

/**
Expand All @@ -182,4 +162,11 @@ private boolean hasNullValue(Map<String, Object> row, String colName) {
private boolean sameValue(String colName) {
return Objects.equals(prevRow.get(colName), newRow.get(colName));
}

/** Write out the previous row to the batch handler. */
private void writePreviousRowIfExists() {
if (previousRowExists) {
batchHandler.addObject(prevRow);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -275,33 +275,24 @@ void testPopulateTable() throws SQLException {
"lastupdated <= 'yyyy-mm-ddThh:mm:ss'");
assertEquals(
"""
select te.uid,a.startdate,a.enddate,ou.uid from (\
select h.trackedentityid, '1001-01-01' as startdate, h.enddate as enddate, h.organisationunitid \
select te.uid,null,h.enddate,ou.uid \
from "programownershiphistory" h \
inner join "trackedentity" te on h.trackedentityid = te.trackedentityid \
inner join "organisationunit" ou on h.organisationunitid = ou.organisationunitid \
left join analytics_rs_orgunitstructure ous on h.organisationunitid = ous.organisationunitid \
left join analytics_rs_organisationunitgroupsetstructure ougs on h.organisationunitid = ougs.organisationunitid \
where h.programid = 0 \
and h.organisationunitid is not null \
union distinct \
select o.trackedentityid, '2002-02-02' as startdate, null as enddate, o.organisationunitid \
from "trackedentityprogramowner" o \
where o.programid = 0 \
and o.trackedentityid in (\
select distinct p.trackedentityid \
from "programownershiphistory" p \
where p.programid = 0 \
and p.organisationunitid is not null)) a \
inner join "trackedentity" te on a.trackedentityid = te.trackedentityid \
inner join "organisationunit" ou on a.organisationunitid = ou.organisationunitid \
left join analytics_rs_orgunitstructure ous on a.organisationunitid = ous.organisationunitid \
left join analytics_rs_organisationunitgroupsetstructure ougs on a.organisationunitid = ougs.organisationunitid \
order by te.uid, a.startdate, a.enddate""",
order by te.uid, h.enddate""",
sqlMasked);

List<Invocation> writerInvocations = getInvocations(writer);
assertEquals(3, writerInvocations.size());
assertEquals(4, writerInvocations.size());

assertEquals("write", writerInvocations.get(0).getMethod().getName());
assertEquals("write", writerInvocations.get(1).getMethod().getName());
assertEquals("write", writerInvocations.get(2).getMethod().getName());
assertEquals("flush", writerInvocations.get(3).getMethod().getName());

Map<String, Object> map0 = writerInvocations.get(0).getArgument(0);
Map<String, Object> map1 = writerInvocations.get(1).getArgument(0);
Expand All @@ -319,17 +310,18 @@ void testGetFixedColumns() {
AnalyticsTableColumn.builder()
.name("teuid")
.dataType(CHARACTER_11)
.nullable(NOT_NULL)
.selectExpression("te.uid")
.build(),
AnalyticsTableColumn.builder()
.name("startdate")
.dataType(DATE)
.selectExpression("a.startdate")
.selectExpression("null")
.build(),
AnalyticsTableColumn.builder()
.name("enddate")
.dataType(DATE)
.selectExpression("a.enddate")
.selectExpression("h.enddate")
.build(),
AnalyticsTableColumn.builder()
.name("ou")
Expand Down
Loading
Loading