Skip to content

Commit

Permalink
Check for Unused Properties on Annotated Schema Components
Browse files Browse the repository at this point in the history
- currently we check unused properties on terms only, which means unsused properties on non-term schema components can be missed. This will fix that issue, by moving the check to AnnotatedSchemaComponents instead.
- add new propagation algorithm that processes the referenced to get the referencers and pull their used properties onto the referenced
- add tests with group references
- fix bug in Union that was setting the schemaDocument as the lexical parent to the Union's local simple type definition
- add typeBaseChainMap that creates a refSpec for simpletypes that reference other simple types
- update TransitiveClosureSchemaComponents to include EnumerationDef
- use root.allComponents to call checkUnusedProperties
- fix bug with XercesSchemaFileLocation.equals assuming all incoming objects would be XercesSchemaFileLocations as well. We also get just plain SchemaFileLocations, which can break things, so instead we compare against SchemaFileLocation members
- add tests

DAFFODIL-2798
  • Loading branch information
olabusayoT committed Dec 13, 2024
1 parent 8ba9506 commit f9854f4
Show file tree
Hide file tree
Showing 14 changed files with 578 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ final class SharedFactory[SharedType] {
trait AnnotatedSchemaComponent
extends SchemaComponent
with AnnotatedMixin
with ResolvesLocalProperties
with OverlapCheckMixin {

protected override def initialize(): Unit = {
Expand Down Expand Up @@ -377,6 +378,45 @@ trait AnnotatedSchemaComponent
val res = schemaDocument.formatAnnotation.formatChain
res
}

/**
* Used to look for DFDL properties on Annotated Schema Components that
* have not been accessed and record it as a warning. This function uses the
* property cache state to determine which properties have been accessed, so
* this function must only be called after all property accesses are complete
* (e.g. schema compilation has finished) and after propagateUsedProperties
* has been called on all AnnotatedSchemaComponents, to ensure there are no false
* positives/negatives.
*
* Note: This is not a recursive walk. It identifies and issues warnings about
* unaccessed properties on just one AnnotatedSchemaComponent
*/
final lazy val checkUnusedProperties: Unit = {
// Get the properties defined on this component and what it refers to
val localProps = formatAnnotation.justThisOneProperties
val refProps = optReferredToComponent
.map { _.formatAnnotation.justThisOneProperties }
.getOrElse(Map.empty)

val usedProperties = propCache

localProps.foreach { case (prop, (value, _)) =>
if (!usedProperties.contains(prop)) {
SDW(WarnID.IgnoreDFDLProperty, "DFDL property was ignored: %s=\"%s\"", prop, value)
}
}

refProps.foreach { case (prop, (value, _)) =>
if (!usedProperties.contains(prop)) {
optReferredToComponent.get.SDW(
WarnID.IgnoreDFDLProperty,
"DFDL property was ignored: %s=\"%s\"",
prop,
value
)
}
}
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ final class Union private (val xmlArg: Node, simpleTypeDef: SimpleTypeDefBase)
private lazy val immediateTypeXMLs = xml \ "simpleType"
private lazy val immediateTypes: Seq[SimpleTypeDefBase] = immediateTypeXMLs.map { node =>
{
LocalSimpleTypeDef(node, schemaDocument)
LocalSimpleTypeDef(node, this)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,22 @@ final class Root private (
m.toMap
}

/**
* For any given global simple type, tells us what other simple types contain
* references to it
*
* This is intended to be used when we're propagating used properties to ensure
* properties are properly propagates along the simple type base chain
*/
lazy val typeBaseChainMap: Map[GlobalComponent, Seq[(String, Seq[RefSpec])]] = {
val refEntries: Seq[(GlobalComponent, Seq[RefSpec])] =
typeBaseChains.groupBy { _.to }.toSeq
val m: Seq[(GlobalComponent, Seq[(String, Seq[RefSpec])])] = refEntries.map {
case (to, seq) => (to, seq.groupBy { _.from.shortSchemaComponentDesignator }.toSeq)
}
m.toMap
}

final def elementRefsTo(decl: GlobalElementDecl): Seq[ElementRef] =
refsTo(decl).asInstanceOf[Seq[ElementRef]]

Expand Down Expand Up @@ -141,6 +157,17 @@ final class Root private (
}.flatten
}

final lazy val typeBaseChains: Seq[RefSpec] = {
allComponents.collect {
case std: SimpleTypeDefBase => {
val optStd = std.optReferredToComponent.collect { case stdb: SimpleTypeDefBase =>
stdb
}
optStd.map { stdb => RefSpec(std, stdb, 1) }.toSeq
}
}.flatten
}

lazy val allERefs = allComponents
.filter {
case er: ElementRef => true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.daffodil.core.dsom

import java.io.File
import scala.collection.mutable
import scala.xml.Node

import org.apache.daffodil.core.compiler.RootSpec
Expand Down Expand Up @@ -540,8 +541,127 @@ final class SchemaSet private (
.flatMap(_.defineVariables)
.union(predefinedVars)

private lazy val checkUnusedProperties = LV('hasUnusedProperties) {
root.checkUnusedProperties
private val doneSimpleTypeDefs: mutable.Set[SimpleTypeDefBase] = mutable.Set()

/**
* Used to propagate used properties through all element/group references, element declarations
* and simple types. This is necessary because we can have elements being referenced
* by multiple element refs, where the first element gets its propCache populated with
* the properties it has used, and the second element doesn't because it shares the parsers
* of the first element.
*
* With this function, we copy used properties from element refs to elements, group refs to groups,
* from elements to their simple types, from enclosing elements to their local simple types,
* and from simple types to their base types. This function is recursive when processing
* simple type definitions that reference simple type defs whose base is a primitive type.
*
* Note: this is clobbering the property caches, making them subsequently unusable
* for their initial property lookup uses. Hence, this must be done in a pass that
* happens only after all other schema compilation involving properties is complete.
*/
def propagateUsedPropertiesForThis(asc: AnnotatedSchemaComponent): Unit = {
asc match {
case ggd: GlobalGroupDef => {
val groupRefs = root.refMap.getOrElse(ggd, Seq.empty)
groupRefs.foreach { case (_, grs) =>
grs.foreach { rs =>
val gr = rs.from.asInstanceOf[AnnotatedSchemaComponent]
gr.propCache.foreach { kv =>
if (ggd.formatAnnotation.justThisOneProperties.contains(kv._1)) {
ggd.propCache.put(kv._1, kv._2)
}
}
}
}
}
case ged: GlobalElementDecl => {
val elementRefs = root.refMap.getOrElse(ged, Seq.empty)
elementRefs.foreach { case (_, ers) =>
ers.foreach { rs =>
val er = rs.from.asInstanceOf[AnnotatedSchemaComponent]
er.propCache.foreach { kv =>
if (ged.formatAnnotation.justThisOneProperties.contains(kv._1)) {
ged.propCache.put(kv._1, kv._2)
}
}
}
}
}
case gstd: GlobalSimpleTypeDef if gstd.bases.nonEmpty => {
val elementsOfType = root.refMap.getOrElse(gstd, Seq.empty)
elementsOfType.foreach { case (_, eles) =>
eles.foreach { rs =>
val er = rs.from.asInstanceOf[AnnotatedSchemaComponent]
er.propCache.foreach { kv =>
if (gstd.formatAnnotation.justThisOneProperties.contains(kv._1)) {
gstd.propCache.put(kv._1, kv._2)
}
}
}
}
doneSimpleTypeDefs.add(gstd)
}
case lstd: LocalSimpleTypeDef => {
val enclElements = lstd.enclosingElements
enclElements.foreach { ee =>
ee.propCache.foreach { kv =>
if (lstd.formatAnnotation.justThisOneProperties.contains(kv._1)) {
lstd.propCache.put(kv._1, kv._2)
}
}
}
doneSimpleTypeDefs.add(lstd)
}
case gstd: GlobalSimpleTypeDef if gstd.bases.isEmpty => {
val othersReferencingThis =
root.typeBaseChainMap
.getOrElse(gstd, Seq.empty) ++ root.refMap.getOrElse(gstd, Seq.empty)
othersReferencingThis.foreach { case (_, refs) =>
refs.collect {
case rs if rs.from.isInstanceOf[SimpleTypeDefBase] =>
val std = rs.from.asInstanceOf[SimpleTypeDefBase]
if (!doneSimpleTypeDefs.contains(std)) {
propagateUsedPropertiesForThis(std)
}
std.propCache.foreach(kv => gstd.propCache.put(kv._1, kv._2))
case rs =>
val er = rs.from.asInstanceOf[AnnotatedSchemaComponent]
er.propCache.foreach { kv =>
if (gstd.formatAnnotation.justThisOneProperties.contains(kv._1)) {
gstd.propCache.put(kv._1, kv._2)
}
}
}
}
doneSimpleTypeDefs.add(gstd)
}
case _ => // do nothing
}
}

/**
* Propagate used properties through AnnotatedSchemaComponent.
* This must be done after compilation is complete to ensure all properties
* that are used are accounted for.
* This is part of the algorithm for identifying properties that are present, but unused.
*
* Note: This algorithm repurposes propCaches which are no longer usable
* for property lookups after this is done.
*/
private lazy val propagateUsedProperties = {
root.allComponents.collect { case asc: AnnotatedSchemaComponent =>
propagateUsedPropertiesForThis(asc)
}
}

/**
* check unused properties on annotated schema component, must be done
* after compilation is completed and used properties are propagated
*/
private lazy val checkUnusedProperties = LV('checkUnusedProperties) {
root.allComponents.collect { case asc: AnnotatedSchemaComponent =>
asc.checkUnusedProperties
}
}.value

/**
Expand All @@ -563,6 +683,15 @@ final class SchemaSet private (
else {
val hasErrors = super.isError
if (!hasErrors) {
// must be called after compilation is done
// this is a central part of the algorithm by which we identify properties that are present,
// but not used (see checkUnusedProperties).
// The overall algorithm is expressed in multiple places.
// This part handles the propagation of properties from referencer to referenced object
// when property combining occurs.
propagateUsedProperties
// The rest of the algorithm is then just traversing every AnnotatedSchemaComponent
// to see what properties are present on them that do not appear in the propCache.
// must be last, after all compilation is done.
// only check this if there are no errors.
checkUnusedProperties
Expand Down Expand Up @@ -679,8 +808,9 @@ class TransitiveClosureSchemaComponents private () extends TransitiveClosure[Sch
st.bases ++
st.optRestriction ++
st.optUnion
case r: Restriction => r.optUnion.toSeq
case r: Restriction => r.optUnion.toSeq ++ r.enumerations
case u: Union => u.unionMemberTypes
case e: EnumerationDef => Nil
case c: ComplexTypeBase => Seq(c.modelGroup)
case gd: GlobalGroupDef => gd.groupMembers
case stmt: DFDLStatement => Nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,4 +305,5 @@ final class EnumerationDef(xml: Node, parentType: SimpleTypeDefBase)
protected def isMyFormatAnnotation(a: DFDLAnnotation): Boolean =
Assert.invariantFailed("Should not be called")

override val diagnosticDebugNameImpl = "enumeration"
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import java.util.UUID

import org.apache.daffodil.core.dsom.walker.TermView
import org.apache.daffodil.core.grammar.TermGrammarMixin
import org.apache.daffodil.lib.api.WarnID
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.schema.annotation.props.Found
import org.apache.daffodil.lib.schema.annotation.props.NotFound
Expand Down Expand Up @@ -105,43 +104,6 @@ trait Term
*/
final def tci = dpathCompileInfo

/**
* Used to recursively go through Terms and look for DFDL properties that
* have not been accessed and record it as a warning. This function uses the
* property cache state to determine which properties have been access, so
* this function must only be called after all property accesses are complete
* (e.g. schema compilation has finished) to ensure there are no false
* positives.
*/
final lazy val checkUnusedProperties: Unit = {
// Get the properties defined on this term and what it refers to
val localProps = formatAnnotation.justThisOneProperties
val refProps = optReferredToComponent
.map { _.formatAnnotation.justThisOneProperties }
.getOrElse(Map.empty)

val usedProperties = propCache

localProps.foreach { case (prop, (value, _)) =>
if (!usedProperties.contains(prop)) {
SDW(WarnID.IgnoreDFDLProperty, "DFDL property was ignored: %s=\"%s\"", prop, value)
}
}

refProps.foreach { case (prop, (value, _)) =>
if (!usedProperties.contains(prop)) {
optReferredToComponent.get.SDW(
WarnID.IgnoreDFDLProperty,
"DFDL property was ignored: %s=\"%s\"",
prop,
value
)
}
}

termChildren.foreach { _.checkUnusedProperties }
}

def position: Int

def optIgnoreCase: Option[YesNo] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,14 @@ class XercesSchemaFileLocation(

// we have to override equals and hashCode because the OOlag error checks for duplicates in its error list
override def equals(obj: Any): Boolean = {
val xsflObj = obj.asInstanceOf[XercesSchemaFileLocation]
xsflObj.xercesError.getLineNumber == this.xercesError.getLineNumber &&
xsflObj.xercesError.getColumnNumber == this.xercesError.getColumnNumber &&
xsflObj.xercesError.getSystemId == this.xercesError.getSystemId &&
xsflObj.schemaFileLocation == this.schemaFileLocation
// sometimes what OOlag error/warning is comparing against isn't a XercesSchemaFileLocation, but
// is a schemaFileLocation instead, since XercesSchemaFileLocation is a SchemaFileLocation as well,
// we just use that for comparison
val sflObj = obj.asInstanceOf[SchemaFileLocation]
sflObj.lineNumber.getOrElse("") == this.lineNumber.getOrElse("") &&
sflObj.columnNumber.getOrElse("") == this.columnNumber.getOrElse("") &&
sflObj.diagnosticFile == this.diagnosticFile &&
sflObj.diagnosticDebugName == this.diagnosticDebugName
}

override def hashCode: Int = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ trait FindPropertyMixin extends PropTypes {
*/
protected def lookupProperty(pname: String): PropertyLookupResult

/**
* the PropCache plays two roles in different phases of the compilation.
* First for property lookups, second for determining which properties are present, but unused.
*
* These two uses are incompatible as the second does propagation of used properties using propCache.
* The first use must therefore be over. Then the caches are repurposed for the second algorithm.
*/
val propCache = new scala.collection.mutable.LinkedHashMap[String, PropertyLookupResult]

/**
Expand Down
Loading

0 comments on commit f9854f4

Please sign in to comment.