From 7777bf364cd26d95a08ec8f3fe8cc972b5e9360a Mon Sep 17 00:00:00 2001 From: Raffi Khatchadourian Date: Fri, 19 Jul 2024 10:50:44 -0400 Subject: [PATCH 1/3] Deal with multiple possible callables (#121) * Return `null` when there are multiple possible callables. * Add test to exercise call string imprecision. Based on the call string length. See https://github.com/wala/WALA/discussions/1417#discussioncomment-10085680. * Expect the test to fail. In the past, we could add 0's to the parameters, but since we are not enforcing the existing of the node in the CG, we can no longer do that. Still, this test should now fail if https://github.com/wala/ML/issues/207 is fixed. --- .../python/ml/test/TestTensorflow2Model.java | 35 +++++++++++++++ com.ibm.wala.cast.python.test/.pydevproject | 1 + .../data/proj66/src/__init__.py | 1 + .../data/proj66/src/tf2_test_model_call5b.py | 9 ++++ .../data/proj66/tf2_test_model_call5.py | 44 +++++++++++++++++++ .../data/proj66/tf2_test_model_call5a.py | 44 +++++++++++++++++++ ...nstanceMethodTrampolineTargetSelector.java | 24 +++++++++- 7 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 com.ibm.wala.cast.python.test/data/proj66/src/__init__.py create mode 100644 com.ibm.wala.cast.python.test/data/proj66/src/tf2_test_model_call5b.py create mode 100644 com.ibm.wala.cast.python.test/data/proj66/tf2_test_model_call5.py create mode 100644 com.ibm.wala.cast.python.test/data/proj66/tf2_test_model_call5a.py diff --git a/com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestTensorflow2Model.java b/com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestTensorflow2Model.java index bf9d86cb..a4f01cdb 100644 --- a/com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestTensorflow2Model.java +++ b/com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestTensorflow2Model.java @@ -1204,6 +1204,41 @@ public void testModelCall4() test("tf2_test_model_call4.py", "SequentialModel.__call__", 1, 1, 3); } + /** + * Test call string imprecision as described in + * https://github.com/wala/WALA/discussions/1417#discussioncomment-10085680. This should fail due + * to https://github.com/wala/ML/issues/207. + */ + @Test(expected = java.lang.AssertionError.class) + public void testModelCall5() + throws ClassHierarchyException, IllegalArgumentException, CancelException, IOException { + test( + new String[] { + "proj66/src/tf2_test_model_call5b.py", + "proj66/tf2_test_model_call5.py", + "proj66/tf2_test_model_call5a.py" + }, + "tf2_test_model_call5.py", + "SequentialModel.__call__", + "proj66", + 1, + 1, + 3); + + test( + new String[] { + "proj66/src/tf2_test_model_call5b.py", + "proj66/tf2_test_model_call5.py", + "proj66/tf2_test_model_call5a.py" + }, + "tf2_test_model_call5a.py", + "SequentialModel.__call__", + "proj66", + 1, + 1, + 3); + } + @Test public void testModelAttributes() throws ClassHierarchyException, IllegalArgumentException, CancelException, IOException { diff --git a/com.ibm.wala.cast.python.test/.pydevproject b/com.ibm.wala.cast.python.test/.pydevproject index bd6807b9..654655ea 100644 --- a/com.ibm.wala.cast.python.test/.pydevproject +++ b/com.ibm.wala.cast.python.test/.pydevproject @@ -19,6 +19,7 @@ /${PROJECT_DIR_NAME}/data/proj35 /${PROJECT_DIR_NAME}/data/proj37 /${PROJECT_DIR_NAME}/data/proj45 + /${PROJECT_DIR_NAME}/data/proj66 diff --git a/com.ibm.wala.cast.python.test/data/proj66/src/__init__.py b/com.ibm.wala.cast.python.test/data/proj66/src/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/proj66/src/__init__.py @@ -0,0 +1 @@ + diff --git a/com.ibm.wala.cast.python.test/data/proj66/src/tf2_test_model_call5b.py b/com.ibm.wala.cast.python.test/data/proj66/src/tf2_test_model_call5b.py new file mode 100644 index 00000000..59bea179 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/proj66/src/tf2_test_model_call5b.py @@ -0,0 +1,9 @@ +# Test https://github.com/wala/WALA/discussions/1417#discussioncomment-10085680. + + +def f(m, d): + return m.predict(d) + + +def g(m, d): + return f(m, d) diff --git a/com.ibm.wala.cast.python.test/data/proj66/tf2_test_model_call5.py b/com.ibm.wala.cast.python.test/data/proj66/tf2_test_model_call5.py new file mode 100644 index 00000000..64274e90 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/proj66/tf2_test_model_call5.py @@ -0,0 +1,44 @@ +# Test https://github.com/wala/WALA/discussions/1417#discussioncomment-10085680. + +import tensorflow as tf +from src.tf2_test_model_call5b import g + +# Create an override model to classify pictures + + +class SequentialModel(tf.keras.Model): + + def __init__(self, **kwargs): + super(SequentialModel, self).__init__(**kwargs) + + self.flatten = tf.keras.layers.Flatten(input_shape=(28, 28)) + + # Add a lot of small layers + num_layers = 100 + self.my_layers = [ + tf.keras.layers.Dense(64, activation="relu") for n in range(num_layers) + ] + + self.dropout = tf.keras.layers.Dropout(0.2) + self.dense_2 = tf.keras.layers.Dense(10) + + def __call__(self, x): + print("Raffi 1") + x = self.flatten(x) + + for layer in self.my_layers: + x = layer(x) + + x = self.dropout(x) + x = self.dense_2(x) + + return x + + def predict(self, x): + return self(x) + + +input_data = tf.random.uniform([20, 28, 28]) + +model = SequentialModel() +result = g(model, input_data) diff --git a/com.ibm.wala.cast.python.test/data/proj66/tf2_test_model_call5a.py b/com.ibm.wala.cast.python.test/data/proj66/tf2_test_model_call5a.py new file mode 100644 index 00000000..842edd5b --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/proj66/tf2_test_model_call5a.py @@ -0,0 +1,44 @@ +# Test https://github.com/wala/WALA/discussions/1417#discussioncomment-10085680. + +import tensorflow as tf +from src.tf2_test_model_call5b import g + +# Create an override model to classify pictures + + +class SequentialModel(tf.keras.Model): + + def __init__(self, **kwargs): + super(SequentialModel, self).__init__(**kwargs) + + self.flatten = tf.keras.layers.Flatten(input_shape=(28, 28)) + + # Add a lot of small layers + num_layers = 100 + self.my_layers = [ + tf.keras.layers.Dense(64, activation="relu") for n in range(num_layers) + ] + + self.dropout = tf.keras.layers.Dropout(0.2) + self.dense_2 = tf.keras.layers.Dense(10) + + def __call__(self, x): + print("Raffi 2") + x = self.flatten(x) + + for layer in self.my_layers: + x = layer(x) + + x = self.dropout(x) + x = self.dense_2(x) + + return x + + def predict(self, x): + return self(x) + + +input_data = tf.random.uniform([20, 28, 28]) + +model = SequentialModel() +result = g(model, input_data) diff --git a/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonInstanceMethodTrampolineTargetSelector.java b/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonInstanceMethodTrampolineTargetSelector.java index 2b9e85ab..53c3695c 100644 --- a/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonInstanceMethodTrampolineTargetSelector.java +++ b/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonInstanceMethodTrampolineTargetSelector.java @@ -43,6 +43,7 @@ import com.ibm.wala.util.collections.HashMapFactory; import com.ibm.wala.util.collections.Pair; import com.ibm.wala.util.intset.OrdinalSet; +import java.util.HashMap; import java.util.Map; import java.util.logging.Logger; @@ -223,6 +224,8 @@ private IClass getCallable(CGNode caller, IClassHierarchy cha, PythonInvokeInstr PointerKey receiver = pkf.getPointerKeyForLocal(caller, call.getUse(0)); OrdinalSet objs = builder.getPointerAnalysis().getPointsToSet(receiver); + Map instanceToCallable = new HashMap<>(); + for (InstanceKey o : objs) { AllocationSiteInNode instanceKey = getAllocationSiteInNode(o); if (instanceKey != null) { @@ -254,10 +257,29 @@ private IClass getCallable(CGNode caller, IClassHierarchy cha, PythonInvokeInstr LOGGER.info("Applying callable workaround for https://github.com/wala/ML/issues/118."); } - if (callable != null) return callable; + if (callable != null) { + if (instanceToCallable.containsKey(instanceKey)) + throw new IllegalStateException("Exisitng mapping found for: " + instanceKey); + + IClass previousValue = instanceToCallable.put(instanceKey, callable); + assert previousValue == null : "Not expecting a previous mapping."; + } } } + // if there's only one possible option. + if (instanceToCallable.values().size() == 1) { + IClass callable = instanceToCallable.values().iterator().next(); + assert callable != null : "Callable should be non-null."; + return callable; + } + + // if we have multiple candidates. + if (instanceToCallable.values().size() > 1) + // we cannot accurately select one. + LOGGER.warning( + "Multiple (" + instanceToCallable.values().size() + ") callable targets found."); + return null; } From 853c79643932877a20a8846b16b2caf891275efe Mon Sep 17 00:00:00 2001 From: Raffi Khatchadourian Date: Sun, 21 Jul 2024 11:27:51 -0400 Subject: [PATCH 2/3] Use a set to track callable candidates. We don't the instance key that produces the callable right now. --- ...nstanceMethodTrampolineTargetSelector.java | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonInstanceMethodTrampolineTargetSelector.java b/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonInstanceMethodTrampolineTargetSelector.java index 53c3695c..50fde5f7 100644 --- a/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonInstanceMethodTrampolineTargetSelector.java +++ b/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonInstanceMethodTrampolineTargetSelector.java @@ -43,8 +43,9 @@ import com.ibm.wala.util.collections.HashMapFactory; import com.ibm.wala.util.collections.Pair; import com.ibm.wala.util.intset.OrdinalSet; -import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import java.util.logging.Logger; public class PythonInstanceMethodTrampolineTargetSelector @@ -224,7 +225,8 @@ private IClass getCallable(CGNode caller, IClassHierarchy cha, PythonInvokeInstr PointerKey receiver = pkf.getPointerKeyForLocal(caller, call.getUse(0)); OrdinalSet objs = builder.getPointerAnalysis().getPointsToSet(receiver); - Map instanceToCallable = new HashMap<>(); + // The set of potential callables to be returned. + Set callableSet = new HashSet<>(); for (InstanceKey o : objs) { AllocationSiteInNode instanceKey = getAllocationSiteInNode(o); @@ -257,28 +259,21 @@ private IClass getCallable(CGNode caller, IClassHierarchy cha, PythonInvokeInstr LOGGER.info("Applying callable workaround for https://github.com/wala/ML/issues/118."); } - if (callable != null) { - if (instanceToCallable.containsKey(instanceKey)) - throw new IllegalStateException("Exisitng mapping found for: " + instanceKey); - - IClass previousValue = instanceToCallable.put(instanceKey, callable); - assert previousValue == null : "Not expecting a previous mapping."; - } + callableSet.add(callable); } } // if there's only one possible option. - if (instanceToCallable.values().size() == 1) { - IClass callable = instanceToCallable.values().iterator().next(); + if (callableSet.size() == 1) { + IClass callable = callableSet.iterator().next(); assert callable != null : "Callable should be non-null."; return callable; } // if we have multiple candidates. - if (instanceToCallable.values().size() > 1) + if (callableSet.size() > 1) // we cannot accurately select one. - LOGGER.warning( - "Multiple (" + instanceToCallable.values().size() + ") callable targets found."); + LOGGER.warning("Multiple (" + callableSet.size() + ") callable targets found."); return null; } From 84e377abc3cbb346c77f27362e96551309e02e66 Mon Sep 17 00:00:00 2001 From: Raffi Khatchadourian Date: Fri, 26 Jul 2024 14:18:57 -0400 Subject: [PATCH 3/3] Add TODO comment. Per https://github.com/wala/ML/pull/208#issuecomment-2253018494. --- .../callgraph/PythonInstanceMethodTrampolineTargetSelector.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonInstanceMethodTrampolineTargetSelector.java b/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonInstanceMethodTrampolineTargetSelector.java index 50fde5f7..0fcc6545 100644 --- a/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonInstanceMethodTrampolineTargetSelector.java +++ b/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonInstanceMethodTrampolineTargetSelector.java @@ -89,6 +89,8 @@ protected boolean shouldProcess(CGNode caller, CallSiteReference site, IClass re @Override public IMethod getCalleeTarget(CGNode caller, CallSiteReference site, IClass receiver) { + // TODO: Callable detection may need to be moved. See https://github.com/wala/ML/issues/207. If + // it stays here, we should further document the receiver swapping process. if (isCallable(receiver)) { LOGGER.fine("Encountered callable.");