From d1ce658067139099b44793beedc3fc8f06615b67 Mon Sep 17 00:00:00 2001
From: Paul Stapor <paul.stapor@helmholtz-muenchen.de>
Date: Tue, 10 Dec 2019 20:03:47 +0100
Subject: [PATCH] Sparsify dwdp to reduce computation time for adjoints (#858)

Fix super long computation times for adjoint sensitivities introduced in AMICI 0.10.0 (Fixes #849)
---
 .github/workflows/test-large-model.yml  |   9 +-
 include/amici/abstract_model.h          |  15 ++-
 include/amici/model.h                   |  57 +++++----
 include/amici/model_dae.h               |  21 ++--
 include/amici/model_ode.h               |  59 ++++++---
 include/amici/serialization.h           |   3 +
 include/amici/sundials_matrix_wrapper.h |  46 ++++++-
 python/amici/gradient_check.py          |   2 -
 python/amici/ode_export.py              | 160 ++++++++++++++----------
 src/abstract_model.cpp                  |   4 +
 src/model.cpp                           |  94 +++++++-------
 src/model_dae.cpp                       |  44 ++++---
 src/model_header.ODE_template.h         |  38 ++++--
 src/model_ode.cpp                       | 142 ++++++++++++++++-----
 src/newton_solver.cpp                   |  40 +++++-
 src/sundials_matrix_wrapper.cpp         | 147 +++++++++++++++++++++-
 tests/performance/reference.yml         |   8 +-
 tests/performance/test.py               |  10 ++
 tests/testMisc.py                       |   2 +-
 tests/testPYSB.py                       |  11 +-
 20 files changed, 675 insertions(+), 237 deletions(-)

diff --git a/.github/workflows/test-large-model.yml b/.github/workflows/test-large-model.yml
index 07445a43df..f77c10ed81 100644
--- a/.github/workflows/test-large-model.yml
+++ b/.github/workflows/test-large-model.yml
@@ -4,7 +4,8 @@ on:
     branches:
       - develop
       - master
-      - fix_862_powsimp
+      - feature_sparse_quadratures
+
   pull_request:
     branches:
       - master
@@ -73,3 +74,9 @@ jobs:
     - name: adjoint_sensitivities
       run: |
         check_time.sh adjoint_sensitivities tests/performance/test.py adjoint_sensitivities
+    - name: forward_simulation_non_optimal_parameters
+      run: |
+        check_time.sh forward_simulation_non_optimal_parameters tests/performance/test.py forward_simulation_non_optimal_parameters
+    - name: adjoint_sensitivities_non_optimal_parameters
+      run: |
+        check_time.sh adjoint_sensitivities_non_optimal_parameters tests/performance/test.py adjoint_sensitivities_non_optimal_parameters
\ No newline at end of file
diff --git a/include/amici/abstract_model.h b/include/amici/abstract_model.h
index c6cadc3f97..3f64573829 100644
--- a/include/amici/abstract_model.h
+++ b/include/amici/abstract_model.h
@@ -108,7 +108,8 @@ class AbstractModel {
                         const AmiVector &dx) = 0;
 
     /**
-     * @brief Parameter derivative of residual function
+     * @brief Model specific sparse implementation of
+              explicit parameter derivative of right hand side
      * @param t time
      * @param x state
      * @param dx time derivative of state (DAE only)
@@ -668,6 +669,18 @@ class AbstractModel {
                        const realtype *p, const realtype *k, const realtype *h,
                        const realtype *w, const realtype *tcl,
                        const realtype *stcl);
+    
+    /**
+     * @brief Model specific implementation for dwdp, column pointers
+     * @param indexptrs column pointers
+     **/
+    virtual void fdwdp_colptrs(sunindextype *indexptrs);
+    
+    /**
+     * @brief Model specific implementation for dwdp, row values
+     * @param indexvals row values
+     **/
+    virtual void fdwdp_rowvals(sunindextype *indexvals);
 
     /**
      * @brief Model specific sensitivity implementation of dwdp
diff --git a/include/amici/model.h b/include/amici/model.h
index b08e5492cd..21733ea9bf 100644
--- a/include/amici/model.h
+++ b/include/amici/model.h
@@ -71,14 +71,18 @@ class Model : public AbstractModel {
      * @param plist indexes wrt to which sensitivities are to be computed
      * @param idlist indexes indicating algebraic components (DAE only)
      * @param z2event mapping of event outputs to events
+     * @param pythonGenerated flag indicating matlab or python wrapping
+     * @param ndxdotdp_explicit number of nonzero elements dxdotdp_explicit
+     * @param ndxdotdp_implicit number of nonzero elements dxdotdp_implicit
      */
     Model(int nx_rdata, int nxtrue_rdata, int nx_solver, int nxtrue_solver,
           int ny, int nytrue, int nz, int nztrue, int ne, int nJ, int nw,
-          int ndwdx, int ndwdp, int ndxdotdw, std::vector<int> ndJydy, int nnz,
-          int ubw, int lbw, amici::SecondOrderMode o2mode,
+          int ndwdx, int ndwdp, int ndxdotdw, std::vector<int> ndJydy,
+          int nnz, int ubw, int lbw, amici::SecondOrderMode o2mode,
           const std::vector<amici::realtype> &p, std::vector<amici::realtype> k,
           const std::vector<int> &plist, std::vector<amici::realtype> idlist,
-          std::vector<int> z2event);
+          std::vector<int> z2event, bool pythonGenerated=false,
+          int ndxdotdp_explicit=0, int ndxdotdp_implicit=0);
 
     /** destructor */
     ~Model() override = default;
@@ -130,6 +134,8 @@ class Model : public AbstractModel {
     using AbstractModel::fdsigmaydp;
     using AbstractModel::fdsigmazdp;
     using AbstractModel::fdwdp;
+    using AbstractModel::fdwdp_colptrs;
+    using AbstractModel::fdwdp_rowvals;
     using AbstractModel::fdwdx;
     using AbstractModel::fdwdx_colptrs;
     using AbstractModel::fdwdx_rowvals;
@@ -1033,12 +1039,6 @@ class Model : public AbstractModel {
      */
     bool getAlwaysCheckFinite() const;
 
-    /**
-     * @brief check whether the model was generated from python
-     * @return that
-     */
-    virtual bool wasPythonGenerated() const { return false; }
-
     /**
      * @brief Initial states
      * @param x pointer to state variables
@@ -1130,7 +1130,6 @@ class Model : public AbstractModel {
 
     /** number of nonzero entries in dxdotdw */
     int ndxdotdw{0};
-
     /** number of nonzero entries in dJydy */
     std::vector<int> ndJydy;
 
@@ -1145,7 +1144,16 @@ class Model : public AbstractModel {
 
     /** lower bandwith of the jacobian */
     int lbw{0};
-
+    
+    /** flag indicating Matlab or python based model generation */
+    bool pythonGenerated;
+
+    /** number of nonzero entries in ndxdotdp_explicit */
+    int ndxdotdp_explicit{0};
+    
+    /** number of nonzero entries in ndxdotdp_implicit */
+    int ndxdotdp_implicit{0};
+    
     /** flag indicating whether for sensi == AMICI_SENSI_ORDER_SECOND
      * directional or full second order derivative will be computed */
     SecondOrderMode o2mode{SecondOrderMode::none};
@@ -1153,10 +1161,17 @@ class Model : public AbstractModel {
     /** flag array for DAE equations */
     std::vector<realtype> idlist;
 
-    /** temporary storage of dxdotdp data across functions (dimension: nplist x
-     * nx_solver, row-major) */
+    /** temporary storage of dxdotdp data across functions, Python only
+     (dimension: nplist x nx_solver, nnz: ndxdotdp_explicit, type CSC_MAT) */
+    mutable SUNMatrixWrapper dxdotdp_explicit;
+    
+    /** temporary storage of dxdotdp_implicit data across functions, Python only
+     (dimension: nplist x * nx_solver, nnz: ndxdotdp_implicit, type CSC_MAT) */
+    mutable SUNMatrixWrapper dxdotdp_implicit;
+    
+    /** temporary storage of dxdotdp data across functions, Matlab only
+     (dimension: nplist x nx_solver, row-major) */
     AmiVectorArray dxdotdp;
-
     /** AMICI context */
     AmiciApplication *app = &defaultContext;
 
@@ -1258,7 +1273,7 @@ class Model : public AbstractModel {
     virtual void fdJydy_colptrs(sunindextype *indexptrs, int index);
 
     /**
-     * @brief Model specific implementation of fdxdotdw row vals
+     * @brief Model specific implementation of fdJydy row vals
      * @param indexptrs row val pointers
      * @param index ytrue index
      */
@@ -1560,6 +1575,9 @@ class Model : public AbstractModel {
     /** Sparse dxdotdw temporary storage (dimension: ndxdotdw) */
     mutable SUNMatrixWrapper dxdotdw;
 
+    /** Sparse dwdp temporary storage (dimension: ndwdp) */
+    mutable SUNMatrixWrapper dwdp;
+    
     /** Sparse dwdx temporary storage (dimension: ndwdx) */
     mutable SUNMatrixWrapper dwdx;
 
@@ -1573,12 +1591,12 @@ class Model : public AbstractModel {
     mutable std::vector<realtype> mz;
 
     /** Sparse observable derivative of data likelihood,
-     * only used if wasPythonGenerated()==true
+     * only used if pythonGenerated==true
      * (dimension nytrue, nJ x ny, row-major) */
     mutable std::vector<SUNMatrixWrapper> dJydy;
 
     /** observable derivative of data likelihood,
-     * only used if wasPythonGenerated()==false
+     * only used if pythonGenerated==false
      * (dimension nJ x ny x nytrue, row-major)
      */
     mutable std::vector<realtype> dJydy_matlab;
@@ -1661,11 +1679,6 @@ class Model : public AbstractModel {
     /** tempory storage of w data across functions (dimension: nw) */
     mutable std::vector<realtype> w;
 
-    /** tempory storage of sparse/dense dwdp data across functions
-     * (dimension: ndwdp)
-     */
-    mutable std::vector<realtype> dwdp;
-
     /** tempory storage for flattened sx,
      * (dimension: nx_solver x nplist, row-major)
      */
diff --git a/include/amici/model_dae.h b/include/amici/model_dae.h
index bb8ed38cd2..c0be1e2eae 100644
--- a/include/amici/model_dae.h
+++ b/include/amici/model_dae.h
@@ -58,21 +58,24 @@ class Model_DAE : public Model {
      * @param plist indexes wrt to which sensitivities are to be computed
      * @param idlist indexes indicating algebraic components (DAE only)
      * @param z2event mapping of event outputs to events
+     * @param pythonGenerated flag indicating matlab or python wrapping
+     * @param ndxdotdp_explicit number of nonzero elements dxdotdp_explicit
+     * @param ndxdotdp_implicit number of nonzero elements dxdotdp_implicit
      */
     Model_DAE(const int nx_rdata, const int nxtrue_rdata, const int nx_solver,
               const int nxtrue_solver, const int ny, const int nytrue,
               const int nz, const int nztrue, const int ne, const int nJ,
-              const int nw, const int ndwdx, const int ndwdp,
-              const int ndxdotdw, std::vector<int> ndJydy, const int nnz,
-              const int ubw, const int lbw, const SecondOrderMode o2mode,
+              const int nw, const int ndwdx, const int ndwdp, const int ndxdotdw,
+              std::vector<int> ndJydy, const int nnz, const int ubw,
+              const int lbw, const SecondOrderMode o2mode,
               std::vector<realtype> const &p, std::vector<realtype> const &k,
-              std::vector<int> const &plist,
-              std::vector<realtype> const &idlist,
-              std::vector<int> const &z2event)
+              std::vector<int> const &plist, std::vector<realtype> const &idlist,
+              std::vector<int> const &z2event, const bool pythonGenerated=false,
+              const int ndxdotdp_explicit=0, const int ndxdotdp_implicit=0)
         : Model(nx_rdata, nxtrue_rdata, nx_solver, nxtrue_solver, ny, nytrue,
-                nz, nztrue, ne, nJ, nw, ndwdx, ndwdp, ndxdotdw,
-                std::move(ndJydy), nnz, ubw, lbw, o2mode, p, k, plist, idlist,
-                z2event) {}
+                nz, nztrue, ne, nJ, nw, ndwdx, ndwdp, ndxdotdw, std::move(ndJydy),
+                nnz, ubw, lbw, o2mode, p, k, plist, idlist, z2event,
+                pythonGenerated, ndxdotdp_explicit, ndxdotdp_implicit) {}
 
     void fJ(realtype t, realtype cj, const AmiVector &x, const AmiVector &dx,
             const AmiVector &xdot, SUNMatrix J) override;
diff --git a/include/amici/model_ode.h b/include/amici/model_ode.h
index 5e223af243..c18db306f1 100644
--- a/include/amici/model_ode.h
+++ b/include/amici/model_ode.h
@@ -58,21 +58,25 @@ class Model_ODE : public Model {
      * @param plist indexes wrt to which sensitivities are to be computed
      * @param idlist indexes indicating algebraic components (DAE only)
      * @param z2event mapping of event outputs to events
+     * @param pythonGenerated flag indicating matlab or python wrapping
+     * @param ndxdotdp_explicit number of nonzero elements dxdotdp_explicit
+     * @param ndxdotdp_implicit number of nonzero elements dxdotdp_implicit
      */
     Model_ODE(const int nx_rdata, const int nxtrue_rdata, const int nx_solver,
               const int nxtrue_solver, const int ny, const int nytrue,
               const int nz, const int nztrue, const int ne, const int nJ,
               const int nw, const int ndwdx, const int ndwdp,
-              const int ndxdotdw, std::vector<int> ndJydy, const int nnz,
-              const int ubw, const int lbw, const SecondOrderMode o2mode,
-              std::vector<realtype> const &p, std::vector<realtype> const &k,
-              std::vector<int> const &plist,
+              const int ndxdotdw, std::vector<int> ndJydy,
+              const int nnz, const int ubw, const int lbw,
+              const SecondOrderMode o2mode, std::vector<realtype> const &p,
+              std::vector<realtype> const &k, std::vector<int> const &plist,
               std::vector<realtype> const &idlist,
-              std::vector<int> const &z2event)
+              std::vector<int> const &z2event, const bool pythonGenerated=false,
+              const int ndxdotdp_explicit=0, const int ndxdotdp_implicit=0)
         : Model(nx_rdata, nxtrue_rdata, nx_solver, nxtrue_solver, ny, nytrue,
-                nz, nztrue, ne, nJ, nw, ndwdx, ndwdp, ndxdotdw,
-                std::move(ndJydy), nnz, ubw, lbw, o2mode, p, k, plist, idlist,
-                z2event) {}
+                nz, nztrue, ne, nJ, nw, ndwdx, ndwdp, ndxdotdw, std::move(ndJydy),
+                nnz, ubw, lbw, o2mode, p, k, plist, idlist, z2event,
+                pythonGenerated, ndxdotdp_explicit, ndxdotdp_implicit) {}
 
     void fJ(realtype t, realtype cj, const AmiVector &x, const AmiVector &dx,
             const AmiVector &xdot, SUNMatrix J) override;
@@ -218,7 +222,7 @@ class Model_ODE : public Model {
      */
     void fdxdotdw(realtype t, const N_Vector x);
 
-    /** Sensitivity of dx/dt wrt model parameters p
+    /** Explicit sensitivity of dx/dt wrt model parameters p
      * @param t timepoint
      * @param x Vector with the states
      * @return status flag indicating successful execution
@@ -402,7 +406,7 @@ class Model_ODE : public Model {
                        const realtype *p, const realtype *k, const realtype *h,
                        const realtype *w) = 0;
 
-    /** model specific implementation of fdxdotdp, with w chainrule
+    /** model specific implementation of fdxdotdp, with w chainrule (Matlab)
      * @param dxdotdp partial derivative xdot wrt p
      * @param t timepoint
      * @param x Vector with the states
@@ -418,19 +422,39 @@ class Model_ODE : public Model {
                           const realtype *h, int ip, const realtype *w,
                           const realtype *dwdp);
 
-    /** model specific implementation of fdxdotdp, without w chainrule
-     * @param dxdotdp partial derivative xdot wrt p
+    /** model specific implementation of fdxdotdp_explicit, no w chainrule (Py)
+     * @param dxdotdp_explicit partial derivative xdot wrt p
      * @param t timepoint
      * @param x Vector with the states
      * @param p parameter vector
      * @param k constants vector
      * @param h heavyside vector
-     * @param ip parameter index
      * @param w vector with helper variables
      */
-    virtual void fdxdotdp(realtype *dxdotdp, realtype t, const realtype *x,
-                          const realtype *p, const realtype *k,
-                          const realtype *h, int ip, const realtype *w);
+    virtual void fdxdotdp_explicit(realtype *dxdotdp_explicit, realtype t,
+                                   const realtype *x, const realtype *p,
+                                   const realtype *k, const realtype *h,
+                                   const realtype *w);
+
+    /** model specific implementation of fdxdotdp_explicit, colptrs part
+     * @param indexptrs column pointers
+     */
+    virtual void fdxdotdp_explicit_colptrs(sunindextype *indexptrs);
+
+    /** model specific implementation of fdxdotdp_explicit, rowvals part
+     * @param indexvals row values
+     */
+    virtual void fdxdotdp_explicit_rowvals(sunindextype *indexvals);
+
+    /** model specific implementation of fdxdotdp_implicit, colptrs part
+     * @param indexptrs column pointers
+     */
+    virtual void fdxdotdp_implicit_colptrs(sunindextype *indexptrs);
+
+    /** model specific implementation of fdxdotdp_implicit, rowvals part
+     * @param indexvals row values
+     */
+    virtual void fdxdotdp_implicit_rowvals(sunindextype *indexvals);
 
     /** model specific implementation of fdxdotdw, data part
      * @param dxdotdw partial derivative xdot wrt w
@@ -450,12 +474,11 @@ class Model_ODE : public Model {
      */
     virtual void fdxdotdw_colptrs(sunindextype *indexptrs);
 
-    /** model specific implementation of fdxdotdw, colptrs part
+    /** model specific implementation of fdxdotdw, rowvals part
      * @param indexvals row values
      */
     virtual void fdxdotdw_rowvals(sunindextype *indexvals);
 };
-
 } // namespace amici
 
 #endif // MODEL_H
diff --git a/include/amici/serialization.h b/include/amici/serialization.h
index 6fb4779e8d..b83e2118ab 100644
--- a/include/amici/serialization.h
+++ b/include/amici/serialization.h
@@ -111,6 +111,9 @@ void serialize(Archive &ar, amici::Model &u, const unsigned int version) {
     ar &u.pscale;
     ar &u.tstart;
     ar &u.stateIsNonNegative;
+    ar &u.pythonGenerated;
+    ar &u.ndxdotdp_explicit;
+    ar &u.ndxdotdp_implicit;
 }
 
 
diff --git a/include/amici/sundials_matrix_wrapper.h b/include/amici/sundials_matrix_wrapper.h
index 59751425c5..35fd16f631 100644
--- a/include/amici/sundials_matrix_wrapper.h
+++ b/include/amici/sundials_matrix_wrapper.h
@@ -116,6 +116,12 @@ class SUNMatrixWrapper {
      */
     sunindextype columns() const;
 
+    /**
+     * @brief Get the number of non-zero elements (sparse matrices only)
+     * @return number
+     */
+    sunindextype nonzeros() const;
+
     /**
      * @brief Get the index values of a sparse matrix
      * @return index array
@@ -153,15 +159,53 @@ class SUNMatrixWrapper {
      */
     void multiply(gsl::span<realtype> c, gsl::span<const realtype> b) const;
 
+    /**
+     * @brief Perform reordered matrix vector multiplication c += A[:,cols]*b
+     * @param c output vector, may already contain values
+     * @param b multiplication vector
+     * @param cols int vector for column reordering
+     * @param transpose bool transpose A before multiplication
+     */
+    void multiply(N_Vector c,
+                  const N_Vector b,
+                  gsl::span <const int> cols,
+                  bool transpose) const;
+
+    /**
+     * @brief Perform reordered matrix vector multiplication c += A[:,cols]*b
+     * @param c output vector, may already contain values
+     * @param b multiplication vector
+     * @param cols int vector for column reordering
+     * @param transpose bool transpose A before multiplication
+     */
+    void multiply(gsl::span<realtype> c,
+                  gsl::span<const realtype> b,
+                  gsl::span <const int> cols,
+                  bool transpose) const;
+
+    /**
+     * @brief Perform matrix matrix multiplication
+              C[:, :] += A * B
+              for sparse A, B, C
+     * @param C output matrix, may already contain values
+     * @param B multiplication matrix
+     */
+    void sparse_multiply(SUNMatrixWrapper *C,
+                         SUNMatrixWrapper *B) const;
+
     /**
      * @brief Set to 0.0
      */
     void zero();
 
+    /**
+     * @brief CSC matrix to which all methods are applied
+     */
+    SUNMatrix matrix = nullptr;
+
   private:
     void update_ptrs();
 
-    SUNMatrix matrix = nullptr;
     realtype *data_ptr = nullptr;
     sunindextype *indexptrs_ptr = nullptr;
     sunindextype *indexvals_ptr = nullptr;
diff --git a/python/amici/gradient_check.py b/python/amici/gradient_check.py
index 1a6f73483a..b56fca96b7 100644
--- a/python/amici/gradient_check.py
+++ b/python/amici/gradient_check.py
@@ -64,8 +64,6 @@ def check_derivatives(model, solver, edata, assert_fun,
         rtol: relative tolerance
         epsilon: finite difference step-size
     """
-    from scipy.optimize import check_grad
-
     p = np.array(model.getParameters())
 
     rdata = runAmiciSimulation(model, solver, edata)
diff --git a/python/amici/ode_export.py b/python/amici/ode_export.py
index 552a43466f..de7833321f 100644
--- a/python/amici/ode_export.py
+++ b/python/amici/ode_export.py
@@ -58,44 +58,35 @@
             '(realtype *J, const realtype t, const realtype *x, '
             'const realtype *p, const realtype *k, const realtype *h, '
             'const realtype *w, const realtype *dwdx)',
-        'assume_pow_positivity':
-            True,
+        'flags': ['assume_pow_positivity']
     },
     'JB': {
         'signature':
             '(realtype *JB, const realtype t, const realtype *x, '
             'const realtype *p, const realtype *k, const realtype *h, '
             'const realtype *xB, const realtype *w, const realtype *dwdx)',
-        'assume_pow_positivity':
-            True,
+        'flags': ['assume_pow_positivity']
     },
     'JDiag': {
         'signature':
             '(realtype *JDiag, const realtype t, const realtype *x, '
             'const realtype *p, const realtype *k, const realtype *h, '
             'const realtype *w, const realtype *dwdx)',
-        'assume_pow_positivity':
-            True,
+        'flags': ['assume_pow_positivity']
     },
     'JSparse': {
         'signature':
             '(realtype *JSparse, const realtype t, const realtype *x, '
             'const realtype *p, const realtype *k, const realtype *h, '
             'const realtype *w, const realtype *dwdx)',
-        'sparse':
-            True,
-        'assume_pow_positivity':
-            True,
+        'flags': ['assume_pow_positivity', 'sparse']
     },
     'JSparseB': {
         'signature':
             '(realtype *JSparseB, const realtype t, const realtype *x, '
             'const realtype *p, const realtype *k, const realtype *h, '
             'const realtype *xB, const realtype *w, const realtype *dwdx)',
-        'sparse':
-            True,
-        'assume_pow_positivity':
-            True,
+        'flags': ['assume_pow_positivity', 'sparse']
     },
     'Jy': {
         'signature':
@@ -114,45 +105,43 @@
             '(realtype *dJydy, const int iy, const realtype *p, '
             'const realtype *k, const realtype *y, '
             'const realtype *sigmay, const realtype *my)',
-        'sparse':
-            True,
+        'flags': ['sparse']
     },
     'dwdp': {
         'signature':
             '(realtype *dwdp, const realtype t, const realtype *x, '
             'const realtype *p, const realtype *k, const realtype *h, '
-            'const realtype *w, const realtype *tcl, const realtype *dtcldp, '
-            'const int ip)',
-        'assume_pow_positivity':
-            True,
+            'const realtype *w, const realtype *tcl, const realtype *dtcldp)',
+        'flags': ['assume_pow_positivity', 'sparse']
     },
     'dwdx': {
         'signature':
             '(realtype *dwdx, const realtype t, const realtype *x, '
             'const realtype *p, const realtype *k, const realtype *h, '
             'const realtype *w, const realtype *tcl)',
-        'sparse':
-            True,
-        'assume_pow_positivity':
-            True,
+        'flags': ['assume_pow_positivity', 'sparse']
     },
     'dxdotdw': {
         'signature':
             '(realtype *dxdotdw, const realtype t, const realtype *x, '
             'const realtype *p, const realtype *k, const realtype *h, '
             'const realtype *w)',
-        'sparse':
-            True,
-        'assume_pow_positivity':
-            True,
+        'flags': ['assume_pow_positivity', 'sparse']
     },
-    'dxdotdp': {
+    'dxdotdp_explicit': {
         'signature':
-            '(realtype *dxdotdp, const realtype t, const realtype *x, '
-            'const realtype *p, const realtype *k, const realtype *h, '
-            'const int ip, const realtype *w)',
-        'assume_pow_positivity':
-            True,
+            '(realtype *dxdotdp_explicit, const realtype t, '
+            'const realtype *x, const realtype *p, '
+            'const realtype *k, const realtype *h, '
+            'const realtype *w)',
+        'flags': ['assume_pow_positivity', 'sparse']
+    },
+    'dxdotdp_implicit': {
+        'signature':
+            '(realtype *dxdotdp_implicit, const realtype t, '
+            'const realtype *x, const realtype *p, const realtype *k, '
+            'const realtype *h, const int ip, const realtype *w)',
+        'flags': ['assume_pow_positivity', 'sparse', 'dont_generate_body']
     },
     'dydx': {
         'signature':
@@ -181,8 +170,7 @@
             '(realtype *w, const realtype t, const realtype *x, '
             'const realtype *p, const realtype *k, '
             'const realtype *h, const realtype *tcl)',
-        'assume_pow_positivity':
-            True,
+        'flags': ['assume_pow_positivity']
     },
     'x0': {
         'signature':
@@ -210,8 +198,7 @@
             '(realtype *xdot, const realtype t, const realtype *x, '
             'const realtype *p, const realtype *k, const realtype *h, '
             'const realtype *w)',
-        'assume_pow_positivity':
-            True,
+        'flags': ['assume_pow_positivity']
     },
     'y': {
         'signature':
@@ -236,8 +223,12 @@
 ## list of sparse functions
 sparse_functions = [
     function for function in functions
-    if 'sparse' in functions[function]
-    and functions[function]['sparse']
+    if 'sparse' in functions[function].get('flags', [])
+]
+## list of nobody functions
+nobody_functions = [
+    function for function in functions
+    if 'dont_generate_body' in functions[function].get('flags', [])
 ]
 ## list of sensitivity functions
 sensi_functions = [
@@ -809,6 +800,10 @@ def __init__(self, simplify: Optional[Callable] = sp.powsimp):
                 'x': 'JB',
                 'y': 'xB',
             },
+            'dxdotdp_implicit': {
+                'x': 'dxdotdw',
+                'y': 'dwdp',
+            },
         }
 
         self._lock_total_derivative = False
@@ -1188,7 +1183,12 @@ def _generateSymbol(self, name):
             return
         elif name == 'dtcldp':
             self._syms[name] = sp.Matrix([
-                sp.Symbol(f's{strip_pysb(tcl.get_id())}', real=True)
+                [
+                    sp.Symbol(f's{strip_pysb(tcl.get_id())}__'
+                              f'{strip_pysb(par.get_id())}',
+                              real=True)
+                    for par in self._parameters
+                ]
                 for tcl in self._conservationlaws
             ])
             return
@@ -1283,8 +1283,8 @@ def _generateSparseSymbol(self, name):
             base_index = 0
             for iy in range(self.ny()):
                 symbolColPtrs, symbolRowVals, sparseList, symbolList, \
-                sparseMatrix = csc_matrix(matrix[iy, :], name,
-                                          base_index=base_index)
+                    sparseMatrix = csc_matrix(matrix[iy, :], name,
+                                              base_index=base_index)
                 base_index += len(symbolList)
                 self._colptrs[name].append(symbolColPtrs)
                 self._rowvals[name].append(symbolRowVals)
@@ -1293,7 +1293,9 @@ def _generateSparseSymbol(self, name):
                 self._syms[name].append(sparseMatrix)
         else:
             symbolColPtrs, symbolRowVals, sparseList, symbolList, \
-                sparseMatrix = csc_matrix(matrix, name)
+                sparseMatrix = csc_matrix(
+                    matrix, name, pattern_only=name in nobody_functions
+                )
 
             self._colptrs[name] = symbolColPtrs
             self._rowvals[name] = symbolRowVals
@@ -1431,6 +1433,10 @@ def _compute_equation(self, name):
             # force symbols
             self._eqs[name] = self.sym(name)
 
+        elif name == 'dxdotdp_explicit':
+            # force symbols
+            self._derivative('xdot', 'p', name=name)
+
         elif match_deriv:
             self._derivative(match_deriv.group(1), match_deriv.group(2))
 
@@ -1648,11 +1654,11 @@ def _multiplication(self, name, x, y,
         else:
             xx = variables[x]
 
-        if xx.is_zero is not True and variables[y].is_zero is not True \
-                and len(xx) and len(variables[y]):
-            self._eqs[name] = sign * xx * variables[y]
-        else:
+        if not len(xx) or not len(variables[y]) or xx.is_zero is True or \
+                variables[y].is_zero is True:
             self._eqs[name] = sp.zeros(len(xx), len(variables[y]))
+        else:
+            self._eqs[name] = sign * xx * variables[y]
 
     def _equationFromComponent(self, name, component):
         """Generates the formulas of a symbolic variable from the attributes
@@ -1925,7 +1931,9 @@ def _generateCCode(self):
 
         """
         for function in self.functions.keys():
-            self._writeFunctionFile(function)
+            if 'dont_generate_body' not in \
+                    self.functions[function].get('flags', []):
+                self._writeFunctionFile(function)
             if function in sparse_functions:
                 self._write_function_index(function, 'colptrs')
                 self._write_function_index(function, 'rowvals')
@@ -2082,8 +2090,7 @@ def _writeFunctionFile(self, function):
 
         # first generate the equations to make sure we have everything we
         # need in subsequent steps
-        if 'sparse' in self.functions[function] and \
-                self.functions[function]['sparse']:
+        if function in sparse_functions:
             symbol = self.model.sparseeq(function)
         elif not self.allow_reinit_fixpar_initcond \
                 and function == 'sx0_fixedParameters':
@@ -2120,9 +2127,8 @@ def _writeFunctionFile(self, function):
 
         # function body
         body = self._getFunctionBody(function, symbol)
-        if self.assume_pow_positivity \
-                and 'assume_pow_positivity' in self.functions[function].keys()\
-                and self.functions[function]['assume_pow_positivity']:
+        if self.assume_pow_positivity and 'assume_pow_positivity' \
+                in self.functions[function].get('flags', []):
             body = [re.sub(r'(^|\W)pow\(', r'\1amici::pos_pow(', line)
                     for line in body]
             # execute this twice to catch cases where the ending ( would be the
@@ -2310,9 +2316,13 @@ def _writeModelHeader(self):
             'NEVENT': '0',
             'NOBJECTIVE': '1',
             'NW': str(len(self.model.sym('w'))),
-            'NDWDP': str(len(self.model.eq('dwdp'))),
+            'NDWDP': str(len(self.model.sparsesym('dwdp'))),
             'NDWDX': str(len(self.model.sparsesym('dwdx'))),
             'NDXDOTDW': str(len(self.model.sparsesym('dxdotdw'))),
+            'NDXDOTDP_EXPLICIT': str(len(self.model.sparsesym(
+                'dxdotdp_explicit'))),
+            'NDXDOTDP_IMPLICIT': str(len(self.model.sparsesym(
+                'dxdotdp_implicit'))),
             'NDJYDY': 'std::vector<int>{%s}'
                       % ','.join(str(len(x))
                                  for x in self.model.sparsesym('dJydy')),
@@ -2349,7 +2359,8 @@ def _writeModelHeader(self):
 
         for fun in [
             'w', 'dwdp', 'dwdx', 'x_rdata', 'x_solver', 'total_cl', 'dxdotdw',
-            'dxdotdp', 'JSparse', 'JSparseB', 'dJydy'
+            'dxdotdp_explicit', 'dxdotdp_implicit', 'JSparse', 'JSparseB',
+            'dJydy'
         ]:
             tplData[f'{fun.upper()}_DEF'] = \
                 get_function_extern_declaration(fun, self.modelName)
@@ -2425,9 +2436,10 @@ def _writeCMakeFile(self):
         Raises:
 
         """
+
         sources = [self.modelName + '_' + function + '.cpp '
                    for function in self.functions.keys()
-                   if self.functions[function]['body'] is not None]
+                   if self.functions[function].get('body', None) is not None]
 
         # add extra source files for sparse matrices
         for function in sparse_functions:
@@ -2824,7 +2836,7 @@ def getSwitchStatement(condition, cases,
     return lines
 
 
-def csc_matrix(matrix, name, base_index=0):
+def csc_matrix(matrix, name, base_index=0, pattern_only=False):
     """Generates the sparse symbolic identifiers, symbolic identifiers,
     sparse matrix, column pointers and row values for a symbolic
     variable
@@ -2833,6 +2845,7 @@ def csc_matrix(matrix, name, base_index=0):
         matrix: dense matrix to be sparsified @type sp.Matrix
         name: name of the symbolic variable @type str
         base_index: index for first symbol name, defaults to 0
+        pattern_only: flag for computing sparsity pattern without whole matrix
 
     Returns:
         symbolColPtrs, symbolRowVals, sparseList, symbolList, sparseMatrix
@@ -2841,24 +2854,37 @@ def csc_matrix(matrix, name, base_index=0):
     """
     idx = 0
     symbol_name_idx = base_index
-    sparseMatrix = sp.zeros(matrix.rows, matrix.cols)
+
+    nrows, ncols = matrix.shape
+
+    if not pattern_only:
+        sparseMatrix = sp.zeros(nrows, ncols)
     symbolList = []
     sparseList = []
     symbolColPtrs = []
     symbolRowVals = []
-    for col in range(0, matrix.cols):
+
+    for col in range(0, ncols):
         symbolColPtrs.append(idx)
-        for row in range(0, matrix.rows):
+        for row in range(0, nrows):
             if not (matrix[row, col] == 0):
-                symbolName = f'{name}{symbol_name_idx}'
-                sparseMatrix[row, col] = sp.Symbol(symbolName, real=True)
-                symbolList.append(symbolName)
-                sparseList.append(matrix[row, col])
                 symbolRowVals.append(row)
                 idx += 1
+                symbolName = f'{name}{symbol_name_idx}'
+                symbolList.append(symbolName)
                 symbol_name_idx += 1
+                if not pattern_only:
+                    sparseMatrix[row, col] = sp.Symbol(symbolName, real=True)
+                    sparseList.append(matrix[row, col])
+
+    if idx == 0:
+        symbolColPtrs = []  # avoid bad memory access for empty matrices
+    else:
+        symbolColPtrs.append(idx)
 
-    symbolColPtrs.append(idx)
-    sparseList = sp.Matrix(sparseList)
+    if not pattern_only:
+        sparseList = sp.Matrix(sparseList)
+    else:
+        sparseMatrix = None
 
     return symbolColPtrs, symbolRowVals, sparseList, symbolList, sparseMatrix
diff --git a/src/abstract_model.cpp b/src/abstract_model.cpp
index ba89b0f53a..6b1076e20a 100644
--- a/src/abstract_model.cpp
+++ b/src/abstract_model.cpp
@@ -291,6 +291,10 @@ void AbstractModel::fdwdp(realtype *dwdp, const realtype t, const realtype *x,
                           const realtype *tcl, const realtype *stcl) {
 }
 
+void AbstractModel::fdwdp_colptrs(sunindextype *indexptrs) {}
+
+void AbstractModel::fdwdp_rowvals(sunindextype *indexvals) {}
+
 void AbstractModel::fdwdp(realtype *dwdp, const realtype t, const realtype *x,
                           const realtype *p, const realtype *k,
                           const realtype *h, const realtype *w,
diff --git a/src/model.cpp b/src/model.cpp
index d668512d65..977c569372 100644
--- a/src/model.cpp
+++ b/src/model.cpp
@@ -138,16 +138,19 @@ Model::Model(const int nx_rdata, const int nxtrue_rdata, const int nx_solver,
              const int lbw, SecondOrderMode o2mode,
              const std::vector<realtype> &p, std::vector<realtype> k,
              const std::vector<int> &plist, std::vector<realtype> idlist,
-             std::vector<int> z2event)
+             std::vector<int> z2event, const bool pythonGenerated,
+             const int ndxdotdp_explicit, const int ndxdotdp_implicit)
     : nx_rdata(nx_rdata), nxtrue_rdata(nxtrue_rdata), nx_solver(nx_solver),
       nxtrue_solver(nxtrue_solver), ny(ny), nytrue(nytrue), nz(nz),
       nztrue(nztrue), ne(ne), nw(nw), ndwdx(ndwdx), ndwdp(ndwdp),
-      ndxdotdw(ndxdotdw), ndJydy(std::move(ndJydy)), nnz(nnz), nJ(nJ), ubw(ubw),
-      lbw(lbw), o2mode(o2mode), idlist(std::move(idlist)),
+      ndxdotdw(ndxdotdw), ndJydy(std::move(ndJydy)),
+      nnz(nnz), nJ(nJ), ubw(ubw), lbw(lbw), pythonGenerated(pythonGenerated),
+      ndxdotdp_explicit(ndxdotdp_explicit), ndxdotdp_implicit(ndxdotdp_implicit),
+      o2mode(o2mode), idlist(std::move(idlist)),
       J(nx_solver, nx_solver, nnz, CSC_MAT),
-      dxdotdw(nx_solver, nw, ndxdotdw, CSC_MAT),
+      dxdotdw(nx_solver, nw, ndxdotdw, CSC_MAT), dwdp(nw, p.size(), ndwdp, CSC_MAT),
       dwdx(nw, nx_solver, ndwdx, CSC_MAT), M(nx_solver, nx_solver), w(nw),
-      dwdp(ndwdp), x_rdata(nx_rdata, 0.0), sx_rdata(nx_rdata, 0.0), h(ne, 0.0),
+      x_rdata(nx_rdata, 0.0), sx_rdata(nx_rdata, 0.0), h(ne, 0.0),
       total_cl(nx_rdata - nx_solver),
       stotal_cl((nx_rdata - nx_solver) * p.size()), x_pos_tmp(nx_solver),
       unscaledParameters(p), originalParameters(p),
@@ -155,21 +158,22 @@ Model::Model(const int nx_rdata, const int nxtrue_rdata, const int nx_solver,
       stateIsNonNegative(nx_solver, false),
       pscale(std::vector<ParameterScaling>(p.size(), ParameterScaling::none)) {
 
-    // Can't use derivedClass::wasPythonGenerated() in ctor.
-    // Guess we are using Python if ndJydy is not empty
-    if (!this->ndJydy.empty()) {
-        if (static_cast<unsigned>(nytrue) != this->ndJydy.size())
-            throw std::runtime_error(
-                "Number of elements in ndJydy is not equal "
-                " nytrue.");
+   /* If Matlab wrapped: dxdotdp is a full AmiVector,
+      if Python wrapped: dxdotdp_explicit and dxdotdp_implicit are CSC matrices */
+    if (pythonGenerated) {
+        dxdotdp_explicit = SUNMatrixWrapper(nx_solver, p.size(), ndxdotdp_explicit, CSC_MAT);
+        dxdotdp_implicit = SUNMatrixWrapper(nx_solver, p.size(), ndxdotdp_implicit, CSC_MAT);
 
+        // also dJydy depends on the way of wrapping
+        if (static_cast<unsigned>(nytrue) != this->ndJydy.size())
+            throw std::runtime_error("Number of elements in ndJydy is not equal "
+                                     " nytrue.");
+        
         for (int iytrue = 0; iytrue < nytrue; ++iytrue)
-            dJydy.emplace_back(
-                SUNMatrixWrapper(nJ, ny, this->ndJydy[iytrue], CSC_MAT));
+            dJydy.emplace_back(SUNMatrixWrapper(nJ, ny, this->ndJydy[iytrue], CSC_MAT));
     } else {
         dJydy_matlab = std::vector<realtype>(nJ * nytrue * ny, 0.0);
     }
-
     requireSensitivitiesForAllParameters();
 }
 
@@ -177,6 +181,13 @@ bool operator==(const Model &a, const Model &b) {
     if (typeid(a) != typeid(b))
         return false;
 
+    bool bool_dxdotdp = true;
+    if (a.pythonGenerated && b.pythonGenerated)
+        bool_dxdotdp = (a.ndxdotdp_explicit == b.ndxdotdp_explicit) &&
+            (a.ndxdotdp_implicit == b.ndxdotdp_implicit);
+    if (a.pythonGenerated != b.pythonGenerated)
+        bool_dxdotdp = false;
+    
     return (a.nx_rdata == b.nx_rdata) && (a.nxtrue_rdata == b.nxtrue_rdata) &&
            (a.nx_solver == b.nx_solver) &&
            (a.nxtrue_solver == b.nxtrue_solver) && (a.ny == b.ny) &&
@@ -193,7 +204,7 @@ bool operator==(const Model &a, const Model &b) {
            (a.ts == b.ts) && (a.nmaxevent == b.nmaxevent) &&
            (a.pscale == b.pscale) &&
            (a.stateIsNonNegative == b.stateIsNonNegative) &&
-           (a.tstart == b.tstart);
+           (a.tstart == b.tstart) && bool_dxdotdp;
 }
 
 void Model::initialize(AmiVector &x, AmiVector &dx, AmiVectorArray &sx,
@@ -1225,8 +1236,9 @@ void Model::checkLLHBufferSize(std::vector<realtype> &sllh,
 }
 
 void Model::initializeVectors() {
-    dxdotdp = AmiVectorArray(nx_solver, nplist());
     sx0data.clear();
+    if (!pythonGenerated)
+        dxdotdp = AmiVectorArray(nx_solver, nplist());
 }
 
 void Model::fy(const realtype t, const AmiVector &x) {
@@ -1249,20 +1261,14 @@ void Model::fdydp(const realtype t, const AmiVector &x) {
         return;
 
     dydp.assign(ny * nplist(), 0.0);
-
     fw(t, x.data());
     fdwdp(t, x.data());
-
-    // if dwdp is not dense, fdydp will expect the full sparse array
-    realtype *dwdp_tmp = dwdp.data();
-    for (int ip = 0; ip < nplist(); ip++) {
-        // get dydp slice (ny) for current time and parameter
-        if (wasPythonGenerated() && nw)
-            dwdp_tmp = &dwdp.at(nw * ip);
-
+    
+    /* get dydp slice (ny) for current time and parameter */
+    for (int ip = 0; ip < nplist(); ip++)
         fdydp(&dydp.at(ip * ny), t, x.data(), unscaledParameters.data(),
-              fixedParameters.data(), h.data(), plist(ip), w.data(), dwdp_tmp);
-    }
+              fixedParameters.data(), h.data(), plist(ip), w.data(),
+              dwdp.data());
 
     if (alwaysCheckFinite) {
         app->checkFinite(dydp, "dydp");
@@ -1360,7 +1366,7 @@ void Model::fdJydy(const int it, const AmiVector &x, const ExpData &edata) {
     fy(edata.getTimepoint(it), x);
     fsigmay(it, &edata);
 
-    if (wasPythonGenerated()) {
+    if (pythonGenerated) {
         for (int iyt = 0; iyt < nytrue; iyt++) {
             dJydy[iyt].zero();
             fdJydy_colptrs(dJydy[iyt].indexptrs(), iyt);
@@ -1432,7 +1438,7 @@ void Model::fdJydp(const int it, const AmiVector &x, const ExpData &edata) {
         if (!edata.isSetObservedData(it, iyt))
             continue;
 
-        if (wasPythonGenerated()) {
+        if (pythonGenerated) {
             // dJydp = 1.0 * dJydp +  1.0 * dJydy * dydp
             for (int iplist = 0; iplist < nplist(); ++iplist) {
                 dJydy[iyt].multiply(
@@ -1471,7 +1477,7 @@ void Model::fdJydx(const int it, const AmiVector &x, const ExpData &edata) {
         //          M  K            K  N                       M  N
         //           lda             ldb                        ldc
 
-        if (wasPythonGenerated()) {
+        if (pythonGenerated) {
             for (int ix = 0; ix < nx_solver; ++ix) {
                 dJydy[iyt].multiply(
                     gsl::span<realtype>(&dJydx.at(ix * nJ), nJ),
@@ -1796,21 +1802,18 @@ void Model::fw(const realtype t, const realtype *x) {
 
 void Model::fdwdp(const realtype t, const realtype *x) {
     fw(t, x);
-    std::fill(dwdp.begin(), dwdp.end(), 0.0);
-    if (wasPythonGenerated()) {
-        realtype *stcl = nullptr;
+    if (pythonGenerated) {
+        dwdp.reset();
 
         // avoid bad memory access when slicing
         if (!nw)
             return;
-
-        for (int ip = 0; ip < nplist(); ++ip) {
-            if (ncl())
-                stcl = &stotal_cl.at(plist(ip) * ncl());
-            fdwdp(&dwdp.at(nw * ip), t, x, unscaledParameters.data(),
-                  fixedParameters.data(), h.data(), w.data(), total_cl.data(),
-                  stcl, plist_[ip]);
-        }
+        
+        fdwdp_colptrs(dwdp.indexptrs());
+        fdwdp_rowvals(dwdp.indexvals());
+        fdwdp(dwdp.data(), t, x, unscaledParameters.data(), fixedParameters.data(),
+              h.data(), w.data(), total_cl.data(), stotal_cl.data());
+        
     } else {
         // matlab generated
         fdwdp(dwdp.data(), t, x, unscaledParameters.data(),
@@ -1819,17 +1822,18 @@ void Model::fdwdp(const realtype t, const realtype *x) {
     }
 
     if (alwaysCheckFinite) {
-        app->checkFinite(dwdp, "dwdp");
+        app->checkFinite(gsl::make_span(dwdp.get()), "dwdp");
     }
 }
 
 void Model::fdwdx(const realtype t, const realtype *x) {
     fw(t, x);
     dwdx.reset();
+    
+    fdwdx_colptrs(dwdx.indexptrs());
+    fdwdx_rowvals(dwdx.indexvals());
     fdwdx(dwdx.data(), t, x, unscaledParameters.data(), fixedParameters.data(),
           h.data(), w.data(), total_cl.data());
-    fdwdx_colptrs(dwdx.indexptrs());
-    fdwdx_rowvals(dwdx.indexptrs());
 
     if (alwaysCheckFinite) {
         app->checkFinite(gsl::make_span(dwdx.get()), "dwdx");
diff --git a/src/model_dae.cpp b/src/model_dae.cpp
index 339a802b77..d0384ae0be 100644
--- a/src/model_dae.cpp
+++ b/src/model_dae.cpp
@@ -34,14 +34,14 @@ void Model_DAE::fJSparse(realtype t, realtype cj, N_Vector x, N_Vector dx,
              fixedParameters.data(), h.data(), cj, N_VGetArrayPointer(dx),
              w.data(), dwdx.data());
 }
-    
+
 void Model_DAE::fJSparseB(SUNMatrixContent_Sparse   /*JSparseB*/,
-                       const realtype  /*t*/, const realtype * /*x*/,
-                       const double * /*p*/, const double * /*k*/,
-                       const realtype * /*h*/, const realtype   /*cj*/,
-                       const realtype * /*xB*/, const realtype * /*dx*/,
-                       const realtype * /*dxB*/, const realtype * /*w*/,
-                       const realtype * /*dwdx*/) {
+                          const realtype  /*t*/, const realtype * /*x*/,
+                          const double * /*p*/, const double * /*k*/,
+                          const realtype * /*h*/, const realtype   /*cj*/,
+                          const realtype * /*xB*/, const realtype * /*dx*/,
+                          const realtype * /*dxB*/, const realtype * /*w*/,
+                          const realtype * /*dwdx*/) {
     throw AmiException("Requested functionality is not supported as %s "
                        "is not implemented for this model!",
                        __func__);
@@ -104,12 +104,20 @@ void Model_DAE::fJDiag(const realtype t, AmiVector &JDiag,
 void Model_DAE::fdxdotdp(const realtype t, const N_Vector x,
                          const N_Vector dx) {
     auto x_pos = computeX_pos(x);
-    fdwdp(t, N_VGetArrayPointer(x_pos));
-    for (int ip = 0; ip < nplist(); ip++) {
-        N_VConst(0.0, dxdotdp.getNVector(ip));
-        fdxdotdp(dxdotdp.data(ip), t, N_VGetArrayPointer(x_pos),
-                 unscaledParameters.data(), fixedParameters.data(), h.data(),
-                 plist_[ip], N_VGetArrayPointer(dx), w.data(), dwdp.data());
+
+    if (pythonGenerated) {
+        // python generated, not yet implemented for DAEs
+        throw AmiException("Wrapping of DAEs is not yet implemented from Python");
+    } else {
+        // matlab generated
+        fdwdp(t, N_VGetArrayPointer(x_pos)); // Why is it x_pos here and x ind model_ode.cpp?
+
+        for (int ip = 0; ip < nplist(); ip++) {
+            N_VConst(0.0, dxdotdp.getNVector(ip));
+            fdxdotdp(dxdotdp.data(ip), t, N_VGetArrayPointer(x_pos),
+                     unscaledParameters.data(), fixedParameters.data(), h.data(),
+                     plist_[ip], N_VGetArrayPointer(dx), w.data(), dwdp.data());
+        }
     }
 }
 
@@ -243,7 +251,15 @@ void Model_DAE::fsxdot(realtype t, N_Vector x, N_Vector dx, int ip, N_Vector sx,
         fdxdotdp(t, x, dx);
         fJSparse(t, 0.0, x, dx, J.get());
     }
-    N_VScale(1.0, dxdotdp.getNVector(ip), sxdot);
+
+    if (pythonGenerated) {
+        // python generated, not yet implemented for DAEs
+        throw AmiException("Wrapping of DAEs is not yet implemented from Python");
+    } else {
+        /* copy dxdotdp over */
+        N_VScale(1.0, dxdotdp.getNVector(ip), sxdot);
+    }
+
     J.multiply(sxdot, sx);
     N_VScale(-1.0, sdx, sdx);
     M.multiply(sxdot, sdx);
diff --git a/src/model_header.ODE_template.h b/src/model_header.ODE_template.h
index bde8831f89..df0523811f 100644
--- a/src/model_header.ODE_template.h
+++ b/src/model_header.ODE_template.h
@@ -46,13 +46,20 @@ TPL_DJYDY_DEF
 TPL_DJYDY_COLPTRS_DEF
 TPL_DJYDY_ROWVALS_DEF
 TPL_DWDP_DEF
+TPL_DWDP_COLPTRS_DEF
+TPL_DWDP_ROWVALS_DEF
 TPL_DWDX_DEF
 TPL_DWDX_COLPTRS_DEF
 TPL_DWDX_ROWVALS_DEF
 TPL_DXDOTDW_DEF
 TPL_DXDOTDW_COLPTRS_DEF
 TPL_DXDOTDW_ROWVALS_DEF
-TPL_DXDOTDP_DEF
+TPL_DXDOTDP_EXPLICIT_DEF
+TPL_DXDOTDP_EXPLICIT_COLPTRS_DEF
+TPL_DXDOTDP_EXPLICIT_ROWVALS_DEF
+TPL_DXDOTDP_IMPLICIT_COLPTRS_DEF
+TPL_DXDOTDP_IMPLICIT_ROWVALS_DEF
+
 extern void dydx_TPL_MODELNAME(realtype *dydx, const realtype t,
                                const realtype *x, const realtype *p,
                                const realtype *k, const realtype *h,
@@ -124,7 +131,10 @@ class Model_TPL_MODELNAME : public amici::Model_ODE {
               std::vector<realtype>{TPL_FIXED_PARAMETERS}, // fixedParameters
               std::vector<int>{},                          // plist
               std::vector<realtype>(TPL_NX_SOLVER, 0.0),   // idlist
-              std::vector<int>{}                           // z2event
+              std::vector<int>{},                          // z2event
+              true,                                        // pythonGenerated
+              TPL_NDXDOTDP_EXPLICIT,                       // ndxdotdp_explicit
+              TPL_NDXDOTDP_IMPLICIT                        // ndxdotdp_implicit
           ) {}
 
     /**
@@ -444,17 +454,31 @@ class Model_TPL_MODELNAME : public amici::Model_ODE {
     TPL_DJYDY_ROWVALS_IMPL
 
     TPL_DWDP_IMPL
+    
+    TPL_DWDP_COLPTRS_IMPL
+    
+    TPL_DWDP_ROWVALS_IMPL
 
     TPL_DWDX_IMPL
 
+    TPL_DWDX_COLPTRS_IMPL
+    
+    TPL_DWDX_ROWVALS_IMPL
+    
     TPL_DXDOTDW_IMPL
-
     TPL_DXDOTDW_COLPTRS_IMPL
-
     TPL_DXDOTDW_ROWVALS_IMPL
 
-    TPL_DXDOTDP_IMPL
+    TPL_DXDOTDP_EXPLICIT_IMPL
+    
+    TPL_DXDOTDP_EXPLICIT_COLPTRS_IMPL
+    
+    TPL_DXDOTDP_EXPLICIT_ROWVALS_IMPL
 
+    TPL_DXDOTDP_IMPLICIT_COLPTRS_IMPL
+    
+    TPL_DXDOTDP_IMPLICIT_ROWVALS_IMPL
+    
     /** model specific implementation of fdydx
      * @param dydx partial derivative of observables y w.r.t. model states x
      * @param t current time
@@ -801,10 +825,6 @@ class Model_TPL_MODELNAME : public amici::Model_ODE {
     virtual const std::string getAmiciCommit() const override {
         return "TPL_AMICI_COMMIT_STRING";
     }
-
-    virtual bool wasPythonGenerated() const override {
-        return true;
-    }
 };
 
 #endif /* _amici_TPL_MODELNAME_h */
diff --git a/src/model_ode.cpp b/src/model_ode.cpp
index 4284c801d3..02389c5348 100644
--- a/src/model_ode.cpp
+++ b/src/model_ode.cpp
@@ -1,3 +1,4 @@
+#include <amici/sundials_matrix_wrapper.h>
 #include "amici/model_ode.h"
 #include "amici/solver_cvodes.h"
 
@@ -27,7 +28,7 @@ void Model_ODE::fJSparse(realtype t, N_Vector x, SUNMatrix J) {
     auto x_pos = computeX_pos(x);
     fdwdx(t, N_VGetArrayPointer(x_pos));
     SUNMatZero(J);
-    if (wasPythonGenerated()) {
+    if (pythonGenerated) {
         fJSparse(SM_DATA_S(J), t, N_VGetArrayPointer(x_pos),
                  unscaledParameters.data(), fixedParameters.data(), h.data(),
                  w.data(), dwdx.data());
@@ -87,30 +88,41 @@ void Model_ODE::fJDiag(const realtype t, AmiVector &JDiag,
 }
 
 void Model_ODE::fdxdotdw(const realtype t, const N_Vector x) {
-    dxdotdw.reset();
-    auto x_pos = computeX_pos(x);
-    fdxdotdw(dxdotdw.data(), t, N_VGetArrayPointer(x_pos),
-             unscaledParameters.data(), fixedParameters.data(), h.data(),
-             w.data());
-    fdxdotdw_colptrs(dxdotdw.indexptrs());
-    fdxdotdw_rowvals(dxdotdw.indexvals());
+    if (nw > 0 && ndxdotdw > 0) {
+        auto x_pos = computeX_pos(x);
+        dxdotdw.reset();
+        fdxdotdw_colptrs(dxdotdw.indexptrs());
+        fdxdotdw_rowvals(dxdotdw.indexvals());
+        fdxdotdw(dxdotdw.data(), t, N_VGetArrayPointer(x_pos),
+                 unscaledParameters.data(), fixedParameters.data(), h.data(),
+                 w.data());
+    }
 }
 
 void Model_ODE::fdxdotdp(const realtype t, const N_Vector x) {
     auto x_pos = computeX_pos(x);
-    fdwdp(t, N_VGetArrayPointer(x));
-    if (wasPythonGenerated()) {
+    fdwdp(t, N_VGetArrayPointer(x_pos));
+    fdxdotdw(t, x_pos);
+
+    if (pythonGenerated) {
         // python generated
-        fdxdotdw(t, x);
-        for (int ip = 0; ip < nplist(); ip++) {
-            N_VConst(0.0, dxdotdp.getNVector(ip));
-            fdxdotdp(dxdotdp.data(ip), t, N_VGetArrayPointer(x_pos),
-                     unscaledParameters.data(), fixedParameters.data(),
-                     h.data(), plist_[ip], w.data());
-            if (nw > 0)
-                dxdotdw.multiply(
-                    gsl::span<realtype>(dxdotdp.data(ip), nx_solver),
-                    gsl::span<const realtype>(&dwdp.at(nw * ip), nw));
+        if (ndxdotdp_explicit > 0) {
+            dxdotdp_explicit.reset();
+            fdxdotdp_explicit_colptrs(dxdotdp_explicit.indexptrs());
+            fdxdotdp_explicit_rowvals(dxdotdp_explicit.indexvals());
+            fdxdotdp_explicit(dxdotdp_explicit.data(),
+                              t, N_VGetArrayPointer(x_pos),
+                              unscaledParameters.data(),
+                              fixedParameters.data(),
+                              h.data(), w.data());
+        }
+        if (nw > 0 && ndxdotdp_implicit > 0) {
+            /* Sparse matrix multiplication
+             dxdotdp_implicit += dxdotdw * dwdp */
+            dxdotdp_implicit.reset();
+            fdxdotdp_implicit_colptrs(dxdotdp_implicit.indexptrs());
+            fdxdotdp_implicit_rowvals(dxdotdp_implicit.indexvals());
+            dxdotdw.sparse_multiply(&dxdotdp_implicit, &dwdp);
         }
     } else {
         // matlab generated
@@ -232,10 +244,34 @@ void Model_ODE::fdxdotdp(realtype * /*dxdotdp*/, const realtype /*t*/,
                        __func__); // not implemented
 }
 
-void Model_ODE::fdxdotdp(realtype * /*dxdotdp*/, const realtype /*t*/,
-                         const realtype * /*x*/, const realtype * /*p*/,
-                         const realtype * /*k*/, const realtype * /*h*/,
-                         const int /*ip*/, const realtype * /*w*/) {
+void Model_ODE::fdxdotdp_explicit(realtype * /*dxdotdp_explicit*/, const realtype /*t*/,
+                                  const realtype * /*x*/, const realtype * /*p*/,
+                                  const realtype * /*k*/, const realtype * /*h*/,
+                                  const realtype * /*w*/) {
+    throw AmiException("Requested functionality is not supported as %s "
+                       "is not implemented for this model!",
+                       __func__); // not implemented
+}
+
+void Model_ODE::fdxdotdp_explicit_colptrs(sunindextype * /*indexptrs*/) {
+    throw AmiException("Requested functionality is not supported as %s "
+                       "is not implemented for this model!",
+                       __func__); // not implemented
+}
+
+void Model_ODE::fdxdotdp_explicit_rowvals(sunindextype * /*indexvals*/) {
+    throw AmiException("Requested functionality is not supported as %s "
+                       "is not implemented for this model!",
+                       __func__); // not implemented
+}
+
+void Model_ODE::fdxdotdp_implicit_colptrs(sunindextype * /*indexptrs*/) {
+    throw AmiException("Requested functionality is not supported as %s "
+                       "is not implemented for this model!",
+                       __func__); // not implemented
+}
+
+void Model_ODE::fdxdotdp_implicit_rowvals(sunindextype * /*indexvals*/) {
     throw AmiException("Requested functionality is not supported as %s "
                        "is not implemented for this model!",
                        __func__); // not implemented
@@ -277,7 +313,7 @@ void Model_ODE::fJSparseB(realtype t, N_Vector x, N_Vector xB,
     auto x_pos = computeX_pos(x);
     fdwdx(t, N_VGetArrayPointer(x_pos));
     SUNMatZero(JB);
-    if (wasPythonGenerated()) {
+    if (pythonGenerated) {
         fJSparseB(SM_DATA_S(JB), t, N_VGetArrayPointer(x_pos),
                   unscaledParameters.data(), fixedParameters.data(), h.data(),
                   N_VGetArrayPointer(xB), w.data(), dwdx.data());
@@ -314,17 +350,29 @@ void Model_ODE::fxBdot(realtype t, N_Vector x, N_Vector xB, N_Vector xBdot) {
 }
 
 void Model_ODE::fqBdot(realtype t, N_Vector x, N_Vector xB, N_Vector qBdot) {
+    /* initialize with zeros */
     N_VConst(0.0, qBdot);
     fdxdotdp(t, x);
-    for (int ip = 0; ip < nplist(); ip++) {
-        for (int ix = 0; ix < nxtrue_solver; ix++)
-            NV_Ith_S(qBdot, ip * nJ) -= NV_Ith_S(xB, ix) * dxdotdp.at(ix, ip);
-        // second order part
-        for (int iJ = 1; iJ < nJ; iJ++)
+
+    if (pythonGenerated) {
+        /* call multiplication */
+        if (ndxdotdp_explicit > 0)
+            dxdotdp_explicit.multiply(qBdot, xB, plist_, true);
+        if (ndxdotdp_implicit > 0)
+            dxdotdp_implicit.multiply(qBdot, xB, plist_, true);
+        N_VScale(-1.0, qBdot, qBdot);
+    } else {
+        /* was matlab generated */
+        for (int ip = 0; ip < nplist(); ip++) {
             for (int ix = 0; ix < nxtrue_solver; ix++)
-                NV_Ith_S(qBdot, ip * nJ + iJ) -=
+                NV_Ith_S(qBdot, ip * nJ) -= NV_Ith_S(xB, ix) * dxdotdp.at(ix, ip);
+            // second order part
+            for (int iJ = 1; iJ < nJ; iJ++)
+                for (int ix = 0; ix < nxtrue_solver; ix++)
+                    NV_Ith_S(qBdot, ip * nJ + iJ) -=
                     NV_Ith_S(xB, ix) * dxdotdp.at(ix + iJ * nxtrue_solver, ip) +
                     NV_Ith_S(xB, ix + iJ * nxtrue_solver) * dxdotdp.at(ix, ip);
+        }
     }
 }
 
@@ -337,13 +385,43 @@ void Model_ODE::fsxdot(const realtype t, const AmiVector &x,
 
 void Model_ODE::fsxdot(realtype t, N_Vector x, int ip, N_Vector sx,
                        N_Vector sxdot) {
+
+    /* sxdot is just the total derivative d(xdot)dp,
+     so we just call dxdotdp and copy the stuff over */
     if (ip == 0) {
         // we only need to call this for the first parameter index will be
         // the same for all remaining
         fdxdotdp(t, x);
         fJSparse(t, x, J.get());
     }
-    N_VScale(1.0, dxdotdp.getNVector(ip), sxdot);
+    if (pythonGenerated) {
+        /* copy dxdotdp and the implicit version over */
+        // initialize
+        N_VConst(0.0, sxdot);
+        realtype *sxdot_tmp = N_VGetArrayPointer(sxdot);
+
+        // copy explicit version
+        if (ndxdotdp_explicit > 0) {
+            auto col_exp = dxdotdp_explicit.indexptrs();
+            auto row_exp = dxdotdp_explicit.indexvals();
+            auto data_exp_ptr = dxdotdp_explicit.data();
+            for (sunindextype i = col_exp[plist(ip)]; i < col_exp[plist(ip) + 1]; ++i)
+                sxdot_tmp[row_exp[i]] += data_exp_ptr[i];
+        }
+
+        // copy implicit version
+        if (ndxdotdp_implicit > 0) {
+            auto col_imp = dxdotdp_implicit.indexptrs();
+            auto row_imp = dxdotdp_implicit.indexvals();
+            auto data_imp_ptr = dxdotdp_implicit.data();
+            for (sunindextype i = col_imp[plist(ip)]; i < col_imp[plist(ip) + 1]; ++i)
+                sxdot_tmp[row_imp[i]] += data_imp_ptr[i];
+        }
+
+    } else {
+        /* copy dxdotdp over */
+        N_VScale(1.0, dxdotdp.getNVector(ip), sxdot);
+    }
     J.multiply(sxdot, sx);
 }
 
diff --git a/src/newton_solver.cpp b/src/newton_solver.cpp
index ca86694b44..198a4a1c9b 100644
--- a/src/newton_solver.cpp
+++ b/src/newton_solver.cpp
@@ -96,14 +96,41 @@ void NewtonSolver::getStep(int ntry, int nnewt, AmiVector &delta) {
 
 void NewtonSolver::computeNewtonSensis(AmiVectorArray &sx) {
     prepareLinearSystem(0, -1);
-
     model->fdxdotdp(*t, *x, dx);
-    for (int ip = 0; ip < model->nplist(); ip++) {
 
-        for (int ix = 0; ix < model->nx_solver; ix++) {
-            sx.at(ix,ip) = -model->dxdotdp.at(ix, ip);
+    if (model->pythonGenerated) {
+        for (int ip = 0; ip < model->nplist(); ip++) {
+            N_VConst(0.0, sx.getNVector(ip));
+
+            // copy explicit version
+            if (model->ndxdotdp_explicit > 0) {
+                auto col = model->dxdotdp_explicit.indexptrs();
+                auto row = model->dxdotdp_explicit.indexvals();
+                auto data_ptr = model->dxdotdp_explicit.data();
+                for (sunindextype iCol = col[model->plist(ip)];
+                     iCol < col[model->plist(ip) + 1]; ++iCol)
+                    sx.at(row[iCol], ip) -= data_ptr[iCol];
+            }
+
+            // copy implicit version
+            if (model->ndxdotdp_implicit > 0) {
+                auto col = model->dxdotdp_implicit.indexptrs();
+                auto row = model->dxdotdp_implicit.indexvals();
+                auto data_ptr = model->dxdotdp_implicit.data();
+                for (sunindextype iCol = col[model->plist(ip)];
+                     iCol < col[model->plist(ip) + 1]; ++iCol)
+                    sx.at(row[iCol], ip) -= data_ptr[iCol];
+            }
+
+            solveLinearSystem(sx[ip]);
+        }
+    } else {
+        for (int ip = 0; ip < model->nplist(); ip++) {
+            for (int ix = 0; ix < model->nx_solver; ix++)
+                sx.at(ix,ip) = -model->dxdotdp.at(ix, ip);
+
+            solveLinearSystem(sx[ip]);
         }
-        solveLinearSystem(sx[ip]);
     }
 }
 /* ------------------------------------------------------------------------- */
@@ -215,7 +242,8 @@ void NewtonSolverIterative::prepareLinearSystem(int ntry, int nnewt) {
     newton_try = ntry;
     i_newton = nnewt;
     if (nnewt == -1) {
-        throw AmiException("Linear solver SPBCG does not support sensitivity computation for steady state problems.");
+        throw AmiException("Linear solver SPBCG does not support sensitivity "
+                           "computation for steady state problems.");
     }
 }
 
diff --git a/src/sundials_matrix_wrapper.cpp b/src/sundials_matrix_wrapper.cpp
index efd17ba9da..496972f08f 100644
--- a/src/sundials_matrix_wrapper.cpp
+++ b/src/sundials_matrix_wrapper.cpp
@@ -142,6 +142,19 @@ sunindextype SUNMatrixWrapper::columns() const {
     }
 }
 
+sunindextype SUNMatrixWrapper::nonzeros() const {
+    if (!matrix)
+        return 0;
+
+    switch (SUNMatGetID(matrix)) {
+    case SUNMATRIX_SPARSE:
+        return SM_NNZ_S(matrix);
+    default:
+        throw std::domain_error("Non-zeros property only available for "
+                                "sparse matrices");
+    }
+}
+
 sunindextype *SUNMatrixWrapper::indexvals() const {
     return indexvals_ptr;
 }
@@ -166,7 +179,8 @@ void SUNMatrixWrapper::multiply(N_Vector c, const_N_Vector b) const {
              gsl::make_span<const realtype>(NV_DATA_S(b), NV_LENGTH_S(b)));
 }
 
-void SUNMatrixWrapper::multiply(gsl::span<realtype> c, gsl::span<const realtype> b) const {
+void SUNMatrixWrapper::multiply(gsl::span<realtype> c,
+                                gsl::span<const realtype> b) const {
     if (!matrix)
         return;
 
@@ -219,6 +233,135 @@ void SUNMatrixWrapper::multiply(gsl::span<realtype> c, gsl::span<const realtype>
     }
 }
 
+void SUNMatrixWrapper::multiply(N_Vector c,
+                                const N_Vector b,
+                                gsl::span <const int> cols,
+                                bool transpose) const {
+    multiply(gsl::make_span<realtype>(NV_DATA_S(c), NV_LENGTH_S(c)),
+             gsl::make_span<const realtype>(NV_DATA_S(b), NV_LENGTH_S(b)),
+             cols, transpose);
+}
+
+void SUNMatrixWrapper::multiply(gsl::span<realtype> c,
+                                gsl::span<const realtype> b,
+                                gsl::span<const int> cols,
+                                bool transpose) const {
+    if (!matrix)
+        return;
+
+    sunindextype nrows = rows();
+    sunindextype ncols = columns();
+
+    if (transpose) {
+        if (c.size() != cols.size())
+            throw std::invalid_argument("Dimension mismatch between number of cols "
+                                        "in index vector cols (" + std::to_string(ncols)
+                                        + ") and elements in c (" + std::to_string(c.size())
+                                        + " ), when using transposed A");
+
+        if (static_cast<sunindextype>(b.size()) != nrows)
+            throw std::invalid_argument("Dimension mismatch between number of rows "
+                                        "in A (" + std::to_string(nrows) + ") and "
+                                        "elements in b (" + std::to_string(b.size())
+                                        + "), when using transposed A");
+    } else {
+        if (static_cast<sunindextype>(c.size()) != nrows)
+            throw std::invalid_argument("Dimension mismatch between number of rows "
+                                        "in A (" + std::to_string(nrows) + ") and "
+                                        "elements in c (" + std::to_string(c.size())
+                                        + ")");
+
+        if (static_cast<sunindextype>(b.size()) != ncols)
+            throw std::invalid_argument("Dimension mismatch between number of cols "
+                                        "in A (" + std::to_string(ncols)
+                                        + ") and elements in b ("
+                                        + std::to_string(b.size()) + ")");
+    }
+
+    if (SUNMatGetID(matrix) != SUNMATRIX_SPARSE)
+        throw std::invalid_argument("Reordered multiply only implemented for "
+                                    "sparse matrices, but A is not sparse");
+
+    if (sparsetype() != CSC_MAT)
+        throw std::invalid_argument("Reordered multiply only implemented for "
+                                    "matrix type CSC, but A is not of type CSC");
+
+    /* Carry out actual multiplication */
+    if (transpose) {
+        for (int i = 0; i < (int)cols.size(); ++i)
+            for (sunindextype k = indexptrs_ptr[cols[i]];
+                 k < indexptrs_ptr[cols[i] + 1]; ++k)
+                c[i] += data_ptr[k] * b[indexvals_ptr[k]];
+    } else {
+        for (sunindextype i = 0; i < ncols; ++i)
+            for (sunindextype k = indexptrs_ptr[cols[i]];
+                 k < indexptrs_ptr[cols[i] + 1]; ++k)
+                c[indexvals_ptr[k]] += data_ptr[k] * b[i];
+    }
+}
+
+
+void SUNMatrixWrapper::sparse_multiply(SUNMatrixWrapper *C,
+                                       SUNMatrixWrapper *B) const {
+    if (!matrix)
+        return;
+
+    sunindextype nrows = rows();
+    sunindextype ncols = columns();
+
+    if (SUNMatGetID(matrix) != SUNMATRIX_SPARSE)
+        throw std::invalid_argument("Matrix A not sparse in sparse_multiply");
+
+    if (sparsetype() != CSC_MAT)
+        throw std::invalid_argument("Matrix A not of type CSC_MAT");
+
+    if (SUNMatGetID(B->matrix) != SUNMATRIX_SPARSE)
+        throw std::invalid_argument("Matrix B not sparse in sparse_multiply");
+
+    if (B->sparsetype() != CSC_MAT)
+        throw std::invalid_argument("Matrix B not of type CSC_MAT");
+
+    if (SUNMatGetID(C->matrix) != SUNMATRIX_SPARSE)
+        throw std::invalid_argument("Matrix C not sparse in sparse_multiply");
+
+    if (C->sparsetype() != CSC_MAT)
+        throw std::invalid_argument("Matrix C not of type CSC_MAT");
+
+    if (C->rows() != nrows)
+        throw std::invalid_argument("Dimension mismatch between number of rows "
+                                    "in A (" + std::to_string(nrows) + ") and "
+                                    "number of rows in C ("
+                                    + std::to_string((int)C->rows()) + ")");
+
+    if (B->rows() != ncols)
+        throw std::invalid_argument("Dimension mismatch between number of rows "
+                                    "in A (" + std::to_string(ncols)
+                                    + ") and number of cols in B ("
+                                    + std::to_string((int)B->rows()) + ")");
+
+    /* Carry out actual multiplication */
+    sunindextype iC_data = 0;
+    for (sunindextype iC_col = 0; iC_col < C->columns(); ++iC_col) {
+        for(sunindextype iC_row = C->indexptrs_ptr[iC_col];
+            iC_row < C->indexptrs_ptr[iC_col + 1]; ++iC_row) {
+            // Current entry in C: (C.indexvals[iC_row], iC_col)
+            for(sunindextype iB_row = B->indexptrs_ptr[iC_col];
+                iB_row < B->indexptrs_ptr[iC_col + 1]; ++iB_row) {
+                // Loop over column iC_col in B
+                sunindextype iA_col = B->indexvals_ptr[iB_row];
+                for (sunindextype iA_row = indexptrs_ptr[iA_col];
+                     iA_row < indexptrs_ptr[iA_col + 1]; ++iA_row)
+                    // loop over entries in column col_in_A
+                    // if two entries match together, carry out multiplication
+                    if (indexvals_ptr[iA_row] == C->indexvals_ptr[iC_row])
+                        C->data_ptr[iC_data] +=
+                            data_ptr[iA_row] * B->data_ptr[iB_row];
+            }
+            iC_data++;
+        }
+    }
+}
+
 void SUNMatrixWrapper::zero()
 {
     if(int res = SUNMatZero(matrix))
@@ -247,7 +390,7 @@ void SUNMatrixWrapper::update_ptrs() {
             data_ptr = SM_DATA_B(matrix);
         break;
     case SUNMATRIX_CUSTOM:
-        throw std::domain_error("Amici currently does not support"
+        throw std::domain_error("Amici currently does not support "
                                 "custom matrix types.");
     }
 }
diff --git a/tests/performance/reference.yml b/tests/performance/reference.yml
index b392b8bb6f..f6ab156b93 100644
--- a/tests/performance/reference.yml
+++ b/tests/performance/reference.yml
@@ -1,8 +1,10 @@
 # Reference wall times (seconds) with some buffer
 create_sdist: 25
-install_sdist: 120
-petab_import: 1680 # ^= 28'
+install_sdist: 140
+petab_import: 1920 # ^= 32'
 install_model: 400
 forward_simulation: 2
 forward_sensitivities: 6
-adjoint_sensitivities: 120 # TODO reduce when #849 fixed
+adjoint_sensitivities: 4
+forward_simulation_non_optimal_parameters: 2
+adjoint_sensitivities_non_optimal_parameters: 5
diff --git a/tests/performance/test.py b/tests/performance/test.py
index 8ca43c4733..535a18f727 100755
--- a/tests/performance/test.py
+++ b/tests/performance/test.py
@@ -25,6 +25,16 @@ def main():
     elif arg == 'adjoint_sensitivities':
         solver.setSensitivityMethod(amici.SensitivityMethod_adjoint)
         solver.setSensitivityOrder(amici.SensitivityOrder_first)
+    elif arg == 'forward_simulation_non_optimal_parameters':
+        tmpPar = model.getParameters()
+        model.setParameters([0.1 for iPar in tmpPar])
+        solver.setSensitivityMethod(amici.SensitivityMethod_none)
+        solver.setSensitivityOrder(amici.SensitivityOrder_none)
+    elif arg == 'adjoint_sensitivities_non_optimal_parameters':
+        tmpPar = model.getParameters()
+        model.setParameters([0.1 for iPar in tmpPar])
+        solver.setSensitivityMethod(amici.SensitivityMethod_adjoint)
+        solver.setSensitivityOrder(amici.SensitivityOrder_first)
     else:
         print("Unknown argument:", arg)
         sys.exit(1)
diff --git a/tests/testMisc.py b/tests/testMisc.py
index 76c7eecc53..7aa21e7560 100755
--- a/tests/testMisc.py
+++ b/tests/testMisc.py
@@ -56,7 +56,7 @@ def test_csc_matrix_empty(self):
         symbolColPtrs, symbolRowVals, sparseList, symbolList, sparseMatrix = \
             amici.ode_export.csc_matrix(matrix, 'a')
         print(symbolColPtrs, symbolRowVals, sparseList, symbolList, sparseMatrix)
-        assert symbolColPtrs == [0]
+        assert symbolColPtrs == []
         assert symbolRowVals == []
         assert sparseList == sp.Matrix(0, 0, [])
         assert symbolList == []
diff --git a/tests/testPYSB.py b/tests/testPYSB.py
index 1092fb2012..a60670b246 100755
--- a/tests/testPYSB.py
+++ b/tests/testPYSB.py
@@ -93,10 +93,12 @@ def test_compare_to_sbml_import(self):
         rdata_sbml = get_results(model_sbml, edata)
 
         # check if preequilibration fixed parameters are correctly applied:
-        for rdata, model in zip([rdata_sbml, rdata_pysb],
-                                [model_sbml, model_pysb]):
+        for rdata, model, importer in zip([rdata_sbml, rdata_pysb],
+                                          [model_sbml, model_pysb],
+                                          ['sbml', 'pysb']):
             # check equilibrium fixed parameters
-            with self.subTest(fixed_pars='preequilibration'):
+            with self.subTest(fixed_pars='preequilibration',
+                              importer=importer):
                 self.assertTrue(np.isclose(
                     [
                         sum(rdata["x_ss"][[1, 3]]),
@@ -111,7 +113,8 @@ def test_compare_to_sbml_import(self):
                     model.getParameterByName('PROT_0'),
                     atol=1e-6, rtol=1e-6
                 ))
-            with self.subTest(fixed_pars='simulation'):
+            with self.subTest(fixed_pars='presimulation',
+                              importer=importer):
                 # check reinitialization with fixed parameter after
                 # presimulation
                 self.assertTrue(np.isclose(