/* $Id$ */ # ifndef CPPAD_OPT_VAL_HES_INCLUDED # define CPPAD_OPT_VAL_HES_INCLUDED /* -------------------------------------------------------------------------- CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-14 Bradley M. Bell CppAD is distributed under multiple licenses. This distribution is under the terms of the GNU General Public License Version 3. A copy of this license is included in the COPYING file of this distribution. Please visit http://www.coin-or.org/CppAD/ for information on other licenses. -------------------------------------------------------------------------- */ /* $begin opt_val_hes$$ $spell hes sy Jacobian hes signdet jac Bradley const CppAD $$ $index Jacobian$$ $index Hessian$$ $index optimal value$$ $index value$$ $index opt_val_hes$$ $section Jacobian and Hessian of Optimal Values$$ $head Syntax$$ $icode%signdet% = opt_val_hes(%x%, %y%, %fun%, %jac%, %hes%)%$$ $head See Also$$ $cref BenderQuad$$ $head Reference$$ Algorithmic differentiation of implicit functions and optimal values, Bradley M. Bell and James V. Burke, Advances in Automatic Differentiation, 2008, Springer. $head Purpose$$ We are given a function $latex S : \B{R}^n \times \B{R}^m \rightarrow \B{R}^\ell$$ and we define $latex F : \B{R}^n \times \B{R}^m \rightarrow \B{R}$$ and $latex V : \B{R}^n \rightarrow \B{R} $$ by $latex \[ \begin{array}{rcl} F(x, y) & = & \sum_{k=0}^{\ell-1} S_k ( x , y) \\ V(x) & = & F [ x , Y(x) ] \\ 0 & = & \partial_y F [x , Y(x) ] \end{array} \] $$ We wish to compute the Jacobian and possibly also the Hessian, of $latex V (x)$$. $head BaseVector$$ The type $icode BaseVector$$ must be a $cref SimpleVector$$ class. We use $icode Base$$ to refer to the type of the elements of $icode BaseVector$$; i.e., $codei% %BaseVector%::value_type %$$ $head x$$ The argument $icode x$$ has prototype $codei% const %BaseVector%& %x% %$$ and its size must be equal to $icode n$$. It specifies the point at which we evaluating the Jacobian $latex V^{(1)} (x)$$ (and possibly the Hessian $latex V^{(2)} (x)$$). $head y$$ The argument $icode y$$ has prototype $codei% const %BaseVector%& %y% %$$ and its size must be equal to $icode m$$. It must be equal to $latex Y(x)$$; i.e., it must solve the implicit equation $latex \[ 0 = \partial_y F ( x , y) \] $$ $head Fun$$ The argument $icode fun$$ is an object of type $icode Fun$$ which must support the member functions listed below. CppAD will may be recording operations of the type $codei%AD<%Base%>%$$ when these member functions are called. These member functions must not stop such a recording; e.g., they must not call $cref/AD::abort_recording/abort_recording/$$. $subhead Fun::ad_vector$$ The type $icode%Fun%::ad_vector%$$ must be a $cref SimpleVector$$ class with elements of type $codei%AD<%Base%>%$$; i.e. $codei% %Fun%::ad_vector::value_type %$$ is equal to $codei%AD<%Base%>%$$. $subhead fun.ell$$ The type $icode Fun$$ must support the syntax $codei% %ell% = %fun%.ell() %$$ where $icode ell$$ has prototype $codei% size_t %ell% %$$ and is the value of $latex \ell$$; i.e., the number of terms in the summation. $pre $$ One can choose $icode ell$$ equal to one, and have $latex S(x,y)$$ the same as $latex F(x, y)$$. Each of the functions $latex S_k (x , y)$$, (in the summation defining $latex F(x, y)$$) is differentiated separately using AD. For very large problems, breaking $latex F(x, y)$$ into the sum of separate simpler functions may reduce the amount of memory necessary for algorithmic differentiation and there by speed up the process. $subhead fun.s$$ The type $icode Fun$$ must support the syntax $codei% %s_k% = %fun%.s(%k%, %x%, %y%) %$$ The $icode%fun%.s%$$ argument $icode k$$ has prototype $codei% size_t %k% %$$ and is between zero and $icode%ell% - 1%$$. The argument $icode x$$ to $icode%fun%.s%$$ has prototype $codei% const %Fun%::ad_vector& %x% %$$ and its size must be equal to $icode n$$. The argument $icode y$$ to $icode%fun%.s%$$ has prototype $codei% const %Fun%::ad_vector& %y% %$$ and its size must be equal to $icode m$$. The $icode%fun%.s%$$ result $icode s_k$$ has prototype $codei% AD<%Base%> %s_k% %$$ and its value must be given by $latex s_k = S_k ( x , y )$$. $subhead fun.sy$$ The type $icode Fun$$ must support the syntax $codei% %sy_k% = %fun%.sy(%k%, %x%, %y%) %$$ The argument $icode k$$ to $icode%fun%.sy%$$ has prototype $codei% size_t %k% %$$ The argument $icode x$$ to $icode%fun%.sy%$$ has prototype $codei% const %Fun%::ad_vector& %x% %$$ and its size must be equal to $icode n$$. The argument $icode y$$ to $icode%fun%.sy%$$ has prototype $codei% const %Fun%::ad_vector& %y% %$$ and its size must be equal to $icode m$$. The $icode%fun%.sy%$$ result $icode sy_k$$ has prototype $codei% %Fun%::ad_vector %sy_k% %$$ its size must be equal to $icode m$$, and its value must be given by $latex sy_k = \partial_y S_k ( x , y )$$. $head jac$$ The argument $icode jac$$ has prototype $codei% %BaseVector%& %jac% %$$ and has size $icode n$$ or zero. The input values of its elements do not matter. If it has size zero, it is not affected. Otherwise, on output it contains the Jacobian of $latex V (x)$$; i.e., for $latex j = 0 , \ldots , n-1$$, $latex \[ jac[ j ] = V^{(1)} (x)_j \] $$ where $icode x$$ is the first argument to $code opt_val_hes$$. $head hes$$ The argument $icode hes$$ has prototype $codei% %BaseVector%& %hes% %$$ and has size $icode%n% * %n%$$ or zero. The input values of its elements do not matter. If it has size zero, it is not affected. Otherwise, on output it contains the Hessian of $latex V (x)$$; i.e., for $latex i = 0 , \ldots , n-1$$, and $latex j = 0 , \ldots , n-1$$, $latex \[ hes[ i * n + j ] = V^{(2)} (x)_{i,j} \] $$ $head signdet$$ If $icode%hes%$$ has size zero, $icode signdet$$ is not defined. Otherwise the return value $icode signdet$$ is the sign of the determinant for $latex \partial_{yy}^2 F(x , y) $$. If it is zero, then the matrix is singular and the Hessian is not computed ($icode hes$$ is not changed). $head Example$$ $children% example/opt_val_hes.cpp %$$ The file $cref opt_val_hes.cpp$$ contains an example and test of this operation. It returns true if it succeeds and false otherwise. $end ----------------------------------------------------------------------------- */ namespace CppAD { // BEGIN_CPPAD_NAMESPACE /*! \file opt_val_hes.hpp \brief Computing Jabobians and Hessians of Optimal Values */ /*! Computing Jabobians and Hessians of Optimal Values We are given a function \f$ S : {\rm R}^n \times {\rm R}^m \rightarrow {\rm R}^\ell \f$ and we define \f$ F : {\rm R}^n \times {\rm R}^m \rightarrow {\rm R} \f$ and \f$ V : {\rm R}^n \rightarrow {\rm R} \f$ by \f[ \begin{array}{rcl} F(x, y) & = & \sum_{k=0}^{\ell-1} S_k ( x , y) \\ V(x) & = & F [ x , Y(x) ] \\ 0 & = & \partial_y F [x , Y(x) ] \end{array} \f] We wish to compute the Jacobian and possibly also the Hessian, of \f$ V (x) \f$. \tparam BaseVector The type \c BaseVector must be a SimpleVector class. We use \c Base to refer to the type of the elements of \c BaseVector; i.e., BaseVector::value_type. \param x is a vector with size \c n. It specifies the point at which we evaluating the Jacobian \f$ V^{(1)} (x) \f$ (and possibly the Hessian \f$ V^{(2)} (x) \f$). \param y is a vector with size \c m. It must be equal to \f$ Y(x) \f$; i.e., it must solve the implicit equation \f[ 0 = \partial_y F ( x , y) \f] \param fun The argument \c fun is an object of type \c Fun wich must support the member functions listed below. CppAD will may be recording operations of the type \c AD when these member functions are called. These member functions must not stop such a recording; e.g., they must not call \c AD::abort_recording. \par Fun::ad_vector The type Fun::ad_vector must be a SimpleVector class with elements of type \c AD; i.e. Fun::ad_vector::value_type is equal to \c AD. \par fun.ell the type \c Fun must support the syntax \verbatim ell = fun.ell() \endverbatim where \c ell is a \c size_t value that is set to \f$ \ell \f$; i.e., the number of terms in the summation. \par fun.s The type \c Fun must support the syntax \verbatim s_k = fun.s(k, x, y) \endverbatim The argument \c k has prototype size_t k. The argument \c x has prototype const Fun::ad_vector& x and its size must be equal to \c n. The argument \c y has prototype const Fun::ad_vector& y and its size must be equal to \c m. The return value \c s_k has prototype \c AD s_k and its value must be given by \f$ s_k = S_k ( x , y ) \f$. \par fun.sy The type \c Fun must support the syntax \verbatim sy_k = fun.sy(k, x, y) \endverbatim The argument \c k has prototype size_t k. The argument \c x has prototype const Fun::ad_vector& x and its size must be equal to \c n. The argument \c y has prototype const Fun::ad_vector& y and its size must be equal to \c m. The return value \c sy_k has prototype Fun::ad_vector& sy_k, its size is \c m and its value must be given by \f$ sy_k = \partial_y S_k ( x , y ) \f$. \param jac is a vector with size \c n or zero. The input values of its elements do not matter. If it has size zero, it is not affected. Otherwise, on output it contains the Jacobian of \f$ V (x) \f$; i.e., for \f$ j = 0 , \ldots , n-1 \f$, \f[ jac[ j ] = V^{(1)} (x)_j \f] $$ where \c x is the first argument to \c opt_val_hes. \param hes is a vector with size n * n or zero. The input values of its elements do not matter. If it has size zero, it is not affected. Otherwise, on output it contains the Hessian of \f$ V (x) \f$; i.e., for \f$ i = 0 , \ldots , n-1 \f$, and \f$ j = 0 , \ldots , n-1 \f$, \f[ hes[ i * n + j ] = V^{(2)} (x)_{i,j} \f] \return If hes.size() == 0, the return value is not defined. Otherwise, the return value is the sign of the determinant for \f$ \partial_{yy}^2 F(x , y) \f$$. If it is zero, then the matrix is singular and \c hes is not set to its specified value. */ template int opt_val_hes( const BaseVector& x , const BaseVector& y , Fun fun , BaseVector& jac , BaseVector& hes ) { // determine the base type typedef typename BaseVector::value_type Base; // check that BaseVector is a SimpleVector class with Base elements CheckSimpleVector(); // determine the AD vector type typedef typename Fun::ad_vector ad_vector; // check that ad_vector is a SimpleVector class with AD elements CheckSimpleVector< AD , ad_vector >(); // size of the x and y spaces size_t n = size_t(x.size()); size_t m = size_t(y.size()); // number of terms in the summation size_t ell = fun.ell(); // check size of return values CPPAD_ASSERT_KNOWN( size_t(jac.size()) == n || jac.size() == 0, "opt_val_hes: size of the vector jac is not equal to n or zero" ); CPPAD_ASSERT_KNOWN( size_t(hes.size()) == n * n || hes.size() == 0, "opt_val_hes: size of the vector hes is not equal to n * n or zero" ); // some temporary indices size_t i, j, k; // AD version of S_k(x, y) ad_vector s_k(1); // ADFun version of S_k(x, y) ADFun S_k; // AD version of x ad_vector a_x(n); // AD version of y ad_vector a_y(n); if( jac.size() > 0 ) { // this is the easy part, computing the V^{(1)} (x) which is equal // to \partial_x F (x, y) (see Thoerem 2 of the reference). // copy x and y to AD version for(j = 0; j < n; j++) a_x[j] = x[j]; for(j = 0; j < m; j++) a_y[j] = y[j]; // initialize summation for(j = 0; j < n; j++) jac[j] = Base(0.); // add in \partial_x S_k (x, y) for(k = 0; k < ell; k++) { // start recording Independent(a_x); // record s_k[0] = fun.s(k, a_x, a_y); // stop recording and store in S_k S_k.Dependent(a_x, s_k); // compute partial of S_k with respect to x BaseVector jac_k = S_k.Jacobian(x); // add \partial_x S_k (x, y) to jac for(j = 0; j < n; j++) jac[j] += jac_k[j]; } } // check if we are done if( hes.size() == 0 ) return 0; /* In this case, we need to compute the Hessian. Using Theorem 1 of the reference: Y^{(1)}(x) = - F_yy (x, y)^{-1} F_yx (x, y) Using Theorem 2 of the reference: V^{(2)}(x) = F_xx (x, y) + F_xy (x, y) Y^{(1)}(x) */ // Base and AD version of xy BaseVector xy(n + m); ad_vector a_xy(n + m); for(j = 0; j < n; j++) a_xy[j] = xy[j] = x[j]; for(j = 0; j < m; j++) a_xy[n+j] = xy[n+j] = y[j]; // Initialization summation for Hessian of F size_t nm_sq = (n + m) * (n + m); BaseVector F_hes(nm_sq); for(j = 0; j < nm_sq; j++) F_hes[j] = Base(0.); BaseVector hes_k(nm_sq); // add in Hessian of S_k to hes for(k = 0; k < ell; k++) { // start recording Independent(a_xy); // split out x for(j = 0; j < n; j++) a_x[j] = a_xy[j]; // split out y for(j = 0; j < m; j++) a_y[j] = a_xy[n+j]; // record s_k[0] = fun.s(k, a_x, a_y); // stop recording and store in S_k S_k.Dependent(a_xy, s_k); // when computing the Hessian it pays to optimize the tape S_k.optimize(); // compute Hessian of S_k hes_k = S_k.Hessian(xy, 0); // add \partial_x S_k (x, y) to jac for(j = 0; j < nm_sq; j++) F_hes[j] += hes_k[j]; } // Extract F_yx BaseVector F_yx(m * n); for(i = 0; i < m; i++) { for(j = 0; j < n; j++) F_yx[i * n + j] = F_hes[ (i+n)*(n+m) + j ]; } // Extract F_yy BaseVector F_yy(n * m); for(i = 0; i < m; i++) { for(j = 0; j < m; j++) F_yy[i * m + j] = F_hes[ (i+n)*(n+m) + j + n ]; } // compute - Y^{(1)}(x) = F_yy (x, y)^{-1} F_yx (x, y) BaseVector neg_Y_x(m * n); Base logdet; int signdet = CppAD::LuSolve(m, n, F_yy, F_yx, neg_Y_x, logdet); if( signdet == 0 ) return signdet; // compute hes = F_xx (x, y) + F_xy (x, y) Y^{(1)}(x) for(i = 0; i < n; i++) { for(j = 0; j < n; j++) { hes[i * n + j] = F_hes[ i*(n+m) + j ]; for(k = 0; k < m; k++) hes[i*n+j] -= F_hes[i*(n+m) + k+n] * neg_Y_x[k*n+j]; } } return signdet; } } // END_CPPAD_NAMESPACE # endif