// SPDX-License-Identifier: Apache-2.0
// 
// Copyright 2008-2016 Conrad Sanderson (https://conradsanderson.id.au)
// Copyright 2008-2016 National ICT Australia (NICTA)
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ------------------------------------------------------------------------


//! \addtogroup auxlib
//! @{



template<typename eT>
inline
bool
auxlib::inv(Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  // NOTE: given a matrix with NaN values, lapack::getrf() and lapack::getri() do not necessarily fail,
  // NOTE: and can produce matrices with NaN values.
  // NOTE: we're not checking for non-finite values to avoid breaking existing user code.
  
  if(A.is_empty())  { return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(A);
    
    blas_int n     = blas_int(A.n_rows);
    blas_int lda   = blas_int(A.n_rows);
    blas_int lwork = (std::max)(blas_int(podarray_prealloc_n_elem::val), n);
    blas_int info  = 0;
    
    podarray<blas_int> ipiv(A.n_rows);
    
    arma_debug_print("lapack::getrf()");
    lapack::getrf(&n, &n, A.memptr(), &lda, ipiv.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    if(n > blas_int(podarray_prealloc_n_elem::val))
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::getri()");
      lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      
      lwork = (std::max)(lwork_proposed, lwork);
      }
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::getri()");
    lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(A);
    arma_stop_logic_error("inv(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::inv(Mat<eT>& out, const Mat<eT>& X)
  {
  arma_debug_sigprint();
  
  out = X;
  
  return auxlib::inv(out);
  }



template<typename eT>
inline
bool
auxlib::inv_rcond(Mat<eT>& A, typename get_pod_type<eT>::result& out_rcond)
  {
  arma_debug_sigprint();
  
  typedef typename get_pod_type<eT>::result T;
  
  out_rcond = T(0);
  
  if(A.is_empty())  { return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(A);
    
    char     norm_id  = '1';
    blas_int n        = blas_int(A.n_rows);
    blas_int lda      = blas_int(A.n_rows);
    blas_int lwork    = (std::max)(blas_int(podarray_prealloc_n_elem::val), n);
    blas_int info     = 0;
    T        norm_val = T(0);
    
    podarray<T>        junk(1);
    podarray<blas_int> ipiv(A.n_rows);
    
    arma_debug_print("lapack::lange()");
    norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_gen(A) : lapack::lange<eT>(&norm_id, &n, &n, A.memptr(), &lda, junk.memptr());
    
    arma_debug_print("lapack::getrf()");
    lapack::getrf(&n, &n, A.memptr(), &lda, ipiv.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    out_rcond = auxlib::lu_rcond<T>(A, norm_val);
    
    if(n > blas_int(podarray_prealloc_n_elem::val))
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::getri()");
      lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      
      lwork = (std::max)(lwork_proposed, lwork);
      }
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::getri()");
    lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(A);
    arma_stop_logic_error("inv_rcond(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::inv_tr(Mat<eT>& A, const uword layout)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    if(A.is_empty())  { return true; }
    
    arma_conform_assert_blas_size(A);
    
    char     uplo = (layout == 0) ? 'U' : 'L';
    char     diag = 'N';
    blas_int n    = blas_int(A.n_rows);
    blas_int info = 0;
    
    arma_debug_print("lapack::trtri()");
    lapack::trtri(&uplo, &diag, &n, A.memptr(), &n, &info);
    
    if(info != 0)  { return false; }
    
    return true;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(layout);
    arma_stop_logic_error("inv(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::inv_tr_rcond(Mat<eT>& A, typename get_pod_type<eT>::result& out_rcond, const uword layout)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename get_pod_type<eT>::result T;
    
    if(A.is_empty())  { return true; }
    
    out_rcond = auxlib::rcond_trimat(A, layout);
    
    arma_conform_assert_blas_size(A);
    
    char     uplo = (layout == 0) ? 'U' : 'L';
    char     diag = 'N';
    blas_int n    = blas_int(A.n_rows);
    blas_int info = 0;
    
    arma_debug_print("lapack::trtri()");
    lapack::trtri(&uplo, &diag, &n, A.memptr(), &n, &info);
    
    if(info != 0)  { out_rcond = T(0); return false; }
    
    return true;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(out_rcond);
    arma_ignore(layout);
    arma_stop_logic_error("inv(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::inv_sym(Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  if(A.is_empty())  { return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(A);
    
    char     uplo  = 'L';
    blas_int n     = blas_int(A.n_rows);
    blas_int lda   = blas_int(A.n_rows);
    blas_int lwork = (std::max)(blas_int(podarray_prealloc_n_elem::val), n);
    blas_int info  = 0;
    
    podarray<blas_int> ipiv(A.n_rows);
    
    if(n > blas_int(podarray_prealloc_n_elem::val))
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::sytrf()");
      lapack::sytrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      
      lwork = (std::max)(lwork_proposed, lwork);
      }
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::sytrf()");
    lapack::sytrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("lapack::sytri()");
    lapack::sytri(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    A = symmatl(A);
    
    return true;
    }
  #else
    {
    arma_ignore(A);
    arma_stop_logic_error("inv_sym(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T>
inline
bool
auxlib::inv_sym(Mat< std::complex<T> >& A)
  {
  arma_debug_sigprint();
  
  // NOTE: the function name is required for overloading, but is a misnomer: it processes complex hermitian matrices
  
  if(A.is_empty())  { return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    arma_conform_assert_blas_size(A);
    
    char     uplo  = 'L';
    blas_int n     = blas_int(A.n_rows);
    blas_int lda   = blas_int(A.n_rows);
    blas_int lwork = (std::max)(blas_int(podarray_prealloc_n_elem::val), n);
    blas_int info  = 0;
    
    podarray<blas_int> ipiv(A.n_rows);
    
    if(n > blas_int(podarray_prealloc_n_elem::val))
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::hetrf()");
      lapack::hetrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      
      lwork = (std::max)(lwork_proposed, lwork);
      }
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::hetrf()");
    lapack::hetrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("lapack::hetri()");
    lapack::hetri(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    A = symmatl(A);
    
    return true;
    }
  #else
    {
    arma_ignore(A);
    arma_stop_logic_error("inv_sym(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::inv_sym_rcond(Mat<eT>& A, eT& out_rcond)
  {
  arma_debug_sigprint();
  
  out_rcond = eT(0);
  
  if(A.is_empty())  { return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(A);
    
    char     norm_id   = '1';
    char     uplo      = 'L';
    blas_int n         = blas_int(A.n_rows);
    blas_int lda       = blas_int(A.n_rows);
    blas_int lwork     = (std::max)(blas_int(podarray_prealloc_n_elem::val), 2*n);  // 2*n due to lapack::sycon() requirements
    blas_int info      = 0;
    eT       norm_val  = eT(0);
    eT       tmp_rcond = eT(0);
    
    podarray<blas_int>  ipiv(A.n_rows);
    podarray<blas_int> iwork(A.n_rows);
    
    if( (2*n) > blas_int(podarray_prealloc_n_elem::val) )
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::sytrf()");
      lapack::sytrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      
      lwork = (std::max)(lwork_proposed, lwork);
      }
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::lansy()");
    norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &lda, work.memptr());
    
    arma_debug_print("lapack::sytrf()");
    lapack::sytrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("lapack::sycon()");
    lapack::sycon(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &norm_val, &tmp_rcond, work.memptr(), iwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    out_rcond = tmp_rcond;
    
    if(arma_isnan(out_rcond))  { return false; }
    
    arma_debug_print("lapack::sytri()");
    lapack::sytri(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    A = symmatl(A);
    
    return true;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(out_rcond);
    arma_stop_logic_error("inv_sym_rcond(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T>
inline
bool
auxlib::inv_sym_rcond(Mat< std::complex<T> >& A, T& out_rcond)
  {
  arma_debug_sigprint();
  
  // NOTE: the function name is required for overloading, but is a misnomer: it processes complex hermitian matrices
  
  out_rcond = T(0);
  
  if(A.is_empty())  { return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    arma_conform_assert_blas_size(A);
    
    char     norm_id   = '1';
    char     uplo      = 'L';
    blas_int n         = blas_int(A.n_rows);
    blas_int lda       = blas_int(A.n_rows);
    blas_int lwork     = (std::max)(blas_int(podarray_prealloc_n_elem::val), 2*n);  // 2*n due to lapack::hecon() requirements
    blas_int info      = 0;
    T        norm_val  = T(0);
    T        tmp_rcond = T(0);
    
    podarray<blas_int>       ipiv(A.n_rows);
    podarray<T>        lanhe_work(A.n_rows);
    
    if( (2*n) > blas_int(podarray_prealloc_n_elem::val) )
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::hetrf()");
      lapack::hetrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      
      lwork = (std::max)(lwork_proposed, lwork);
      }
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::lanhe()");
    norm_val = (has_blas_float_bug<T>::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &lda, lanhe_work.memptr());
    
    arma_debug_print("lapack::hetrf()");
    lapack::hetrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("lapack::hecon()");
    lapack::hecon(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &norm_val, &tmp_rcond, work.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    out_rcond = tmp_rcond;
    
    if(arma_isnan(out_rcond))  { return false; }
    
    arma_debug_print("lapack::hetri()");
    lapack::hetri(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    A = symmatl(A);
    
    return true;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(out_rcond);
    arma_stop_logic_error("inv_sym_rcond(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::inv_sympd(Mat<eT>& A, bool& out_sympd_state)
  {
  arma_debug_sigprint();
  
  out_sympd_state = false;
  
  if(A.is_empty())  { return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(A);
    
    char     uplo = 'L';
    blas_int n    = blas_int(A.n_rows);
    blas_int info = 0;
    
    // NOTE: for complex matrices, zpotrf() assumes the matrix is hermitian (not simply symmetric)
    
    arma_debug_print("lapack::potrf()");
    lapack::potrf(&uplo, &n, A.memptr(), &n, &info);
    
    if(info != 0)  { return false; }
    
    out_sympd_state = true;
    
    arma_debug_print("lapack::potri()");
    lapack::potri(&uplo, &n, A.memptr(), &n, &info);
    
    if(info != 0)  { return false; }
    
    A = symmatl(A);
    
    return true;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(out_sympd_state);
    arma_stop_logic_error("inv_sympd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::inv_sympd(Mat<eT>& out, const Mat<eT>& X)
  {
  arma_debug_sigprint();
  
  out = X;
  
  bool sympd_state_junk = false;
  
  return auxlib::inv_sympd(out, sympd_state_junk);
  }



template<typename eT>
inline
bool
auxlib::inv_sympd_rcond(Mat<eT>& A, eT& out_rcond)
  {
  arma_debug_sigprint();
  
  if(A.is_empty())  { return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename get_pod_type<eT>::result T;
    
    arma_conform_assert_blas_size(A);
    
    char     norm_id  = '1';
    char     uplo     = 'L';
    blas_int n        = blas_int(A.n_rows);
    blas_int info     = 0;
    T        norm_val = T(0);
    
    podarray<T> work(A.n_rows);
    
    arma_debug_print("lapack::lansy()");
    norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
    
    arma_debug_print("lapack::potrf()");
    lapack::potrf(&uplo, &n, A.memptr(), &n, &info);
    
    if(info != 0)  { out_rcond = eT(0); return false; }
    
    out_rcond = auxlib::lu_rcond_sympd<T>(A, norm_val);
    
    if(arma_isnan(out_rcond))  { return false; }
    
    arma_debug_print("lapack::potri()");
    lapack::potri(&uplo, &n, A.memptr(), &n, &info);
    
    if(info != 0)  { return false; }
    
    A = symmatl(A);
    
    return true;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(out_rcond);
    arma_stop_logic_error("inv_sympd_rcond(): use LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T>
inline
bool
auxlib::inv_sympd_rcond(Mat< std::complex<T> >& A, T& out_rcond)
  {
  arma_debug_sigprint();
  
  if(A.is_empty())  { return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(A);
    
    char     norm_id  = '1';
    char     uplo     = 'L';
    blas_int n        = blas_int(A.n_rows);
    blas_int info     = 0;
    T        norm_val = T(0);
    
    podarray<T> work(A.n_rows);
    
    arma_debug_print("lapack::lanhe()");
    norm_val = (has_blas_float_bug<T>::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
    
    arma_debug_print("lapack::potrf()");
    lapack::potrf(&uplo, &n, A.memptr(), &n, &info);
    
    if(info != 0)  { out_rcond = T(0); return false; }
    
    out_rcond = auxlib::lu_rcond_sympd<T>(A, norm_val);
    
    if(arma_isnan(out_rcond))  { return false; }
    
    arma_debug_print("lapack::potri()");
    lapack::potri(&uplo, &n, A.memptr(), &n, &info);
    
    if(info != 0)  { return false; }
    
    A = symmatl(A);
    
    return true;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(out_rcond);
    arma_stop_logic_error("inv_sympd_rcond(): use LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! determinant of a matrix
template<typename eT>
inline
bool
auxlib::det(eT& out_val, Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  if(A.is_empty())  { out_val = eT(1); return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(A);
    
    podarray<blas_int> ipiv(A.n_rows);
    
    blas_int info   = 0;
    blas_int n_rows = blas_int(A.n_rows);
    blas_int n_cols = blas_int(A.n_cols);
    
    arma_debug_print("lapack::getrf()");
    lapack::getrf(&n_rows, &n_cols, A.memptr(), &n_rows, ipiv.memptr(), &info);
    
    if(info < 0)  { return false; }
    
    // on output A appears to be L+U_alt, where U_alt is U with the main diagonal set to zero
    eT val = A.at(0,0);
    for(uword i=1; i < A.n_rows; ++i)  { val *= A.at(i,i); }
    
    blas_int sign = +1;
    for(uword i=0; i < A.n_rows; ++i)
      {
      // NOTE: adjustment of -1 is required as Fortran counts from 1
      if( blas_int(i) != (ipiv.mem[i] - 1) )  { sign *= -1; }
      }
    
    out_val = (sign < 0) ? eT(-val) : eT(val);
    
    return true;
    }
  #else
    {
    arma_ignore(out_val);
    arma_ignore(A);
    arma_stop_logic_error("det(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! log determinant of a matrix
template<typename eT>
inline
bool
auxlib::log_det(eT& out_val, typename get_pod_type<eT>::result& out_sign, Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  typedef typename get_pod_type<eT>::result T;
  
  if(A.is_empty())  { out_val  = eT(0); out_sign =  T(1); return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(A);
    
    podarray<blas_int> ipiv(A.n_rows);
    
    blas_int info   = 0;
    blas_int n_rows = blas_int(A.n_rows);
    blas_int n_cols = blas_int(A.n_cols);
    
    arma_debug_print("lapack::getrf()");
    lapack::getrf(&n_rows, &n_cols, A.memptr(), &n_rows, ipiv.memptr(), &info);
    
    if(info < 0)  { return false; }
    
    // on output A appears to be L+U_alt, where U_alt is U with the main diagonal set to zero
    
    sword sign = (is_cx<eT>::no) ? ( (access::tmp_real( A.at(0,0) ) < T(0)) ? -1 : +1 ) : +1;
    eT    val  = (is_cx<eT>::no) ? std::log( (access::tmp_real( A.at(0,0) ) < T(0)) ? A.at(0,0)*T(-1) : A.at(0,0) ) : std::log( A.at(0,0) );
    
    for(uword i=1; i < A.n_rows; ++i)
      {
      const eT x = A.at(i,i);
      
      sign *= (is_cx<eT>::no) ? ( (access::tmp_real(x) < T(0)) ? -1 : +1 ) : +1;
      val  += (is_cx<eT>::no) ? std::log( (access::tmp_real(x) < T(0)) ? x*T(-1) : x ) : std::log(x);
      }
    
    for(uword i=0; i < A.n_rows; ++i)
      {
      if( blas_int(i) != (ipiv.mem[i] - 1) )  // NOTE: adjustment of -1 is required as Fortran counts from 1
        {
        sign *= -1;
        }
      }
    
    out_val  = val;
    out_sign = T(sign);
    
    return true;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(out_val);
    arma_ignore(out_sign);
    arma_stop_logic_error("log_det(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::log_det_sympd(typename get_pod_type<eT>::result& out_val, Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  typedef typename get_pod_type<eT>::result T;
  
  if(A.is_empty())  { out_val = T(0); return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(A);
    
    char     uplo = 'L';
    blas_int n    = blas_int(A.n_rows);
    blas_int info = 0;
    
    arma_debug_print("lapack::potrf()");
    lapack::potrf(&uplo, &n, A.memptr(), &n, &info);
    
    if(info != 0)  { return false; }
    
    T val = T(0);
    
    for(uword i=0; i < A.n_rows; ++i)  { val += std::log( access::tmp_real(A.at(i,i)) ); }
    
    out_val = T(2) * val;
    
    return true;
    }
  #else
    {
    arma_ignore(out_val);
    arma_ignore(A);
    arma_stop_logic_error("log_det_sympd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! LU decomposition of a matrix
template<typename eT, typename T1>
inline
bool
auxlib::lu(Mat<eT>& L, Mat<eT>& U, podarray<blas_int>& ipiv, const Base<eT,T1>& X)
  {
  arma_debug_sigprint();
  
  U = X.get_ref();
  
  const uword U_n_rows = U.n_rows;
  const uword U_n_cols = U.n_cols;
  
  if(U.is_empty())  { L.set_size(U_n_rows, 0); U.set_size(0, U_n_cols); ipiv.reset(); return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(U);
    
    ipiv.set_size( (std::min)(U_n_rows, U_n_cols) );
    
    blas_int info = 0;
    
    blas_int n_rows = blas_int(U_n_rows);
    blas_int n_cols = blas_int(U_n_cols);
    
    arma_debug_print("lapack::getrf()");
    lapack::getrf(&n_rows, &n_cols, U.memptr(), &n_rows, ipiv.memptr(), &info);
    
    if(info < 0)  { return false; }
    
    // take into account that Fortran counts from 1
    arrayops::inplace_minus(ipiv.memptr(), blas_int(1), ipiv.n_elem);
    
    L.copy_size(U);
    
    for(uword col=0; col < U_n_cols; ++col)
      {
      for(uword row=0; (row < col) && (row < U_n_rows); ++row)
        {
        L.at(row,col) = eT(0);
        }
      
      if( L.in_range(col,col) )
        {
        L.at(col,col) = eT(1);
        }
      
      for(uword row = (col+1); row < U_n_rows; ++row)
        {
        L.at(row,col) = U.at(row,col);
        U.at(row,col) = eT(0);
        }
      }
    
    return true;
    }
  #else
    {
    arma_stop_logic_error("lu(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT, typename T1>
inline
bool
auxlib::lu(Mat<eT>& L, Mat<eT>& U, Mat<eT>& P, const Base<eT,T1>& X)
  {
  arma_debug_sigprint();
  
  podarray<blas_int> ipiv1;
  const bool status = auxlib::lu(L, U, ipiv1, X);
  
  if(status == false)  { return false; }
  
  if(U.is_empty())
    {
    // L and U have been already set to the correct empty matrices
    P.eye(L.n_rows, L.n_rows);
    return true;
    }
  
  const uword n      = ipiv1.n_elem;
  const uword P_rows = U.n_rows;
  
  podarray<blas_int> ipiv2(P_rows);
  
  const blas_int* ipiv1_mem = ipiv1.memptr();
        blas_int* ipiv2_mem = ipiv2.memptr();
  
  for(uword i=0; i<P_rows; ++i)
    {
    ipiv2_mem[i] = blas_int(i);
    }
  
  for(uword i=0; i<n; ++i)
    {
    const uword k = static_cast<uword>(ipiv1_mem[i]);
    
    if( ipiv2_mem[i] != ipiv2_mem[k] )
      {
      std::swap( ipiv2_mem[i], ipiv2_mem[k] );
      }
    }
  
  P.zeros(P_rows, P_rows);
  
  for(uword row=0; row<P_rows; ++row)
    {
    P.at(row, static_cast<uword>(ipiv2_mem[row])) = eT(1);
    }
  
  if(L.n_cols > U.n_rows)
    {
    L.shed_cols(U.n_rows, L.n_cols-1);
    }
    
  if(U.n_rows > L.n_cols)
    {
    U.shed_rows(L.n_cols, U.n_rows-1);
    }
  
  return true;
  }



template<typename eT, typename T1>
inline
bool
auxlib::lu(Mat<eT>& L, Mat<eT>& U, const Base<eT,T1>& X)
  {
  arma_debug_sigprint();
  
  podarray<blas_int> ipiv1;
  const bool status = auxlib::lu(L, U, ipiv1, X);
  
  if(status == false)  { return false; }
  
  if(U.is_empty())
    {
    // L and U have been already set to the correct empty matrices
    return true;
    }
  
  const uword n      = ipiv1.n_elem;
  const uword P_rows = U.n_rows;
  
  podarray<blas_int> ipiv2(P_rows);
  
  const blas_int* ipiv1_mem = ipiv1.memptr();
        blas_int* ipiv2_mem = ipiv2.memptr();
  
  for(uword i=0; i<P_rows; ++i)
    {
    ipiv2_mem[i] = blas_int(i);
    }
  
  for(uword i=0; i<n; ++i)
    {
    const uword k = static_cast<uword>(ipiv1_mem[i]);
    
    if( ipiv2_mem[i] != ipiv2_mem[k] )
      {
      std::swap( ipiv2_mem[i], ipiv2_mem[k] );
      L.swap_rows( static_cast<uword>(ipiv2_mem[i]), static_cast<uword>(ipiv2_mem[k]) );
      }
    }
  
  if(L.n_cols > U.n_rows)
    {
    L.shed_cols(U.n_rows, L.n_cols-1);
    }
    
  if(U.n_rows > L.n_cols)
    {
    U.shed_rows(L.n_cols, U.n_rows-1);
    }
  
  return true;
  }



//! eigen decomposition of general square matrix (real)
template<typename T1>
inline
bool
auxlib::eig_gen
  (
         Mat< std::complex<typename T1::pod_type> >& vals,
         Mat< std::complex<typename T1::pod_type> >& vecs,
  const bool                                         vecs_on,
  const Base<typename T1::pod_type,T1>&              expr
  )
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type T;
    
    Mat<T> X = expr.get_ref();
    
    arma_conform_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
    
    arma_conform_assert_blas_size(X);
    
    if(X.is_empty())  { vals.reset(); vecs.reset(); return true; }
    
    if(arma_config::check_nonfinite && X.internal_has_nonfinite())  { return false; }
    
    vals.set_size(X.n_rows, 1);
    
    Mat<T> tmp(1, 1, arma_nozeros_indicator());
    
    if(vecs_on)
      {
      vecs.set_size(X.n_rows, X.n_rows);
       tmp.set_size(X.n_rows, X.n_rows);
      }
    
    podarray<T> junk(1);
    
    char     jobvl = 'N';
    char     jobvr = (vecs_on) ? 'V' : 'N';
    blas_int N     = blas_int(X.n_rows);
    T*       vl    = junk.memptr();
    T*       vr    = (vecs_on) ? tmp.memptr() : junk.memptr();
    blas_int ldvl  = blas_int(1);
    blas_int ldvr  = (vecs_on) ? blas_int(tmp.n_rows) : blas_int(1);
    blas_int lwork = 64*N;  // lwork_min = (vecs_on) ? (std::max)(blas_int(1), 4*N) : (std::max)(blas_int(1), 3*N)
    blas_int info  = 0;
    
    podarray<T> work( static_cast<uword>(lwork) );
    
    podarray<T> vals_real(X.n_rows);
    podarray<T> vals_imag(X.n_rows);
    
    arma_debug_print("lapack::geev() -- START");
    lapack::geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, &info);
    arma_debug_print("lapack::geev() -- END");
    
    if(info != 0)  { return false; }
    
    arma_debug_print("reformatting eigenvalues and eigenvectors");
    
    std::complex<T>* vals_mem = vals.memptr();
    
    for(uword i=0; i < X.n_rows; ++i)  { vals_mem[i] = std::complex<T>(vals_real[i], vals_imag[i]); }
    
    if(vecs_on)
      {
      for(uword j=0; j < X.n_rows; ++j)
        {
        if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) )
          {
          for(uword i=0; i < X.n_rows; ++i)
            {
            vecs.at(i,j)   = std::complex<T>( tmp.at(i,j),  tmp.at(i,j+1) );
            vecs.at(i,j+1) = std::complex<T>( tmp.at(i,j), -tmp.at(i,j+1) );
            }
          
          ++j;
          }
        else
          {
          for(uword i=0; i<X.n_rows; ++i)
            {
            vecs.at(i,j) = std::complex<T>(tmp.at(i,j), T(0));
            }
          }
        }
      }
    
    return true;
    }
  #else
    {
    arma_ignore(vals);
    arma_ignore(vecs);
    arma_ignore(vecs_on);
    arma_ignore(expr);
    arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! eigen decomposition of general square matrix (complex)
template<typename T1>
inline
bool
auxlib::eig_gen
  (
         Mat< std::complex<typename T1::pod_type> >&     vals,
         Mat< std::complex<typename T1::pod_type> >&     vecs, 
  const bool                                             vecs_on,
  const Base< std::complex<typename T1::pod_type>, T1 >& expr
  )
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type     T;
    typedef typename std::complex<T> eT;
    
    Mat<eT> X = expr.get_ref();
    
    arma_conform_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
    
    arma_conform_assert_blas_size(X);
    
    if(X.is_empty())  { vals.reset(); vecs.reset(); return true; }
    
    if(arma_config::check_nonfinite && X.internal_has_nonfinite())  { return false; }
    
    vals.set_size(X.n_rows, 1);
    
    if(vecs_on)  { vecs.set_size(X.n_rows, X.n_rows); }
    
    podarray<eT> junk(1);
    
    char     jobvl = 'N';
    char     jobvr = (vecs_on) ? 'V' : 'N';
    blas_int N     = blas_int(X.n_rows);
    eT*      vl    = junk.memptr();
    eT*      vr    = (vecs_on) ? vecs.memptr() : junk.memptr();
    blas_int ldvl  = blas_int(1);
    blas_int ldvr  = (vecs_on) ? blas_int(vecs.n_rows) : blas_int(1);
    blas_int lwork = 64*N;  // lwork_min = (std::max)(blas_int(1), 2*N)
    blas_int info  = 0;
    
    podarray<eT>  work( static_cast<uword>(lwork) );
    podarray< T> rwork( static_cast<uword>(2*N)   );
    
    arma_debug_print("lapack::cx_geev() -- START");
    lapack::cx_geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, rwork.memptr(), &info);
    arma_debug_print("lapack::cx_geev() -- END");
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(vals);
    arma_ignore(vecs);
    arma_ignore(vecs_on);
    arma_ignore(expr);
    arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! eigen decomposition of general square matrix (real, balance given matrix)
template<typename T1>
inline
bool
auxlib::eig_gen_balance
  (
         Mat< std::complex<typename T1::pod_type> >& vals,
         Mat< std::complex<typename T1::pod_type> >& vecs,
  const bool                                         vecs_on,
  const Base<typename T1::pod_type,T1>&              expr
  )
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type T;
    
    Mat<T> X = expr.get_ref();
    
    arma_conform_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
    
    arma_conform_assert_blas_size(X);
    
    if(X.is_empty())  { vals.reset(); vecs.reset(); return true; }
    
    if(arma_config::check_nonfinite && X.internal_has_nonfinite())  { return false; }
    
    vals.set_size(X.n_rows, 1);
    
    Mat<T> tmp(1, 1, arma_nozeros_indicator());
    
    if(vecs_on)
      {
      vecs.set_size(X.n_rows, X.n_rows);
       tmp.set_size(X.n_rows, X.n_rows);
      }
    
    podarray<T> junk(1);
    
    char     bal   = 'B'; 
    char     jobvl = 'N';
    char     jobvr = (vecs_on) ? 'V' : 'N';
    char     sense = 'N';
    blas_int N     = blas_int(X.n_rows);
    T*       vl    = junk.memptr();
    T*       vr    = (vecs_on) ? tmp.memptr() : junk.memptr();
    blas_int ldvl  = blas_int(1);
    blas_int ldvr  = (vecs_on) ? blas_int(tmp.n_rows) : blas_int(1);
    blas_int ilo   = blas_int(0);
    blas_int ihi   = blas_int(0);
    T        abnrm = T(0);
    blas_int lwork = 64*N;  // lwork_min = (vecs_on) ? (std::max)(blas_int(1), 2*N) : (std::max)(blas_int(1), 3*N)
    blas_int info  = blas_int(0);
    
    podarray<T>  scale(X.n_rows);
    podarray<T> rconde(X.n_rows);
    podarray<T> rcondv(X.n_rows);
    
    podarray<T>         work( static_cast<uword>(lwork) );
    podarray<blas_int> iwork( uword(1) );  // iwork not used by lapack::geevx() as sense = 'N'
    
    podarray<T> vals_real(X.n_rows);
    podarray<T> vals_imag(X.n_rows);
    
    arma_debug_print("lapack::geevx() -- START");
    lapack::geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), vl, &ldvl, vr, &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, iwork.memptr(), &info);
    arma_debug_print("lapack::geevx() -- END");
    
    if(info != 0)  { return false; }
    
    arma_debug_print("reformatting eigenvalues and eigenvectors");
    
    std::complex<T>* vals_mem = vals.memptr();
    
    for(uword i=0; i < X.n_rows; ++i)  { vals_mem[i] = std::complex<T>(vals_real[i], vals_imag[i]); }
    
    if(vecs_on)
      {
      for(uword j=0; j < X.n_rows; ++j)
        {
        if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) )
          {
          for(uword i=0; i < X.n_rows; ++i)
            {
            vecs.at(i,j)   = std::complex<T>( tmp.at(i,j),  tmp.at(i,j+1) );
            vecs.at(i,j+1) = std::complex<T>( tmp.at(i,j), -tmp.at(i,j+1) );
            }
          
          ++j;
          }
        else
          {
          for(uword i=0; i<X.n_rows; ++i)
            {
            vecs.at(i,j) = std::complex<T>(tmp.at(i,j), T(0));
            }
          }
        }
      }
    
    return true;
    }
  #else
    {
    arma_ignore(vals);
    arma_ignore(vecs);
    arma_ignore(vecs_on);
    arma_ignore(expr);
    arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! eigen decomposition of general square matrix (complex, balance given matrix)
template<typename T1>
inline
bool
auxlib::eig_gen_balance
  (
         Mat< std::complex<typename T1::pod_type> >&     vals,
         Mat< std::complex<typename T1::pod_type> >&     vecs, 
  const bool                                             vecs_on,
  const Base< std::complex<typename T1::pod_type>, T1 >& expr
  )
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type     T;
    typedef typename std::complex<T> eT;
    
    Mat<eT> X = expr.get_ref();
    
    arma_conform_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
    
    arma_conform_assert_blas_size(X);
    
    if(X.is_empty())  { vals.reset(); vecs.reset(); return true; }
    
    if(arma_config::check_nonfinite && X.internal_has_nonfinite())  { return false; }
    
    vals.set_size(X.n_rows, 1);
    
    if(vecs_on)  { vecs.set_size(X.n_rows, X.n_rows); }
    
    podarray<eT> junk(1);
    
    char     bal   = 'B';
    char     jobvl = 'N';
    char     jobvr = (vecs_on) ? 'V' : 'N';
    char     sense = 'N';
    blas_int N     = blas_int(X.n_rows);
    eT*      vl    = junk.memptr();
    eT*      vr    = (vecs_on) ? vecs.memptr() : junk.memptr();
    blas_int ldvl  = blas_int(1);
    blas_int ldvr  = (vecs_on) ? blas_int(vecs.n_rows) : blas_int(1);
    blas_int ilo   = blas_int(0);
    blas_int ihi   = blas_int(0);
    T        abnrm = T(0);
    blas_int lwork = 64*N;  // lwork_min = (std::max)(blas_int(1), blas_int(2*N))
    blas_int info  = blas_int(0);
    
    podarray<T>  scale(X.n_rows);
    podarray<T> rconde(X.n_rows);
    podarray<T> rcondv(X.n_rows);
    
    podarray<eT>  work( static_cast<uword>(lwork) );
    podarray< T> rwork( static_cast<uword>(2*N)   );
    
    arma_debug_print("lapack::cx_geevx() -- START");
    lapack::cx_geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals.memptr(), vl, &ldvl, vr, &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, rwork.memptr(), &info);
    arma_debug_print("lapack::cx_geevx() -- END");
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(vals);
    arma_ignore(vecs);
    arma_ignore(vecs_on);
    arma_ignore(expr);
    arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! two-sided eigen decomposition of general square matrix (real)
template<typename T1>
inline
bool
auxlib::eig_gen_twosided
  (
         Mat< std::complex<typename T1::pod_type> >&  vals,
         Mat< std::complex<typename T1::pod_type> >& lvecs,
         Mat< std::complex<typename T1::pod_type> >& rvecs,
  const Base<typename T1::pod_type,T1>&              expr
  )
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type T;
    
    Mat<T> X = expr.get_ref();
    
    arma_conform_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
    
    arma_conform_assert_blas_size(X);
    
    if(X.is_empty())  { vals.reset(); lvecs.reset(); rvecs.reset(); return true; }
    
    if(arma_config::check_nonfinite && X.internal_has_nonfinite())  { return false; }
    
    vals.set_size(X.n_rows, 1);
    
    lvecs.set_size(X.n_rows, X.n_rows);
    rvecs.set_size(X.n_rows, X.n_rows);
    
    Mat<T> ltmp(X.n_rows, X.n_rows, arma_nozeros_indicator());
    Mat<T> rtmp(X.n_rows, X.n_rows, arma_nozeros_indicator());
    
    char     jobvl = 'V';
    char     jobvr = 'V';
    blas_int N     = blas_int(X.n_rows);
    blas_int ldvl  = blas_int(ltmp.n_rows);
    blas_int ldvr  = blas_int(rtmp.n_rows);
    blas_int lwork = 64*N;  // lwork_min = (std::max)(blas_int(1), 4*N)
    blas_int info  = 0;
    
    podarray<T> work( static_cast<uword>(lwork) );
    
    podarray<T> vals_real(X.n_rows);
    podarray<T> vals_imag(X.n_rows);
    
    arma_debug_print("lapack::geev() -- START");
    lapack::geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), ltmp.memptr(), &ldvl, rtmp.memptr(), &ldvr, work.memptr(), &lwork, &info);
    arma_debug_print("lapack::geev() -- END");
    
    if(info != 0)  { return false; }
    
    arma_debug_print("reformatting eigenvalues and eigenvectors");
    
    std::complex<T>* vals_mem = vals.memptr();
    
    for(uword i=0; i < X.n_rows; ++i)  { vals_mem[i] = std::complex<T>(vals_real[i], vals_imag[i]); }
    
    for(uword j=0; j < X.n_rows; ++j)
      {
      if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) )
        {
        for(uword i=0; i < X.n_rows; ++i)
          {
          lvecs.at(i,j)   = std::complex<T>( ltmp.at(i,j),  ltmp.at(i,j+1) );
          lvecs.at(i,j+1) = std::complex<T>( ltmp.at(i,j), -ltmp.at(i,j+1) );
          rvecs.at(i,j)   = std::complex<T>( rtmp.at(i,j),  rtmp.at(i,j+1) );
          rvecs.at(i,j+1) = std::complex<T>( rtmp.at(i,j), -rtmp.at(i,j+1) );
          }
        ++j;
        }
      else
        {
        for(uword i=0; i<X.n_rows; ++i)
          {
          lvecs.at(i,j) = std::complex<T>(ltmp.at(i,j), T(0));
          rvecs.at(i,j) = std::complex<T>(rtmp.at(i,j), T(0));
          }
        }
      }
    
    return true;
    }
  #else
    {
    arma_ignore(vals);
    arma_ignore(lvecs);
    arma_ignore(rvecs);
    arma_ignore(expr);
    arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! two-sided eigen decomposition of general square matrix (complex)
template<typename T1>
inline
bool
auxlib::eig_gen_twosided
  (
         Mat< std::complex<typename T1::pod_type> >&      vals,
         Mat< std::complex<typename T1::pod_type> >&     lvecs,
         Mat< std::complex<typename T1::pod_type> >&     rvecs,
  const Base< std::complex<typename T1::pod_type>, T1 >& expr
  )
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type     T;
    typedef typename std::complex<T> eT;
    
    Mat<eT> X = expr.get_ref();
    
    arma_conform_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
    
    arma_conform_assert_blas_size(X);
    
    if(X.is_empty())  { vals.reset(); lvecs.reset(); rvecs.reset(); return true; }
    
    if(arma_config::check_nonfinite && X.internal_has_nonfinite())  { return false; }
    
    vals.set_size(X.n_rows, 1);
    
    lvecs.set_size(X.n_rows, X.n_rows);
    rvecs.set_size(X.n_rows, X.n_rows);
    
    char     jobvl = 'V';
    char     jobvr = 'V';
    blas_int N     = blas_int(X.n_rows);
    blas_int ldvl  = blas_int(lvecs.n_rows);
    blas_int ldvr  = blas_int(rvecs.n_rows);
    blas_int lwork = 64*N;  // lwork_min = (std::max)(blas_int(1), 2*N)
    blas_int info  = 0;
    
    podarray<eT>  work( static_cast<uword>(lwork) );
    podarray< T> rwork( static_cast<uword>(2*N)   );
    
    arma_debug_print("lapack::cx_geev() -- START");
    lapack::cx_geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals.memptr(), lvecs.memptr(), &ldvl, rvecs.memptr(), &ldvr, work.memptr(), &lwork, rwork.memptr(), &info);
    arma_debug_print("lapack::cx_geev() -- END");
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(vals);
    arma_ignore(lvecs);
    arma_ignore(rvecs);
    arma_ignore(expr);
    arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! two-sided eigen decomposition of general square matrix (real, balance given matrix)
template<typename T1>
inline
bool
auxlib::eig_gen_twosided_balance
  (
         Mat< std::complex<typename T1::pod_type> >&  vals,
         Mat< std::complex<typename T1::pod_type> >& lvecs,
         Mat< std::complex<typename T1::pod_type> >& rvecs,
  const Base<typename T1::pod_type,T1>&              expr
  )
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type T;
    
    Mat<T> X = expr.get_ref();
    
    arma_conform_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
    
    arma_conform_assert_blas_size(X);
    
    if(X.is_empty())  { vals.reset(); lvecs.reset(); rvecs.reset(); return true; }
    
    if(arma_config::check_nonfinite && X.internal_has_nonfinite())  { return false; }
    
    vals.set_size(X.n_rows, 1);
    
    lvecs.set_size(X.n_rows, X.n_rows);
    rvecs.set_size(X.n_rows, X.n_rows);
    
    Mat<T> ltmp(X.n_rows, X.n_rows, arma_nozeros_indicator());
    Mat<T> rtmp(X.n_rows, X.n_rows, arma_nozeros_indicator());
    
    char     bal   = 'B';
    char     jobvl = 'V';
    char     jobvr = 'V';
    char     sense = 'N';
    blas_int N     = blas_int(X.n_rows);
    blas_int ldvl  = blas_int(ltmp.n_rows);
    blas_int ldvr  = blas_int(rtmp.n_rows);
    blas_int ilo   = blas_int(0);
    blas_int ihi   = blas_int(0);
    T        abnrm = T(0);
    blas_int lwork = 64*N;  // lwork_min = (std::max)(blas_int(1), blas_int(3*N))
    blas_int info  = blas_int(0);
    
    podarray<T>  scale(X.n_rows);
    podarray<T> rconde(X.n_rows);
    podarray<T> rcondv(X.n_rows);
    
    podarray<T>         work( static_cast<uword>(lwork) );
    podarray<blas_int> iwork( uword(1) );  // iwork not used by lapack::geevx() as sense = 'N'
    
    podarray<T> vals_real(X.n_rows);
    podarray<T> vals_imag(X.n_rows);
    
    arma_debug_print("lapack::geevx() -- START");
    lapack::geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), ltmp.memptr(), &ldvl, rtmp.memptr(), &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, iwork.memptr(), &info);
    arma_debug_print("lapack::geevx() -- END");
    
    if(info != 0)  { return false; }
    
    arma_debug_print("reformatting eigenvalues and eigenvectors");
    
    std::complex<T>* vals_mem = vals.memptr();
    
    for(uword i=0; i < X.n_rows; ++i)  { vals_mem[i] = std::complex<T>(vals_real[i], vals_imag[i]); }
    
    for(uword j=0; j < X.n_rows; ++j)
      {
      if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) )
        {
        for(uword i=0; i < X.n_rows; ++i)
          {
          lvecs.at(i,j)   = std::complex<T>( ltmp.at(i,j),  ltmp.at(i,j+1) );
          lvecs.at(i,j+1) = std::complex<T>( ltmp.at(i,j), -ltmp.at(i,j+1) );
          rvecs.at(i,j)   = std::complex<T>( rtmp.at(i,j),  rtmp.at(i,j+1) );
          rvecs.at(i,j+1) = std::complex<T>( rtmp.at(i,j), -rtmp.at(i,j+1) );
          }
        ++j;
        }
      else
        {
        for(uword i=0; i<X.n_rows; ++i)
          {
          lvecs.at(i,j) = std::complex<T>(ltmp.at(i,j), T(0));
          rvecs.at(i,j) = std::complex<T>(rtmp.at(i,j), T(0));
          }
        }
      }
    
    return true;
    }
  #else
    {
    arma_ignore(vals);
    arma_ignore(lvecs);
    arma_ignore(rvecs);
    arma_ignore(expr);
    arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! two-sided eigen decomposition of general square matrix (complex, balance given matrix)
template<typename T1>
inline
bool
auxlib::eig_gen_twosided_balance
  (
         Mat< std::complex<typename T1::pod_type> >&      vals,
         Mat< std::complex<typename T1::pod_type> >&     lvecs,
         Mat< std::complex<typename T1::pod_type> >&     rvecs,
  const Base< std::complex<typename T1::pod_type>, T1 >& expr
  )
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type     T;
    typedef typename std::complex<T> eT;
    
    Mat<eT> X = expr.get_ref();
    
    arma_conform_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
    
    arma_conform_assert_blas_size(X);
    
    if(X.is_empty())  { vals.reset(); lvecs.reset(); rvecs.reset(); return true; }
    
    if(arma_config::check_nonfinite && X.internal_has_nonfinite())  { return false; }
    
    vals.set_size(X.n_rows, 1);
    
    lvecs.set_size(X.n_rows, X.n_rows);
    rvecs.set_size(X.n_rows, X.n_rows);
    
    char     bal   = 'B';
    char     jobvl = 'V';
    char     jobvr = 'V';
    char     sense = 'N';
    blas_int N     = blas_int(X.n_rows);
    blas_int ldvl  = blas_int(lvecs.n_rows);
    blas_int ldvr  = blas_int(rvecs.n_rows);
    blas_int ilo   = blas_int(0);
    blas_int ihi   = blas_int(0);
    T        abnrm = T(0);
    blas_int lwork = 64*N;  // lwork_min = (std::max)(blas_int(1), blas_int(2*N))
    blas_int info  = blas_int(0);
    
    podarray<T>  scale(X.n_rows);
    podarray<T> rconde(X.n_rows);
    podarray<T> rcondv(X.n_rows);
    
    podarray<eT>  work( static_cast<uword>(lwork) );
    podarray< T> rwork( static_cast<uword>(2*N)   );
    
    arma_debug_print("lapack::cx_geevx() -- START");
    lapack::cx_geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals.memptr(), lvecs.memptr(), &ldvl, rvecs.memptr(), &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, rwork.memptr(), &info);
    arma_debug_print("lapack::cx_geevx() -- END");
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(vals);
    arma_ignore(lvecs);
    arma_ignore(rvecs);
    arma_ignore(expr);
    arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! eigendecomposition of general square matrix pair (real)
template<typename T1, typename T2>
inline
bool
auxlib::eig_pair
  (
        Mat< std::complex<typename T1::pod_type> >& vals,
        Mat< std::complex<typename T1::pod_type> >& vecs,
  const bool                                        vecs_on,
  const Base<typename T1::pod_type,T1>&             A_expr,
  const Base<typename T1::pod_type,T2>&             B_expr
  )
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type  T;
    typedef std::complex<T>       eT;
    
    Mat<T> A(A_expr.get_ref());
    Mat<T> B(B_expr.get_ref());
    
    arma_conform_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" );
    
    arma_conform_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" );
    
    arma_conform_assert_blas_size(A);
    
    if(A.is_empty())  { vals.reset(); vecs.reset(); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    if(arma_config::check_nonfinite && B.internal_has_nonfinite())  { return false; }
    
    vals.set_size(A.n_rows, 1);
    
    Mat<T> tmp(1, 1, arma_nozeros_indicator());
    
    if(vecs_on)
      {
      vecs.set_size(A.n_rows, A.n_rows);
       tmp.set_size(A.n_rows, A.n_rows);
      }
    
    podarray<T> junk(1);
    
    char     jobvl = 'N';
    char     jobvr = (vecs_on) ? 'V' : 'N';
    blas_int N     = blas_int(A.n_rows);
    T*       vl    = junk.memptr();
    T*       vr    = (vecs_on) ? tmp.memptr() : junk.memptr();
    blas_int ldvl  = blas_int(1);
    blas_int ldvr  = (vecs_on) ? blas_int(tmp.n_rows) : blas_int(1);
    blas_int lwork = 64*N;  // lwork_min = (std::max)(blas_int(1), 8*N)
    blas_int info  = 0;
    
    podarray<T> alphar(A.n_rows);
    podarray<T> alphai(A.n_rows);
    podarray<T>   beta(A.n_rows);
    
    podarray<T> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::ggev()");
    lapack::ggev(&jobvl, &jobvr, &N, A.memptr(), &N,  B.memptr(), &N, alphar.memptr(), alphai.memptr(), beta.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("reformatting eigenvalues and eigenvectors");
    
          eT*   vals_mem =   vals.memptr();
    const  T* alphar_mem = alphar.memptr();
    const  T* alphai_mem = alphai.memptr();
    const  T*   beta_mem =   beta.memptr();
    
    bool beta_has_zero = false;
    
    for(uword j=0; j<A.n_rows; ++j)
      {
      const T alphai_val = alphai_mem[j];
      const T   beta_val =   beta_mem[j];
      
      const T re = alphar_mem[j] / beta_val;
      const T im = alphai_val    / beta_val;
      
      beta_has_zero = (beta_has_zero || (beta_val == T(0)));
      
      vals_mem[j] = std::complex<T>(re, im);
      
      if( (alphai_val > T(0)) && (j < (A.n_rows-1)) )
        {
        ++j;
        vals_mem[j] = std::complex<T>(re,-im);  // force exact conjugate
        }
      }
    
    if(beta_has_zero)  { arma_warn(1, "eig_pair(): given matrices appear ill-conditioned"); }
    
    if(vecs_on)
      {
      for(uword j=0; j<A.n_rows; ++j)
        {
        if( (j < (A.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) )
          {
          for(uword i=0; i<A.n_rows; ++i)
            {
            vecs.at(i,j)   = std::complex<T>( tmp.at(i,j),  tmp.at(i,j+1) );
            vecs.at(i,j+1) = std::complex<T>( tmp.at(i,j), -tmp.at(i,j+1) );
            }
          
          ++j;
          }
        else
          {
          for(uword i=0; i<A.n_rows; ++i)
            {
            vecs.at(i,j) = std::complex<T>(tmp.at(i,j), T(0));
            }
          }
        }
      }
    
    return true;
    }
  #else
    {
    arma_ignore(vals);
    arma_ignore(vecs);
    arma_ignore(vecs_on);
    arma_ignore(A_expr);
    arma_ignore(B_expr);
    arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! eigendecomposition of general square matrix pair (complex)
template<typename T1, typename T2>
inline
bool
auxlib::eig_pair
  (
        Mat< std::complex<typename T1::pod_type> >&      vals,
        Mat< std::complex<typename T1::pod_type> >&      vecs,
  const bool                                             vecs_on,
  const Base< std::complex<typename T1::pod_type>, T1 >& A_expr,
  const Base< std::complex<typename T1::pod_type>, T2 >& B_expr
  )
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type     T;
    typedef typename std::complex<T> eT;
    
    Mat<eT> A(A_expr.get_ref());
    Mat<eT> B(B_expr.get_ref());
    
    arma_conform_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" );
    
    arma_conform_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" );
    
    arma_conform_assert_blas_size(A);
    
    if(A.is_empty())  { vals.reset(); vecs.reset(); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    if(arma_config::check_nonfinite && B.internal_has_nonfinite())  { return false; }
    
    vals.set_size(A.n_rows, 1);
    
    if(vecs_on)  { vecs.set_size(A.n_rows, A.n_rows); }
    
    podarray<eT> junk(1);
    
    char     jobvl = 'N';
    char     jobvr = (vecs_on) ? 'V' : 'N';
    blas_int N     = blas_int(A.n_rows);
    eT*      vl    = junk.memptr();
    eT*      vr    = (vecs_on) ? vecs.memptr() : junk.memptr();
    blas_int ldvl  = blas_int(1);
    blas_int ldvr  = (vecs_on) ? blas_int(vecs.n_rows) : blas_int(1);
    blas_int lwork = 64*N;  // lwork_min = (std::max)(blas_int(1),2*N)
    blas_int info  = 0;
    
    podarray<eT> alpha(A.n_rows);
    podarray<eT>  beta(A.n_rows);
    
    podarray<eT>  work( static_cast<uword>(lwork) );
    podarray<T>  rwork( static_cast<uword>(8*N)   );
    
    arma_debug_print("lapack::cx_ggev()");
    lapack::cx_ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alpha.memptr(), beta.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, rwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
          eT*   vals_mem =  vals.memptr();
    const eT*  alpha_mem = alpha.memptr();
    const eT*   beta_mem =  beta.memptr();
    
    const std::complex<T> zero(T(0), T(0));
    
    bool beta_has_zero = false;
    
    for(uword i=0; i<A.n_rows; ++i)
      {
      const eT& beta_val = beta_mem[i];
      
      vals_mem[i] = alpha_mem[i] / beta_val;
      
      beta_has_zero = (beta_has_zero || (beta_val == zero));
      }
    
    if(beta_has_zero)  { arma_warn(1, "eig_pair(): given matrices appear ill-conditioned"); }
    
    return true;
    }
  #else
    {
    arma_ignore(vals);
    arma_ignore(vecs);
    arma_ignore(vecs_on);
    arma_ignore(A_expr);
    arma_ignore(B_expr);
    arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! two-sided eigendecomposition of general square matrix pair (real)
template<typename T1, typename T2>
inline
bool
auxlib::eig_pair_twosided
  (
        Mat< std::complex<typename T1::pod_type> >&  vals,
        Mat< std::complex<typename T1::pod_type> >& lvecs,
        Mat< std::complex<typename T1::pod_type> >& rvecs,
  const Base<typename T1::pod_type,T1>&             A_expr,
  const Base<typename T1::pod_type,T2>&             B_expr
  )
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type  T;
    typedef std::complex<T>       eT;
    
    Mat<T> A(A_expr.get_ref());
    Mat<T> B(B_expr.get_ref());
    
    arma_conform_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" );
    
    arma_conform_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" );
    
    arma_conform_assert_blas_size(A);
    
    if(A.is_empty())  { vals.reset(); lvecs.reset(); rvecs.reset(); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    if(arma_config::check_nonfinite && B.internal_has_nonfinite())  { return false; }
    
    vals.set_size(A.n_rows, 1);
    
    lvecs.set_size(A.n_rows, A.n_rows);
    rvecs.set_size(A.n_rows, A.n_rows);
    
    Mat<T> ltmp(A.n_rows, A.n_rows, arma_nozeros_indicator());
    Mat<T> rtmp(A.n_rows, A.n_rows, arma_nozeros_indicator());
    
    char     jobvl = 'V';
    char     jobvr = 'V';
    blas_int N     = blas_int(A.n_rows);
    blas_int ldvl  = blas_int(ltmp.n_rows);
    blas_int ldvr  = blas_int(rtmp.n_rows);
    blas_int lwork = 64*N;  // lwork_min = (std::max)(blas_int(1), 8*N)
    blas_int info  = 0;
    
    podarray<T> alphar(A.n_rows);
    podarray<T> alphai(A.n_rows);
    podarray<T>   beta(A.n_rows);
    
    podarray<T> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::ggev()");
    lapack::ggev(&jobvl, &jobvr, &N, A.memptr(), &N,  B.memptr(), &N, alphar.memptr(), alphai.memptr(), beta.memptr(), ltmp.memptr(), &ldvl, rtmp.memptr(), &ldvr, work.memptr(), &lwork, &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("reformatting eigenvalues and eigenvectors");
    
          eT*   vals_mem =   vals.memptr();
    const  T* alphar_mem = alphar.memptr();
    const  T* alphai_mem = alphai.memptr();
    const  T*   beta_mem =   beta.memptr();
    
    bool beta_has_zero = false;
    
    for(uword j=0; j<A.n_rows; ++j)
      {
      const T alphai_val = alphai_mem[j];
      const T   beta_val =   beta_mem[j];
      
      const T re = alphar_mem[j] / beta_val;
      const T im = alphai_val    / beta_val;
      
      beta_has_zero = (beta_has_zero || (beta_val == T(0)));
      
      vals_mem[j] = std::complex<T>(re, im);
      
      if( (alphai_val > T(0)) && (j < (A.n_rows-1)) )
        {
        ++j;
        vals_mem[j] = std::complex<T>(re,-im);  // force exact conjugate
        }
      }
    
    if(beta_has_zero)  { arma_warn(1, "eig_pair(): given matrices appear ill-conditioned"); }
    
    for(uword j=0; j < A.n_rows; ++j)
      {
      if( (j < (A.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) )
        {
        for(uword i=0; i < A.n_rows; ++i)
          {
          lvecs.at(i,j)   = std::complex<T>( ltmp.at(i,j),  ltmp.at(i,j+1) );
          lvecs.at(i,j+1) = std::complex<T>( ltmp.at(i,j), -ltmp.at(i,j+1) );
          rvecs.at(i,j)   = std::complex<T>( rtmp.at(i,j),  rtmp.at(i,j+1) );
          rvecs.at(i,j+1) = std::complex<T>( rtmp.at(i,j), -rtmp.at(i,j+1) );
          }
        ++j;
        }
      else
        {
        for(uword i=0; i<A.n_rows; ++i)
          {
          lvecs.at(i,j) = std::complex<T>(ltmp.at(i,j), T(0));
          rvecs.at(i,j) = std::complex<T>(rtmp.at(i,j), T(0));
          }
        }
      }
    
    return true;
    }
  #else
    {
    arma_ignore(vals);
    arma_ignore(lvecs);
    arma_ignore(rvecs);
    arma_ignore(A_expr);
    arma_ignore(B_expr);
    arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! two-sided eigendecomposition of general square matrix pair (complex)
template<typename T1, typename T2>
inline
bool
auxlib::eig_pair_twosided
  (
        Mat< std::complex<typename T1::pod_type> >&       vals,
        Mat< std::complex<typename T1::pod_type> >&      lvecs,
        Mat< std::complex<typename T1::pod_type> >&      rvecs,
  const Base< std::complex<typename T1::pod_type>, T1 >& A_expr,
  const Base< std::complex<typename T1::pod_type>, T2 >& B_expr
  )
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type     T;
    typedef typename std::complex<T> eT;
    
    Mat<eT> A(A_expr.get_ref());
    Mat<eT> B(B_expr.get_ref());
    
    arma_conform_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" );
    
    arma_conform_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" );
    
    arma_conform_assert_blas_size(A);
    
    if(A.is_empty())  { vals.reset(); lvecs.reset(); rvecs.reset(); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    if(arma_config::check_nonfinite && B.internal_has_nonfinite())  { return false; }
    
    vals.set_size(A.n_rows, 1);
    
    lvecs.set_size(A.n_rows, A.n_rows);
    rvecs.set_size(A.n_rows, A.n_rows);
    
    char     jobvl = 'V';
    char     jobvr = 'V';
    blas_int N     = blas_int(A.n_rows);
    blas_int ldvl  = blas_int(lvecs.n_rows);
    blas_int ldvr  = blas_int(rvecs.n_rows);
    blas_int lwork = 64*N;  // lwork_min = (std::max)(blas_int(1),2*N)
    blas_int info  = 0;
    
    podarray<eT> alpha(A.n_rows);
    podarray<eT>  beta(A.n_rows);
    
    podarray<eT>  work( static_cast<uword>(lwork) );
    podarray<T>  rwork( static_cast<uword>(8*N)   );
    
    arma_debug_print("lapack::cx_ggev()");
    lapack::cx_ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alpha.memptr(), beta.memptr(), lvecs.memptr(), &ldvl, rvecs.memptr(), &ldvr, work.memptr(), &lwork, rwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
          eT*   vals_mem =  vals.memptr();
    const eT*  alpha_mem = alpha.memptr();
    const eT*   beta_mem =  beta.memptr();
    
    const std::complex<T> zero(T(0), T(0));
    
    bool beta_has_zero = false;
    
    for(uword i=0; i<A.n_rows; ++i)
      {
      const eT& beta_val = beta_mem[i];
      
      vals_mem[i] = alpha_mem[i] / beta_val;
      
      beta_has_zero = (beta_has_zero || (beta_val == zero));
      }
    
    if(beta_has_zero)  { arma_warn(1, "eig_pair(): given matrices appear ill-conditioned"); }
    
    return true;
    }
  #else
    {
    arma_ignore(vals);
    arma_ignore(lvecs);
    arma_ignore(rvecs);
    arma_ignore(A_expr);
    arma_ignore(B_expr);
    arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! eigenvalues of a symmetric real matrix
template<typename eT>
inline
bool
auxlib::eig_sym(Col<eT>& eigval, Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_check( (A.is_square() == false), "eig_sym(): given matrix must be square sized" );
    
    if(A.is_empty())  { eigval.reset(); return true; }
    
    if((arma_config::check_conform) && (auxlib::rudimentary_sym_check(A) == false))
      {
      arma_warn(1, "eig_sym(): given matrix is not symmetric");
      }
    
    if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(A))  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    eigval.set_size(A.n_rows);
    
    char jobz  = 'N';
    char uplo  = 'U';
    
    blas_int N     = blas_int(A.n_rows);
    blas_int lwork = (64+2)*N;  // lwork_min = (std::max)(blas_int(1), 3*N-1)
    blas_int info  = 0;
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::syev()");
    lapack::syev(&jobz, &uplo, &N, A.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(eigval);
    arma_ignore(A);
    arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! eigenvalues of a hermitian complex matrix
template<typename T>
inline
bool
auxlib::eig_sym(Col<T>& eigval, Mat< std::complex<T> >& A)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    arma_conform_check( (A.is_square() == false), "eig_sym(): given matrix must be square sized" );
    
    if(A.is_empty())  { eigval.reset(); return true; }
    
    if((arma_config::check_conform) && (auxlib::rudimentary_sym_check(A) == false))
      {
      arma_warn(1, "eig_sym(): given matrix is not hermitian");
      }
    
    if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(A))  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    eigval.set_size(A.n_rows);
    
    char jobz  = 'N'; 
    char uplo  = 'U';
    
    blas_int N     = blas_int(A.n_rows);
    blas_int lwork = (64+1)*N;  // lwork_min = (std::max)(blas_int(1), 2*N-1)
    blas_int info  = 0;
    
    podarray<eT>  work( static_cast<uword>(lwork) );
    podarray<T>  rwork( static_cast<uword>( (std::max)(blas_int(1), 3*N) ) );
    
    arma_debug_print("lapack::heev()");
    lapack::heev(&jobz, &uplo, &N, A.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(eigval);
    arma_ignore(A);
    arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! eigenvalues and eigenvectors of a symmetric real matrix
template<typename eT>
inline
bool
auxlib::eig_sym(Col<eT>& eigval, Mat<eT>& eigvec, const Mat<eT>& X)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_check( (X.is_square() == false), "eig_sym(): given matrix must be square sized" );
    
    if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(X))  { return false; }
    
    eigvec = X;
    
    if(eigvec.is_empty())  { eigval.reset(); eigvec.reset(); return true; }
    
    arma_conform_assert_blas_size(eigvec);
    
    eigval.set_size(eigvec.n_rows);
    
    char jobz  = 'V';
    char uplo  = 'U';
    
    blas_int N     = blas_int(eigvec.n_rows);
    blas_int lwork = (64+2)*N;  // lwork_min = (std::max)(blas_int(1), 3*N-1)
    blas_int info  = 0;
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::syev()");
    lapack::syev(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(eigval);
    arma_ignore(eigvec);
    arma_ignore(X);
    arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! eigenvalues and eigenvectors of a hermitian complex matrix
template<typename T>
inline
bool
auxlib::eig_sym(Col<T>& eigval, Mat< std::complex<T> >& eigvec, const Mat< std::complex<T> >& X)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    arma_conform_check( (X.is_square() == false), "eig_sym(): given matrix must be square sized" );
    
    if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(X))  { return false; }
    
    eigvec = X;
    
    if(eigvec.is_empty())  { eigval.reset(); eigvec.reset(); return true; }
    
    arma_conform_assert_blas_size(eigvec);
    
    eigval.set_size(eigvec.n_rows);
    
    char jobz  = 'V';
    char uplo  = 'U';
    
    blas_int N     = blas_int(eigvec.n_rows);
    blas_int lwork = (64+1)*N;  // lwork_min = (std::max)(blas_int(1), 2*N-1)
    blas_int info  = 0;
    
    podarray<eT>  work( static_cast<uword>(lwork) );
    podarray<T>  rwork( static_cast<uword>((std::max)(blas_int(1), 3*N)) );
    
    arma_debug_print("lapack::heev()");
    lapack::heev(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(eigval);
    arma_ignore(eigvec);
    arma_ignore(X);
    arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! eigenvalues and eigenvectors of a symmetric real matrix (divide and conquer algorithm)
template<typename eT>
inline
bool
auxlib::eig_sym_dc(Col<eT>& eigval, Mat<eT>& eigvec, const Mat<eT>& X)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_check( (X.is_square() == false), "eig_sym(): given matrix must be square sized" );
    
    if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(X))  { return false; }
    
    eigvec = X;
    
    if(eigvec.is_empty())  { eigval.reset(); eigvec.reset(); return true; }
    
    arma_conform_assert_blas_size(eigvec);
    
    eigval.set_size(eigvec.n_rows);
    
    char jobz = 'V';
    char uplo = 'U';
    
    blas_int N          = blas_int(eigvec.n_rows);
    blas_int lwork_min  = 1 + 6*N + 2*(N*N);
    blas_int liwork_min = 3 + 5*N;
    blas_int info       = 0;
    
    blas_int  lwork_proposed = 0;
    blas_int liwork_proposed = 0;
    
    if(N >= 32)
      {
      eT        work_query[2] = {};
      blas_int iwork_query[2] = {};
      
      blas_int  lwork_query = -1;
      blas_int liwork_query = -1;
      
      arma_debug_print("lapack::syevd()");
      lapack::syevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), &work_query[0], &lwork_query, &iwork_query[0], &liwork_query, &info);
      
      if(info != 0)  { return false; }
      
       lwork_proposed = static_cast<blas_int>( work_query[0] );
      liwork_proposed = iwork_query[0];
      }
    
    blas_int  lwork_final = (std::max)( lwork_proposed,  lwork_min);
    blas_int liwork_final = (std::max)(liwork_proposed, liwork_min);
    
    podarray<eT>        work( static_cast<uword>( lwork_final) );
    podarray<blas_int> iwork( static_cast<uword>(liwork_final) ); 
    
    arma_debug_print("lapack::syevd()");
    lapack::syevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork_final, iwork.memptr(), &liwork_final, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(eigval);
    arma_ignore(eigvec);
    arma_ignore(X);
    arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! eigenvalues and eigenvectors of a hermitian complex matrix (divide and conquer algorithm)
template<typename T>
inline
bool
auxlib::eig_sym_dc(Col<T>& eigval, Mat< std::complex<T> >& eigvec, const Mat< std::complex<T> >& X)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    arma_conform_check( (X.is_square() == false), "eig_sym(): given matrix must be square sized" );
    
    if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(X))  { return false; }
    
    eigvec = X;
    
    if(eigvec.is_empty())  { eigval.reset(); eigvec.reset(); return true; }
    
    arma_conform_assert_blas_size(eigvec);
    
    eigval.set_size(eigvec.n_rows);
    
    char jobz  = 'V';
    char uplo  = 'U';
    
    blas_int N          = blas_int(eigvec.n_rows);
    blas_int lwork_min  = 2*N + N*N;
    blas_int lrwork_min = 1 + 5*N + 2*(N*N);
    blas_int liwork_min = 3 + 5*N;
    blas_int info       = 0;
    
    blas_int  lwork_proposed = 0;
    blas_int lrwork_proposed = 0;
    blas_int liwork_proposed = 0;
    
    if(N >= 32)
      {
      eT        work_query[2] = {};
      T        rwork_query[2] = {};
      blas_int iwork_query[2] = {};
      
      blas_int  lwork_query = -1;
      blas_int lrwork_query = -1;
      blas_int liwork_query = -1;
      
      arma_debug_print("lapack::heevd()");
      lapack::heevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), &work_query[0], &lwork_query, &rwork_query[0], &lrwork_query, &iwork_query[0], &liwork_query, &info);
      
      if(info != 0)  { return false; }
      
       lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      lrwork_proposed = static_cast<blas_int>( rwork_query[0] );
      liwork_proposed = iwork_query[0];
      }
    
    blas_int  lwork_final = (std::max)( lwork_proposed,  lwork_min);
    blas_int lrwork_final = (std::max)(lrwork_proposed, lrwork_min);
    blas_int liwork_final = (std::max)(liwork_proposed, liwork_min);
    
    podarray<eT>        work( static_cast<uword>( lwork_final) );
    podarray< T>       rwork( static_cast<uword>(lrwork_final) );
    podarray<blas_int> iwork( static_cast<uword>(liwork_final) ); 
    
    arma_debug_print("lapack::heevd()");
    lapack::heevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork_final, rwork.memptr(), &lrwork_final, iwork.memptr(), &liwork_final, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(eigval);
    arma_ignore(eigvec);
    arma_ignore(X);
    arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::chol_simple(Mat<eT>& X)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(X);
    
    char      uplo = 'U';
    blas_int  n    = blas_int(X.n_rows);
    blas_int  info = 0;
    
    arma_debug_print("lapack::potrf()");
    lapack::potrf(&uplo, &n, X.memptr(), &n, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(X);
    
    arma_stop_logic_error("chol(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::chol(Mat<eT>& X, const uword layout)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(X);
    
    char      uplo = (layout == 0) ? 'U' : 'L';
    blas_int  n    = blas_int(X.n_rows);
    blas_int  info = 0;
    
    arma_debug_print("lapack::potrf()");
    lapack::potrf(&uplo, &n, X.memptr(), &n, &info);
    
    if(info != 0)  { return false; }
    
    X = (layout == 0) ? trimatu(X) : trimatl(X);  // trimatu() and trimatl() return the same type
    
    return true;
    }
  #else
    {
    arma_ignore(X);
    arma_ignore(layout);
    
    arma_stop_logic_error("chol(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::chol_band(Mat<eT>& X, const uword KD, const uword layout)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    const uword N = X.n_rows;
    
    const uword KL = (layout == 0) ? uword(0) : KD;
    const uword KU = (layout == 0) ? KD       : uword(0);
    
    Mat<eT> AB;
    band_helper::compress(AB, X, KL, KU, false);
    
    arma_conform_assert_blas_size(AB);
    
    char     uplo = (layout == 0) ? 'U' : 'L';
    blas_int n    = blas_int(N);
    blas_int kd   = blas_int(KD);
    blas_int ldab = blas_int(AB.n_rows);
    blas_int info = 0;
    
    arma_debug_print("lapack::pbtrf()");
    lapack::pbtrf(&uplo, &n, &kd, AB.memptr(), &ldab, &info);
    
    if(info != 0)  { return false; }
    
    band_helper::uncompress(X, AB, KL, KU, false);
    
    return true;
    }
  #else
    {
    arma_ignore(X);
    arma_ignore(KD);
    arma_ignore(layout);
    
    arma_stop_logic_error("chol(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::chol_pivot(Mat<eT>& X, Mat<uword>& P, const uword layout)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename get_pod_type<eT>::result T;
    
    arma_conform_assert_blas_size(X);
    
    char     uplo = (layout == 0) ? 'U' : 'L';
    blas_int n    = blas_int(X.n_rows);
    blas_int rank = 0;
    T        tol  = T(-1);
    blas_int info = 0;
    
    podarray<blas_int> ipiv(  X.n_rows);
    podarray<T>        work(2*X.n_rows);
    
    ipiv.zeros();
    
    arma_debug_print("lapack::pstrf()");
    lapack::pstrf(&uplo, &n, X.memptr(), &n, ipiv.memptr(), &rank, &tol, work.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    X = (layout == 0) ? trimatu(X) : trimatl(X);  // trimatu() and trimatl() return the same type
    
    P.set_size(X.n_rows, 1);
    
    for(uword i=0; i < X.n_rows; ++i)
      {
      P[i] = uword(ipiv[i] - 1);  // take into account that Fortran counts from 1
      }
    
    return true;
    }
  #else
    {
    arma_ignore(X);
    arma_ignore(P);
    arma_ignore(layout);
    
    arma_stop_logic_error("chol(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//
// hessenberg decomposition
template<typename eT, typename T1>
inline
bool
auxlib::hess(Mat<eT>& H, const Base<eT,T1>& X, Col<eT>& tao)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    H = X.get_ref();
    
    arma_conform_check( (H.is_square() == false), "hess(): given matrix must be square sized" );
    
    if(H.is_empty())  { return true; }
    
    arma_conform_assert_blas_size(H);
    
    if(H.n_rows > 2)
      {
      tao.set_size(H.n_rows-1);
      
      blas_int  n      = blas_int(H.n_rows);
      blas_int  ilo    = 1;
      blas_int  ihi    = blas_int(H.n_rows);
      blas_int  lda    = blas_int(H.n_rows);
      blas_int  lwork  = blas_int(H.n_rows) * 64;
      blas_int  info   = 0;
      
      podarray<eT> work(static_cast<uword>(lwork));
      
      arma_debug_print("lapack::gehrd()");
      lapack::gehrd(&n, &ilo, &ihi, H.memptr(), &lda, tao.memptr(), work.memptr(), &lwork, &info);
      
      return (info == 0);
      }
    
    return true;
    }
  #else
    {
    arma_ignore(H);
    arma_ignore(X);
    arma_ignore(tao);
    arma_stop_logic_error("hess(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT, typename T1>
inline
bool
auxlib::qr(Mat<eT>& Q, Mat<eT>& R, const Base<eT,T1>& X)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    R = X.get_ref();
    
    const uword R_n_rows = R.n_rows;
    const uword R_n_cols = R.n_cols;
    
    if(R.is_empty())  { Q.eye(R_n_rows, R_n_rows); return true; }
    
    arma_conform_assert_blas_size(R);
    
    blas_int m         = static_cast<blas_int>(R_n_rows);
    blas_int n         = static_cast<blas_int>(R_n_cols);
    blas_int lwork_min = (std::max)(blas_int(1), (std::max)(m,n));  // take into account requirements of geqrf() _and_ orgqr()/ungqr()
    blas_int k         = (std::min)(m,n);
    blas_int info      = 0;
    
    podarray<eT> tau( static_cast<uword>(k) );
    
    eT        work_query[2] = {};
    blas_int lwork_query    = -1;
    
    arma_debug_print("lapack::geqrf()");
    lapack::geqrf(&m, &n, R.memptr(), &m, tau.memptr(), &work_query[0], &lwork_query, &info);
    
    if(info != 0)  { return false; }
    
    blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
    blas_int lwork_final    = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::geqrf()");
    lapack::geqrf(&m, &n, R.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
    
    if(info != 0)  { return false; }
    
    Q.set_size(R_n_rows, R_n_rows);
    
    arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) );
    
    //
    // construct R
    
    for(uword col=0; col < R_n_cols; ++col)
      {
      for(uword row=(col+1); row < R_n_rows; ++row)
        {
        R.at(row,col) = eT(0);
        }
      }
    
    
    if( (is_float<eT>::value) || (is_double<eT>::value) )
      {
      arma_debug_print("lapack::orgqr()");
      lapack::orgqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
      }
    else
    if( (is_cx_float<eT>::value) || (is_cx_double<eT>::value) )
      {
      arma_debug_print("lapack::ungqr()");
      lapack::ungqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
      }
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(Q);
    arma_ignore(R);
    arma_ignore(X);
    arma_stop_logic_error("qr(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT, typename T1>
inline
bool 
auxlib::qr_econ(Mat<eT>& Q, Mat<eT>& R, const Base<eT,T1>& X)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    if(is_Mat<T1>::value)
      {
      const unwrap<T1>   tmp(X.get_ref());
      const Mat<eT>& M = tmp.M;
      
      if(M.n_rows < M.n_cols)  { return auxlib::qr(Q, R, X); }
      }
    
    Q = X.get_ref();
    
    const uword Q_n_rows = Q.n_rows;
    const uword Q_n_cols = Q.n_cols;
    
    if( Q_n_rows <= Q_n_cols )  { return auxlib::qr(Q, R, Q); }
    
    if(Q.is_empty())  { Q.set_size(Q_n_rows, 0); R.set_size(0, Q_n_cols); return true; }
    
    arma_conform_assert_blas_size(Q);
    
    blas_int m         = static_cast<blas_int>(Q_n_rows);
    blas_int n         = static_cast<blas_int>(Q_n_cols);
    blas_int lwork_min = (std::max)(blas_int(1), (std::max)(m,n));  // take into account requirements of geqrf() _and_ orgqr()/ungqr()
    blas_int k         = (std::min)(m,n);
    blas_int info      = 0;
    
    podarray<eT> tau( static_cast<uword>(k) );
    
    eT        work_query[2] = {};
    blas_int lwork_query    = -1;
    
    arma_debug_print("lapack::geqrf()");
    lapack::geqrf(&m, &n, Q.memptr(), &m, tau.memptr(), &work_query[0], &lwork_query, &info);
    
    if(info != 0)  { return false; }
    
    blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
    blas_int lwork_final    = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::geqrf()");
    lapack::geqrf(&m, &n, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
    
    if(info != 0)  { return false; }
    
    R.zeros(Q_n_cols, Q_n_cols);
    
    //
    // construct R
    
    for(uword col=0; col < Q_n_cols; ++col)
      {
      for(uword row=0; row <= col; ++row)
        {
        R.at(row,col) = Q.at(row,col);
        }
      }
    
    if( (is_float<eT>::value) || (is_double<eT>::value) )
      {
      arma_debug_print("lapack::orgqr()");
      lapack::orgqr(&m, &n, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
      }
    else
    if( (is_cx_float<eT>::value) || (is_cx_double<eT>::value) )
      {
      arma_debug_print("lapack::ungqr()");
      lapack::ungqr(&m, &n, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
      }
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(Q);
    arma_ignore(R);
    arma_ignore(X);
    arma_stop_logic_error("qr_econ(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT, typename T1>
inline
bool
auxlib::qr_pivot(Mat<eT>& Q, Mat<eT>& R, Mat<uword>& P, const Base<eT,T1>& X)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    R = X.get_ref();
    
    const uword R_n_rows = R.n_rows;
    const uword R_n_cols = R.n_cols;
    
    if(R.is_empty())
      {
      Q.eye(R_n_rows, R_n_rows);
      
      P.set_size(R_n_cols, 1);
      
      for(uword col=0; col < R_n_cols; ++col)  { P.at(col) = col; }
      
      return true;
      }
    
    arma_conform_assert_blas_size(R);
    
    blas_int m         = static_cast<blas_int>(R_n_rows);
    blas_int n         = static_cast<blas_int>(R_n_cols);
    blas_int lwork_min = (std::max)(blas_int(3*n + 1), (std::max)(m,n));  // take into account requirements of geqp3() and orgqr()
    blas_int k         = (std::min)(m,n);
    blas_int info      = 0;
    
    podarray<eT>        tau( static_cast<uword>(k) );
    podarray<blas_int> jpvt( R_n_cols );
    
    jpvt.zeros();
    
    eT        work_query[2] = {};
    blas_int lwork_query    = -1;
    
    arma_debug_print("lapack::geqp3()");
    lapack::geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), &work_query[0], &lwork_query, &info);
    
    if(info != 0)  { return false; }
    
    blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
    blas_int lwork_final    = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::geqp3()");
    lapack::geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), work.memptr(), &lwork_final, &info);
    
    if(info != 0)  { return false; }
    
    Q.set_size(R_n_rows, R_n_rows);
    
    arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) );
    
    //
    // construct R and P
    
    P.set_size(R_n_cols, 1);
    
    for(uword col=0; col < R_n_cols; ++col)
      {
      for(uword row=(col+1); row < R_n_rows; ++row)  { R.at(row,col) = eT(0); }
      
      P.at(col) = jpvt[col] - 1;  // take into account that Fortran counts from 1
      }
    
    arma_debug_print("lapack::orgqr()");
    lapack::orgqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(Q);
    arma_ignore(R);
    arma_ignore(P);
    arma_ignore(X);
    arma_stop_logic_error("qr(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T, typename T1>
inline
bool
auxlib::qr_pivot(Mat< std::complex<T> >& Q, Mat< std::complex<T> >& R, Mat<uword>& P, const Base<std::complex<T>,T1>& X)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    R = X.get_ref();
    
    const uword R_n_rows = R.n_rows;
    const uword R_n_cols = R.n_cols;
    
    if(R.is_empty())
      {
      Q.eye(R_n_rows, R_n_rows);
      
      P.set_size(R_n_cols, 1);
      
      for(uword col=0; col < R_n_cols; ++col)  { P.at(col) = col; }
      
      return true;
      }
    
    arma_conform_assert_blas_size(R);
    
    blas_int m         = static_cast<blas_int>(R_n_rows);
    blas_int n         = static_cast<blas_int>(R_n_cols);
    blas_int lwork_min = (std::max)(blas_int(3*n + 1), (std::max)(m,n));  // take into account requirements of geqp3() and ungqr()
    blas_int k         = (std::min)(m,n);
    blas_int info      = 0;
    
    podarray<eT>         tau( static_cast<uword>(k) );
    podarray< T>       rwork( 2*R_n_cols );
    podarray<blas_int>  jpvt( R_n_cols );
    
    jpvt.zeros();
    
    eT        work_query[2] = {};
    blas_int lwork_query    = -1;
    
    arma_debug_print("lapack::geqp3()");
    lapack::cx_geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), &work_query[0], &lwork_query, rwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
    blas_int lwork_final    = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::geqp3()");
    lapack::cx_geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), work.memptr(), &lwork_final, rwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    Q.set_size(R_n_rows, R_n_rows);
    
    arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) );
    
    //
    // construct R and P
    
    P.set_size(R_n_cols, 1);
    
    for(uword col=0; col < R_n_cols; ++col)
      {
      for(uword row=(col+1); row < R_n_rows; ++row)  { R.at(row,col) = eT(0); }
      
      P.at(col) = jpvt[col] - 1;  // take into account that Fortran counts from 1
      }
    
    arma_debug_print("lapack::ungqr()");
    lapack::ungqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(Q);
    arma_ignore(R);
    arma_ignore(P);
    arma_ignore(X);
    arma_stop_logic_error("qr(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::svd(Col<eT>& S, Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    if(A.is_empty())  { S.reset(); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    Mat<eT> U(1, 1,        arma_nozeros_indicator());
    Mat<eT> V(1, A.n_cols, arma_nozeros_indicator());
    
    char jobu  = 'N';
    char jobvt = 'N';
    
    blas_int m         = blas_int(A.n_rows);
    blas_int n         = blas_int(A.n_cols);
    blas_int min_mn    = (std::min)(m,n);
    blas_int lda       = blas_int(A.n_rows);
    blas_int ldu       = blas_int(U.n_rows);
    blas_int ldvt      = blas_int(V.n_rows);
    blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) );
    blas_int info      = 0;
    
    S.set_size( static_cast<uword>(min_mn) );
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= 1024)
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::gesvd()");
      lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>( work_query[0] );
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::gesvd()");
    lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(S);
    arma_ignore(A);
    arma_stop_logic_error("svd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T>
inline
bool
auxlib::svd(Col<T>& S, Mat< std::complex<T> >& A)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef std::complex<T> eT;
    
    if(A.is_empty())  { S.reset(); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    Mat<eT> U(1, 1,        arma_nozeros_indicator());
    Mat<eT> V(1, A.n_cols, arma_nozeros_indicator());
    
    char jobu  = 'N';
    char jobvt = 'N';
    
    blas_int  m         = blas_int(A.n_rows);
    blas_int  n         = blas_int(A.n_cols);
    blas_int  min_mn    = (std::min)(m,n);
    blas_int  lda       = blas_int(A.n_rows);
    blas_int  ldu       = blas_int(U.n_rows);
    blas_int  ldvt      = blas_int(V.n_rows);
    blas_int  lwork_min = (std::max)( blas_int(1), 2*min_mn+(std::max)(m,n) );
    blas_int  info      = 0;
    
    S.set_size( static_cast<uword>(min_mn) );
    
    podarray<T> rwork( static_cast<uword>(5*min_mn) );
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= 256)
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;  // query to find optimum size of workspace
      
      arma_debug_print("lapack::cx_gesvd()");
      lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), &info);
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::cx_gesvd()");
    lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(S);
    arma_ignore(A);
    arma_stop_logic_error("svd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::svd(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    if(A.is_empty())  { U.eye(A.n_rows, A.n_rows); S.reset(); V.eye(A.n_cols, A.n_cols); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    U.set_size(A.n_rows, A.n_rows);
    V.set_size(A.n_cols, A.n_cols);
    
    char jobu  = 'A';
    char jobvt = 'A';
    
    blas_int  m         = blas_int(A.n_rows);
    blas_int  n         = blas_int(A.n_cols);
    blas_int  min_mn    = (std::min)(m,n);
    blas_int  lda       = blas_int(A.n_rows);
    blas_int  ldu       = blas_int(U.n_rows);
    blas_int  ldvt      = blas_int(V.n_rows);
    blas_int  lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) );
    blas_int  info      = 0;
    
    S.set_size( static_cast<uword>(min_mn) );
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= 1024)
      {
      // query to find optimum size of workspace
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::gesvd()");
      lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>( work_query[0] );
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::gesvd()");
    lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, &info);
    
    if(info != 0)  { return false; }
    
    op_strans::apply_mat_inplace(V);
    
    return true;
    }
  #else
    {
    arma_ignore(U);
    arma_ignore(S);
    arma_ignore(V);
    arma_ignore(A);
    arma_stop_logic_error("svd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T>
inline
bool
auxlib::svd(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, Mat< std::complex<T> >& A)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef std::complex<T> eT;
    
    if(A.is_empty())  { U.eye(A.n_rows, A.n_rows); S.reset(); V.eye(A.n_cols, A.n_cols); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    U.set_size(A.n_rows, A.n_rows);
    V.set_size(A.n_cols, A.n_cols);
    
    char jobu  = 'A';
    char jobvt = 'A';
    
    blas_int  m         = blas_int(A.n_rows);
    blas_int  n         = blas_int(A.n_cols);
    blas_int  min_mn    = (std::min)(m,n);
    blas_int  lda       = blas_int(A.n_rows);
    blas_int  ldu       = blas_int(U.n_rows);
    blas_int  ldvt      = blas_int(V.n_rows);
    blas_int  lwork_min = (std::max)( blas_int(1), 2*min_mn + (std::max)(m,n) );
    blas_int  info      = 0;
    
    S.set_size( static_cast<uword>(min_mn) );
    
    podarray<T> rwork( static_cast<uword>(5*min_mn) );
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= 256)
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;  // query to find optimum size of workspace
      
      arma_debug_print("lapack::cx_gesvd()");
      lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), &info);
      
      if(info != 0)  { return false; }

      lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::cx_gesvd()");
    lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    op_htrans::apply_mat_inplace(V);
    
    return true;
    }
  #else
    {
    arma_ignore(U);
    arma_ignore(S);
    arma_ignore(V);
    arma_ignore(A);
    arma_stop_logic_error("svd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::svd_econ(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, Mat<eT>& A, const char mode)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    if(A.is_empty())  { U.eye(); S.reset(); V.eye(); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    blas_int m      = blas_int(A.n_rows);
    blas_int n      = blas_int(A.n_cols);
    blas_int min_mn = (std::min)(m,n);
    blas_int lda    = blas_int(A.n_rows);
    
    S.set_size( static_cast<uword>(min_mn) );
    
    blas_int ldu  = 0;
    blas_int ldvt = 0;
    
    char jobu  = char(0);
    char jobvt = char(0);
    
    if(mode == 'l')
      {
      jobu  = 'S';
      jobvt = 'N';
      
      ldu  = m;
      ldvt = 1;
      
      U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) );
      V.reset();
      }
    
    if(mode == 'r')
      {
      jobu  = 'N';
      jobvt = 'S';
      
      ldu = 1;
      ldvt = (std::min)(m,n);
      
      U.reset();
      V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n) );
      }
    
    if(mode == 'b')
      {
      jobu  = 'S';
      jobvt = 'S';
      
      ldu  = m;
      ldvt = (std::min)(m,n);
      
      U.set_size( static_cast<uword>(ldu),  static_cast<uword>(min_mn) );
      V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n     ) );
      }
    
    
    blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) );
    blas_int info      = 0;
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= 1024)
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;  // query to find optimum size of workspace
      
      arma_debug_print("lapack::gesvd()");
      lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>(work_query[0]);
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::gesvd()");
    lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, &info);
    
    if(info != 0)  { return false; }
    
    op_strans::apply_mat_inplace(V);
    
    return true;
    }
  #else
    {
    arma_ignore(U);
    arma_ignore(S);
    arma_ignore(V);
    arma_ignore(A);
    arma_ignore(mode);
    arma_stop_logic_error("svd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T>
inline
bool
auxlib::svd_econ(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, Mat< std::complex<T> >& A, const char mode)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef std::complex<T> eT;
    
    if(A.is_empty())  { U.eye(); S.reset(); V.eye(); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    blas_int m      = blas_int(A.n_rows);
    blas_int n      = blas_int(A.n_cols);
    blas_int min_mn = (std::min)(m,n);
    blas_int lda    = blas_int(A.n_rows);
    
    S.set_size( static_cast<uword>(min_mn) );
    
    blas_int ldu  = 0;
    blas_int ldvt = 0;
    
    char jobu  = char(0);
    char jobvt = char(0);
    
    if(mode == 'l')
      {
      jobu  = 'S';
      jobvt = 'N';
      
      ldu  = m;
      ldvt = 1;
      
      U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) );
      V.reset();
      }
    
    if(mode == 'r')
      {
      jobu  = 'N';
      jobvt = 'S';
      
      ldu  = 1;
      ldvt = (std::min)(m,n);
      
      U.reset();
      V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n) );
      }
    
    if(mode == 'b')
      {
      jobu  = 'S';
      jobvt = 'S';
      
      ldu  = m;
      ldvt = (std::min)(m,n);
      
      U.set_size( static_cast<uword>(ldu),  static_cast<uword>(min_mn) );
      V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n)      );
      }
    
    blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) );
    blas_int info      = 0;
    
    podarray<T> rwork( static_cast<uword>(5*min_mn) );
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= 256)
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;  // query to find optimum size of workspace
      
      arma_debug_print("lapack::cx_gesvd()");
      lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), &info);
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::cx_gesvd()");
    lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    op_htrans::apply_mat_inplace(V);
    
    return true;
    }
  #else
    {
    arma_ignore(U);
    arma_ignore(S);
    arma_ignore(V);
    arma_ignore(A);
    arma_ignore(mode);
    arma_stop_logic_error("svd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::svd_dc(Col<eT>& S, Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    if(A.is_empty())  { S.reset(); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    Mat<eT> U(1, 1, arma_nozeros_indicator());
    Mat<eT> V(1, 1, arma_nozeros_indicator());
    
    char jobz = 'N';
    
    blas_int  m         = blas_int(A.n_rows);
    blas_int  n         = blas_int(A.n_cols);
    blas_int  min_mn    = (std::min)(m,n);
    blas_int  max_mn    = (std::max)(m,n);
    blas_int  lda       = blas_int(A.n_rows);
    blas_int  ldu       = blas_int(U.n_rows);
    blas_int  ldvt      = blas_int(V.n_rows);
    blas_int  lwork_min = 3*min_mn + (std::max)( max_mn, 7*min_mn );
    blas_int  info      = 0;
    
    S.set_size( static_cast<uword>(min_mn) );
    
    podarray<blas_int> iwork( static_cast<uword>(8*min_mn) );
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= 1024)
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = blas_int(-1);
      
      arma_debug_print("lapack::gesdd()");
      lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, iwork.memptr(), &info);
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>( work_query[0] );
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::gesdd()");
    lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, iwork.memptr(), &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(S);
    arma_ignore(A);
    arma_stop_logic_error("svd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T>
inline
bool
auxlib::svd_dc(Col<T>& S, Mat< std::complex<T> >& A)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef std::complex<T> eT;
    
    if(A.is_empty())  { S.reset(); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    Mat<eT> U(1, 1, arma_nozeros_indicator());
    Mat<eT> V(1, 1, arma_nozeros_indicator());
    
    char jobz = 'N';
    
    blas_int  m         = blas_int(A.n_rows);
    blas_int  n         = blas_int(A.n_cols);
    blas_int  min_mn    = (std::min)(m,n);
    blas_int  max_mn    = (std::max)(m,n);
    blas_int  lda       = blas_int(A.n_rows);
    blas_int  ldu       = blas_int(U.n_rows);
    blas_int  ldvt      = blas_int(V.n_rows);
    blas_int  lwork_min = 2*min_mn + max_mn;
    blas_int  info      = 0;
    
    S.set_size( static_cast<uword>(min_mn) );
    
    podarray<T>        rwork( static_cast<uword>(7*min_mn) );  // from LAPACK 3.8 docs: LAPACK <= v3.6 needs 7*mn
    podarray<blas_int> iwork( static_cast<uword>(8*min_mn) );
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= 256)
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = blas_int(-1);
      
      arma_debug_print("lapack::cx_gesdd()");
      lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info);
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::cx_gesdd()");
    lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(S);
    arma_ignore(A);
    arma_stop_logic_error("svd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::svd_dc(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    if(A.is_empty())  { U.eye(A.n_rows, A.n_rows); S.reset(); V.eye(A.n_cols, A.n_cols); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    U.set_size(A.n_rows, A.n_rows);
    V.set_size(A.n_cols, A.n_cols);
    
    char jobz = 'A';
    
    blas_int  m         = blas_int(A.n_rows);
    blas_int  n         = blas_int(A.n_cols);
    blas_int  min_mn    = (std::min)(m,n);
    blas_int  max_mn    = (std::max)(m,n);
    blas_int  lda       = blas_int(A.n_rows);
    blas_int  ldu       = blas_int(U.n_rows);
    blas_int  ldvt      = blas_int(V.n_rows);
    blas_int  lwork_min = 4*min_mn*min_mn + 6*min_mn + max_mn;  // as per LAPACK 3.8 and 3.12 docs; consistent with LAPACK 3.4 docs
    blas_int  info      = 0;
    
    S.set_size( static_cast<uword>(min_mn) );
    
    podarray<blas_int> iwork( static_cast<uword>(8*min_mn) );
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= 1024)
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = blas_int(-1);
      
      arma_debug_print("lapack::gesdd()");
      lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, iwork.memptr(), &info);
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>(work_query[0]);
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::gesdd()");
    lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, iwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    op_strans::apply_mat_inplace(V);
    
    return true;
    }
  #else
    {
    arma_ignore(U);
    arma_ignore(S);
    arma_ignore(V);
    arma_ignore(A);
    arma_stop_logic_error("svd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T>
inline
bool
auxlib::svd_dc(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, Mat< std::complex<T> >& A)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef std::complex<T> eT;
    
    if(A.is_empty())  { U.eye(A.n_rows, A.n_rows); S.reset(); V.eye(A.n_cols, A.n_cols); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    U.set_size(A.n_rows, A.n_rows);
    V.set_size(A.n_cols, A.n_cols);
    
    char jobz = 'A';
    
    blas_int m         = blas_int(A.n_rows);
    blas_int n         = blas_int(A.n_cols);
    blas_int min_mn    = (std::min)(m,n);
    blas_int max_mn    = (std::max)(m,n);
    blas_int lda       = blas_int(A.n_rows);
    blas_int ldu       = blas_int(U.n_rows);
    blas_int ldvt      = blas_int(V.n_rows);
    blas_int lwork_min = min_mn*min_mn + 2*min_mn + max_mn;  // as per LAPACK 3.2, 3.4, 3.8, 3.12 docs
    blas_int lrwork    = min_mn * ((std::max)(5*min_mn+5, 2*max_mn + 2*min_mn+1));   // as per LAPACK 3.8 and 3.12 docs
    blas_int info      = 0;
    
    S.set_size( static_cast<uword>(min_mn) );
    
    podarray<T>        rwork( static_cast<uword>(lrwork  ) );
    podarray<blas_int> iwork( static_cast<uword>(8*min_mn) );
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= 256)
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = blas_int(-1);
      
      arma_debug_print("lapack::cx_gesdd()");
      lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info);
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::cx_gesdd()");
    lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    op_htrans::apply_mat_inplace(V);
    
    return true;
    }
  #else
    {
    arma_ignore(U);
    arma_ignore(S);
    arma_ignore(V);
    arma_ignore(A);
    arma_stop_logic_error("svd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::svd_dc_econ(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    char jobz = 'S';
    
    blas_int m         = blas_int(A.n_rows);
    blas_int n         = blas_int(A.n_cols);
    blas_int min_mn    = (std::min)(m,n);
    blas_int lda       = blas_int(A.n_rows);
    blas_int ldu       = m;
    blas_int ldvt      = min_mn;
    blas_int lwork_min = 4*min_mn*min_mn + 7*min_mn;  // as per LAPACK 3.8 and 3.12 docs
    blas_int info      = 0;
    
    if(A.is_empty())
      {
      U.eye();
      S.reset();
      V.eye( static_cast<uword>(n), static_cast<uword>(min_mn) );
      return true;
      }
    
    S.set_size( static_cast<uword>(min_mn) );
    
    U.set_size( static_cast<uword>(m), static_cast<uword>(min_mn) );
    
    V.set_size( static_cast<uword>(min_mn), static_cast<uword>(n) );
    
    podarray<blas_int> iwork( static_cast<uword>(8*min_mn) );
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= 1024)
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = blas_int(-1);
      
      arma_debug_print("lapack::gesdd()");
      lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, iwork.memptr(), &info);
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>(work_query[0]);
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::gesdd()");
    lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, iwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    op_strans::apply_mat_inplace(V);
    
    return true;
    }
  #else
    {
    arma_ignore(U);
    arma_ignore(S);
    arma_ignore(V);
    arma_ignore(A);
    arma_stop_logic_error("svd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T>
inline
bool
auxlib::svd_dc_econ(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, Mat< std::complex<T> >& A)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef std::complex<T> eT;
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    char jobz = 'S';
    
    blas_int m         = blas_int(A.n_rows);
    blas_int n         = blas_int(A.n_cols);
    blas_int min_mn    = (std::min)(m,n);
    blas_int max_mn    = (std::max)(m,n);
    blas_int lda       = blas_int(A.n_rows);
    blas_int ldu       = m;
    blas_int ldvt      = min_mn;
    blas_int lwork_min = min_mn*min_mn + 3*min_mn;  // as per LAPACK 3.12 docs
    blas_int lrwork    = min_mn * ((std::max)(5*min_mn+5, 2*max_mn + 2*min_mn+1));  // as per LAPACK 3.8 and 3.12 docs
    blas_int info      = 0;
    
    if(A.is_empty())
      {
      U.eye();
      S.reset();
      V.eye( static_cast<uword>(n), static_cast<uword>(min_mn) );
      return true;
      }
    
    S.set_size( static_cast<uword>(min_mn) );
    
    U.set_size( static_cast<uword>(m), static_cast<uword>(min_mn) );
    
    V.set_size( static_cast<uword>(min_mn), static_cast<uword>(n) );
    
    podarray<T>        rwork( static_cast<uword>(lrwork  ) );
    podarray<blas_int> iwork( static_cast<uword>(8*min_mn) );
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= 256)
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = blas_int(-1);
      
      arma_debug_print("lapack::cx_gesdd()");
      lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info);
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::cx_gesdd()");
    lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    op_htrans::apply_mat_inplace(V);
    
    return true;
    }
  #else
    {
    arma_ignore(U);
    arma_ignore(S);
    arma_ignore(V);
    arma_ignore(A);
    arma_stop_logic_error("svd(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via LU decomposition
template<typename T1>
inline
bool
auxlib::solve_square_fast(Mat<typename T1::elem_type>& out, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  out = B_expr.get_ref();
  
  const uword B_n_rows = out.n_rows;
  const uword B_n_cols = out.n_cols;
  
  arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
  
  if(A.is_empty() || out.is_empty())  { out.zeros(A.n_cols, B_n_cols); return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::elem_type eT;
    
    arma_conform_assert_blas_size(A);
    
    blas_int n    = blas_int(A.n_rows);  // assuming A is square
    blas_int lda  = blas_int(A.n_rows);
    blas_int ldb  = blas_int(B_n_rows);
    blas_int nrhs = blas_int(B_n_cols);
    blas_int info = blas_int(0);
    
    podarray<blas_int> ipiv(A.n_rows + 2);  // +2 for paranoia: some versions of Lapack might be trashing memory
    
    arma_debug_print("lapack::gesv()");
    lapack::gesv<eT>(&n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via LU decomposition with rcond estimate
template<typename T1>
inline
bool
auxlib::solve_square_rcond(Mat<typename T1::elem_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::elem_type eT;
    typedef typename T1::pod_type   T;
    
    out_rcond = T(0);
    
    out = B_expr.get_ref();
    
    const uword B_n_rows = out.n_rows;
    const uword B_n_cols = out.n_cols;
    
    arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
    
    if(A.is_empty() || out.is_empty())  { out.zeros(A.n_cols, B_n_cols); return true; }
    
    arma_conform_assert_blas_size(A);
    
    char     norm_id  = '1';
    char     trans    = 'N';
    blas_int n        = blas_int(A.n_rows);  // assuming A is square
    blas_int lda      = blas_int(A.n_rows);
    blas_int ldb      = blas_int(B_n_rows);
    blas_int nrhs     = blas_int(B_n_cols);
    blas_int info     = blas_int(0);
    T        norm_val = T(0);
    
    podarray<T>        junk(1);
    podarray<blas_int> ipiv(A.n_rows + 2);  // +2 for paranoia
    
    arma_debug_print("lapack::lange()");
    norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_gen(A) : lapack::lange<eT>(&norm_id, &n, &n, A.memptr(), &lda, junk.memptr());
    
    arma_debug_print("lapack::getrf()");
    lapack::getrf<eT>(&n, &n, A.memptr(), &n, ipiv.memptr(), &info);
    
    if(info != blas_int(0))  { return false; }
    
    arma_debug_print("lapack::getrs()");
    lapack::getrs<eT>(&trans, &n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info);
    
    if(info != blas_int(0))  { return false; }
    
    out_rcond = auxlib::lu_rcond<T>(A, norm_val);
    
    return true;
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(out_rcond);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via LU decomposition with refinement (real matrices)
template<typename T1>
inline
bool
auxlib::solve_square_refine(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr, const bool equilibrate)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type eT;
    
    // Mat<eT> B = B_expr.get_ref();  // B is overwritten by lapack::gesvx() if equilibrate is enabled
    
    quasi_unwrap<T1> UB(B_expr.get_ref());  // deliberately not declaring as const
    
    const Mat<eT>& UB_M_as_Mat = UB.M;  // so we don't confuse the ?: operator below
    
    const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out));
    
    Mat<eT> B_tmp;  if(use_copy)  { B_tmp = UB_M_as_Mat; }
    
    const Mat<eT>& B = (use_copy) ? B_tmp : UB_M_as_Mat;
    
    arma_conform_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" );
      
    if(A.is_empty() || B.is_empty())  { out.zeros(A.n_rows, B.n_cols); return true; }
    
    arma_conform_assert_blas_size(A,B);
    
    out.set_size(A.n_rows, B.n_cols);
    
    char     fact  = (equilibrate) ? 'E' : 'N'; 
    char     trans = 'N';
    char     equed = char(0);
    blas_int n     = blas_int(A.n_rows);
    blas_int nrhs  = blas_int(B.n_cols);
    blas_int lda   = blas_int(A.n_rows);
    blas_int ldaf  = blas_int(A.n_rows);
    blas_int ldb   = blas_int(A.n_rows);
    blas_int ldx   = blas_int(A.n_rows);
    blas_int info  = blas_int(0);
    eT       rcond = eT(0);
    
    Mat<eT> AF(A.n_rows, A.n_rows, arma_nozeros_indicator());
    
    podarray<blas_int>  IPIV(  A.n_rows);
    podarray<eT>           R(  A.n_rows);
    podarray<eT>           C(  A.n_rows);
    podarray<eT>        FERR(  B.n_cols);
    podarray<eT>        BERR(  B.n_cols);
    podarray<eT>        WORK(4*A.n_rows);
    podarray<blas_int> IWORK(  A.n_rows);
    
    arma_debug_print("lapack::gesvx()");
    lapack::gesvx
      (
      &fact, &trans, &n, &nrhs,
      A.memptr(), &lda,
      AF.memptr(), &ldaf,
      IPIV.memptr(),
      &equed,
      R.memptr(),
      C.memptr(),
      const_cast<eT*>(B.memptr()), &ldb,
      out.memptr(), &ldx,
      &rcond,
      FERR.memptr(),
      BERR.memptr(),
      WORK.memptr(),
      IWORK.memptr(),
      &info
      );
    
    // NOTE: using const_cast<eT*>(B.memptr()) to allow B to be overwritten for equilibration;
    // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr
    
    out_rcond = rcond;
    
    return ((info == 0) || (info == (n+1)));
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(out_rcond);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_ignore(equilibrate);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via LU decomposition with refinement (complex matrices)
template<typename T1>
inline
bool
auxlib::solve_square_refine(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const Base<std::complex<typename T1::pod_type>,T1>& B_expr, const bool equilibrate)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type     T;
    typedef typename std::complex<T> eT;
    
    // Mat<eT> B = B_expr.get_ref();  // B is overwritten by lapack::cx_gesvx() if equilibrate is enabled
    
    quasi_unwrap<T1> UB(B_expr.get_ref());  // deliberately not declaring as const
    
    const Mat<eT>& UB_M_as_Mat = UB.M;  // so we don't confuse the ?: operator below
    
    const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out));
    
    Mat<eT> B_tmp;  if(use_copy)  { B_tmp = UB_M_as_Mat; }
    
    const Mat<eT>& B = (use_copy) ? B_tmp : UB_M_as_Mat;
    
    arma_conform_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" );
      
    if(A.is_empty() || B.is_empty())  { out.zeros(A.n_rows, B.n_cols); return true; }
    
    arma_conform_assert_blas_size(A,B);
    
    out.set_size(A.n_rows, B.n_cols);
    
    char     fact  = (equilibrate) ? 'E' : 'N'; 
    char     trans = 'N';
    char     equed = char(0);
    blas_int n     = blas_int(A.n_rows);
    blas_int nrhs  = blas_int(B.n_cols);
    blas_int lda   = blas_int(A.n_rows);
    blas_int ldaf  = blas_int(A.n_rows);
    blas_int ldb   = blas_int(A.n_rows);
    blas_int ldx   = blas_int(A.n_rows);
    blas_int info  = blas_int(0);
    T        rcond = T(0);
    
    Mat<eT> AF(A.n_rows, A.n_rows, arma_nozeros_indicator());
    
    podarray<blas_int>  IPIV(  A.n_rows);
    podarray< T>           R(  A.n_rows);
    podarray< T>           C(  A.n_rows);
    podarray< T>        FERR(  B.n_cols);
    podarray< T>        BERR(  B.n_cols);
    podarray<eT>        WORK(2*A.n_rows);
    podarray< T>       RWORK(2*A.n_rows);
    
    arma_debug_print("lapack::cx_gesvx()");
    lapack::cx_gesvx
      (
      &fact, &trans, &n, &nrhs,
      A.memptr(), &lda,
      AF.memptr(), &ldaf,
      IPIV.memptr(),
      &equed,
      R.memptr(),
      C.memptr(),
      const_cast<eT*>(B.memptr()), &ldb,
      out.memptr(), &ldx,
      &rcond,
      FERR.memptr(),
      BERR.memptr(),
      WORK.memptr(),
      RWORK.memptr(),
      &info
      );
    
    // NOTE: using const_cast<eT*>(B.memptr()) to allow B to be overwritten for equilibration;
    // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr
    
    out_rcond = rcond;
    
    return ((info == 0) || (info == (n+1)));
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(out_rcond);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_ignore(equilibrate);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T1>
inline
bool
auxlib::solve_sym_fast(Mat<typename T1::pod_type>& out, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  out = B_expr.get_ref();
  
  const uword B_n_rows = out.n_rows;
  const uword B_n_cols = out.n_cols;
  
  arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
  
  if(A.is_empty() || out.is_empty())  { out.zeros(A.n_cols, B_n_cols); return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type eT;
    
    arma_conform_assert_blas_size(A,out);
    
    char     uplo  = 'L';
    blas_int n     = blas_int(A.n_rows);
    blas_int lda   = blas_int(A.n_rows);
    blas_int ldb   = blas_int(out.n_rows);
    blas_int nrhs  = blas_int(out.n_cols);
    blas_int lwork = (std::max)(blas_int(podarray_prealloc_n_elem::val), n);
    blas_int info  = 0;
    
    podarray<blas_int> ipiv(A.n_rows);
    
    if(n > blas_int(podarray_prealloc_n_elem::val))
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::sytrf()");
      lapack::sytrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      
      lwork = (std::max)(lwork_proposed, lwork);
      }
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::sytrf()");
    lapack::sytrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("lapack::sytrs()");
    lapack::sytrs(&uplo, &n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T1>
inline
bool
auxlib::solve_sym_fast(Mat< std::complex<typename T1::pod_type> >& out, Mat< std::complex<typename T1::pod_type> >& A, const Base< std::complex<typename T1::pod_type>, T1 >& B_expr)
  {
  arma_debug_sigprint();
  
  out = B_expr.get_ref();
  
  const uword B_n_rows = out.n_rows;
  const uword B_n_cols = out.n_cols;
  
  arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
  
  if(A.is_empty() || out.is_empty())  { out.zeros(A.n_cols, B_n_cols); return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type  T;
    typedef std::complex<T>       eT;
    
    arma_conform_assert_blas_size(A,out);
    
    char     uplo  = 'L';
    blas_int n     = blas_int(A.n_rows);
    blas_int lda   = blas_int(A.n_rows);
    blas_int ldb   = blas_int(out.n_rows);
    blas_int nrhs  = blas_int(out.n_cols);
    blas_int lwork = (std::max)(blas_int(podarray_prealloc_n_elem::val), n);
    blas_int info  = 0;
    
    podarray<blas_int> ipiv(A.n_rows);
    
    if(n > blas_int(podarray_prealloc_n_elem::val))
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::hetrf()");
      lapack::hetrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      
      lwork = (std::max)(lwork_proposed, lwork);
      }
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::hetrf()");
    lapack::hetrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("lapack::hetrs()");
    lapack::hetrs(&uplo, &n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T1>
inline
bool
auxlib::solve_sym_rcond(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  out = B_expr.get_ref();
  
  const uword B_n_rows = out.n_rows;
  const uword B_n_cols = out.n_cols;
  
  arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
  
  if(A.is_empty() || out.is_empty())  { out.zeros(A.n_cols, B_n_cols); return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type eT;
    
    out_rcond = eT(0);
    
    arma_conform_assert_blas_size(A,out);
    
    char     norm_id   = '1';
    char     uplo      = 'L';
    blas_int n         = blas_int(A.n_rows);
    blas_int lda       = blas_int(A.n_rows);
    blas_int ldb       = blas_int(out.n_rows);
    blas_int nrhs      = blas_int(out.n_cols);
    blas_int lwork     = (std::max)(blas_int(podarray_prealloc_n_elem::val), 2*n);  // 2*n due to lapack::sycon() requirements
    blas_int info      = 0;
    eT       norm_val  = eT(0);
    eT       tmp_rcond = eT(0);
    
    podarray<blas_int>  ipiv(A.n_rows);
    podarray<blas_int> iwork(A.n_rows);
    
    if( (2*n) > blas_int(podarray_prealloc_n_elem::val) )
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::sytrf()");
      lapack::sytrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      
      lwork = (std::max)(lwork_proposed, lwork);
      }
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::lansy()");
    norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
    
    arma_debug_print("lapack::sytrf()");
    lapack::sytrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("lapack::sytrs()");
    lapack::sytrs(&uplo, &n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("lapack::sycon()");
    lapack::sycon(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &norm_val, &tmp_rcond, work.memptr(), iwork.memptr(), &info);
    
    out_rcond = tmp_rcond;
    
    return (info == 0);
    }
  #else
    {
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T1>
inline
bool
auxlib::solve_sym_rcond(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const Base< std::complex<typename T1::pod_type>,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  out = B_expr.get_ref();
  
  const uword B_n_rows = out.n_rows;
  const uword B_n_cols = out.n_cols;
  
  arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
  
  if(A.is_empty() || out.is_empty())  { out.zeros(A.n_cols, B_n_cols); return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type     T;
    typedef typename std::complex<T> eT;
    
    out_rcond = T(0);
    
    arma_conform_assert_blas_size(A,out);
    
    char     norm_id   = '1';
    char     uplo      = 'L';
    blas_int n         = blas_int(A.n_rows);
    blas_int lda       = blas_int(A.n_rows);
    blas_int ldb       = blas_int(out.n_rows);
    blas_int nrhs      = blas_int(out.n_cols);
    blas_int lwork     = (std::max)(blas_int(podarray_prealloc_n_elem::val), 2*n);  // 2*n due to lapack::hecon() requirements
    blas_int info      = 0;
    T        norm_val  = T(0);
    T        tmp_rcond = T(0);
    
    podarray<blas_int>       ipiv(A.n_rows);
    podarray<T>        lanhe_work(A.n_rows);
    
    if( (2*n) > blas_int(podarray_prealloc_n_elem::val) )
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::hetrf()");
      lapack::hetrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return false; }
      
      blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      
      lwork = (std::max)(lwork_proposed, lwork);
      }
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::lanhe()");
    norm_val = (has_blas_float_bug<T>::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &lda, lanhe_work.memptr());
    
    arma_debug_print("lapack::hetrf()");
    lapack::hetrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("lapack::hetrs()");
    lapack::hetrs(&uplo, &n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("lapack::hecon()");
    lapack::hecon(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &norm_val, &tmp_rcond, work.memptr(), &info);
    
    out_rcond = tmp_rcond;
    
    return (info == 0);
    }
  #else
    {
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T1>
inline
bool
auxlib::solve_sympd_fast(Mat<typename T1::elem_type>& out, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  out = B_expr.get_ref();
  
  const uword B_n_rows = out.n_rows;
  const uword B_n_cols = out.n_cols;
  
  arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
  
  if(A.is_empty() || out.is_empty())  { out.zeros(A.n_cols, B_n_cols); return true; }
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::elem_type eT;
    
    arma_conform_assert_blas_size(A, out);
    
    char     uplo = 'L';
    blas_int n    = blas_int(A.n_rows);  // assuming A is square
    blas_int nrhs = blas_int(B_n_cols);
    blas_int lda  = blas_int(A.n_rows);
    blas_int ldb  = blas_int(B_n_rows);
    blas_int info = blas_int(0);
    
    arma_debug_print("lapack::posv()");
    lapack::posv<eT>(&uplo, &n, &nrhs, A.memptr(), &lda, out.memptr(), &ldb, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via Cholesky decomposition with rcond estimate (real matrices)
template<typename T1>
inline
bool
auxlib::solve_sympd_rcond(Mat<typename T1::pod_type>& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::elem_type eT;
    typedef typename T1::pod_type   T;
    
    out_sympd_state = false;
    out_rcond       = T(0);
    
    out = B_expr.get_ref();
    
    const uword B_n_rows = out.n_rows;
    const uword B_n_cols = out.n_cols;
    
    arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
    
    if(A.is_empty() || out.is_empty())  { out.zeros(A.n_cols, B_n_cols); return true; }
    
    arma_conform_assert_blas_size(A, out);
    
    char     norm_id  = '1';
    char     uplo     = 'L';
    blas_int n        = blas_int(A.n_rows);  // assuming A is square
    blas_int nrhs     = blas_int(B_n_cols);
    blas_int info     = blas_int(0);
    T        norm_val = T(0);
    
    podarray<T> work(A.n_rows);
    
    arma_debug_print("lapack::lansy()");
    norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
    
    arma_debug_print("lapack::potrf()");
    lapack::potrf<eT>(&uplo, &n, A.memptr(), &n, &info);
    
    if(info != 0)  { return false; }
    
    out_sympd_state = true;
    
    arma_debug_print("lapack::potrs()");
    lapack::potrs<eT>(&uplo, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info);
    
    if(info != 0)  { return false; }
    
    out_rcond = auxlib::lu_rcond_sympd<T>(A, norm_val);
    
    return true;
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(out_sympd_state);
    arma_ignore(out_rcond);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via Cholesky decomposition with rcond estimate (complex matrices)
template<typename T1>
inline
bool
auxlib::solve_sympd_rcond(Mat< std::complex<typename T1::pod_type> >& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const Base< std::complex<typename T1::pod_type>,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type     T;
    typedef typename std::complex<T> eT;
    
    out_sympd_state = false;
    out_rcond       = T(0);
    
    out = B_expr.get_ref();
    
    const uword B_n_rows = out.n_rows;
    const uword B_n_cols = out.n_cols;
    
    arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
    
    if(A.is_empty() || out.is_empty())  { out.zeros(A.n_cols, B_n_cols); return true; }
    
    arma_conform_assert_blas_size(A, out);
    
    char     norm_id  = '1';
    char     uplo     = 'L';
    blas_int n        = blas_int(A.n_rows);  // assuming A is square
    blas_int nrhs     = blas_int(B_n_cols);
    blas_int info     = blas_int(0);
    T        norm_val = T(0);
    
    podarray<T> work(A.n_rows);
    
    arma_debug_print("lapack::lanhe()");
    norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
    
    arma_debug_print("lapack::potrf()");
    lapack::potrf<eT>(&uplo, &n, A.memptr(), &n, &info);
    
    if(info != 0)  { return false; }
    
    out_sympd_state = true;
    
    arma_debug_print("lapack::potrs()");
    lapack::potrs<eT>(&uplo, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info);
    
    if(info != 0)  { return false; }
    
    out_rcond = auxlib::lu_rcond_sympd<T>(A, norm_val);
    
    return true;
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(out_sympd_state);
    arma_ignore(out_rcond);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via Cholesky decomposition with refinement (real matrices)
template<typename T1>
inline
bool
auxlib::solve_sympd_refine(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr, const bool equilibrate)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type eT;
    
    // Mat<eT> B = B_expr.get_ref();  // B is overwritten by lapack::posvx() if equilibrate is enabled
    
    quasi_unwrap<T1> UB(B_expr.get_ref());  // deliberately not declaring as const
    
    const Mat<eT>& UB_M_as_Mat = UB.M;  // so we don't confuse the ?: operator below
    
    const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out));
    
    Mat<eT> B_tmp;  if(use_copy)  { B_tmp = UB_M_as_Mat; }
    
    const Mat<eT>& B = (use_copy) ? B_tmp : UB_M_as_Mat;
    
    arma_conform_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" );
    
    if(A.is_empty() || B.is_empty())  { out.zeros(A.n_rows, B.n_cols); return true; }
    
    arma_conform_assert_blas_size(A,B);
    
    out.set_size(A.n_rows, B.n_cols);
    
    char     fact  = (equilibrate) ? 'E' : 'N'; 
    char     uplo  = 'L';
    char     equed = char(0);
    blas_int n     = blas_int(A.n_rows);
    blas_int nrhs  = blas_int(B.n_cols);
    blas_int lda   = blas_int(A.n_rows);
    blas_int ldaf  = blas_int(A.n_rows);
    blas_int ldb   = blas_int(A.n_rows);
    blas_int ldx   = blas_int(A.n_rows);
    blas_int info  = blas_int(0);
    eT       rcond = eT(0);
    
    Mat<eT> AF(A.n_rows, A.n_rows, arma_nozeros_indicator());
    
    podarray<eT>           S(  A.n_rows);
    podarray<eT>        FERR(  B.n_cols);
    podarray<eT>        BERR(  B.n_cols);
    podarray<eT>        WORK(3*A.n_rows);
    podarray<blas_int> IWORK(  A.n_rows);
    
    arma_debug_print("lapack::posvx()");
    lapack::posvx(&fact, &uplo, &n, &nrhs, A.memptr(), &lda, AF.memptr(), &ldaf, &equed, S.memptr(), const_cast<eT*>(B.memptr()), &ldb, out.memptr(), &ldx, &rcond, FERR.memptr(), BERR.memptr(), WORK.memptr(), IWORK.memptr(), &info);
    
    // NOTE: using const_cast<eT*>(B.memptr()) to allow B to be overwritten for equilibration;
    // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr
    
    // NOTE: lapack::posvx() sets rcond to zero if A is not sympd
    out_rcond = rcond;
    
    return ((info == 0) || (info == (n+1)));
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(out_rcond);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_ignore(equilibrate);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via Cholesky decomposition with refinement (complex matrices)
template<typename T1>
inline
bool
auxlib::solve_sympd_refine(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const Base<std::complex<typename T1::pod_type>,T1>& B_expr, const bool equilibrate)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type     T;
    typedef typename std::complex<T> eT;
    
    // Mat<eT> B = B_expr.get_ref();  // B is overwritten by lapack::cx_posvx() if equilibrate is enabled
    
    quasi_unwrap<T1> UB(B_expr.get_ref());  // deliberately not declaring as const
    
    const Mat<eT>& UB_M_as_Mat = UB.M;  // so we don't confuse the ?: operator below
    
    const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out));
    
    Mat<eT> B_tmp;  if(use_copy)  { B_tmp = UB_M_as_Mat; }
    
    const Mat<eT>& B = (use_copy) ? B_tmp : UB_M_as_Mat;
    
    arma_conform_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" );
      
    if(A.is_empty() || B.is_empty())  { out.zeros(A.n_rows, B.n_cols); return true; }
    
    arma_conform_assert_blas_size(A,B);
    
    out.set_size(A.n_rows, B.n_cols);
    
    char     fact  = (equilibrate) ? 'E' : 'N'; 
    char     uplo  = 'L';
    char     equed = char(0);
    blas_int n     = blas_int(A.n_rows);
    blas_int nrhs  = blas_int(B.n_cols);
    blas_int lda   = blas_int(A.n_rows);
    blas_int ldaf  = blas_int(A.n_rows);
    blas_int ldb   = blas_int(A.n_rows);
    blas_int ldx   = blas_int(A.n_rows);
    blas_int info  = blas_int(0);
    T        rcond = T(0);
    
    Mat<eT> AF(A.n_rows, A.n_rows, arma_nozeros_indicator());
    
    podarray< T>           S(  A.n_rows);
    podarray< T>        FERR(  B.n_cols);
    podarray< T>        BERR(  B.n_cols);
    podarray<eT>        WORK(2*A.n_rows);
    podarray< T>       RWORK(  A.n_rows);
    
    arma_debug_print("lapack::cx_posvx()");
    lapack::cx_posvx(&fact, &uplo, &n, &nrhs, A.memptr(), &lda, AF.memptr(), &ldaf, &equed, S.memptr(), const_cast<eT*>(B.memptr()), &ldb, out.memptr(), &ldx, &rcond, FERR.memptr(), BERR.memptr(), WORK.memptr(), RWORK.memptr(), &info);
    
    // NOTE: using const_cast<eT*>(B.memptr()) to allow B to be overwritten for equilibration;
    // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr
    
    // NOTE: lapack::cx_posvx() sets rcond to zero if A is not sympd
    out_rcond = rcond;
    
    return ((info == 0) || (info == (n+1)));
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(out_rcond);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_ignore(equilibrate);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a non-square full-rank system via QR or LQ decomposition
template<typename T1>
inline
bool
auxlib::solve_rect_fast(Mat<typename T1::elem_type>& out, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::elem_type eT;
    
    const unwrap<T1>   U(B_expr.get_ref());
    const Mat<eT>& B = U.M;
    
    arma_conform_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" );
    
    if(A.is_empty() || B.is_empty())  { out.zeros(A.n_cols, B.n_cols); return true; }
    
    arma_conform_assert_blas_size(A,B);
    
    Mat<eT> tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() );
    
    if(arma::size(tmp) == arma::size(B))
      {
      tmp = B;
      }
    else
      {
      tmp.zeros();
      tmp(0,0, arma::size(B)) = B;
      }
    
    char      trans     = 'N';
    blas_int  m         = blas_int(A.n_rows);
    blas_int  n         = blas_int(A.n_cols);
    blas_int  lda       = blas_int(A.n_rows);
    blas_int  ldb       = blas_int(tmp.n_rows);
    blas_int  nrhs      = blas_int(B.n_cols);
    blas_int  min_mn    = (std::min)(m,n);
    blas_int  lwork_min = (std::max)(blas_int(1), min_mn + (std::max)(min_mn, nrhs));
    blas_int  info      = 0;
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= ((is_cx<eT>::yes) ? uword(256) : uword(1024)))
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::gels()");
      lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, &work_query[0], &lwork_query, &info );
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::gels()");
    lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, work.memptr(), &lwork_final, &info );
    
    if(info != 0)  { return false; }
    
    if(tmp.n_rows == A.n_cols)
      {
      out.steal_mem(tmp);
      }
    else
      {
      out = tmp.head_rows(A.n_cols);
      }
    
    return true;
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a non-square full-rank system via QR or LQ decomposition with rcond estimate (experimental)
template<typename T1>
inline
bool
auxlib::solve_rect_rcond(Mat<typename T1::elem_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::elem_type eT;
    typedef typename T1::pod_type   T;
    
    out_rcond = T(0);
    
    const unwrap<T1>   U(B_expr.get_ref());
    const Mat<eT>& B = U.M;
    
    arma_conform_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" );
    
    if(A.is_empty() || B.is_empty())  { out.zeros(A.n_cols, B.n_cols); return true; }
    
    arma_conform_assert_blas_size(A,B);
    
    Mat<eT> tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() );
    
    if(arma::size(tmp) == arma::size(B))
      {
      tmp = B;
      }
    else
      {
      tmp.zeros();
      tmp(0,0, arma::size(B)) = B;
      }
    
    char      trans     = 'N';
    blas_int  m         = blas_int(A.n_rows);
    blas_int  n         = blas_int(A.n_cols);
    blas_int  lda       = blas_int(A.n_rows);
    blas_int  ldb       = blas_int(tmp.n_rows);
    blas_int  nrhs      = blas_int(B.n_cols);
    blas_int  min_mn    = (std::min)(m,n);
    blas_int  lwork_min = (std::max)(blas_int(1), min_mn + (std::max)(min_mn, nrhs));
    blas_int  info      = 0;
    
    blas_int lwork_proposed = 0;
    
    if(A.n_elem >= ((is_cx<eT>::yes) ? uword(256) : uword(1024)))
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::gels()");
      lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, &work_query[0], &lwork_query, &info );
      
      if(info != 0)  { return false; }
      
      lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      }
    
    blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
    
    podarray<eT> work( static_cast<uword>(lwork_final) );
    
    arma_debug_print("lapack::gels()");
    lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, work.memptr(), &lwork_final, &info );
    
    if(info != 0)  { return false; }
    
    if(A.n_rows >= A.n_cols)
      {
      arma_debug_print("estimating rcond via R");
      
      // xGELS  docs: for M >= N, A contains details of its QR decomposition as returned by xGEQRF
      // xGEQRF docs: elements on and above the diagonal contain the min(M,N)-by-N upper trapezoidal matrix R
      
      Mat<eT> R(A.n_cols, A.n_cols, arma_zeros_indicator());
      
      for(uword col=0; col < A.n_cols; ++col)
        {
        for(uword row=0; row <= col; ++row)
          {
          R.at(row,col) = A.at(row,col);
          }
        }
      
      // determine quality of solution
      out_rcond = auxlib::rcond_trimat(R, 0);   // 0: upper triangular; 1: lower triangular
      }
    else
    if(A.n_rows < A.n_cols)
      {
      arma_debug_print("estimating rcond via L");
      
      // xGELS  docs: for M < N, A contains details of its LQ decomposition as returned by xGELQF
      // xGELQF docs: elements on and below the diagonal contain the M-by-min(M,N) lower trapezoidal matrix L
      
      Mat<eT> L(A.n_rows, A.n_rows, arma_zeros_indicator());
      
      for(uword col=0; col < A.n_rows; ++col)
        {
        for(uword row=col; row < A.n_rows; ++row)
          {
          L.at(row,col) = A.at(row,col);
          }
        }
      
      // determine quality of solution
      out_rcond = auxlib::rcond_trimat(L, 1);   // 0: upper triangular; 1: lower triangular
      }
    
    if(tmp.n_rows == A.n_cols)
      {
      out.steal_mem(tmp);
      }
    else
      {
      out = tmp.head_rows(A.n_cols);
      }
    
    return true;
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(out_rcond);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T1>
inline
bool
auxlib::solve_approx_svd(Mat<typename T1::pod_type>& out, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type eT;
    
    const unwrap<T1>   U(B_expr.get_ref());
    const Mat<eT>& B = U.M;
    
    arma_conform_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" );
    
    if(A.is_empty() || B.is_empty())  { out.zeros(A.n_cols, B.n_cols); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    if(arma_config::check_nonfinite && B.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A,B);
    
    Mat<eT> tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() );
    
    if(arma::size(tmp) == arma::size(B))
      {
      tmp = B;
      }
    else
      {
      tmp.zeros();
      tmp(0,0, arma::size(B)) = B;
      }
    
    blas_int m      = blas_int(A.n_rows);
    blas_int n      = blas_int(A.n_cols);
    blas_int min_mn = (std::min)(m, n);
    blas_int nrhs   = blas_int(B.n_cols);
    blas_int lda    = blas_int(A.n_rows);
    blas_int ldb    = blas_int(tmp.n_rows);
  //eT       rcond  = eT(-1);  // -1 means "use machine precision"
    eT       rcond  = (std::max)(A.n_rows, A.n_cols) * std::numeric_limits<eT>::epsilon();
    blas_int rank   = blas_int(0);
    blas_int info   = blas_int(0);
    
    podarray<eT> S( static_cast<uword>(min_mn) );
    
    // NOTE: assuming LAPACK 3.8+, where the workspace query also obtains liwork in addition to lwork
    
    blas_int ispec = blas_int(9);
    
    const char* const_name = (is_float<eT>::value) ? "SGELSD" : "DGELSD";
    const char* const_opts = " ";
    
    char* name = const_cast<char*>(const_name);
    char* opts = const_cast<char*>(const_opts);
    
    blas_int n1 = m;
    blas_int n2 = n;
    blas_int n3 = nrhs;
    blas_int n4 = lda;
    
    blas_int laenv_result = (arma_config::hidden_args) ? blas_int(lapack::laenv(&ispec, name, opts, &n1, &n2, &n3, &n4, 6, 1)) : blas_int(0);
    
    blas_int smlsiz    = (std::max)( blas_int(25), laenv_result );
    blas_int smlsiz_p1 = blas_int(1) + smlsiz;
    
    blas_int nlvl = (std::max)( blas_int(0), blas_int(1) + blas_int( std::log2( double(min_mn)/double(smlsiz_p1) ) ) );
    
    blas_int  lwork_min = blas_int(12)*min_mn + blas_int(2)*min_mn*smlsiz + blas_int(8)*min_mn*nlvl + min_mn*nrhs + smlsiz_p1*smlsiz_p1;
    blas_int liwork_min = (std::max)( blas_int(1), (blas_int(3)*min_mn*nlvl + blas_int(11)*min_mn) );
    
    eT        work_query[2] = {};
    blas_int iwork_query[2] = {};
    
    blas_int lwork_query = blas_int(-1);
    
    arma_debug_print("lapack::gelsd()");
    lapack::gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, &work_query[0], &lwork_query, &iwork_query[0], &info);
    
    if(info != 0)  { return false; }
    
    blas_int  lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
    blas_int liwork_proposed = iwork_query[0];
    
    blas_int  lwork_final = (std::max)( lwork_proposed,  lwork_min);
    blas_int liwork_final = (std::max)(liwork_proposed, liwork_min);
    
    podarray<eT>        work( static_cast<uword>( lwork_final) );
    podarray<blas_int> iwork( static_cast<uword>(liwork_final) );
    
    arma_debug_print("lapack::gelsd()");
    lapack::gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, work.memptr(), &lwork_final, iwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    if(tmp.n_rows == A.n_cols)
      {
      out.steal_mem(tmp);
      }
    else
      {
      out = tmp.head_rows(A.n_cols);
      }
    
    return true;
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T1>
inline
bool
auxlib::solve_approx_svd(Mat< std::complex<typename T1::pod_type> >& out, Mat< std::complex<typename T1::pod_type> >& A, const Base<std::complex<typename T1::pod_type>,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type     T;
    typedef typename std::complex<T> eT;
    
    const unwrap<T1>   U(B_expr.get_ref());
    const Mat<eT>& B = U.M;
    
    arma_conform_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" );
    
    if(A.is_empty() || B.is_empty())  { out.zeros(A.n_cols, B.n_cols); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    if(arma_config::check_nonfinite && B.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A,B);
    
    Mat<eT> tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() );
    
    if(arma::size(tmp) == arma::size(B))
      {
      tmp = B;
      }
    else
      {
      tmp.zeros();
      tmp(0,0, arma::size(B)) = B;
      }
    
    blas_int m      = blas_int(A.n_rows);
    blas_int n      = blas_int(A.n_cols);
    blas_int min_mn = (std::min)(m, n);
    blas_int nrhs   = blas_int(B.n_cols);
    blas_int lda    = blas_int(A.n_rows);
    blas_int ldb    = blas_int(tmp.n_rows);
  //T        rcond  = T(-1);  // -1 means "use machine precision"
    T        rcond  = (std::max)(A.n_rows, A.n_cols) * std::numeric_limits<T>::epsilon();
    blas_int rank   = blas_int(0);
    blas_int info   = blas_int(0);
    
    podarray<T> S( static_cast<uword>(min_mn) );
    
    // NOTE: assuming LAPACK 3.8+, where the workspace query also obtains lrwork and liwork in addition to lwork
    
    blas_int ispec = blas_int(9);
    
    const char* const_name = (is_float<T>::value) ? "CGELSD" : "ZGELSD";
    const char* const_opts = " ";
    
    char* name = const_cast<char*>(const_name);
    char* opts = const_cast<char*>(const_opts);
    
    blas_int n1 = m;
    blas_int n2 = n;
    blas_int n3 = nrhs;
    blas_int n4 = lda;
    
    blas_int laenv_result = (arma_config::hidden_args) ? blas_int(lapack::laenv(&ispec, name, opts, &n1, &n2, &n3, &n4, 6, 1)) : blas_int(0);
    
    blas_int smlsiz    = (std::max)( blas_int(25), laenv_result );
    blas_int smlsiz_p1 = blas_int(1) + smlsiz;
    
    blas_int nlvl = (std::max)( blas_int(0), blas_int(1) + blas_int( std::log2( double(min_mn)/double(smlsiz_p1) ) ) );
    
    blas_int  lwork_min = 2*min_mn + min_mn*nrhs;
    
    blas_int lrwork_min = (m >= n)
      ? blas_int(10)*n + blas_int(2)*n*smlsiz + blas_int(8)*n*nlvl + blas_int(3)*smlsiz*nrhs + (std::max)( (smlsiz_p1)*(smlsiz_p1), n*(blas_int(1)+nrhs) + blas_int(2)*nrhs )
      : blas_int(10)*m + blas_int(2)*m*smlsiz + blas_int(8)*m*nlvl + blas_int(3)*smlsiz*nrhs + (std::max)( (smlsiz_p1)*(smlsiz_p1), n*(blas_int(1)+nrhs) + blas_int(2)*nrhs );
    
    blas_int liwork_min = (std::max)( blas_int(1), (blas_int(3)*blas_int(min_mn)*nlvl + blas_int(11)*blas_int(min_mn)) );
    
    eT        work_query[2] = {};
    T        rwork_query[2] = {};
    blas_int iwork_query[2] = {};
    
    blas_int lwork_query = blas_int(-1);
    
    arma_debug_print("lapack::cx_gelsd()");
    lapack::cx_gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, &work_query[0], &lwork_query, &rwork_query[0], &iwork_query[0], &info);
    
    if(info != 0)  { return false; }
    
    blas_int  lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
    blas_int lrwork_proposed = static_cast<blas_int>( rwork_query[0] );
    blas_int liwork_proposed = iwork_query[0];
    
    blas_int  lwork_final = (std::max)( lwork_proposed,  lwork_min);
    blas_int lrwork_final = (std::max)(lrwork_proposed, lrwork_min);
    blas_int liwork_final = (std::max)(liwork_proposed, liwork_min);
    
    podarray<eT>        work( static_cast<uword>( lwork_final) );
    podarray<T>        rwork( static_cast<uword>(lrwork_final) );
    podarray<blas_int> iwork( static_cast<uword>(liwork_final) );
    
    arma_debug_print("lapack::cx_gelsd()");
    lapack::cx_gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    if(tmp.n_rows == A.n_cols)
      {
      out.steal_mem(tmp);
      }
    else
      {
      out = tmp.head_rows(A.n_cols);
      }
    
    return true;
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T1>
inline
bool
auxlib::solve_trimat_fast(Mat<typename T1::elem_type>& out, const Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr, const uword layout)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    out = B_expr.get_ref();
    
    const uword B_n_rows = out.n_rows;
    const uword B_n_cols = out.n_cols;
    
    arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
    
    if(A.is_empty() || out.is_empty())  { out.zeros(A.n_cols, B_n_cols); return true; }
    
    arma_conform_assert_blas_size(A,out);
    
    char     uplo  = (layout == 0) ? 'U' : 'L';
    char     trans = 'N';
    char     diag  = 'N';
    blas_int n     = blas_int(A.n_rows);
    blas_int nrhs  = blas_int(B_n_cols);
    blas_int info  = 0;
    
    arma_debug_print("lapack::trtrs()");
    lapack::trtrs(&uplo, &trans, &diag, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_ignore(layout);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T1>
inline
bool
auxlib::solve_trimat_rcond(Mat<typename T1::elem_type>& out, typename T1::pod_type& out_rcond, const Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr, const uword layout)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type T;
    
    out_rcond = T(0);
    
    out = B_expr.get_ref();
    
    const uword B_n_rows = out.n_rows;
    const uword B_n_cols = out.n_cols;
    
    arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
    
    if(A.is_empty() || out.is_empty())  { out.zeros(A.n_cols, B_n_cols); return true; }
    
    arma_conform_assert_blas_size(A,out);
    
    char     uplo  = (layout == 0) ? 'U' : 'L';
    char     trans = 'N';
    char     diag  = 'N';
    blas_int n     = blas_int(A.n_rows);
    blas_int nrhs  = blas_int(B_n_cols);
    blas_int info  = 0;
    
    arma_debug_print("lapack::trtrs()");
    lapack::trtrs(&uplo, &trans, &diag, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info);
    
    if(info != 0)  { return false; }
    
    // determine quality of solution
    out_rcond = auxlib::rcond_trimat(A, layout);
    
    return true;
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(out_rcond);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_ignore(layout);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via LU decomposition (band matrix)
template<typename T1>
inline
bool
auxlib::solve_band_fast(Mat<typename T1::elem_type>& out, const Mat<typename T1::elem_type>& A, const uword KL, const uword KU, const Base<typename T1::elem_type,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::elem_type eT;
    
    out = B_expr.get_ref();
    
    const uword B_n_rows = out.n_rows;
    const uword B_n_cols = out.n_cols;
    
    arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
    
    if(A.is_empty() || out.is_empty())  { out.zeros(A.n_rows, B_n_cols); return true; }
    
    // for gbsv, matrix AB size: 2*KL+KU+1 x N; band representation of A stored in rows KL+1 to 2*KL+KU+1  (note: fortran counts from 1)
    
    Mat<eT> AB;
    band_helper::compress(AB, A, KL, KU, true);
    
    const uword N = AB.n_cols;  // order of the original square matrix A
    
    arma_conform_assert_blas_size(AB,out);
    
    blas_int n    = blas_int(N);
    blas_int kl   = blas_int(KL);
    blas_int ku   = blas_int(KU);
    blas_int nrhs = blas_int(B_n_cols);
    blas_int ldab = blas_int(AB.n_rows);
    blas_int ldb  = blas_int(B_n_rows);
    blas_int info = blas_int(0);
    
    podarray<blas_int> ipiv(N + 2);  // +2 for paranoia
    
    // NOTE: AB is overwritten
    
    arma_debug_print("lapack::gbsv()");
    lapack::gbsv<eT>(&n, &kl, &ku, &nrhs, AB.memptr(), &ldab, ipiv.memptr(), out.memptr(), &ldb, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(A);
    arma_ignore(KL);
    arma_ignore(KU);
    arma_ignore(B_expr);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via LU decomposition (band matrix)
template<typename T1>
inline
bool
auxlib::solve_band_rcond(Mat<typename T1::elem_type>& out, typename T1::pod_type& out_rcond, const Mat<typename T1::elem_type>& A, const uword KL, const uword KU, const Base<typename T1::elem_type,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::elem_type eT;
    typedef typename T1::pod_type   T;
    
    out_rcond = T(0);
    
    out = B_expr.get_ref();
    
    const uword B_n_rows = out.n_rows;
    const uword B_n_cols = out.n_cols;
    
    arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
    
    if(A.is_empty() || out.is_empty())  { out.zeros(A.n_rows, B_n_cols); return true; }
    
    // for gbtrf, matrix AB size: 2*KL+KU+1 x N; band representation of A stored in rows KL+1 to 2*KL+KU+1  (note: fortran counts from 1)
    
    Mat<eT> AB;
    band_helper::compress(AB, A, KL, KU, true);
    
    const uword N = AB.n_cols;  // order of the original square matrix A
    
    arma_conform_assert_blas_size(AB,out);
    
  //char     norm_id  = '1';
    char     trans    = 'N';
    blas_int n        = blas_int(N);  // assuming square matrix
    blas_int kl       = blas_int(KL);
    blas_int ku       = blas_int(KU);
    blas_int nrhs     = blas_int(B_n_cols);
    blas_int ldab     = blas_int(AB.n_rows);
    blas_int ldb      = blas_int(B_n_rows);
    blas_int info     = blas_int(0);
    T        norm_val = T(0);
    
  //podarray<T>        junk(1);
    podarray<blas_int> ipiv(N + 2);  // +2 for paranoia
    
    // // NOTE: lapack::langb() and lapack::gbtrf() use incompatible storage formats for the band matrix
    // arma_debug_print("lapack::langb()");
    // norm_val = lapack::langb<eT>(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, junk.memptr());
    
    norm_val = auxlib::norm1_band(A,KL,KU);
    
    arma_debug_print("lapack::gbtrf()");
    lapack::gbtrf<eT>(&n, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &info);
    
    if(info != 0)  { return false; }
    
    arma_debug_print("lapack::gbtrs()");
    lapack::gbtrs<eT>(&trans, &n, &kl, &ku, &nrhs, AB.memptr(), &ldab, ipiv.memptr(), out.memptr(), &ldb, &info);
    
    if(info != 0)  { return false; }
    
    out_rcond = auxlib::lu_rcond_band<T>(AB, KL, KU, ipiv, norm_val);
    
    return true;
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(out_rcond);
    arma_ignore(A);
    arma_ignore(KL);
    arma_ignore(KU);
    arma_ignore(B_expr);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via LU decomposition with refinement (real band matrices)
template<typename T1>
inline
bool
auxlib::solve_band_refine(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const uword KL, const uword KU, const Base<typename T1::pod_type,T1>& B_expr, const bool equilibrate)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type eT;
    
    Mat<eT> B = B_expr.get_ref();  // B is overwritten
    
    arma_conform_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" );
      
    if(A.is_empty() || B.is_empty())  { out.zeros(A.n_rows, B.n_cols); return true; }
    
    // for gbsvx, matrix AB size: KL+KU+1 x N; band representation of A stored in rows 1 to KL+KU+1  (note: fortran counts from 1)
    
    Mat<eT> AB;
    band_helper::compress(AB, A, KL, KU, false);
    
    const uword N = AB.n_cols;
    
    arma_conform_assert_blas_size(AB,B);
    
    out.set_size(N, B.n_cols);
    
    Mat<eT> AFB(2*KL+KU+1, N, arma_nozeros_indicator());
    
    char     fact  = (equilibrate) ? 'E' : 'N'; 
    char     trans = 'N';
    char     equed = char(0);
    blas_int n     = blas_int(N);
    blas_int kl    = blas_int(KL);
    blas_int ku    = blas_int(KU);
    blas_int nrhs  = blas_int(B.n_cols);
    blas_int ldab  = blas_int(AB.n_rows);
    blas_int ldafb = blas_int(AFB.n_rows);
    blas_int ldb   = blas_int(B.n_rows);
    blas_int ldx   = blas_int(N);
    blas_int info  = blas_int(0);
    eT       rcond = eT(0);
    
    podarray<blas_int>  IPIV(  N);
    podarray<eT>           R(  N);
    podarray<eT>           C(  N);
    podarray<eT>        FERR(  B.n_cols);
    podarray<eT>        BERR(  B.n_cols);
    podarray<eT>        WORK(3*N);
    podarray<blas_int> IWORK(  N);
    
    arma_debug_print("lapack::gbsvx()");
    lapack::gbsvx
      (
      &fact, &trans, &n, &kl, &ku, &nrhs, 
      AB.memptr(), &ldab,
      AFB.memptr(), &ldafb,
      IPIV.memptr(),
      &equed,
      R.memptr(),
      C.memptr(),
      B.memptr(), &ldb,
      out.memptr(), &ldx,
      &rcond,
      FERR.memptr(),
      BERR.memptr(),
      WORK.memptr(),
      IWORK.memptr(),
      &info
      );
    
    out_rcond = rcond;
    
    return ((info == 0) || (info == (n+1)));
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(out_rcond);
    arma_ignore(A);
    arma_ignore(KL);
    arma_ignore(KU);
    arma_ignore(B_expr);
    arma_ignore(equilibrate);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via LU decomposition with refinement (complex band matrices)
template<typename T1>
inline
bool
auxlib::solve_band_refine(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const uword KL, const uword KU, const Base<std::complex<typename T1::pod_type>,T1>& B_expr, const bool equilibrate)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::pod_type     T;
    typedef typename std::complex<T> eT;
    
    Mat<eT> B = B_expr.get_ref();  // B is overwritten
    
    arma_conform_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" );
      
    if(A.is_empty() || B.is_empty())  { out.zeros(A.n_rows, B.n_cols); return true; }
    
    // for gbsvx, matrix AB size: KL+KU+1 x N; band representation of A stored in rows 1 to KL+KU+1  (note: fortran counts from 1)
    
    Mat<eT> AB;
    band_helper::compress(AB, A, KL, KU, false);
    
    const uword N = AB.n_cols;
    
    arma_conform_assert_blas_size(AB,B);
    
    out.set_size(N, B.n_cols);
    
    Mat<eT> AFB(2*KL+KU+1, N, arma_nozeros_indicator());
    
    char     fact  = (equilibrate) ? 'E' : 'N'; 
    char     trans = 'N';
    char     equed = char(0);
    blas_int n     = blas_int(N);
    blas_int kl    = blas_int(KL);
    blas_int ku    = blas_int(KU);
    blas_int nrhs  = blas_int(B.n_cols);
    blas_int ldab  = blas_int(AB.n_rows);
    blas_int ldafb = blas_int(AFB.n_rows);
    blas_int ldb   = blas_int(B.n_rows);
    blas_int ldx   = blas_int(N);
    blas_int info  = blas_int(0);
    T        rcond = T(0);
    
    podarray<blas_int>  IPIV(  N);
    podarray< T>           R(  N);
    podarray< T>           C(  N);
    podarray< T>        FERR(  B.n_cols);
    podarray< T>        BERR(  B.n_cols);
    podarray<eT>        WORK(2*N);
    podarray< T>       RWORK(  N);  // NOTE: according to lapack 3.6.1 docs, the size of RWORK in zgbsvx is different to RWORK in dgesvx 
    
    arma_debug_print("lapack::cx_gbsvx()");
    lapack::cx_gbsvx
      (
      &fact, &trans, &n, &kl, &ku, &nrhs,
      AB.memptr(), &ldab,
      AFB.memptr(), &ldafb,
      IPIV.memptr(),
      &equed,
      R.memptr(),
      C.memptr(),
      B.memptr(), &ldb,
      out.memptr(), &ldx,
      &rcond,
      FERR.memptr(),
      BERR.memptr(),
      WORK.memptr(),
      RWORK.memptr(),
      &info
      );
    
    out_rcond = rcond;
    
    return ((info == 0) || (info == (n+1)));
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(out_rcond);
    arma_ignore(A);
    arma_ignore(KL);
    arma_ignore(KU);
    arma_ignore(B_expr);
    arma_ignore(equilibrate);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//! solve a system of linear equations via Gaussian elimination with partial pivoting (tridiagonal band matrix)
template<typename T1>
inline
bool
auxlib::solve_tridiag_fast(Mat<typename T1::elem_type>& out, const Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename T1::elem_type eT;
    
    out = B_expr.get_ref();
    
    const uword B_n_rows = out.n_rows;
    const uword B_n_cols = out.n_cols;
    
    arma_conform_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } );
    
    if(A.is_empty() || out.is_empty())  { out.zeros(A.n_rows, B_n_cols); return true; }
    
    Mat<eT> tridiag;
    band_helper::extract_tridiag(tridiag, A);
    
    arma_conform_assert_blas_size(tridiag, out);
    
    blas_int n    = blas_int(A.n_rows);
    blas_int nrhs = blas_int(B_n_cols);
    blas_int ldb  = blas_int(B_n_rows);
    blas_int info = blas_int(0);
    
    arma_debug_print("lapack::gtsv()");
    lapack::gtsv<eT>(&n, &nrhs, tridiag.colptr(0), tridiag.colptr(1), tridiag.colptr(2), out.memptr(), &ldb, &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(out);
    arma_ignore(A);
    arma_ignore(B_expr);
    arma_stop_logic_error("solve(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//
// Schur decomposition

template<typename eT, typename T1>
inline
bool
auxlib::schur(Mat<eT>& U, Mat<eT>& S, const Base<eT,T1>& X, const bool calc_U)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    S = X.get_ref();
    
    arma_conform_check( (S.is_square() == false), "schur(): given matrix must be square sized" );
    
    if(S.is_empty())  { U.reset(); S.reset(); return true; }
    
    arma_conform_assert_blas_size(S);
    
    const uword S_n_rows = S.n_rows;
    
    if(calc_U) { U.set_size(S_n_rows, S_n_rows); } else { U.set_size(1,1); }
    
    char      jobvs  = calc_U ? 'V' : 'N';
    char      sort   = 'N';
    void*     select = 0;
    blas_int  n      = blas_int(S_n_rows);
    blas_int  sdim   = 0;
    blas_int  ldvs   = calc_U ? n : blas_int(1);
    blas_int  lwork  = 64*n;  // lwork_min = (std::max)(blas_int(1), 3*n)
    blas_int  info   = 0;
    
    podarray<eT> wr(S_n_rows);
    podarray<eT> wi(S_n_rows);
    
    podarray<eT>        work( static_cast<uword>(lwork) );
    podarray<blas_int> bwork(S_n_rows);
    
    arma_debug_print("lapack::gees()");
    lapack::gees(&jobvs, &sort, select, &n, S.memptr(), &n, &sdim, wr.memptr(), wi.memptr(), U.memptr(), &ldvs, work.memptr(), &lwork, bwork.memptr(), &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(U);
    arma_ignore(S);
    arma_ignore(X);
    arma_ignore(calc_U);
    arma_stop_logic_error("schur(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename T, typename T1>
inline
bool
auxlib::schur(Mat< std::complex<T> >& U, Mat< std::complex<T> >& S, const Base<std::complex<T>,T1>& X, const bool calc_U)
  {
  arma_debug_sigprint();
  
  S = X.get_ref();
  
  arma_conform_check( (S.is_square() == false), "schur(): given matrix must be square sized" );
  
  return auxlib::schur(U,S,calc_U);
  }



template<typename T>
inline
bool
auxlib::schur(Mat< std::complex<T> >& U, Mat< std::complex<T> >& S, const bool calc_U)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef std::complex<T> eT;
    
    if(S.is_empty())  { U.reset(); S.reset(); return true; }
    
    arma_conform_assert_blas_size(S);
    
    const uword S_n_rows = S.n_rows;
    
    if(calc_U) { U.set_size(S_n_rows, S_n_rows); } else { U.set_size(1,1); }
    
    char      jobvs  = calc_U ? 'V' : 'N';
    char      sort   = 'N';
    void*     select = 0;
    blas_int  n      = blas_int(S_n_rows);
    blas_int  sdim   = 0;
    blas_int  ldvs   = calc_U ? n : blas_int(1);
    blas_int  lwork  = 64*n;  // lwork_min = (std::max)(blas_int(1), 2*n)
    blas_int  info   = 0;
    
    podarray<eT>           w(S_n_rows);
    podarray<eT>        work( static_cast<uword>(lwork) );
    podarray< T>       rwork(S_n_rows);
    podarray<blas_int> bwork(S_n_rows);
    
    arma_debug_print("lapack::cx_gees()");
    lapack::cx_gees(&jobvs, &sort, select, &n, S.memptr(), &n, &sdim, w.memptr(), U.memptr(), &ldvs, work.memptr(), &lwork, rwork.memptr(), bwork.memptr(), &info);
    
    return (info == 0);
    }
  #else
    {
    arma_ignore(U);
    arma_ignore(S);
    arma_ignore(calc_U);
    arma_stop_logic_error("schur(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//
// solve the Sylvester equation AX + XB = C

template<typename eT>
inline
bool
auxlib::sylvester(Mat<eT>& X, const Mat<eT>& A, const Mat<eT>& B, const Mat<eT>& C)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_check( (A.is_square() == false) || (B.is_square() == false), "sylvester(): given matrices must be square sized" );
      
    arma_conform_check( (C.n_rows != A.n_rows) || (C.n_cols != B.n_cols), "sylvester(): matrices are not conformant" );
    
    if(A.is_empty() || B.is_empty() || C.is_empty())  { X.reset(); return true; }
    
    Mat<eT> Z1, Z2, T1, T2;
    
    const bool status_sd1 = auxlib::schur(Z1, T1, A);
    const bool status_sd2 = auxlib::schur(Z2, T2, B);
    
    if( (status_sd1 == false) || (status_sd2 == false) )  { return false; }
    
    char     trana = 'N';
    char     tranb = 'N';
    blas_int  isgn = +1;
    blas_int     m = blas_int(T1.n_rows);
    blas_int     n = blas_int(T2.n_cols);
    
    eT       scale = eT(0);
    blas_int  info = 0;
    
    Mat<eT> Y = trans(Z1) * C * Z2;
    
    arma_debug_print("lapack::trsyl()");
    lapack::trsyl<eT>(&trana, &tranb, &isgn, &m, &n, T1.memptr(), &m, T2.memptr(), &n, Y.memptr(), &m, &scale, &info);
    
    if(info < 0)  { return false; }
    
    //Y /= scale;
    Y /= (-scale);
    
    X = Z1 * Y * trans(Z2);
    
    return true;
    }
  #else
    {
    arma_ignore(X);
    arma_ignore(A);
    arma_ignore(B);
    arma_ignore(C);
    arma_stop_logic_error("sylvester(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }
  
  

//
// QZ decomposition of general square real matrix pair

template<typename T, typename T1, typename T2>
inline
bool
auxlib::qz(Mat<T>& A, Mat<T>& B, Mat<T>& vsl, Mat<T>& vsr, const Base<T,T1>& X_expr, const Base<T,T2>& Y_expr, const char mode)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    A = X_expr.get_ref();
    B = Y_expr.get_ref();
    
    arma_conform_check( ((A.is_square() == false) || (B.is_square() == false)), "qz(): given matrices must be square sized", [&](){ A.soft_reset(); B.soft_reset(); } );
    
    arma_conform_check( (A.n_rows != B.n_rows), "qz(): given matrices must have the same size" );
    
    if(A.is_empty())  { A.reset();  B.reset();  vsl.reset(); vsr.reset(); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    if(arma_config::check_nonfinite && B.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    vsl.set_size(A.n_rows, A.n_rows);
    vsr.set_size(A.n_rows, A.n_rows);
    
    char     jobvsl  = 'V';
    char     jobvsr  = 'V';
    char     eigsort = 'N';
    void*    selctg  = 0;
    blas_int N       = blas_int(A.n_rows);
    blas_int sdim    = 0;
    blas_int lwork   = 64*N+16;  // lwork_min = (std::max)(blas_int(1),8*N+16)
    blas_int info    = 0;
    
         if(mode == 'l')  { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_lhp<T>)); }
    else if(mode == 'r')  { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_rhp<T>)); }
    else if(mode == 'i')  { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_iuc<T>)); }
    else if(mode == 'o')  { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_ouc<T>)); }
    
    podarray<T> alphar(A.n_rows);
    podarray<T> alphai(A.n_rows);
    podarray<T>   beta(A.n_rows);
    
    podarray<T>         work( static_cast<uword>(lwork) );
    podarray<blas_int> bwork( static_cast<uword>(N)     );
    
    arma_debug_print("lapack::gges()");
    
    lapack::gges
      (
      &jobvsl, &jobvsr, &eigsort, selctg, &N,
      A.memptr(), &N, B.memptr(), &N, &sdim,
      alphar.memptr(), alphai.memptr(), beta.memptr(),
      vsl.memptr(), &N, vsr.memptr(), &N,
      work.memptr(), &lwork, bwork.memptr(),
      &info
      );
    
    if(info != 0)  { return false; }
    
    op_strans::apply_mat_inplace(vsl);
    
    return true;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(B);
    arma_ignore(vsl);
    arma_ignore(vsr);
    arma_ignore(X_expr);
    arma_ignore(Y_expr);
    arma_ignore(mode);
    arma_stop_logic_error("qz(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



//
// QZ decomposition of general square complex matrix pair

template<typename T, typename T1, typename T2>
inline
bool
auxlib::qz(Mat< std::complex<T> >& A, Mat< std::complex<T> >& B, Mat< std::complex<T> >& vsl, Mat< std::complex<T> >& vsr, const Base< std::complex<T>, T1 >& X_expr, const Base< std::complex<T>, T2 >& Y_expr, const char mode)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    A = X_expr.get_ref();
    B = Y_expr.get_ref();
    
    arma_conform_check( ((A.is_square() == false) || (B.is_square() == false)), "qz(): given matrices must be square sized", [&](){ A.soft_reset(); B.soft_reset(); } );
    
    arma_conform_check( (A.n_rows != B.n_rows), "qz(): given matrices must have the same size" );
    
    if(A.is_empty())  { A.reset(); B.reset(); vsl.reset(); vsr.reset(); return true; }
    
    if(arma_config::check_nonfinite && A.internal_has_nonfinite())  { return false; }
    if(arma_config::check_nonfinite && B.internal_has_nonfinite())  { return false; }
    
    arma_conform_assert_blas_size(A);
    
    vsl.set_size(A.n_rows, A.n_rows);
    vsr.set_size(A.n_rows, A.n_rows);
    
    char     jobvsl  = 'V';
    char     jobvsr  = 'V';
    char     eigsort = 'N';
    void*    selctg  = 0;
    blas_int N       = blas_int(A.n_rows);
    blas_int sdim    = 0;
    blas_int lwork   = 64*N;  // lwork_min = (std::max)(blas_int(1),2*N)
    blas_int info    = 0;
    
         if(mode == 'l')  { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_lhp<T>)); }
    else if(mode == 'r')  { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_rhp<T>)); }
    else if(mode == 'i')  { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_iuc<T>)); }
    else if(mode == 'o')  { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_ouc<T>)); }
    
    podarray<eT> alpha(A.n_rows);
    podarray<eT>  beta(A.n_rows);
    
    podarray<eT>        work( static_cast<uword>(lwork) );
    podarray< T>       rwork( static_cast<uword>(8*N)   );
    podarray<blas_int> bwork( static_cast<uword>(N)     );
    
    arma_debug_print("lapack::cx_gges()");
    
    lapack::cx_gges
      (
      &jobvsl, &jobvsr, &eigsort, selctg, &N,
      A.memptr(), &N, B.memptr(), &N, &sdim,
      alpha.memptr(), beta.memptr(),
      vsl.memptr(), &N, vsr.memptr(), &N,
      work.memptr(), &lwork, rwork.memptr(), bwork.memptr(),
      &info
      );
    
    if(info != 0)  { return false; }
    
    op_htrans::apply_mat_inplace(vsl);
    
    return true;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(B);
    arma_ignore(vsl);
    arma_ignore(vsr);
    arma_ignore(X_expr);
    arma_ignore(Y_expr);
    arma_ignore(mode);
    arma_stop_logic_error("qz(): use of LAPACK must be enabled");
    return false;
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::balance(Col<typename get_pod_type<eT>::result>& S, Col<uword>& P, Mat<eT>& A, const bool calc_SP, const bool do_scal, const bool do_perm)
  {
  arma_debug_sigprint();
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename get_pod_type<eT>::result T;
    
    // assuming given matrix is square-sized
    
    if(A.n_elem == 0)  { S.reset(); P.reset(); return true; }
    
    const char job = (do_scal && do_perm) ? 'B' : ((do_scal) ? 'S' : ((do_perm) ? 'P' : 'N'));
    
    blas_int n    = blas_int(A.n_rows);
    blas_int lda  = blas_int(A.n_rows);
    blas_int ilo  = blas_int(0);
    blas_int ihi  = blas_int(0);
    blas_int info = blas_int(0);
    
    podarray<T> scale(A.n_rows);  scale.zeros();
    
    arma_debug_print("lapack::gebal()");
    lapack::gebal(&job, &n, A.memptr(), &lda, &ilo, &ihi, scale.memptr(), &info);
    
    if(info != blas_int(0))  { return false; }
    
    if(calc_SP == false)  { return true; }
    
    const uword N = A.n_rows;
    
    // sanity check
    if( (ilo < 1) || (uword(ihi) > N) )  { arma_debug_print("ilo and/or ihi out of bounds"); return false; }
    
    S.zeros(N);
    P.zeros(N);
    
        T* S_mem = S.memptr();
    uword* P_mem = P.memptr();
    
    const T* scale_mem = scale.memptr();
    
    for(uword i = 0;            i < uword(ilo)-1; ++i)  { S_mem[i] = T(1);         }
    for(uword i = uword(ilo)-1; i < uword(ihi);   ++i)  { S_mem[i] = scale_mem[i]; }
    for(uword i = uword(ihi);   i < N;            ++i)  { S_mem[i] = T(1);         }
    
    for(uword i=0; i < N; ++i)  { P_mem[i] = i; }
    
    for(uword i=N-1; i >= uword(ihi)  ; --i)  { const uword j = uword(scale_mem[i]) - 1; std::swap(P_mem[i], P_mem[j]); }
    for(uword i=0;   i <  uword(ilo)-1; ++i)  { const uword j = uword(scale_mem[i]) - 1; std::swap(P_mem[i], P_mem[j]); }
    
    return true;
    }
  #else
    {
    arma_ignore(S);
    arma_ignore(P);
    arma_ignore(A);
    arma_ignore(do_scal);
    arma_ignore(do_perm);
    return false;
    }
  #endif
  }



template<typename eT>
inline
eT
auxlib::rcond(Mat<eT>& A)
  {
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(A);
    
    char     norm_id  = '1';
    blas_int m        = blas_int(A.n_rows);
    blas_int n        = blas_int(A.n_rows);  // assuming square matrix
    blas_int lda      = blas_int(A.n_rows);
    eT       norm_val = eT(0);
    eT       rcond    = eT(0);
    blas_int info     = blas_int(0);
    
    podarray<eT>        work(4*A.n_rows);
    podarray<blas_int> iwork(  A.n_rows);
    podarray<blas_int> ipiv( (std::min)(A.n_rows, A.n_cols) );
    
    arma_debug_print("lapack::lange()");
    norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_gen(A) : lapack::lange(&norm_id, &m, &n, A.memptr(), &lda, work.memptr());
    
    arma_debug_print("lapack::getrf()");
    lapack::getrf(&m, &n, A.memptr(), &lda, ipiv.memptr(), &info);
    
    if(info != blas_int(0))  { return eT(0); }
    
    arma_debug_print("lapack::gecon()");
    lapack::gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info);
    
    if(info != blas_int(0))  { return eT(0); }
    
    return rcond;
    }
  #else
    {
    arma_ignore(A);
    arma_stop_logic_error("rcond(): use of LAPACK must be enabled");
    return eT(0);
    }
  #endif
  }



template<typename T>
inline
T
auxlib::rcond(Mat< std::complex<T> >& A)
  {
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    arma_conform_assert_blas_size(A);
    
    char     norm_id  = '1';
    blas_int m        = blas_int(A.n_rows);
    blas_int n        = blas_int(A.n_rows);  // assuming square matrix
    blas_int lda      = blas_int(A.n_rows);
    T        norm_val = T(0);
    T        rcond    = T(0);
    blas_int info     = blas_int(0);
    
    podarray< T>        junk(1);
    podarray<eT>        work(2*A.n_rows);
    podarray< T>       rwork(2*A.n_rows);
    podarray<blas_int> ipiv( (std::min)(A.n_rows, A.n_cols) );
    
    arma_debug_print("lapack::lange()");
    norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_gen(A) : lapack::lange(&norm_id, &m, &n, A.memptr(), &lda, junk.memptr());
    
    arma_debug_print("lapack::getrf()");
    lapack::getrf(&m, &n, A.memptr(), &lda, ipiv.memptr(), &info);
    
    if(info != blas_int(0))  { return T(0); }
    
    arma_debug_print("lapack::cx_gecon()");
    lapack::cx_gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info);
    
    if(info != blas_int(0))  { return T(0); }
    
    return rcond;
    }
  #else
    {
    arma_ignore(A);
    arma_stop_logic_error("rcond(): use of LAPACK must be enabled");
    return T(0);
    }
  #endif
  }



template<typename eT>
inline
eT
auxlib::rcond_sym(Mat<eT>& A)
  {
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(A);
    
    char     norm_id   = '1';
    char     uplo      = 'L';
    blas_int n         = blas_int(A.n_rows);
    blas_int lda       = blas_int(A.n_rows);
    blas_int lwork     = (std::max)(blas_int(podarray_prealloc_n_elem::val), 2*n);  // 2*n due to lapack::sycon() requirements
    blas_int info      = 0;
    eT       norm_val  = eT(0);
    eT       out_rcond = eT(0);
    
    podarray<blas_int>  ipiv(A.n_rows);
    podarray<blas_int> iwork(A.n_rows);
    
    if( (2*n) > blas_int(podarray_prealloc_n_elem::val) )
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::sytrf()");
      lapack::sytrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return eT(0); }
      
      blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      
      lwork = (std::max)(lwork_proposed, lwork);
      }
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::lansy()");
    norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &lda, work.memptr());
    
    arma_debug_print("lapack::sytrf()");
    lapack::sytrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
    
    if(info != 0)  { return eT(0); }
    
    arma_debug_print("lapack::sycon()");
    lapack::sycon(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &norm_val, &out_rcond, work.memptr(), iwork.memptr(), &info);
    
    if(info != 0)  { return eT(0); }
    
    return out_rcond;
    }
  #else
    {
    arma_ignore(A);
    arma_stop_logic_error("rcond_sym(): use of LAPACK must be enabled");
    return eT(0);
    }
  #endif
  }



template<typename T>
inline
T
auxlib::rcond_sym(Mat< std::complex<T> >& A)
  {
  // NOTE: the function name is required for overloading, but is a misnomer: it processes complex hermitian matrices
  
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    arma_conform_assert_blas_size(A);
    
    char     norm_id   = '1';
    char     uplo      = 'L';
    blas_int n         = blas_int(A.n_rows);
    blas_int lda       = blas_int(A.n_rows);
    blas_int lwork     = (std::max)(blas_int(podarray_prealloc_n_elem::val), 2*n);  // 2*n due to lapack::hecon() requirements
    blas_int info      = 0;
    T        norm_val  = T(0);
    T        out_rcond = T(0);
    
    podarray<blas_int>       ipiv(A.n_rows);
    podarray<T>        lanhe_work(A.n_rows);
    
    if( (2*n) > blas_int(podarray_prealloc_n_elem::val) )
      {
      eT        work_query[2] = {};
      blas_int lwork_query    = -1;
      
      arma_debug_print("lapack::hetrf()");
      lapack::hetrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
      
      if(info != 0)  { return T(0); }
      
      blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
      
      lwork = (std::max)(lwork_proposed, lwork);
      }
    
    podarray<eT> work( static_cast<uword>(lwork) );
    
    arma_debug_print("lapack::lanhe()");
    norm_val = (has_blas_float_bug<T>::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &lda, lanhe_work.memptr());
    
    arma_debug_print("lapack::hetrf()");
    lapack::hetrf(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
    
    if(info != 0)  { return T(0); }
    
    arma_debug_print("lapack::hecon()");
    lapack::hecon(&uplo, &n, A.memptr(), &lda, ipiv.memptr(), &norm_val, &out_rcond, work.memptr(), &info);
    
    if(info != 0)  { return T(0); }
    
    return out_rcond;
    }
  #else
    {
    arma_ignore(A);
    arma_stop_logic_error("rcond_sym(): use of LAPACK must be enabled");
    return T(0);
    }
  #endif
  }



template<typename eT>
inline
eT
auxlib::rcond_trimat(const Mat<eT>& A, const uword layout)
  {
  #if defined(ARMA_USE_LAPACK)
    {
    arma_conform_assert_blas_size(A);
    
    char     norm_id = '1';
    char     uplo    = (layout == 0) ? 'U' : 'L';
    char     diag    = 'N';
    blas_int n       = blas_int(A.n_rows);  // assuming square matrix
    eT       rcond   = eT(0);
    blas_int info    = blas_int(0);
    
    podarray<eT>        work(3*A.n_rows);
    podarray<blas_int> iwork(  A.n_rows);
    
    arma_debug_print("lapack::trcon()");
    lapack::trcon(&norm_id, &uplo, &diag, &n, A.memptr(), &n, &rcond, work.memptr(), iwork.memptr(), &info);
    
    if(info != blas_int(0))  { return eT(0); }
    
    return rcond;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(layout);
    arma_stop_logic_error("rcond(): use of LAPACK must be enabled");
    return eT(0);
    }
  #endif
  }



template<typename T>
inline
T
auxlib::rcond_trimat(const Mat< std::complex<T> >& A, const uword layout)
  {
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    arma_conform_assert_blas_size(A);
    
    char     norm_id = '1';
    char     uplo    = (layout == 0) ? 'U' : 'L';
    char     diag    = 'N';
    blas_int n       = blas_int(A.n_rows);  // assuming square matrix
    T        rcond   = T(0);
    blas_int info    = blas_int(0);
    
    podarray<eT>  work(2*A.n_rows);
    podarray< T> rwork(  A.n_rows);
    
    arma_debug_print("lapack::cx_trcon()");
    lapack::cx_trcon(&norm_id, &uplo, &diag, &n, A.memptr(), &n, &rcond, work.memptr(), rwork.memptr(), &info);
    
    if(info != blas_int(0))  { return T(0); }
    
    return rcond;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(layout);
    arma_stop_logic_error("rcond(): use of LAPACK must be enabled");
    return T(0);
    }
  #endif
  }



template<typename eT>
inline
eT
auxlib::lu_rcond(const Mat<eT>& A, const eT norm_val)
  {
  #if defined(ARMA_USE_LAPACK)
    {
    char     norm_id = '1';
    blas_int n       = blas_int(A.n_rows);  // assuming square matrix
    blas_int lda     = blas_int(A.n_rows);
    eT       rcond   = eT(0);
    blas_int info    = blas_int(0);
    
    podarray<eT>        work(4*A.n_rows);
    podarray<blas_int> iwork(  A.n_rows);
    
    arma_debug_print("lapack::gecon()");
    lapack::gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info);
    
    if(info != blas_int(0))  { return eT(0); }
    
    return rcond;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(norm_val);
    return eT(0);
    }
  #endif
  }



template<typename T>
inline
T
auxlib::lu_rcond(const Mat< std::complex<T> >& A, const T norm_val)
  {
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    char     norm_id = '1';
    blas_int n       = blas_int(A.n_rows);  // assuming square matrix
    blas_int lda     = blas_int(A.n_rows);
    T        rcond   = T(0);
    blas_int info    = blas_int(0);
    
    podarray<eT>  work(2*A.n_rows);
    podarray< T> rwork(2*A.n_rows);
    
    arma_debug_print("lapack::cx_gecon()");
    lapack::cx_gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info);
    
    if(info != blas_int(0))  { return T(0); }
    
    return rcond;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(norm_val);
    return T(0);
    }
  #endif
  }



template<typename eT>
inline
eT
auxlib::lu_rcond_sympd(const Mat<eT>& A, const eT norm_val)
  {
  #if defined(ARMA_USE_LAPACK)
    {
    char     uplo  = 'L';
    blas_int n     = blas_int(A.n_rows);  // assuming square matrix
    eT       rcond = eT(0);
    blas_int info  = blas_int(0);
    
    podarray<eT>        work(3*A.n_rows);
    podarray<blas_int> iwork(  A.n_rows);
    
    arma_debug_print("lapack::pocon()");
    lapack::pocon(&uplo, &n, A.memptr(), &n, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info);
    
    if(info != blas_int(0))  { return eT(0); }
    
    return rcond;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(norm_val);
    return eT(0);
    }
  #endif
  }



template<typename T>
inline
T
auxlib::lu_rcond_sympd(const Mat< std::complex<T> >& A, const T norm_val)
  {
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    char     uplo  = 'L';
    blas_int n     = blas_int(A.n_rows);  // assuming square matrix
    T        rcond = T(0);
    blas_int info  = blas_int(0);
    
    podarray<eT>  work(2*A.n_rows);
    podarray< T> rwork(  A.n_rows);
    
    arma_debug_print("lapack::cx_pocon()");
    lapack::cx_pocon(&uplo, &n, A.memptr(), &n, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info);
    
    if(info != blas_int(0))  { return T(0); }
    
    return rcond;
    }
  #else
    {
    arma_ignore(A);
    arma_ignore(norm_val);
    return T(0);
    }
  #endif
  }



template<typename eT>
inline
eT
auxlib::lu_rcond_band(const Mat<eT>& AB, const uword KL, const uword KU, const podarray<blas_int>& ipiv, const eT norm_val)
  {
  #if defined(ARMA_USE_LAPACK)
    {
    const uword N = AB.n_cols;  // order of the original square matrix A
    
    char     norm_id = '1';
    blas_int n       = blas_int(N);
    blas_int kl      = blas_int(KL);
    blas_int ku      = blas_int(KU);
    blas_int ldab    = blas_int(AB.n_rows);
    eT       rcond   = eT(0);
    blas_int info    = blas_int(0);
    
    podarray<eT>        work(3*N);
    podarray<blas_int> iwork(  N);
    
    arma_debug_print("lapack::gbcon()");
    lapack::gbcon<eT>(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &norm_val, &rcond, work.memptr(), iwork.memptr(), &info);
    
    if(info != blas_int(0))  { return eT(0); }
    
    return rcond;
    }
  #else
    {
    arma_ignore(AB);
    arma_ignore(KL);
    arma_ignore(KU);
    arma_ignore(ipiv);
    arma_ignore(norm_val);
    return eT(0);
    }
  #endif
  }



template<typename T>
inline
T
auxlib::lu_rcond_band(const Mat< std::complex<T> >& AB, const uword KL, const uword KU, const podarray<blas_int>& ipiv, const T norm_val)
  {
  #if defined(ARMA_USE_LAPACK)
    {
    typedef typename std::complex<T> eT;
    
    const uword N = AB.n_cols;  // order of the original square matrix A
    
    char     norm_id = '1';
    blas_int n       = blas_int(N);
    blas_int kl      = blas_int(KL);
    blas_int ku      = blas_int(KU);
    blas_int ldab    = blas_int(AB.n_rows);
    T        rcond   = T(0);
    blas_int info    = blas_int(0);
    
    podarray<eT>  work(2*N);
    podarray< T> rwork(  N);
    
    arma_debug_print("lapack::cx_gbcon()");
    lapack::cx_gbcon<T>(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &norm_val, &rcond, work.memptr(), rwork.memptr(), &info);
    
    if(info != blas_int(0))  { return T(0); }
    
    return rcond;
    }
  #else
    {
    arma_ignore(AB);
    arma_ignore(KL);
    arma_ignore(KU);
    arma_ignore(ipiv);
    arma_ignore(norm_val);
    return T(0);
    }
  #endif
  }



template<typename eT>
inline
bool
auxlib::rudimentary_sym_check(const Mat<eT>& X)
  {
  arma_debug_sigprint();
  
  const uword N   = X.n_rows;
  const uword Nm2 = N-2;
  
  if(N != X.n_cols)  { return false; }
  if(N <= uword(1))  { return true;  }
  
  const eT* X_mem = X.memptr();
  
  const eT* X_offsetA = &(X_mem[Nm2  ]);
  const eT* X_offsetB = &(X_mem[Nm2*N]);
  
  const eT A1 = *(X_offsetA  ); 
  const eT A2 = *(X_offsetA+1);  // bottom-left corner (ie. last value in first column)
  const eT B1 = *(X_offsetB  );  
  const eT B2 = *(X_offsetB+N);  // top-right   corner (ie. first value in last column)
  
  const eT C1 = (std::max)(std::abs(A1), std::abs(B1));
  const eT C2 = (std::max)(std::abs(A2), std::abs(B2));
  
  const eT delta1 = std::abs(A1 - B1);
  const eT delta2 = std::abs(A2 - B2);
  
  const eT tol = eT(10000)*std::numeric_limits<eT>::epsilon();  // allow some leeway
  
  const bool okay1 = ( (delta1 <= tol) || (delta1 <= (C1 * tol)) );
  const bool okay2 = ( (delta2 <= tol) || (delta2 <= (C2 * tol)) );
  
  return (okay1 && okay2);
  }



template<typename T>
inline
bool
auxlib::rudimentary_sym_check(const Mat< std::complex<T> >& X)
  {
  arma_debug_sigprint();
  
  // NOTE: the function name is a misnomer, as it checks for hermitian complex matrices;
  // NOTE: for simplicity of use, the function name is the same as for real matrices
  
  typedef typename std::complex<T> eT;
  
  const uword N   = X.n_rows;
  const uword Nm1 = N-1;
  
  if(N != X.n_cols)  { return false; }
  if(N == uword(0))  { return true;  }
  
  const eT* X_mem = X.memptr();
  
  const T tol = T(10000)*std::numeric_limits<T>::epsilon();  // allow some leeway
  
  if(std::abs(X_mem[0         ].imag()) > tol)  { return false; }  // check top-left
  if(std::abs(X_mem[X.n_elem-1].imag()) > tol)  { return false; }  // check bottom-right
  
  const eT& A = X_mem[Nm1  ];  // bottom-left corner (ie. last value in first column)
  const eT& B = X_mem[Nm1*N];  // top-right   corner (ie. first value in last column)
  
  const T C_real = (std::max)(std::abs(A.real()), std::abs(B.real()));
  const T C_imag = (std::max)(std::abs(A.imag()), std::abs(B.imag()));
  
  const T delta_real = std::abs(A.real() - B.real());
  const T delta_imag = std::abs(A.imag() + B.imag());  // take into account the conjugate
  
  const bool okay_real = ( (delta_real <= tol) || (delta_real <= (C_real * tol)) );
  const bool okay_imag = ( (delta_imag <= tol) || (delta_imag <= (C_imag * tol)) );
  
  return (okay_real && okay_imag);
  }



template<typename eT>
inline
typename get_pod_type<eT>::result
auxlib::norm1_gen(const Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  typedef typename get_pod_type<eT>::result T;
  
  if(A.n_elem == 0)  { return T(0); }
  
  const uword n_rows = A.n_rows;
  const uword n_cols = A.n_cols;
  
  T max_val = T(0);
  
  for(uword c=0; c < n_cols; ++c)
    {
    const eT* colmem  = A.colptr(c);
           T  acc_val = T(0);
    
    for(uword r=0; r < n_rows; ++r)  { acc_val += std::abs(colmem[r]); }
    
    max_val = (acc_val > max_val) ? acc_val : max_val;
    }
  
  return max_val;
  }



template<typename eT>
inline
typename get_pod_type<eT>::result
auxlib::norm1_sym(const Mat<eT>& A)
  {
  arma_debug_sigprint();
  
  typedef typename get_pod_type<eT>::result T;
  
  if(A.n_elem == 0)  { return T(0); }
  
  const uword N = (std::min)(A.n_rows, A.n_cols);
  
  T max_val = T(0);
  
  for(uword col=0; col < N; ++col)
    {
    const eT* colmem  = A.colptr(col);
           T  acc_val = T(0);
    
    for(uword c=0; c < col; ++c)  { acc_val += std::abs(A.at(col,c)); }
    
    for(uword r=col; r < N; ++r)  { acc_val += std::abs(colmem[r]);   }
    
    max_val = (acc_val > max_val) ? acc_val : max_val;
    }
  
  return max_val;
  }



template<typename eT>
inline
typename get_pod_type<eT>::result
auxlib::norm1_band(const Mat<eT>& A, const uword KL, const uword KU)
  {
  arma_debug_sigprint();
  
  typedef typename get_pod_type<eT>::result T;
  
  if(A.n_elem == 0)  { return T(0); }
  
  const uword n_rows = A.n_rows;
  const uword n_cols = A.n_cols;
  
  T max_val = T(0);
  
  for(uword c=0; c < n_cols; ++c)
    {
    const eT* colmem  = A.colptr(c);
           T  acc_val = T(0);
    
    // use values only from main diagonal + KU upper diagonals + KL lower diagonals
    
    const uword start = ( c       > KU    ) ? (c - KU) : 0;
    const uword   end = ((c + KL) < n_rows) ? (c + KL) : (n_rows-1);
    
    for(uword r=start; r <= end; ++r)  { acc_val += std::abs(colmem[r]); }
    
    max_val = (acc_val > max_val) ? acc_val : max_val;
    }
  
  return max_val;
  }



//



namespace qz_helper
{

// sgges() and dgges() require an external function with three arguments:
// select(alpha_real, alpha_imag, beta)
// where the eigenvalue is defined as complex(alpha_real, alpha_imag) / beta

template<typename T>
inline
blas_int
select_lhp(const T* x_ptr, const T* y_ptr, const T* z_ptr)
  {
  arma_debug_sigprint();
  
  // cout << "select_lhp(): (*x_ptr) = " << (*x_ptr) << endl;
  // cout << "select_lhp(): (*y_ptr) = " << (*y_ptr) << endl;
  // cout << "select_lhp(): (*z_ptr) = " << (*z_ptr) << endl;
  
  arma_ignore(y_ptr);  // ignore imaginary part
  
  const T x = (*x_ptr);
  const T z = (*z_ptr);
  
  if(z == T(0))  { return blas_int(0); }  // consider an infinite eig value not to lie in either lhp or rhp
  
  return ((x/z) < T(0)) ? blas_int(1) : blas_int(0);
  }



template<typename T>
inline
blas_int
select_rhp(const T* x_ptr, const T* y_ptr, const T* z_ptr)
  {
  arma_debug_sigprint();
  
  // cout << "select_rhp(): (*x_ptr) = " << (*x_ptr) << endl;
  // cout << "select_rhp(): (*y_ptr) = " << (*y_ptr) << endl;
  // cout << "select_rhp(): (*z_ptr) = " << (*z_ptr) << endl;
  
  arma_ignore(y_ptr);  // ignore imaginary part
  
  const T x = (*x_ptr);
  const T z = (*z_ptr);
  
  if(z == T(0))  { return blas_int(0); }  // consider an infinite eig value not to lie in either lhp or rhp
  
  return ((x/z) > T(0)) ? blas_int(1) : blas_int(0);
  }



template<typename T>
inline
blas_int
select_iuc(const T* x_ptr, const T* y_ptr, const T* z_ptr)
  {
  arma_debug_sigprint();
  
  // cout << "select_iuc(): (*x_ptr) = " << (*x_ptr) << endl;
  // cout << "select_iuc(): (*y_ptr) = " << (*y_ptr) << endl;
  // cout << "select_iuc(): (*z_ptr) = " << (*z_ptr) << endl;
  
  const T x = (*x_ptr);
  const T y = (*y_ptr);
  const T z = (*z_ptr);
  
  if(z == T(0))  { return blas_int(0); }  // consider an infinite eig value to be outside of the unit circle 
  
  //return (std::abs(std::complex<T>(x,y) / z) < T(1)) ? blas_int(1) : blas_int(0);
  return (std::sqrt(x*x + y*y) < std::abs(z)) ? blas_int(1) : blas_int(0);
  }



template<typename T>
inline
blas_int
select_ouc(const T* x_ptr, const T* y_ptr, const T* z_ptr)
  {
  arma_debug_sigprint();
  
  // cout << "select_ouc(): (*x_ptr) = " << (*x_ptr) << endl;
  // cout << "select_ouc(): (*y_ptr) = " << (*y_ptr) << endl;
  // cout << "select_ouc(): (*z_ptr) = " << (*z_ptr) << endl;
  
  const T x = (*x_ptr);
  const T y = (*y_ptr);
  const T z = (*z_ptr);
  
  if(z == T(0))
    {
    return (x == T(0)) ? blas_int(0) : blas_int(1);  // consider an infinite eig value to be outside of the unit circle 
    }
  
  //return (std::abs(std::complex<T>(x,y) / z) > T(1)) ? blas_int(1) : blas_int(0);
  return (std::sqrt(x*x + y*y) > std::abs(z)) ? blas_int(1) : blas_int(0);
  }



// cgges() and zgges() require an external function with two arguments:
// select(alpha, beta)
// where the complex eigenvalue is defined as (alpha / beta)

template<typename T>
inline
blas_int
cx_select_lhp(const std::complex<T>* x_ptr, const std::complex<T>* y_ptr)
  {
  arma_debug_sigprint();
  
  // cout << "cx_select_lhp(): (*x_ptr) = " << (*x_ptr) << endl;
  // cout << "cx_select_lhp(): (*y_ptr) = " << (*y_ptr) << endl;
  
  const std::complex<T>& x = (*x_ptr);
  const std::complex<T>& y = (*y_ptr);
  
  if( (y.real() == T(0)) && (y.imag() == T(0)) )  { return blas_int(0); }  // consider an infinite eig value not to lie in either lhp or rhp
  
  return (std::real(x / y) < T(0)) ? blas_int(1) : blas_int(0);
  }



template<typename T>
inline
blas_int
cx_select_rhp(const std::complex<T>* x_ptr, const std::complex<T>* y_ptr)
  {
  arma_debug_sigprint();
  
  // cout << "cx_select_rhp(): (*x_ptr) = " << (*x_ptr) << endl;
  // cout << "cx_select_rhp(): (*y_ptr) = " << (*y_ptr) << endl;
  
  const std::complex<T>& x = (*x_ptr);
  const std::complex<T>& y = (*y_ptr);
  
  if( (y.real() == T(0)) && (y.imag() == T(0)) )  { return blas_int(0); }  // consider an infinite eig value not to lie in either lhp or rhp
  
  return (std::real(x / y) > T(0)) ? blas_int(1) : blas_int(0);
  }



template<typename T>
inline
blas_int
cx_select_iuc(const std::complex<T>* x_ptr, const std::complex<T>* y_ptr)
  {
  arma_debug_sigprint();
  
  // cout << "cx_select_iuc(): (*x_ptr) = " << (*x_ptr) << endl;
  // cout << "cx_select_iuc(): (*y_ptr) = " << (*y_ptr) << endl;
  
  const std::complex<T>& x = (*x_ptr);
  const std::complex<T>& y = (*y_ptr);
  
  if( (y.real() == T(0)) && (y.imag() == T(0)) )  { return blas_int(0); }  // consider an infinite eig value to be outside of the unit circle
  
  return (std::abs(x / y) < T(1)) ? blas_int(1) : blas_int(0);
  }



template<typename T>
inline
blas_int
cx_select_ouc(const std::complex<T>* x_ptr, const std::complex<T>* y_ptr)
  {
  arma_debug_sigprint();
  
  // cout << "cx_select_ouc(): (*x_ptr) = " << (*x_ptr) << endl;
  // cout << "cx_select_ouc(): (*y_ptr) = " << (*y_ptr) << endl;
  
  const std::complex<T>& x = (*x_ptr);
  const std::complex<T>& y = (*y_ptr);
  
  if( (y.real() == T(0)) && (y.imag() == T(0)) )
    {
    return ((x.real() == T(0)) && (x.imag() == T(0))) ? blas_int(0) : blas_int(1);  // consider an infinite eig value to be outside of the unit circle
    }
  
  return (std::abs(x / y) > T(1)) ? blas_int(1) : blas_int(0);
  }



// need to do shenanigans with pointers due to:
// - we're using LAPACK ?gges() defined to expect pointer-to-function to be passed as pointer-to-object
// - explicit casting between pointer-to-function and pointer-to-object is a non-standard extension in C
// - the extension is essentially mandatory on POSIX systems
// - some compilers will complain about the extension in pedantic mode

template<typename T>
inline
void_ptr
ptr_cast(blas_int (*function)(const T*, const T*, const T*))
  {
  // TODO: investigate replacement of union-based conversion
  
  union converter
    {
    blas_int (*fn)(const T*, const T*, const T*);
    void_ptr obj;
    };
  
  converter tmp;
  
  tmp.obj = 0;
  tmp.fn  = function;
  
  return tmp.obj;
  }



template<typename T>
inline
void_ptr
ptr_cast(blas_int (*function)(const std::complex<T>*, const std::complex<T>*))
  {
  // TODO: investigate replacement of union-based conversion
  
  union converter
    {
    blas_int (*fn)(const std::complex<T>*, const std::complex<T>*);
    void_ptr obj;
    };
  
  converter tmp;
  
  tmp.obj = 0;
  tmp.fn  = function;
  
  return tmp.obj;
  }



}  // end of namespace qz_helper


//! @}
