From abf7bcb9db93ed33220cd0f6d8c088fe0047b1a5 Mon Sep 17 00:00:00 2001 From: koldo Date: Mon, 23 Oct 2017 07:47:29 +0000 Subject: [PATCH] Eigen: Updated to version 3.3.4 git-svn-id: svn://ultimatepp.org/upp/trunk@11406 f0d560ea-af0d-0410-9eb7-867de7ffcac7 --- uppsrc/plugin/Eigen/Copying | 5 - uppsrc/plugin/Eigen/Eigen/Array | 11 - uppsrc/plugin/Eigen/Eigen/Core | 25 +- uppsrc/plugin/Eigen/Eigen/Eigen2Support | 95 - uppsrc/plugin/Eigen/Eigen/LeastSquares | 32 - uppsrc/plugin/Eigen/Eigen/QtAlignedMalloc | 4 +- uppsrc/plugin/Eigen/Eigen/Sparse | 2 + uppsrc/plugin/Eigen/Eigen/StdDeque | 2 +- uppsrc/plugin/Eigen/Eigen/StdList | 2 +- uppsrc/plugin/Eigen/Eigen/StdVector | 2 +- .../plugin/Eigen/Eigen/src/Cholesky/LLT_MKL.h | 102 - .../Eigen/src/CholmodSupport/CholmodSupport.h | 71 +- uppsrc/plugin/Eigen/Eigen/src/Core/Array.h | 8 +- .../plugin/Eigen/Eigen/src/Core/ArrayBase.h | 8 +- .../Eigen/Eigen/src/Core/ArrayWrapper.h | 6 +- .../Eigen/Eigen/src/Core/AssignEvaluator.h | 29 +- .../Eigen/Eigen/src/Core/BooleanRedux.h | 38 +- .../Eigen/Eigen/src/Core/CoreEvaluators.h | 207 +- .../Eigen/Eigen/src/Core/CwiseBinaryOp.h | 2 +- .../Eigen/Eigen/src/Core/CwiseNullaryOp.h | 80 +- .../plugin/Eigen/Eigen/src/Core/DenseBase.h | 20 +- .../Eigen/Eigen/src/Core/DenseStorage.h | 29 +- uppsrc/plugin/Eigen/Eigen/src/Core/Diagonal.h | 2 +- uppsrc/plugin/Eigen/Eigen/src/Core/Dot.h | 3 +- .../plugin/Eigen/Eigen/src/Core/EigenBase.h | 4 + uppsrc/plugin/Eigen/Eigen/src/Core/Flagged.h | 140 - uppsrc/plugin/Eigen/Eigen/src/Core/Functors.h | 1026 ------ .../Eigen/Eigen/src/Core/GenericPacketMath.h | 11 +- .../Eigen/Eigen/src/Core/GlobalFunctions.h | 1 - uppsrc/plugin/Eigen/Eigen/src/Core/IO.h | 14 - .../Eigen/Eigen/src/Core/MathFunctions.h | 272 +- .../plugin/Eigen/Eigen/src/Core/MatrixBase.h | 4 +- .../plugin/Eigen/Eigen/src/Core/NumTraits.h | 4 +- .../Eigen/Eigen/src/Core/PlainObjectBase.h | 57 +- .../plugin/Eigen/Eigen/src/Core/ProductBase.h | 290 -- .../Eigen/Eigen/src/Core/ProductEvaluators.h | 6 + .../Eigen/Eigen/src/Core/SelfAdjointView.h | 10 + .../Eigen/Eigen/src/Core/SelfCwiseBinaryOp.h | 8 +- uppsrc/plugin/Eigen/Eigen/src/Core/Solve.h | 4 +- .../Eigen/Eigen/src/Core/SolveTriangular.h | 2 + .../plugin/Eigen/Eigen/src/Core/StableNorm.h | 3 +- .../Eigen/Eigen/src/Core/TriangularMatrix.h | 15 +- uppsrc/plugin/Eigen/Eigen/src/Core/Visitor.h | 6 +- .../Eigen/src/Core/arch/AVX512/PacketMath.h | 188 +- .../Eigen/src/Core/arch/AltiVec/Complex.h | 2 +- .../Eigen/src/Core/arch/AltiVec/PacketMath.h | 14 +- .../Eigen/Eigen/src/Core/arch/CUDA/Half.h | 79 +- .../Eigen/src/Core/arch/CUDA/MathFunctions.h | 12 - .../Eigen/src/Core/arch/CUDA/PacketMath.h | 2 +- .../Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 9 - .../Eigen/src/Core/arch/NEON/PacketMath.h | 67 +- .../Eigen/src/Core/arch/ZVector/PacketMath.h | 2 +- .../src/Core/functors/AssignmentFunctors.h | 2 +- .../Eigen/src/Core/functors/NullaryFunctors.h | 15 +- .../Eigen/src/Core/functors/StlFunctors.h | 2 +- .../Eigen/src/Core/functors/UnaryFunctors.h | 44 +- .../src/Core/products/CoeffBasedProduct.h | 476 --- .../src/Core/products/GeneralMatrixMatrix.h | 8 +- .../products/GeneralMatrixMatrixTriangular.h | 29 +- .../GeneralMatrixMatrixTriangular_BLAS.h | 2 +- .../GeneralMatrixMatrixTriangular_MKL.h | 146 - .../Core/products/GeneralMatrixMatrix_MKL.h | 118 - .../src/Core/products/GeneralMatrixVector.h | 704 ++-- .../Core/products/GeneralMatrixVector_MKL.h | 131 - .../Eigen/src/Core/products/Parallelizer.h | 9 +- .../products/SelfadjointMatrixMatrix_MKL.h | 295 -- .../Core/products/SelfadjointMatrixVector.h | 14 +- .../products/SelfadjointMatrixVector_MKL.h | 114 - .../Core/products/TriangularMatrixMatrix.h | 4 +- .../products/TriangularMatrixMatrix_MKL.h | 309 -- .../products/TriangularMatrixVector_MKL.h | 247 -- .../products/TriangularSolverMatrix_MKL.h | 155 - .../Eigen/Eigen/src/Core/util/BlasUtil.h | 5 - .../src/Core/util/DisableStupidWarnings.h | 6 +- .../plugin/Eigen/Eigen/src/Core/util/Macros.h | 30 +- .../plugin/Eigen/Eigen/src/Core/util/Memory.h | 54 +- .../Eigen/Eigen/src/Core/util/XprHelper.h | 11 +- .../Eigen/Eigen/src/Eigen2Support/Block.h | 126 - .../Eigen/Eigen/src/Eigen2Support/Cwise.h | 192 - .../Eigen/src/Eigen2Support/CwiseOperators.h | 298 -- .../src/Eigen2Support/Geometry/AlignedBox.h | 159 - .../Eigen/src/Eigen2Support/Geometry/All.h | 115 - .../src/Eigen2Support/Geometry/AngleAxis.h | 214 -- .../src/Eigen2Support/Geometry/Hyperplane.h | 254 -- .../Eigen2Support/Geometry/ParametrizedLine.h | 141 - .../src/Eigen2Support/Geometry/Quaternion.h | 495 --- .../src/Eigen2Support/Geometry/Rotation2D.h | 145 - .../src/Eigen2Support/Geometry/RotationBase.h | 123 - .../src/Eigen2Support/Geometry/Scaling.h | 167 - .../src/Eigen2Support/Geometry/Transform.h | 786 ----- .../src/Eigen2Support/Geometry/Translation.h | 184 - .../plugin/Eigen/Eigen/src/Eigen2Support/LU.h | 120 - .../Eigen/Eigen/src/Eigen2Support/Lazy.h | 71 - .../Eigen/src/Eigen2Support/LeastSquares.h | 169 - .../Eigen/Eigen/src/Eigen2Support/Macros.h | 20 - .../Eigen/src/Eigen2Support/MathFunctions.h | 57 - .../Eigen/Eigen/src/Eigen2Support/Memory.h | 45 - .../Eigen/Eigen/src/Eigen2Support/Meta.h | 75 - .../Eigen/Eigen/src/Eigen2Support/Minor.h | 117 - .../plugin/Eigen/Eigen/src/Eigen2Support/QR.h | 67 - .../Eigen/Eigen/src/Eigen2Support/SVD.h | 637 ---- .../src/Eigen2Support/TriangularSolver.h | 42 - .../Eigen/src/Eigen2Support/VectorBlock.h | 94 - .../src/Eigenvalues/ComplexEigenSolver.h | 6 +- .../Eigen/src/Eigenvalues/ComplexSchur_MKL.h | 94 - .../Eigen/Eigen/src/Eigenvalues/RealSchur.h | 12 + .../Eigen/src/Eigenvalues/RealSchur_MKL.h | 83 - .../src/Eigenvalues/SelfAdjointEigenSolver.h | 3 +- .../Eigenvalues/SelfAdjointEigenSolver_MKL.h | 92 - .../Eigen/Eigen/src/Geometry/AlignedBox.h | 2 +- .../Eigen/src/Geometry/ParametrizedLine.h | 39 +- .../Eigen/Eigen/src/Geometry/Quaternion.h | 10 +- .../Eigen/Eigen/src/Geometry/Transform.h | 14 +- .../Eigen/src/Geometry/arch/Geometry_SSE.h | 60 +- .../Eigen/src/Householder/BlockHouseholder.h | 3 +- .../BasicPreconditioners.h | 27 +- uppsrc/plugin/Eigen/Eigen/src/Jacobi/Jacobi.h | 42 +- uppsrc/plugin/Eigen/Eigen/src/LU/Inverse.h | 400 --- .../Eigen/Eigen/src/LU/PartialPivLU_MKL.h | 85 - .../Eigen/src/OrderingMethods/Eigen_Colamd.h | 2 +- .../Eigen/Eigen/src/QR/ColPivHouseholderQR.h | 12 +- .../Eigen/src/QR/ColPivHouseholderQR_MKL.h | 99 - .../src/QR/CompleteOrthogonalDecomposition.h | 2 +- .../Eigen/Eigen/src/QR/HouseholderQR_MKL.h | 71 - uppsrc/plugin/Eigen/Eigen/src/SVD/BDCSVD.h | 81 +- uppsrc/plugin/Eigen/Eigen/src/SVD/JacobiSVD.h | 12 +- .../Eigen/Eigen/src/SVD/JacobiSVD_MKL.h | 92 - .../Eigen/src/SVD/UpperBidiagonalization.h | 4 +- .../Eigen/Eigen/src/SparseCore/AmbiVector.h | 8 +- .../Eigen/Eigen/src/SparseCore/SparseAssign.h | 5 +- .../src/SparseCore/SparseCompressedBase.h | 38 +- .../src/SparseCore/SparseCwiseBinaryOp.h | 18 +- .../src/SparseCore/SparseDiagonalProduct.h | 4 + .../Eigen/Eigen/src/SparseCore/SparseMatrix.h | 82 +- .../Eigen/src/SparseCore/SparseMatrixBase.h | 4 + .../src/SparseCore/SparseSelfAdjointView.h | 3 +- .../src/SparseCore/SparseTriangularView.h | 3 + .../Eigen/Eigen/src/SparseCore/SparseView.h | 33 +- .../Eigen/src/SparseCore/TriangularSolver.h | 5 + .../Eigen/Eigen/src/StlSupport/details.h | 14 +- .../Eigen/src/SuperLUSupport/SuperLUSupport.h | 3 + .../Eigen/src/UmfPackSupport/UmfPackSupport.h | 8 +- uppsrc/plugin/Eigen/Eigen/src/misc/Solve.h | 76 - .../plugin/Eigen/Eigen/src/misc/SparseSolve.h | 128 - .../Eigen/src/plugins/ArrayCwiseUnaryOps.h | 17 +- .../Eigen/Eigen/src/plugins/BlockMethods.h | 12 +- uppsrc/plugin/Eigen/README.md | 3 + .../plugin/Eigen/srcdoc.tpp/Eigen$en-us.tpp | 2 +- .../Eigen/unsupported/Eigen/CXX11/Tensor | 9 +- .../Eigen/unsupported/Eigen/CXX11/ThreadPool | 13 - .../Eigen/CXX11/src/Tensor/README.md | 14 +- .../Eigen/CXX11/src/Tensor/TensorBase.h | 6 - .../CXX11/src/Tensor/TensorConcatenation.h | 6 - .../CXX11/src/Tensor/TensorContraction.h | 290 +- .../src/Tensor/TensorContractionBlocking.h | 134 - .../src/Tensor/TensorContractionMapper.h | 74 +- .../CXX11/src/Tensor/TensorContractionSycl.h | 402 --- .../src/Tensor/TensorContractionThreadPool.h | 208 +- .../Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 2 +- .../CXX11/src/Tensor/TensorDeviceDefault.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 323 +- .../CXX11/src/Tensor/TensorDeviceThreadPool.h | 13 +- .../Eigen/CXX11/src/Tensor/TensorDimensions.h | 12 +- .../Eigen/CXX11/src/Tensor/TensorEvalTo.h | 5 +- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 11 +- .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 4 +- .../src/Tensor/TensorForwardDeclarations.h | 12 - .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorIntDiv.h | 10 +- .../Eigen/CXX11/src/Tensor/TensorMacros.h | 8 - .../Eigen/CXX11/src/Tensor/TensorMeta.h | 5 +- .../Eigen/CXX11/src/Tensor/TensorMorphing.h | 34 +- .../Eigen/CXX11/src/Tensor/TensorPadding.h | 7 - .../Eigen/CXX11/src/Tensor/TensorReduction.h | 7 - .../CXX11/src/Tensor/TensorReductionSycl.h | 156 +- .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 10 +- .../Eigen/CXX11/src/Tensor/TensorStorage.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorSycl.h | 9 - .../TensorSyclConvertToDeviceExpression.h | 28 +- .../src/Tensor/TensorSyclExprConstructor.h | 121 +- .../src/Tensor/TensorSyclExtractAccessor.h | 230 +- .../src/Tensor/TensorSyclExtractFunctors.h | 285 +- .../CXX11/src/Tensor/TensorSyclFunctors.h | 147 - .../CXX11/src/Tensor/TensorSyclLeafCount.h | 112 +- .../src/Tensor/TensorSyclPlaceHolderExpr.h | 31 +- .../Eigen/CXX11/src/Tensor/TensorSyclRun.h | 58 +- .../Eigen/CXX11/src/Tensor/TensorSyclTuple.h | 2 - .../Eigen/CXX11/src/Tensor/TensorTraits.h | 6 - .../src/ThreadPool/NonBlockingThreadPool.h | 50 +- .../Eigen/CXX11/src/ThreadPool/RunQueue.h | 7 - .../CXX11/src/ThreadPool/SimpleThreadPool.h | 8 - .../Eigen/CXX11/src/ThreadPool/ThreadCancel.h | 23 - .../CXX11/src/ThreadPool/ThreadEnvironment.h | 2 - .../src/ThreadPool/ThreadPoolInterface.h | 6 - .../Eigen/CXX11/src/util/CXX11Meta.h | 6 +- uppsrc/plugin/Eigen/unsupported/Eigen/SVD | 39 - .../Eigen/src/AutoDiff/AutoDiffScalar.h | 11 +- .../Eigen/src/EulerAngles/EulerAngles.h | 261 +- .../Eigen/src/EulerAngles/EulerSystem.h | 184 +- .../src/IterativeSolvers/IncompleteCholesky.h | 278 -- .../src/MatrixFunctions/MatrixExponential.h | 48 +- .../src/MatrixFunctions/MatrixFunction.h | 11 +- .../MatrixFunctions/MatrixFunctionAtomic.h | 131 - .../src/MatrixFunctions/MatrixLogarithm.h | 2 +- .../Eigen/src/Polynomials/Companion.h | 50 +- .../Eigen/src/Polynomials/PolynomialSolver.h | 18 +- .../Eigen/unsupported/Eigen/src/SVD/BDCSVD.h | 748 ---- .../unsupported/Eigen/src/SVD/JacobiSVD.h | 782 ----- .../Eigen/unsupported/Eigen/src/SVD/SVDBase.h | 236 -- .../unsupported/Eigen/src/SVD/TODOBdcsvd.txt | 29 - .../Eigen/src/SVD/doneInBDCSVD.txt | 21 - .../Eigen/src/SparseExtra/MarketIO.h | 89 +- .../SpecialFunctions/SpecialFunctionsImpl.h | 8 +- .../Eigen/unsupported/bench/bench_svd.cpp | 123 - .../plugin/Eigen/unsupported/doc/Overview.dox | 28 - .../unsupported/doc/eigendoxy_layout.xml.in | 177 - .../unsupported/doc/examples/BVH_Example.cpp | 50 - .../unsupported/doc/examples/EulerAngles.cpp | 46 - .../Eigen/unsupported/doc/examples/FFT.cpp | 118 - .../doc/examples/MatrixExponential.cpp | 16 - .../doc/examples/MatrixFunction.cpp | 23 - .../doc/examples/MatrixLogarithm.cpp | 15 - .../unsupported/doc/examples/MatrixPower.cpp | 16 - .../doc/examples/MatrixPower_optimal.cpp | 17 - .../unsupported/doc/examples/MatrixSine.cpp | 20 - .../unsupported/doc/examples/MatrixSinh.cpp | 20 - .../doc/examples/MatrixSquareRoot.cpp | 16 - .../doc/examples/PolynomialSolver1.cpp | 53 - .../doc/examples/PolynomialUtils1.cpp | 20 - uppsrc/plugin/Eigen/unsupported/test/BVH.cpp | 222 -- .../Eigen/unsupported/test/EulerAngles.cpp | 292 -- uppsrc/plugin/Eigen/unsupported/test/FFT.cpp | 2 - uppsrc/plugin/Eigen/unsupported/test/FFTW.cpp | 262 -- .../test/NonLinearOptimization.cpp | 1878 ---------- .../Eigen/unsupported/test/NumericalDiff.cpp | 114 - .../Eigen/unsupported/test/alignedvector3.cpp | 84 - .../Eigen/unsupported/test/autodiff.cpp | 367 -- .../unsupported/test/autodiff_scalar.cpp | 83 - .../plugin/Eigen/unsupported/test/bdcsvd.cpp | 213 -- .../unsupported/test/cxx11_eventcount.cpp | 142 - .../Eigen/unsupported/test/cxx11_meta.cpp | 357 -- .../test/cxx11_non_blocking_thread_pool.cpp | 124 - .../Eigen/unsupported/test/cxx11_runqueue.cpp | 235 -- .../unsupported/test/cxx11_tensor_argmax.cpp | 294 -- .../test/cxx11_tensor_argmax_cuda.cu | 254 -- .../unsupported/test/cxx11_tensor_assign.cpp | 370 -- .../test/cxx11_tensor_broadcast_sycl.cpp | 149 - .../test/cxx11_tensor_broadcasting.cpp | 194 -- .../test/cxx11_tensor_builtins_sycl.cpp | 264 -- .../test/cxx11_tensor_cast_float16_cuda.cu | 82 - .../unsupported/test/cxx11_tensor_casts.cpp | 115 - .../test/cxx11_tensor_chipping.cpp | 425 --- .../test/cxx11_tensor_comparisons.cpp | 84 - .../test/cxx11_tensor_complex_cuda.cu | 153 - .../cxx11_tensor_complex_cwise_ops_cuda.cu | 97 - .../test/cxx11_tensor_concatenation.cpp | 137 - .../test/cxx11_tensor_concatenation_sycl.cpp | 180 - .../unsupported/test/cxx11_tensor_const.cpp | 62 - .../test/cxx11_tensor_contract_cuda.cu | 216 -- .../test/cxx11_tensor_contract_sycl.cpp | 218 -- .../test/cxx11_tensor_contraction.cpp | 545 --- .../test/cxx11_tensor_convolution.cpp | 149 - .../unsupported/test/cxx11_tensor_cuda.cu | 1287 ------- .../test/cxx11_tensor_custom_index.cpp | 100 - .../test/cxx11_tensor_custom_op.cpp | 111 - .../unsupported/test/cxx11_tensor_device.cu | 390 --- .../test/cxx11_tensor_device_sycl.cpp | 77 - .../test/cxx11_tensor_dimension.cpp | 69 - .../unsupported/test/cxx11_tensor_empty.cpp | 40 - .../unsupported/test/cxx11_tensor_expr.cpp | 314 -- .../unsupported/test/cxx11_tensor_fft.cpp | 273 -- .../test/cxx11_tensor_fixed_size.cpp | 261 -- .../test/cxx11_tensor_forced_eval.cpp | 79 - .../test/cxx11_tensor_forced_eval_sycl.cpp | 76 - .../test/cxx11_tensor_generator.cpp | 91 - .../unsupported/test/cxx11_tensor_ifft.cpp | 154 - .../test/cxx11_tensor_image_patch.cpp | 757 ---- .../test/cxx11_tensor_index_list.cpp | 386 -- .../test/cxx11_tensor_inflation.cpp | 81 - .../unsupported/test/cxx11_tensor_intdiv.cpp | 147 - .../unsupported/test/cxx11_tensor_io.cpp | 136 - .../test/cxx11_tensor_layout_swap.cpp | 61 - .../unsupported/test/cxx11_tensor_lvalue.cpp | 42 - .../unsupported/test/cxx11_tensor_map.cpp | 277 -- .../unsupported/test/cxx11_tensor_math.cpp | 46 - .../test/cxx11_tensor_mixed_indices.cpp | 53 - .../test/cxx11_tensor_morphing.cpp | 485 --- .../test/cxx11_tensor_morphing_sycl.cpp | 247 -- .../test/cxx11_tensor_notification.cpp | 72 - .../test/cxx11_tensor_of_complex.cpp | 103 - .../test/cxx11_tensor_of_const_values.cpp | 105 - .../test/cxx11_tensor_of_float16_cuda.cu | 500 --- .../test/cxx11_tensor_of_strings.cpp | 152 - .../unsupported/test/cxx11_tensor_padding.cpp | 93 - .../test/cxx11_tensor_padding_sycl.cpp | 161 - .../unsupported/test/cxx11_tensor_patch.cpp | 172 - .../unsupported/test/cxx11_tensor_random.cpp | 78 - .../test/cxx11_tensor_random_cuda.cu | 88 - .../test/cxx11_tensor_reduction.cpp | 508 --- .../test/cxx11_tensor_reduction_cuda.cu | 157 - .../test/cxx11_tensor_reduction_sycl.cpp | 147 - .../unsupported/test/cxx11_tensor_ref.cpp | 248 -- .../unsupported/test/cxx11_tensor_reverse.cpp | 190 - .../test/cxx11_tensor_roundings.cpp | 62 - .../unsupported/test/cxx11_tensor_scan.cpp | 110 - .../test/cxx11_tensor_scan_cuda.cu | 79 - .../test/cxx11_tensor_shuffling.cpp | 228 -- .../test/cxx11_tensor_shuffling_sycl.cpp | 122 - .../unsupported/test/cxx11_tensor_simple.cpp | 327 -- .../test/cxx11_tensor_striding.cpp | 119 - .../unsupported/test/cxx11_tensor_sugar.cpp | 81 - .../unsupported/test/cxx11_tensor_sycl.cpp | 247 -- .../test/cxx11_tensor_symmetry.cpp | 818 ----- .../test/cxx11_tensor_thread_pool.cpp | 373 -- .../unsupported/test/cxx11_tensor_uint128.cpp | 160 - .../test/cxx11_tensor_volume_patch.cpp | 112 - .../plugin/Eigen/unsupported/test/dgmres.cpp | 31 - .../Eigen/unsupported/test/forward_adolc.cpp | 141 - .../plugin/Eigen/unsupported/test/gmres.cpp | 31 - .../Eigen/unsupported/test/jacobisvd.cpp | 198 -- .../unsupported/test/kronecker_product.cpp | 252 -- .../unsupported/test/levenberg_marquardt.cpp | 1477 -------- .../unsupported/test/matrix_exponential.cpp | 141 - .../unsupported/test/matrix_function.cpp | 193 - .../Eigen/unsupported/test/matrix_functions.h | 67 - .../Eigen/unsupported/test/matrix_power.cpp | 204 -- .../unsupported/test/matrix_square_root.cpp | 31 - .../plugin/Eigen/unsupported/test/minres.cpp | 44 - .../Eigen/unsupported/test/mpreal/mpreal.h | 3104 ----------------- .../Eigen/unsupported/test/mpreal_support.cpp | 65 - .../Eigen/unsupported/test/openglsupport.cpp | 337 -- .../unsupported/test/polynomialsolver.cpp | 216 -- .../unsupported/test/polynomialutils.cpp | 113 - .../Eigen/unsupported/test/sparse_extra.cpp | 170 - .../unsupported/test/special_functions.cpp | 345 -- .../plugin/Eigen/unsupported/test/splines.cpp | 281 -- .../Eigen/unsupported/test/svd_common.h | 261 -- 337 files changed, 2367 insertions(+), 44663 deletions(-) delete mode 100644 uppsrc/plugin/Eigen/Copying delete mode 100644 uppsrc/plugin/Eigen/Eigen/Array delete mode 100644 uppsrc/plugin/Eigen/Eigen/Eigen2Support delete mode 100644 uppsrc/plugin/Eigen/Eigen/LeastSquares delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Cholesky/LLT_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Core/Flagged.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Core/Functors.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Core/ProductBase.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Core/products/CoeffBasedProduct.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixVector_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularMatrixVector_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Block.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Cwise.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/CwiseOperators.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/AlignedBox.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/All.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/AngleAxis.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Hyperplane.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Quaternion.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Rotation2D.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/RotationBase.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Scaling.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Transform.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Translation.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/LU.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Lazy.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/LeastSquares.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Macros.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/MathFunctions.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Memory.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Meta.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Minor.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/QR.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/SVD.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/TriangularSolver.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/VectorBlock.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/ComplexSchur_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/RealSchur_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/LU/Inverse.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/LU/PartialPivLU_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/QR/ColPivHouseholderQR_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/QR/HouseholderQR_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/SVD/JacobiSVD_MKL.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/misc/Solve.h delete mode 100644 uppsrc/plugin/Eigen/Eigen/src/misc/SparseSolve.h create mode 100644 uppsrc/plugin/Eigen/README.md delete mode 100644 uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h delete mode 100644 uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h delete mode 100644 uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h delete mode 100644 uppsrc/plugin/Eigen/unsupported/Eigen/SVD delete mode 100644 uppsrc/plugin/Eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h delete mode 100644 uppsrc/plugin/Eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunctionAtomic.h delete mode 100644 uppsrc/plugin/Eigen/unsupported/Eigen/src/SVD/BDCSVD.h delete mode 100644 uppsrc/plugin/Eigen/unsupported/Eigen/src/SVD/JacobiSVD.h delete mode 100644 uppsrc/plugin/Eigen/unsupported/Eigen/src/SVD/SVDBase.h delete mode 100644 uppsrc/plugin/Eigen/unsupported/Eigen/src/SVD/TODOBdcsvd.txt delete mode 100644 uppsrc/plugin/Eigen/unsupported/Eigen/src/SVD/doneInBDCSVD.txt delete mode 100644 uppsrc/plugin/Eigen/unsupported/bench/bench_svd.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/Overview.dox delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/eigendoxy_layout.xml.in delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/BVH_Example.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/EulerAngles.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/FFT.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/MatrixExponential.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/MatrixFunction.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/MatrixLogarithm.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/MatrixPower.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/MatrixPower_optimal.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/MatrixSine.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/MatrixSinh.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/MatrixSquareRoot.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/PolynomialSolver1.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/doc/examples/PolynomialUtils1.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/BVH.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/EulerAngles.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/FFT.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/FFTW.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/NonLinearOptimization.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/NumericalDiff.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/alignedvector3.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/autodiff.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/autodiff_scalar.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/bdcsvd.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_eventcount.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_meta.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_runqueue.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_argmax.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_argmax_cuda.cu delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_assign.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_broadcasting.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_cast_float16_cuda.cu delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_casts.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_chipping.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_comparisons.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_complex_cuda.cu delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_concatenation.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_const.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_contract_cuda.cu delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_contraction.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_convolution.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_cuda.cu delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_custom_index.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_custom_op.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_device.cu delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_device_sycl.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_dimension.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_empty.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_expr.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_fft.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_fixed_size.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_forced_eval.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_generator.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_ifft.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_image_patch.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_index_list.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_inflation.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_intdiv.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_io.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_layout_swap.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_lvalue.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_map.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_math.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_mixed_indices.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_morphing.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_notification.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_of_complex.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_of_const_values.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_of_strings.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_padding.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_patch.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_random.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_random_cuda.cu delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_reduction.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_reduction_cuda.cu delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_ref.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_reverse.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_roundings.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_scan.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_scan_cuda.cu delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_shuffling.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_simple.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_striding.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_sugar.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_sycl.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_symmetry.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_thread_pool.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_uint128.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/cxx11_tensor_volume_patch.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/dgmres.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/forward_adolc.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/gmres.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/jacobisvd.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/kronecker_product.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/levenberg_marquardt.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/matrix_exponential.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/matrix_function.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/matrix_functions.h delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/matrix_power.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/matrix_square_root.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/minres.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/mpreal/mpreal.h delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/mpreal_support.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/openglsupport.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/polynomialsolver.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/polynomialutils.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/sparse_extra.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/special_functions.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/splines.cpp delete mode 100644 uppsrc/plugin/Eigen/unsupported/test/svd_common.h diff --git a/uppsrc/plugin/Eigen/Copying b/uppsrc/plugin/Eigen/Copying deleted file mode 100644 index a653f1fc0..000000000 --- a/uppsrc/plugin/Eigen/Copying +++ /dev/null @@ -1,5 +0,0 @@ -Copyright (C) 2008 Gael Guennebaud - -This Source Code Form is subject to the terms of the Mozilla -Public License v. 2.0. If a copy of the MPL was not distributed -with this file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/uppsrc/plugin/Eigen/Eigen/Array b/uppsrc/plugin/Eigen/Eigen/Array deleted file mode 100644 index 3d004fb69..000000000 --- a/uppsrc/plugin/Eigen/Eigen/Array +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef EIGEN_ARRAY_MODULE_H -#define EIGEN_ARRAY_MODULE_H - -// include Core first to handle Eigen2 support macros -#include "Core" - -#ifndef EIGEN2_SUPPORT - #error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core. -#endif - -#endif // EIGEN_ARRAY_MODULE_H diff --git a/uppsrc/plugin/Eigen/Eigen/Core b/uppsrc/plugin/Eigen/Eigen/Core index 16be82ac2..0f7fa630d 100644 --- a/uppsrc/plugin/Eigen/Eigen/Core +++ b/uppsrc/plugin/Eigen/Eigen/Core @@ -43,8 +43,10 @@ #else #define EIGEN_DEVICE_FUNC #endif + #else #define EIGEN_DEVICE_FUNC + #endif // When compiling CUDA device code with NVCC, pull in math functions from the @@ -145,15 +147,11 @@ #ifdef __FMA__ #define EIGEN_VECTORIZE_FMA #endif - #if defined(__AVX512F__) + #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512) #define EIGEN_VECTORIZE_AVX512 #define EIGEN_VECTORIZE_AVX2 #define EIGEN_VECTORIZE_AVX #define EIGEN_VECTORIZE_FMA - #define EIGEN_VECTORIZE_SSE3 - #define EIGEN_VECTORIZE_SSSE3 - #define EIGEN_VECTORIZE_SSE4_1 - #define EIGEN_VECTORIZE_SSE4_2 #ifdef __AVX512DQ__ #define EIGEN_VECTORIZE_AVX512DQ #endif @@ -285,15 +283,6 @@ #include #endif -#if defined(__SYCL_DEVICE_ONLY__) - #undef min - #undef max - #undef isnan - #undef isinf - #undef isfinite - #include -#endif - /** \brief Namespace containing all symbols from the %Eigen library. */ namespace Eigen { @@ -332,12 +321,16 @@ inline static const char *SimdInstructionSetsInUse(void) { #error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information #endif +namespace Eigen { + // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to // ensure QNX/QCC support using std::size_t; // gcc 4.6.0 wants std:: for ptrdiff_t using std::ptrdiff_t; +} + /** \defgroup Core_Module Core module * This is the main module of Eigen providing dense matrix and vector support * (both fixed and dynamic size) with all the features corresponding to a BLAS library @@ -364,8 +357,6 @@ using std::ptrdiff_t; #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/AVX/PacketMath.h" #include "src/Core/arch/AVX512/PacketMath.h" - #include "src/Core/arch/SSE/MathFunctions.h" - #include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX512/MathFunctions.h" #elif defined EIGEN_VECTORIZE_AVX // Use AVX for floats and doubles, SSE for integers @@ -418,6 +409,7 @@ using std::ptrdiff_t; // on CUDA devices #include "src/Core/arch/CUDA/Complex.h" +#include "src/Core/IO.h" #include "src/Core/DenseCoeffsBase.h" #include "src/Core/DenseBase.h" #include "src/Core/MatrixBase.h" @@ -465,7 +457,6 @@ using std::ptrdiff_t; #include "src/Core/Redux.h" #include "src/Core/Visitor.h" #include "src/Core/Fuzzy.h" -#include "src/Core/IO.h" #include "src/Core/Swap.h" #include "src/Core/CommaInitializer.h" #include "src/Core/GeneralProduct.h" diff --git a/uppsrc/plugin/Eigen/Eigen/Eigen2Support b/uppsrc/plugin/Eigen/Eigen/Eigen2Support deleted file mode 100644 index 6aa009d20..000000000 --- a/uppsrc/plugin/Eigen/Eigen/Eigen2Support +++ /dev/null @@ -1,95 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2SUPPORT_H -#define EIGEN2SUPPORT_H - -#if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H)) -#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header -#endif - -#ifndef EIGEN_NO_EIGEN2_DEPRECATED_WARNING - -#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) -#warning "Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)" -#else -#pragma message ("Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)") -#endif - -#endif // EIGEN_NO_EIGEN2_DEPRECATED_WARNING - -#include "src/Core/util/DisableStupidWarnings.h" - -/** \ingroup Support_modules - * \defgroup Eigen2Support_Module Eigen2 support module - * - * \warning Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. - * - * This module provides a couple of deprecated functions improving the compatibility with Eigen2. - * - * To use it, define EIGEN2_SUPPORT before including any Eigen header - * \code - * #define EIGEN2_SUPPORT - * \endcode - * - */ - -#include "src/Eigen2Support/Macros.h" -#include "src/Eigen2Support/Memory.h" -#include "src/Eigen2Support/Meta.h" -#include "src/Eigen2Support/Lazy.h" -#include "src/Eigen2Support/Cwise.h" -#include "src/Eigen2Support/CwiseOperators.h" -#include "src/Eigen2Support/TriangularSolver.h" -#include "src/Eigen2Support/Block.h" -#include "src/Eigen2Support/VectorBlock.h" -#include "src/Eigen2Support/Minor.h" -#include "src/Eigen2Support/MathFunctions.h" - - -#include "src/Core/util/ReenableStupidWarnings.h" - -// Eigen2 used to include iostream -#include - -#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \ -using Eigen::Matrix##SizeSuffix##TypeSuffix; \ -using Eigen::Vector##SizeSuffix##TypeSuffix; \ -using Eigen::RowVector##SizeSuffix##TypeSuffix; - -#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \ - -#define EIGEN_USING_MATRIX_TYPEDEFS \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd) - -#define USING_PART_OF_NAMESPACE_EIGEN \ -EIGEN_USING_MATRIX_TYPEDEFS \ -using Eigen::Matrix; \ -using Eigen::MatrixBase; \ -using Eigen::ei_random; \ -using Eigen::ei_real; \ -using Eigen::ei_imag; \ -using Eigen::ei_conj; \ -using Eigen::ei_abs; \ -using Eigen::ei_abs2; \ -using Eigen::ei_sqrt; \ -using Eigen::ei_exp; \ -using Eigen::ei_log; \ -using Eigen::ei_sin; \ -using Eigen::ei_cos; - -#endif // EIGEN2SUPPORT_H diff --git a/uppsrc/plugin/Eigen/Eigen/LeastSquares b/uppsrc/plugin/Eigen/Eigen/LeastSquares deleted file mode 100644 index 35137c25d..000000000 --- a/uppsrc/plugin/Eigen/Eigen/LeastSquares +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef EIGEN_REGRESSION_MODULE_H -#define EIGEN_REGRESSION_MODULE_H - -#ifndef EIGEN2_SUPPORT -#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT) -#endif - -// exclude from normal eigen3-only documentation -#ifdef EIGEN2_SUPPORT - -#include "Core" - -#include "src/Core/util/DisableStupidWarnings.h" - -#include "Eigenvalues" -#include "Geometry" - -/** \defgroup LeastSquares_Module LeastSquares module - * This module provides linear regression and related features. - * - * \code - * #include - * \endcode - */ - -#include "src/Eigen2Support/LeastSquares.h" - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN2_SUPPORT - -#endif // EIGEN_REGRESSION_MODULE_H diff --git a/uppsrc/plugin/Eigen/Eigen/QtAlignedMalloc b/uppsrc/plugin/Eigen/Eigen/QtAlignedMalloc index 4044d5ac5..c6571f129 100644 --- a/uppsrc/plugin/Eigen/Eigen/QtAlignedMalloc +++ b/uppsrc/plugin/Eigen/Eigen/QtAlignedMalloc @@ -14,7 +14,7 @@ #include "src/Core/util/DisableStupidWarnings.h" -void *qMalloc(size_t size) +void *qMalloc(std::size_t size) { return Eigen::internal::aligned_malloc(size); } @@ -24,7 +24,7 @@ void qFree(void *ptr) Eigen::internal::aligned_free(ptr); } -void *qRealloc(void *ptr, size_t size) +void *qRealloc(void *ptr, std::size_t size) { void* newPtr = Eigen::internal::aligned_malloc(size); memcpy(newPtr, ptr, size); diff --git a/uppsrc/plugin/Eigen/Eigen/Sparse b/uppsrc/plugin/Eigen/Eigen/Sparse index a2ef7a665..136e681a1 100644 --- a/uppsrc/plugin/Eigen/Eigen/Sparse +++ b/uppsrc/plugin/Eigen/Eigen/Sparse @@ -25,7 +25,9 @@ #include "SparseCore" #include "OrderingMethods" +#ifndef EIGEN_MPL2_ONLY #include "SparseCholesky" +#endif #include "SparseLU" #include "SparseQR" #include "IterativeLinearSolvers" diff --git a/uppsrc/plugin/Eigen/Eigen/StdDeque b/uppsrc/plugin/Eigen/Eigen/StdDeque index be3a7f82b..bc68397be 100644 --- a/uppsrc/plugin/Eigen/Eigen/StdDeque +++ b/uppsrc/plugin/Eigen/Eigen/StdDeque @@ -14,7 +14,7 @@ #include "Core" #include -#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ #define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) diff --git a/uppsrc/plugin/Eigen/Eigen/StdList b/uppsrc/plugin/Eigen/Eigen/StdList index 07ba1297b..4c6262c08 100644 --- a/uppsrc/plugin/Eigen/Eigen/StdList +++ b/uppsrc/plugin/Eigen/Eigen/StdList @@ -13,7 +13,7 @@ #include "Core" #include -#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ #define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) diff --git a/uppsrc/plugin/Eigen/Eigen/StdVector b/uppsrc/plugin/Eigen/Eigen/StdVector index fdfc37766..0c4697ad5 100644 --- a/uppsrc/plugin/Eigen/Eigen/StdVector +++ b/uppsrc/plugin/Eigen/Eigen/StdVector @@ -14,7 +14,7 @@ #include "Core" #include -#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ #define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...) diff --git a/uppsrc/plugin/Eigen/Eigen/src/Cholesky/LLT_MKL.h b/uppsrc/plugin/Eigen/Eigen/src/Cholesky/LLT_MKL.h deleted file mode 100644 index 66675d747..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Cholesky/LLT_MKL.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * LLt decomposition based on LAPACKE_?potrf function. - ******************************************************************************** -*/ - -#ifndef EIGEN_LLT_MKL_H -#define EIGEN_LLT_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" -#include - -namespace Eigen { - -namespace internal { - -template struct mkl_llt; - -#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \ -template<> struct mkl_llt \ -{ \ - template \ - static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \ - { \ - lapack_int matrix_order; \ - lapack_int size, lda, info, StorageOrder; \ - EIGTYPE* a; \ - eigen_assert(m.rows()==m.cols()); \ - /* Set up parameters for ?potrf */ \ - size = m.rows(); \ - StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \ - matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - a = &(m.coeffRef(0,0)); \ - lda = m.outerStride(); \ -\ - info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \ - info = (info==0) ? -1 : info>0 ? info-1 : size; \ - return info; \ - } \ -}; \ -template<> struct llt_inplace \ -{ \ - template \ - static typename MatrixType::Index blocked(MatrixType& m) \ - { \ - return mkl_llt::potrf(m, 'L'); \ - } \ - template \ - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ - { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \ -}; \ -template<> struct llt_inplace \ -{ \ - template \ - static typename MatrixType::Index blocked(MatrixType& m) \ - { \ - return mkl_llt::potrf(m, 'U'); \ - } \ - template \ - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ - { \ - Transpose matt(mat); \ - return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); \ - } \ -}; - -EIGEN_MKL_LLT(double, double, d) -EIGEN_MKL_LLT(float, float, s) -EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z) -EIGEN_MKL_LLT(scomplex, MKL_Complex8, c) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_LLT_MKL_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/CholmodSupport/CholmodSupport.h b/uppsrc/plugin/Eigen/Eigen/src/CholmodSupport/CholmodSupport.h index a07efb1d5..571972023 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/uppsrc/plugin/Eigen/Eigen/src/CholmodSupport/CholmodSupport.h @@ -32,7 +32,7 @@ template<> struct cholmod_configure_matrix > { } }; -// Other scalar types are not yet supported by Cholmod +// Other scalar types are not yet suppotred by Cholmod // template<> struct cholmod_configure_matrix { // template // static void run(CholmodType& mat) { @@ -124,9 +124,6 @@ cholmod_sparse viewAsCholmod(const SparseSelfAdjointView::IsComplex == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); - if(_Options & RowMajorBit) res.stype *=-1; return res; } @@ -162,44 +159,6 @@ MappedSparseMatrix viewAsEigen(cholmod_sparse& cm) static_cast(cm.p), static_cast(cm.i),static_cast(cm.x) ); } -namespace internal { - -// template specializations for int and long that call the correct cholmod method - -#define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \ - template ret cm_ ## name (cholmod_common &Common) { return cholmod_ ## name (&Common); } \ - template<> ret cm_ ## name (cholmod_common &Common) { return cholmod_l_ ## name (&Common); } - -#define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \ - template ret cm_ ## name (t1& a1, cholmod_common &Common) { return cholmod_ ## name (&a1, &Common); } \ - template<> ret cm_ ## name (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); } - -EIGEN_CHOLMOD_SPECIALIZE0(int, start) -EIGEN_CHOLMOD_SPECIALIZE0(int, finish) - -EIGEN_CHOLMOD_SPECIALIZE1(int, free_factor, cholmod_factor*, L) -EIGEN_CHOLMOD_SPECIALIZE1(int, free_dense, cholmod_dense*, X) -EIGEN_CHOLMOD_SPECIALIZE1(int, free_sparse, cholmod_sparse*, A) - -EIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A) - -template cholmod_dense* cm_solve (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_solve (sys, &L, &B, &Common); } -template<> cholmod_dense* cm_solve (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_l_solve (sys, &L, &B, &Common); } - -template cholmod_sparse* cm_spsolve (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve (sys, &L, &B, &Common); } -template<> cholmod_sparse* cm_spsolve (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); } - -template -int cm_factorize_p (cholmod_sparse* A, double beta[2], _StorageIndex* fset, size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p (A, beta, fset, fsize, L, &Common); } -template<> -int cm_factorize_p (cholmod_sparse* A, double beta[2], long* fset, size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); } - -#undef EIGEN_CHOLMOD_SPECIALIZE0 -#undef EIGEN_CHOLMOD_SPECIALIZE1 - -} // namespace internal - - enum CholmodMode { CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt }; @@ -236,7 +195,7 @@ class CholmodBase : public SparseSolverBase { EIGEN_STATIC_ASSERT((internal::is_same::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); m_shiftOffset[0] = m_shiftOffset[1] = 0.0; - internal::cm_start(m_cholmod); + cholmod_start(&m_cholmod); } explicit CholmodBase(const MatrixType& matrix) @@ -244,15 +203,15 @@ class CholmodBase : public SparseSolverBase { EIGEN_STATIC_ASSERT((internal::is_same::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); m_shiftOffset[0] = m_shiftOffset[1] = 0.0; - internal::cm_start(m_cholmod); + cholmod_start(&m_cholmod); compute(matrix); } ~CholmodBase() { if(m_cholmodFactor) - internal::cm_free_factor(m_cholmodFactor, m_cholmod); - internal::cm_finish(m_cholmod); + cholmod_free_factor(&m_cholmodFactor, &m_cholmod); + cholmod_finish(&m_cholmod); } inline StorageIndex cols() const { return internal::convert_index(m_cholmodFactor->n); } @@ -260,7 +219,7 @@ class CholmodBase : public SparseSolverBase /** \brief Reports whether previous computation was successful. * - * \returns \c Success if computation was successful, + * \returns \c Success if computation was succesful, * \c NumericalIssue if the matrix.appears to be negative. */ ComputationInfo info() const @@ -287,11 +246,11 @@ class CholmodBase : public SparseSolverBase { if(m_cholmodFactor) { - internal::cm_free_factor(m_cholmodFactor, m_cholmod); + cholmod_free_factor(&m_cholmodFactor, &m_cholmod); m_cholmodFactor = 0; } cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); - m_cholmodFactor = internal::cm_analyze(A, m_cholmod); + m_cholmodFactor = cholmod_analyze(&A, &m_cholmod); this->m_isInitialized = true; this->m_info = Success; @@ -309,7 +268,7 @@ class CholmodBase : public SparseSolverBase { eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); - internal::cm_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, m_cholmod); + cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod); // If the factorization failed, minor is the column at which it did. On success minor == n. this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); @@ -330,20 +289,19 @@ class CholmodBase : public SparseSolverBase EIGEN_UNUSED_VARIABLE(size); eigen_assert(size==b.rows()); - // Cholmod needs column-major storage without inner-stride, which corresponds to the default behavior of Ref. + // Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref. Ref > b_ref(b.derived()); cholmod_dense b_cd = viewAsCholmod(b_ref); - cholmod_dense* x_cd = internal::cm_solve(CHOLMOD_A, *m_cholmodFactor, b_cd, m_cholmod); + cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); if(!x_cd) { this->m_info = NumericalIssue; return; } // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) - // NOTE Actually, the copy can be avoided by calling cholmod_solve2 instead of cholmod_solve dest = Matrix::Map(reinterpret_cast(x_cd->x),b.rows(),b.cols()); - internal::cm_free_dense(x_cd, m_cholmod); + cholmod_free_dense(&x_cd, &m_cholmod); } /** \internal */ @@ -358,16 +316,15 @@ class CholmodBase : public SparseSolverBase // note: cs stands for Cholmod Sparse Ref > b_ref(b.const_cast_derived()); cholmod_sparse b_cs = viewAsCholmod(b_ref); - cholmod_sparse* x_cs = internal::cm_spsolve(CHOLMOD_A, *m_cholmodFactor, b_cs, m_cholmod); + cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod); if(!x_cs) { this->m_info = NumericalIssue; return; } // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) - // NOTE cholmod_spsolve in fact just calls the dense solver for blocks of 4 columns at a time (similar to Eigen's sparse solver) dest.derived() = viewAsEigen(*x_cs); - internal::cm_free_sparse(x_cs, m_cholmod); + cholmod_free_sparse(&x_cs, &m_cholmod); } #endif // EIGEN_PARSED_BY_DOXYGEN diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/Array.h b/uppsrc/plugin/Eigen/Eigen/src/Core/Array.h index 0d34269fd..e10020d4f 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/Array.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/Array.h @@ -231,10 +231,16 @@ class Array : Base(other) { } + private: + struct PrivateType {}; + public: + /** \sa MatrixBase::operator=(const EigenBase&) */ template EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Array(const EigenBase &other) + EIGEN_STRONG_INLINE Array(const EigenBase &other, + typename internal::enable_if::value, + PrivateType>::type = PrivateType()) : Base(other.derived()) { } diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/ArrayBase.h b/uppsrc/plugin/Eigen/Eigen/src/Core/ArrayBase.h index f0232f65e..3dbc7084c 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/ArrayBase.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/ArrayBase.h @@ -175,7 +175,7 @@ template class ArrayBase */ template template -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase::operator-=(const ArrayBase &other) { call_assignment(derived(), other.derived(), internal::sub_assign_op()); @@ -188,7 +188,7 @@ ArrayBase::operator-=(const ArrayBase &other) */ template template -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase::operator+=(const ArrayBase& other) { call_assignment(derived(), other.derived(), internal::add_assign_op()); @@ -201,7 +201,7 @@ ArrayBase::operator+=(const ArrayBase& other) */ template template -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase::operator*=(const ArrayBase& other) { call_assignment(derived(), other.derived(), internal::mul_assign_op()); @@ -214,7 +214,7 @@ ArrayBase::operator*=(const ArrayBase& other) */ template template -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase::operator/=(const ArrayBase& other) { call_assignment(derived(), other.derived(), internal::div_assign_op()); diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/ArrayWrapper.h b/uppsrc/plugin/Eigen/Eigen/src/Core/ArrayWrapper.h index a04521a16..688aadd62 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/ArrayWrapper.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/ArrayWrapper.h @@ -32,7 +32,8 @@ struct traits > // Let's remove NestByRefBit enum { Flags0 = traits::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } @@ -129,7 +130,8 @@ struct traits > // Let's remove NestByRefBit enum { Flags0 = traits::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/AssignEvaluator.h b/uppsrc/plugin/Eigen/Eigen/src/Core/AssignEvaluator.h index 14400d246..b0ec7b7ca 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/AssignEvaluator.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/AssignEvaluator.h @@ -515,7 +515,7 @@ struct dense_assignment_loop template struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::Scalar Scalar; typedef typename Kernel::PacketType PacketType; @@ -563,7 +563,7 @@ struct dense_assignment_loop template struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; typedef typename Kernel::PacketType PacketType; @@ -701,6 +701,26 @@ protected: * Part 5 : Entry point for dense rectangular assignment ***************************************************************************/ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/) +{ + EIGEN_ONLY_USED_FOR_DEBUG(dst); + EIGEN_ONLY_USED_FOR_DEBUG(src); + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op &/*func*/) +{ + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols))) + dst.resize(dstRows, dstCols); + eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols); +} + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func) { @@ -711,10 +731,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType // NOTE To properly handle A = (A*A.transpose())/s with A rectangular, // we need to resize the destination after the source evaluator has been created. - Index dstRows = src.rows(); - Index dstCols = src.cols(); - if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) - dst.resize(dstRows, dstCols); + resize_if_allowed(dst, src, func); DstEvaluatorType dstEvaluator(dst); diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/BooleanRedux.h b/uppsrc/plugin/Eigen/Eigen/src/Core/BooleanRedux.h index ed607d5d8..8409d8749 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/BooleanRedux.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/BooleanRedux.h @@ -14,54 +14,56 @@ namespace Eigen { namespace internal { -template +template struct all_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Rows, - row = (UnrollCount-1) % Rows + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) { - return all_unroller::run(mat) && mat.coeff(row, col); + return all_unroller::run(mat) && mat.coeff(row, col); } }; -template -struct all_unroller +template +struct all_unroller { static inline bool run(const Derived &/*mat*/) { return true; } }; -template -struct all_unroller +template +struct all_unroller { static inline bool run(const Derived &) { return false; } }; -template +template struct any_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Rows, - row = (UnrollCount-1) % Rows + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) { - return any_unroller::run(mat) || mat.coeff(row, col); + return any_unroller::run(mat) || mat.coeff(row, col); } }; -template -struct any_unroller +template +struct any_unroller { static inline bool run(const Derived & /*mat*/) { return false; } }; -template -struct any_unroller +template +struct any_unroller { static inline bool run(const Derived &) { return false; } }; @@ -85,7 +87,7 @@ inline bool DenseBase::all() const }; Evaluator evaluator(derived()); if(unroll) - return internal::all_unroller::RowsAtCompileTime>::run(evaluator); + return internal::all_unroller::run(evaluator); else { for(Index j = 0; j < cols(); ++j) @@ -109,7 +111,7 @@ inline bool DenseBase::any() const }; Evaluator evaluator(derived()); if(unroll) - return internal::any_unroller::RowsAtCompileTime>::run(evaluator); + return internal::any_unroller::run(evaluator); else { for(Index j = 0; j < cols(); ++j) diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/CoreEvaluators.h b/uppsrc/plugin/Eigen/Eigen/src/Core/CoreEvaluators.h index 24fc7835b..f7c1effca 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/CoreEvaluators.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/CoreEvaluators.h @@ -106,7 +106,7 @@ struct evaluator // ---------- base class for all evaluators ---------- template -struct evaluator_base +struct evaluator_base : public noncopyable { // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits ExpressionTraits; @@ -114,14 +114,6 @@ struct evaluator_base enum { Alignment = 0 }; - // noncopyable: - // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization) - // and make complex evaluator much larger than then should do. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator_base() {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() {} -private: - EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&); - EIGEN_DEVICE_FUNC const evaluator_base& operator=(const evaluator_base&); }; // -------------------- Matrix and Array -------------------- @@ -131,27 +123,6 @@ private: // Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, // so no need for more sophisticated dispatching. -// this helper permits to completely eliminate m_outerStride if it is known at compiletime. -template class plainobjectbase_evaluator_data { -public: - plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr) - { - EIGEN_ONLY_USED_FOR_DEBUG(outerStride); - eigen_internal_assert(outerStride==OuterStride); - } - Index outerStride() const { return OuterStride; } - const Scalar *data; -}; - -template class plainobjectbase_evaluator_data { -public: - plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {} - Index outerStride() const { return m_outerStride; } - const Scalar *data; -protected: - Index m_outerStride; -}; - template struct evaluator > : evaluator_base @@ -170,21 +141,18 @@ struct evaluator > Flags = traits::EvaluatorFlags, Alignment = traits::Alignment }; - enum { - // We do not need to know the outer stride for vectors - OuterStrideAtCompileTime = IsVectorAtCompileTime ? 0 - : int(IsRowMajor) ? ColsAtCompileTime - : RowsAtCompileTime - }; - + EIGEN_DEVICE_FUNC evaluator() - : m_d(0,OuterStrideAtCompileTime) + : m_data(0), + m_outerStride(IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) - : m_d(m.data(),IsVectorAtCompileTime ? 0 : m.outerStride()) + : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } @@ -193,30 +161,30 @@ struct evaluator > CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) - return m_d.data[row * m_d.outerStride() + col]; + return m_data[row * m_outerStride.value() + col]; else - return m_d.data[row + col * m_d.outerStride()]; + return m_data[row + col * m_outerStride.value()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.data[index]; + return m_data[index]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) - return const_cast(m_d.data)[row * m_d.outerStride() + col]; + return const_cast(m_data)[row * m_outerStride.value() + col]; else - return const_cast(m_d.data)[row + col * m_d.outerStride()]; + return const_cast(m_data)[row + col * m_outerStride.value()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return const_cast(m_d.data)[index]; + return const_cast(m_data)[index]; } template @@ -224,16 +192,16 @@ struct evaluator > PacketType packet(Index row, Index col) const { if (IsRowMajor) - return ploadt(m_d.data + row * m_d.outerStride() + col); + return ploadt(m_data + row * m_outerStride.value() + col); else - return ploadt(m_d.data + row + col * m_d.outerStride()); + return ploadt(m_data + row + col * m_outerStride.value()); } template EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return ploadt(m_d.data + index); + return ploadt(m_data + index); } template @@ -242,22 +210,26 @@ struct evaluator > { if (IsRowMajor) return pstoret - (const_cast(m_d.data) + row * m_d.outerStride() + col, x); + (const_cast(m_data) + row * m_outerStride.value() + col, x); else return pstoret - (const_cast(m_d.data) + row + col * m_d.outerStride(), x); + (const_cast(m_data) + row + col * m_outerStride.value(), x); } template EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - return pstoret(const_cast(m_d.data) + index, x); + return pstoret(const_cast(m_data) + index, x); } protected: + const Scalar *m_data; - plainobjectbase_evaluator_data m_d; + // We do not need to know the outer stride for vectors + variable_if_dynamic m_outerStride; }; template @@ -555,7 +527,9 @@ struct unary_evaluator, IndexBased > }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - explicit unary_evaluator(const XprType& op) : m_d(op) + explicit unary_evaluator(const XprType& op) + : m_functor(op.functor()), + m_argImpl(op.nestedExpression()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -566,43 +540,32 @@ struct unary_evaluator, IndexBased > EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.argImpl.coeff(row, col)); + return m_functor(m_argImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.argImpl.coeff(index)); + return m_functor(m_argImpl.coeff(index)); } template EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_d.func().packetOp(m_d.argImpl.template packet(row, col)); + return m_functor.packetOp(m_argImpl.template packet(row, col)); } template EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_d.func().packetOp(m_d.argImpl.template packet(index)); + return m_functor.packetOp(m_argImpl.template packet(index)); } protected: - - // this helper permits to completely eliminate the functor if it is empty - class Data : private UnaryOp - { - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const UnaryOp& func() const { return static_cast(*this); } - evaluator argImpl; - }; - - Data m_d; + const UnaryOp m_functor; + evaluator m_argImpl; }; // -------------------- CwiseTernaryOp -------------------- @@ -646,7 +609,11 @@ struct ternary_evaluator, IndexBased evaluator::Alignment) }; - EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr) + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_arg1Impl(xpr.arg1()), + m_arg2Impl(xpr.arg2()), + m_arg3Impl(xpr.arg3()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -657,47 +624,38 @@ struct ternary_evaluator, IndexBased EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.arg1Impl.coeff(row, col), m_d.arg2Impl.coeff(row, col), m_d.arg3Impl.coeff(row, col)); + return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.arg1Impl.coeff(index), m_d.arg2Impl.coeff(index), m_d.arg3Impl.coeff(index)); + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); } template EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_d.func().packetOp(m_d.arg1Impl.template packet(row, col), - m_d.arg2Impl.template packet(row, col), - m_d.arg3Impl.template packet(row, col)); + return m_functor.packetOp(m_arg1Impl.template packet(row, col), + m_arg2Impl.template packet(row, col), + m_arg3Impl.template packet(row, col)); } template EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_d.func().packetOp(m_d.arg1Impl.template packet(index), - m_d.arg2Impl.template packet(index), - m_d.arg3Impl.template packet(index)); + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); } protected: - // this helper permits to completely eliminate the functor if it is empty - struct Data : private TernaryOp - { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : TernaryOp(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TernaryOp& func() const { return static_cast(*this); } - evaluator arg1Impl; - evaluator arg2Impl; - evaluator arg3Impl; - }; - - Data m_d; + const TernaryOp m_functor; + evaluator m_arg1Impl; + evaluator m_arg2Impl; + evaluator m_arg3Impl; }; // -------------------- CwiseBinaryOp -------------------- @@ -738,7 +696,10 @@ struct binary_evaluator, IndexBased, IndexBase Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment,evaluator::Alignment) }; - EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) : m_d(xpr) + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -749,45 +710,35 @@ struct binary_evaluator, IndexBased, IndexBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.lhsImpl.coeff(row, col), m_d.rhsImpl.coeff(row, col)); + return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.lhsImpl.coeff(index), m_d.rhsImpl.coeff(index)); + return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); } template EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_d.func().packetOp(m_d.lhsImpl.template packet(row, col), - m_d.rhsImpl.template packet(row, col)); + return m_functor.packetOp(m_lhsImpl.template packet(row, col), + m_rhsImpl.template packet(row, col)); } template EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_d.func().packetOp(m_d.lhsImpl.template packet(index), - m_d.rhsImpl.template packet(index)); + return m_functor.packetOp(m_lhsImpl.template packet(index), + m_rhsImpl.template packet(index)); } protected: - - // this helper permits to completely eliminate the functor if it is empty - struct Data : private BinaryOp - { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : BinaryOp(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const BinaryOp& func() const { return static_cast(*this); } - evaluator lhsImpl; - evaluator rhsImpl; - }; - - Data m_d; + const BinaryOp m_functor; + evaluator m_lhsImpl; + evaluator m_rhsImpl; }; // -------------------- CwiseUnaryView -------------------- @@ -806,7 +757,9 @@ struct unary_evaluator, IndexBased> Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_d(op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + : m_unaryOp(op.functor()), + m_argImpl(op.nestedExpression()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -818,40 +771,30 @@ struct unary_evaluator, IndexBased> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.argImpl.coeff(row, col)); + return m_unaryOp(m_argImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.argImpl.coeff(index)); + return m_unaryOp(m_argImpl.coeff(index)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { - return m_d.func()(m_d.argImpl.coeffRef(row, col)); + return m_unaryOp(m_argImpl.coeffRef(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return m_d.func()(m_d.argImpl.coeffRef(index)); + return m_unaryOp(m_argImpl.coeffRef(index)); } protected: - - // this helper permits to completely eliminate the functor if it is empty - struct Data : private UnaryOp - { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const UnaryOp& func() const { return static_cast(*this); } - evaluator argImpl; - }; - - Data m_d; + const UnaryOp m_unaryOp; + evaluator m_argImpl; }; // -------------------- Map -------------------- @@ -1613,9 +1556,7 @@ struct evaluator > { } typedef typename XprType::Scalar Scalar; - // FIXME having to check whether ArgType is sparse here i not very nice. - typedef typename internal::conditional::value, - typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType; + typedef typename XprType::CoeffReturnType CoeffReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index) const diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/CwiseBinaryOp.h b/uppsrc/plugin/Eigen/Eigen/src/Core/CwiseBinaryOp.h index 9ddbfe286..a36765e39 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/CwiseBinaryOp.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/CwiseBinaryOp.h @@ -46,7 +46,7 @@ struct traits > typedef typename remove_reference::type _LhsNested; typedef typename remove_reference::type _RhsNested; enum { - Flags = _LhsNested::Flags & RowMajorBit + Flags = cwise_promote_storage_order::StorageKind,typename traits::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value }; }; } // end namespace internal diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/CwiseNullaryOp.h b/uppsrc/plugin/Eigen/Eigen/src/Core/CwiseNullaryOp.h index dd498f758..ddd607e38 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/CwiseNullaryOp.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/CwiseNullaryOp.h @@ -105,7 +105,7 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp template -EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) { return CwiseNullaryOp(rows, cols, func); @@ -150,7 +150,7 @@ DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(const CustomNullaryOp& func) { return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); @@ -192,7 +192,7 @@ DenseBase::Constant(Index rows, Index cols, const Scalar& value) * \sa class CwiseNullaryOp */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Constant(Index size, const Scalar& value) { return DenseBase::NullaryExpr(size, internal::scalar_constant_op(value)); @@ -208,7 +208,7 @@ DenseBase::Constant(Index size, const Scalar& value) * \sa class CwiseNullaryOp */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Constant(const Scalar& value) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) @@ -220,7 +220,7 @@ DenseBase::Constant(const Scalar& value) * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&) */ template -EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -232,7 +232,7 @@ DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const * \sa LinSpaced(Scalar,Scalar) */ template -EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -264,7 +264,7 @@ DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp */ template -EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType DenseBase::LinSpaced(Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -276,7 +276,7 @@ DenseBase::LinSpaced(Index size, const Scalar& low, const Scalar& high) * Special version for fixed size types which does not require the size parameter. */ template -EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType DenseBase::LinSpaced(const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -286,7 +286,7 @@ DenseBase::LinSpaced(const Scalar& low, const Scalar& high) /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ template -bool DenseBase::isApproxToConstant +EIGEN_DEVICE_FUNC bool DenseBase::isApproxToConstant (const Scalar& val, const RealScalar& prec) const { typename internal::nested_eval::type self(derived()); @@ -301,7 +301,7 @@ bool DenseBase::isApproxToConstant * * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ template -bool DenseBase::isConstant +EIGEN_DEVICE_FUNC bool DenseBase::isConstant (const Scalar& val, const RealScalar& prec) const { return isApproxToConstant(val, prec); @@ -312,7 +312,7 @@ bool DenseBase::isConstant * \sa setConstant(), Constant(), class CwiseNullaryOp */ template -EIGEN_STRONG_INLINE void DenseBase::fill(const Scalar& val) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase::fill(const Scalar& val) { setConstant(val); } @@ -322,7 +322,7 @@ EIGEN_STRONG_INLINE void DenseBase::fill(const Scalar& val) * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes() */ template -EIGEN_STRONG_INLINE Derived& DenseBase::setConstant(const Scalar& val) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setConstant(const Scalar& val) { return derived() = Constant(rows(), cols(), val); } @@ -337,7 +337,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setConstant(const Scalar& val) * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) */ template -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setConstant(Index size, const Scalar& val) { resize(size); @@ -356,7 +356,7 @@ PlainObjectBase::setConstant(Index size, const Scalar& val) * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) */ template -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setConstant(Index rows, Index cols, const Scalar& val) { resize(rows, cols); @@ -380,7 +380,7 @@ PlainObjectBase::setConstant(Index rows, Index cols, const Scalar& val) * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp */ template -EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low,high,newSize)); @@ -400,7 +400,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, con * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp */ template -EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(const Scalar& low, const Scalar& high) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return setLinSpaced(size(), low, high); @@ -423,7 +423,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(const Scalar& low, * \sa Zero(), Zero(Index) */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Zero(Index rows, Index cols) { return Constant(rows, cols, Scalar(0)); @@ -446,7 +446,7 @@ DenseBase::Zero(Index rows, Index cols) * \sa Zero(), Zero(Index,Index) */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Zero(Index size) { return Constant(size, Scalar(0)); @@ -463,7 +463,7 @@ DenseBase::Zero(Index size) * \sa Zero(Index), Zero(Index,Index) */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Zero() { return Constant(Scalar(0)); @@ -478,7 +478,7 @@ DenseBase::Zero() * \sa class CwiseNullaryOp, Zero() */ template -bool DenseBase::isZero(const RealScalar& prec) const +EIGEN_DEVICE_FUNC bool DenseBase::isZero(const RealScalar& prec) const { typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) @@ -496,7 +496,7 @@ bool DenseBase::isZero(const RealScalar& prec) const * \sa class CwiseNullaryOp, Zero() */ template -EIGEN_STRONG_INLINE Derived& DenseBase::setZero() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setZero() { return setConstant(Scalar(0)); } @@ -511,7 +511,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setZero() * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero() */ template -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero(Index newSize) { resize(newSize); @@ -529,7 +529,7 @@ PlainObjectBase::setZero(Index newSize) * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero() */ template -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero(Index rows, Index cols) { resize(rows, cols); @@ -553,7 +553,7 @@ PlainObjectBase::setZero(Index rows, Index cols) * \sa Ones(), Ones(Index), isOnes(), class Ones */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Ones(Index rows, Index cols) { return Constant(rows, cols, Scalar(1)); @@ -576,7 +576,7 @@ DenseBase::Ones(Index rows, Index cols) * \sa Ones(), Ones(Index,Index), isOnes(), class Ones */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Ones(Index newSize) { return Constant(newSize, Scalar(1)); @@ -593,7 +593,7 @@ DenseBase::Ones(Index newSize) * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Ones() { return Constant(Scalar(1)); @@ -608,7 +608,7 @@ DenseBase::Ones() * \sa class CwiseNullaryOp, Ones() */ template -bool DenseBase::isOnes +EIGEN_DEVICE_FUNC bool DenseBase::isOnes (const RealScalar& prec) const { return isApproxToConstant(Scalar(1), prec); @@ -622,7 +622,7 @@ bool DenseBase::isOnes * \sa class CwiseNullaryOp, Ones() */ template -EIGEN_STRONG_INLINE Derived& DenseBase::setOnes() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setOnes() { return setConstant(Scalar(1)); } @@ -637,7 +637,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setOnes() * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones() */ template -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setOnes(Index newSize) { resize(newSize); @@ -655,7 +655,7 @@ PlainObjectBase::setOnes(Index newSize) * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones() */ template -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setOnes(Index rows, Index cols) { resize(rows, cols); @@ -679,7 +679,7 @@ PlainObjectBase::setOnes(Index rows, Index cols) * \sa Identity(), setIdentity(), isIdentity() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType MatrixBase::Identity(Index rows, Index cols) { return DenseBase::NullaryExpr(rows, cols, internal::scalar_identity_op()); @@ -696,7 +696,7 @@ MatrixBase::Identity(Index rows, Index cols) * \sa Identity(Index,Index), setIdentity(), isIdentity() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType MatrixBase::Identity() { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) @@ -771,7 +771,7 @@ struct setIdentity_impl * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity() */ template -EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() { return internal::setIdentity_impl::run(derived()); } @@ -787,7 +787,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity() */ template -EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index rows, Index cols) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index rows, Index cols) { derived().resize(rows, cols); return setIdentity(); @@ -800,7 +800,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index rows, Index * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index newSize, Index i) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index newSize, Index i) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i); @@ -815,7 +815,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index i) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index i) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return BasisReturnType(SquareMatrixType::Identity(),i); @@ -828,7 +828,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitX() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitX() { return Derived::Unit(0); } /** \returns an expression of the Y axis unit vector (0,1{,0}^*) @@ -838,7 +838,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitY() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitY() { return Derived::Unit(1); } /** \returns an expression of the Z axis unit vector (0,0,1{,0}^*) @@ -848,7 +848,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitZ() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitZ() { return Derived::Unit(2); } /** \returns an expression of the W axis unit vector (0,0,0,1) @@ -858,7 +858,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitW() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitW() { return Derived::Unit(3); } } // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/DenseBase.h b/uppsrc/plugin/Eigen/Eigen/src/Core/DenseBase.h index bd74e8a13..90066ae73 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/DenseBase.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/DenseBase.h @@ -296,7 +296,7 @@ template class DenseBase EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue& func); - /** \ínternal + /** \internal * Copies \a other into *this without evaluating other. \returns a reference to *this. * \deprecated */ template @@ -463,7 +463,17 @@ template class DenseBase EIGEN_DEVICE_FUNC void visit(Visitor& func) const; - inline const WithFormat format(const IOFormat& fmt) const; + /** \returns a WithFormat proxy object allowing to print a matrix the with given + * format \a fmt. + * + * See class IOFormat for some examples. + * + * \sa class IOFormat, class WithFormat + */ + inline const WithFormat format(const IOFormat& fmt) const + { + return WithFormat(derived(), fmt); + } /** \returns the unique coefficient of a 1x1 expression */ EIGEN_DEVICE_FUNC @@ -474,9 +484,9 @@ template class DenseBase return derived().coeff(0,0); } - bool all() const; - bool any() const; - Index count() const; + EIGEN_DEVICE_FUNC bool all() const; + EIGEN_DEVICE_FUNC bool any() const; + EIGEN_DEVICE_FUNC Index count() const; typedef VectorwiseOp RowwiseReturnType; typedef const VectorwiseOp ConstRowwiseReturnType; diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/DenseStorage.h b/uppsrc/plugin/Eigen/Eigen/src/Core/DenseStorage.h index 82201d96a..7958feeb9 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/DenseStorage.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/DenseStorage.h @@ -13,9 +13,9 @@ #define EIGEN_MATRIXSTORAGE_H #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN; + #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN; #else - #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) #endif namespace Eigen { @@ -184,12 +184,16 @@ template class DenseSt { internal::plain_array m_data; public: - EIGEN_DEVICE_FUNC DenseStorage() {} + EIGEN_DEVICE_FUNC DenseStorage() { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + } EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()) {} EIGEN_DEVICE_FUNC - DenseStorage(const DenseStorage& other) : m_data(other.m_data) {} + DenseStorage(const DenseStorage& other) : m_data(other.m_data) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + } EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { @@ -197,7 +201,7 @@ template class DenseSt return *this; } EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols); EIGEN_UNUSED_VARIABLE(size); EIGEN_UNUSED_VARIABLE(rows); @@ -343,7 +347,7 @@ template class DenseStorage(size)), m_rows(rows), m_cols(cols) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0); } EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) @@ -351,6 +355,7 @@ template class DenseStorage class DenseStorage(size); else m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_rows = rows; m_cols = cols; @@ -422,7 +427,7 @@ template class DenseStorage(size)), m_cols(cols) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0); EIGEN_UNUSED_VARIABLE(rows); } @@ -430,6 +435,7 @@ template class DenseStorage(_Rows*other.m_cols)) , m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows) internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data); } EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) @@ -477,7 +483,7 @@ template class DenseStorage(size); else m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_cols = cols; } @@ -495,7 +501,7 @@ template class DenseStorage(size)), m_rows(rows) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols); EIGEN_UNUSED_VARIABLE(cols); } @@ -503,6 +509,7 @@ template class DenseStorage(other.m_rows*_Cols)) , m_rows(other.m_rows) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols) internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data); } EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) @@ -550,7 +557,7 @@ template class DenseStorage(size); else m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_rows = rows; } diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/Diagonal.h b/uppsrc/plugin/Eigen/Eigen/src/Core/Diagonal.h index bfea0584b..49e711257 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/Diagonal.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/Diagonal.h @@ -21,7 +21,7 @@ namespace Eigen { * \param MatrixType the type of the object in which we are taking a sub/main/super diagonal * \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal. * A positive value means a superdiagonal, a negative value means a subdiagonal. - * You can also use Dynamic so the index can be set at runtime. + * You can also use DynamicIndex so the index can be set at runtime. * * The matrix is not required to be square. * diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/Dot.h b/uppsrc/plugin/Eigen/Eigen/src/Core/Dot.h index f4fb4db7e..06ef18b8b 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/Dot.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/Dot.h @@ -51,7 +51,8 @@ struct dot_nocheck } // end namespace internal -/** \returns the dot product of *this with other. +/** \fn MatrixBase::dot + * \returns the dot product of *this with other. * * \only_for_vectors * diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/EigenBase.h b/uppsrc/plugin/Eigen/Eigen/src/Core/EigenBase.h index f76995af9..b195506a9 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/EigenBase.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/EigenBase.h @@ -14,6 +14,7 @@ namespace Eigen { /** \class EigenBase + * \ingroup Core_Module * * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). * @@ -128,6 +129,7 @@ template struct EigenBase */ template template +EIGEN_DEVICE_FUNC Derived& DenseBase::operator=(const EigenBase &other) { call_assignment(derived(), other.derived()); @@ -136,6 +138,7 @@ Derived& DenseBase::operator=(const EigenBase &other) template template +EIGEN_DEVICE_FUNC Derived& DenseBase::operator+=(const EigenBase &other) { call_assignment(derived(), other.derived(), internal::add_assign_op()); @@ -144,6 +147,7 @@ Derived& DenseBase::operator+=(const EigenBase &other) template template +EIGEN_DEVICE_FUNC Derived& DenseBase::operator-=(const EigenBase &other) { call_assignment(derived(), other.derived(), internal::sub_assign_op()); diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/Flagged.h b/uppsrc/plugin/Eigen/Eigen/src/Core/Flagged.h deleted file mode 100644 index 1f2955fc1..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/Flagged.h +++ /dev/null @@ -1,140 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_FLAGGED_H -#define EIGEN_FLAGGED_H - -namespace Eigen { - -/** \class Flagged - * \ingroup Core_Module - * - * \brief Expression with modified flags - * - * \param ExpressionType the type of the object of which we are modifying the flags - * \param Added the flags added to the expression - * \param Removed the flags removed from the expression (has priority over Added). - * - * This class represents an expression whose flags have been modified. - * It is the return type of MatrixBase::flagged() - * and most of the time this is the only way it is used. - * - * \sa MatrixBase::flagged() - */ - -namespace internal { -template -struct traits > : traits -{ - enum { Flags = (ExpressionType::Flags | Added) & ~Removed }; -}; -} - -template class Flagged - : public MatrixBase > -{ - public: - - typedef MatrixBase Base; - - EIGEN_DENSE_PUBLIC_INTERFACE(Flagged) - typedef typename internal::conditional::ret, - ExpressionType, const ExpressionType&>::type ExpressionTypeNested; - typedef typename ExpressionType::InnerIterator InnerIterator; - - inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - inline Index outerStride() const { return m_matrix.outerStride(); } - inline Index innerStride() const { return m_matrix.innerStride(); } - - inline CoeffReturnType coeff(Index row, Index col) const - { - return m_matrix.coeff(row, col); - } - - inline CoeffReturnType coeff(Index index) const - { - return m_matrix.coeff(index); - } - - inline const Scalar& coeffRef(Index row, Index col) const - { - return m_matrix.const_cast_derived().coeffRef(row, col); - } - - inline const Scalar& coeffRef(Index index) const - { - return m_matrix.const_cast_derived().coeffRef(index); - } - - inline Scalar& coeffRef(Index row, Index col) - { - return m_matrix.const_cast_derived().coeffRef(row, col); - } - - inline Scalar& coeffRef(Index index) - { - return m_matrix.const_cast_derived().coeffRef(index); - } - - template - inline const PacketScalar packet(Index row, Index col) const - { - return m_matrix.template packet(row, col); - } - - template - inline void writePacket(Index row, Index col, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket(row, col, x); - } - - template - inline const PacketScalar packet(Index index) const - { - return m_matrix.template packet(index); - } - - template - inline void writePacket(Index index, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket(index, x); - } - - const ExpressionType& _expression() const { return m_matrix; } - - template - typename ExpressionType::PlainObject solveTriangular(const MatrixBase& other) const; - - template - void solveTriangularInPlace(const MatrixBase& other) const; - - protected: - ExpressionTypeNested m_matrix; -}; - -/** \returns an expression of *this with added and removed flags - * - * This is mostly for internal use. - * - * \sa class Flagged - */ -template -template -inline const Flagged -DenseBase::flagged() const -{ - return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_FLAGGED_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/Functors.h b/uppsrc/plugin/Eigen/Eigen/src/Core/Functors.h deleted file mode 100644 index 5f14c6587..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/Functors.h +++ /dev/null @@ -1,1026 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_FUNCTORS_H -#define EIGEN_FUNCTORS_H - -namespace Eigen { - -namespace internal { - -// associative functors: - -/** \internal - * \brief Template functor to compute the sum of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, MatrixBase::sum() - */ -template struct scalar_sum_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::padd(a,b); } - template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const - { return internal::predux(a); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasAdd - }; -}; - -/** \internal - * \brief Template functor to compute the product of two scalars - * - * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux() - */ -template struct scalar_product_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasMul && packet_traits::HasMul - }; - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) - EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pmul(a,b); } - template - EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const - { return internal::predux_mul(a); } -}; -template -struct functor_traits > { - enum { - Cost = (NumTraits::MulCost + NumTraits::MulCost)/2, // rough estimate! - PacketAccess = scalar_product_op::Vectorizable - }; -}; - -/** \internal - * \brief Template functor to compute the conjugate product of two scalars - * - * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y) - */ -template struct scalar_conj_product_op { - - enum { - Conj = NumTraits::IsComplex - }; - - typedef typename scalar_product_traits::ReturnType result_type; - - EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op) - EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const - { return conj_helper().pmul(a,b); } - - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return conj_helper().pmul(a,b); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::MulCost, - PacketAccess = internal::is_same::value && packet_traits::HasMul - }; -}; - -/** \internal - * \brief Template functor to compute the min of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff() - */ -template struct scalar_min_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pmin(a,b); } - template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const - { return internal::predux_min(a); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMin - }; -}; - -/** \internal - * \brief Template functor to compute the max of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff() - */ -template struct scalar_max_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pmax(a,b); } - template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const - { return internal::predux_max(a); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMax - }; -}; - -/** \internal - * \brief Template functor to compute the hypot of two scalars - * - * \sa MatrixBase::stableNorm(), class Redux - */ -template struct scalar_hypot_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) -// typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const - { - using std::max; - using std::min; - using std::sqrt; - Scalar p = (max)(_x, _y); - Scalar q = (min)(_x, _y); - Scalar qp = q/p; - return p * sqrt(Scalar(1) + qp*qp); - } -}; -template -struct functor_traits > { - enum { Cost = 5 * NumTraits::MulCost, PacketAccess=0 }; -}; - -/** \internal - * \brief Template functor to compute the pow of two scalars - */ -template struct scalar_binary_pow_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) - inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } -}; -template -struct functor_traits > { - enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; -}; - -// other binary functors: - -/** \internal - * \brief Template functor to compute the difference of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::operator- - */ -template struct scalar_difference_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::psub(a,b); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasSub - }; -}; - -/** \internal - * \brief Template functor to compute the quotient of two scalars - * - * \sa class CwiseBinaryOp, Cwise::operator/() - */ -template struct scalar_quotient_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv - }; - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) - EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pdiv(a,b); } -}; -template -struct functor_traits > { - enum { - Cost = (NumTraits::MulCost + NumTraits::MulCost), // rough estimate! - PacketAccess = scalar_quotient_op::Vectorizable - }; -}; - - - -/** \internal - * \brief Template functor to compute the and of two booleans - * - * \sa class CwiseBinaryOp, ArrayBase::operator&& - */ -struct scalar_boolean_and_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op) - EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; } -}; -template<> struct functor_traits { - enum { - Cost = NumTraits::AddCost, - PacketAccess = false - }; -}; - -/** \internal - * \brief Template functor to compute the or of two booleans - * - * \sa class CwiseBinaryOp, ArrayBase::operator|| - */ -struct scalar_boolean_or_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op) - EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; } -}; -template<> struct functor_traits { - enum { - Cost = NumTraits::AddCost, - PacketAccess = false - }; -}; - -/** \internal - * \brief Template functors for comparison of two scalars - * \todo Implement packet-comparisons - */ -template struct scalar_cmp_op; - -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = false - }; -}; - -template -struct result_of(Scalar,Scalar)> { - typedef bool type; -}; - - -template struct scalar_cmp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;} -}; -template struct scalar_cmp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a struct scalar_cmp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;} -}; -template struct scalar_cmp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);} -}; -template struct scalar_cmp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;} -}; - -// unary functors: - -/** \internal - * \brief Template functor to compute the opposite of a scalar - * - * \sa class CwiseUnaryOp, MatrixBase::operator- - */ -template struct scalar_opposite_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pnegate(a); } -}; -template -struct functor_traits > -{ enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasNegate }; -}; - -/** \internal - * \brief Template functor to compute the absolute value of a scalar - * - * \sa class CwiseUnaryOp, Cwise::abs - */ -template struct scalar_abs_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pabs(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasAbs - }; -}; - -/** \internal - * \brief Template functor to compute the squared absolute value of a scalar - * - * \sa class CwiseUnaryOp, Cwise::abs2 - */ -template struct scalar_abs2_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a,a); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasAbs2 }; }; - -/** \internal - * \brief Template functor to compute the conjugate of a complex value - * - * \sa class CwiseUnaryOp, MatrixBase::conjugate() - */ -template struct scalar_conjugate_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = NumTraits::IsComplex ? NumTraits::AddCost : 0, - PacketAccess = packet_traits::HasConj - }; -}; - -/** \internal - * \brief Template functor to cast a scalar to another type - * - * \sa class CwiseUnaryOp, MatrixBase::cast() - */ -template -struct scalar_cast_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) - typedef NewType result_type; - EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast(a); } -}; -template -struct functor_traits > -{ enum { Cost = is_same::value ? 0 : NumTraits::AddCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the real part of a complex - * - * \sa class CwiseUnaryOp, MatrixBase::real() - */ -template -struct scalar_real_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the imaginary part of a complex - * - * \sa class CwiseUnaryOp, MatrixBase::imag() - */ -template -struct scalar_imag_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the real part of a complex as a reference - * - * \sa class CwiseUnaryOp, MatrixBase::real() - */ -template -struct scalar_real_ref_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast(&a)); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the imaginary part of a complex as a reference - * - * \sa class CwiseUnaryOp, MatrixBase::imag() - */ -template -struct scalar_imag_ref_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast(&a)); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * - * \brief Template functor to compute the exponential of a scalar - * - * \sa class CwiseUnaryOp, Cwise::exp() - */ -template struct scalar_exp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op) - inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pexp(a); } -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = packet_traits::HasExp }; }; - -/** \internal - * - * \brief Template functor to compute the logarithm of a scalar - * - * \sa class CwiseUnaryOp, Cwise::log() - */ -template struct scalar_log_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op) - inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::plog(a); } -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = packet_traits::HasLog }; }; - -/** \internal - * \brief Template functor to multiply a scalar by a fixed other one - * - * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/ - */ -/* NOTE why doing the pset1() in packetOp *is* an optimization ? - * indeed it seems better to declare m_other as a Packet and do the pset1() once - * in the constructor. However, in practice: - * - GCC does not like m_other as a Packet and generate a load every time it needs it - * - on the other hand GCC is able to moves the pset1() outside the loop :) - * - simpler code ;) - * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y) - */ -template -struct scalar_multiple_op { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { } - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -template -struct scalar_multiple2_op { - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { } - EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to divide a scalar by a fixed other one - * - * This functor is used to implement the quotient of a matrix by - * a scalar where the scalar type is not necessarily a floating point type. - * - * \sa class CwiseUnaryOp, MatrixBase::operator/ - */ -template -struct scalar_quotient1_op { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pdiv(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; - -// nullary functors - -template -struct scalar_constant_op { - typedef typename packet_traits::type Packet; - EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { } - template - EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; } - template - EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1(m_other); } - const Scalar m_other; -}; -template -struct functor_traits > -// FIXME replace this packet test by a safe one -{ enum { Cost = 1, PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; - -template struct scalar_identity_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op) - template - EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false, IsRepeatable = true }; }; - -template struct linspaced_op_impl; - -// linear access for packet ops: -// 1) initialization -// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0]) -// 2) each step (where size is 1 for coeff access or PacketSize for packet access) -// base += [size*step, ..., size*step] -// -// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp) -// in order to avoid the padd() in operator() ? -template -struct linspaced_op_impl -{ - typedef typename packet_traits::type Packet; - - linspaced_op_impl(const Scalar& low, const Scalar& step) : - m_low(low), m_step(step), - m_packetStep(pset1(packet_traits::size*step)), - m_base(padd(pset1(low), pmul(pset1(step),plset(-packet_traits::size)))) {} - - template - EIGEN_STRONG_INLINE const Scalar operator() (Index i) const - { - m_base = padd(m_base, pset1(m_step)); - return m_low+Scalar(i)*m_step; - } - - template - EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); } - - const Scalar m_low; - const Scalar m_step; - const Packet m_packetStep; - mutable Packet m_base; -}; - -// random access for packet ops: -// 1) each step -// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) -template -struct linspaced_op_impl -{ - typedef typename packet_traits::type Packet; - - linspaced_op_impl(const Scalar& low, const Scalar& step) : - m_low(low), m_step(step), - m_lowPacket(pset1(m_low)), m_stepPacket(pset1(m_step)), m_interPacket(plset(0)) {} - - template - EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } - - template - EIGEN_STRONG_INLINE const Packet packetOp(Index i) const - { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1(Scalar(i)),m_interPacket))); } - - const Scalar m_low; - const Scalar m_step; - const Packet m_lowPacket; - const Packet m_stepPacket; - const Packet m_interPacket; -}; - -// ----- Linspace functor ---------------------------------------------------------------- - -// Forward declaration (we default to random access which does not really give -// us a speed gain when using packet access but it allows to use the functor in -// nested expressions). -template struct linspaced_op; -template struct functor_traits< linspaced_op > -{ enum { Cost = 1, PacketAccess = packet_traits::HasSetLinear, IsRepeatable = true }; }; -template struct linspaced_op -{ - typedef typename packet_traits::type Packet; - linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {} - - template - EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } - - // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since - // there row==0 and col is used for the actual iteration. - template - EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const - { - eigen_assert(col==0 || row==0); - return impl(col + row); - } - - template - EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); } - - // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since - // there row==0 and col is used for the actual iteration. - template - EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const - { - eigen_assert(col==0 || row==0); - return impl.packetOp(col + row); - } - - // This proxy object handles the actual required temporaries, the different - // implementations (random vs. sequential access) as well as the - // correct piping to size 2/4 packet operations. - const linspaced_op_impl impl; -}; - -// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta -// to indicate whether a functor allows linear access, just always answering 'yes' except for -// scalar_identity_op. -// FIXME move this to functor_traits adding a functor_default -template struct functor_has_linear_access { enum { ret = 1 }; }; -template struct functor_has_linear_access > { enum { ret = 0 }; }; - -// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication -// where the mixing of different types is handled by scalar_product_traits -// In particular, real * complex is allowed. -// FIXME move this to functor_traits adding a functor_default -template struct functor_is_product_like { enum { ret = 0 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; - - -/** \internal - * \brief Template functor to add a scalar to a fixed other one - * \sa class CwiseUnaryOp, Array::operator+ - */ -/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */ -template -struct scalar_add_op { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { } - inline scalar_add_op(const Scalar& other) : m_other(other) { } - inline Scalar operator() (const Scalar& a) const { return a + m_other; } - inline const Packet packetOp(const Packet& a) const - { return internal::padd(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - -/** \internal - * \brief Template functor to compute the square root of a scalar - * \sa class CwiseUnaryOp, Cwise::sqrt() - */ -template struct scalar_sqrt_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op) - inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); } -}; -template -struct functor_traits > -{ enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasSqrt - }; -}; - -/** \internal - * \brief Template functor to compute the cosine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::cos() - */ -template struct scalar_cos_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op) - inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pcos(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasCos - }; -}; - -/** \internal - * \brief Template functor to compute the sine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::sin() - */ -template struct scalar_sin_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op) - inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::psin(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasSin - }; -}; - - -/** \internal - * \brief Template functor to compute the tan of a scalar - * \sa class CwiseUnaryOp, ArrayBase::tan() - */ -template struct scalar_tan_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op) - inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::ptan(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasTan - }; -}; - -/** \internal - * \brief Template functor to compute the arc cosine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::acos() - */ -template struct scalar_acos_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op) - inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pacos(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasACos - }; -}; - -/** \internal - * \brief Template functor to compute the arc sine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::asin() - */ -template struct scalar_asin_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op) - inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pasin(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasASin - }; -}; - -/** \internal - * \brief Template functor to raise a scalar to a power - * \sa class CwiseUnaryOp, Cwise::pow - */ -template -struct scalar_pow_op { - // FIXME default copy constructors seems bugged with std::complex<> - inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } - inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} - inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); } - const Scalar m_exponent; -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to compute the quotient between a scalar and array entries. - * \sa class CwiseUnaryOp, Cwise::inverse() - */ -template -struct scalar_inverse_mult_op { - scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} - inline Scalar operator() (const Scalar& a) const { return m_other / a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pdiv(pset1(m_other),a); } - Scalar m_other; -}; - -/** \internal - * \brief Template functor to compute the inverse of a scalar - * \sa class CwiseUnaryOp, Cwise::inverse() - */ -template -struct scalar_inverse_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op) - inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pdiv(pset1(Scalar(1)),a); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; - -/** \internal - * \brief Template functor to compute the square of a scalar - * \sa class CwiseUnaryOp, Cwise::square() - */ -template -struct scalar_square_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op) - inline Scalar operator() (const Scalar& a) const { return a*a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pmul(a,a); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -/** \internal - * \brief Template functor to compute the cube of a scalar - * \sa class CwiseUnaryOp, Cwise::cube() - */ -template -struct scalar_cube_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op) - inline Scalar operator() (const Scalar& a) const { return a*a*a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pmul(a,pmul(a,a)); } -}; -template -struct functor_traits > -{ enum { Cost = 2*NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -// default functor traits for STL functors: - -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; - -#ifdef EIGEN_STDEXT_SUPPORT - -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost + functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost + functor_traits::Cost + functor_traits::Cost, PacketAccess = false }; }; - -#endif // EIGEN_STDEXT_SUPPORT - -// allow to add new functors and specializations of functor_traits from outside Eigen. -// this macro is really needed because functor_traits must be specialized after it is declared but before it is used... -#ifdef EIGEN_FUNCTORS_PLUGIN -#include EIGEN_FUNCTORS_PLUGIN -#endif - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_FUNCTORS_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/GenericPacketMath.h b/uppsrc/plugin/Eigen/Eigen/src/Core/GenericPacketMath.h index ac5552d3e..029f8ac36 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/GenericPacketMath.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/GenericPacketMath.h @@ -61,7 +61,6 @@ struct default_packet_traits HasSqrt = 0, HasRsqrt = 0, HasExp = 0, - HasExpm1 = 0, HasLog = 0, HasLog1p = 0, HasLog10 = 0, @@ -231,7 +230,7 @@ pload1(const typename unpacket_traits::type *a) { return pset1( * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]} * Currently, this function is only used for scalar * complex products. */ -template EIGEN_DEVICE_FUNC inline Packet +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits::type* from) { return *from; } /** \internal \returns a packet with elements of \a *from quadrupled. @@ -279,7 +278,7 @@ inline void pbroadcast2(const typename unpacket_traits::type *a, } /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ -template inline Packet +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits::type& a) { return a; } /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ @@ -402,10 +401,6 @@ Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { using std::exp; return exp(a); } -/** \internal \returns the expm1 of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet pexpm1(const Packet& a) { return numext::expm1(a); } - /** \internal \returns the log of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) { using std::log; return log(a); } @@ -487,7 +482,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& fro * by the current computation. */ template -inline Packet ploadt_ro(const typename unpacket_traits::type* from) +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits::type* from) { return ploadt(from); } diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/GlobalFunctions.h b/uppsrc/plugin/Eigen/Eigen/src/Core/GlobalFunctions.h index 12828a7c3..769dc255c 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/GlobalFunctions.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/GlobalFunctions.h @@ -71,7 +71,6 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log) diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/IO.h b/uppsrc/plugin/Eigen/Eigen/src/Core/IO.h index 644228c3f..da7fd6cce 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/IO.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/IO.h @@ -109,20 +109,6 @@ class WithFormat IOFormat m_format; }; -/** \returns a WithFormat proxy object allowing to print a matrix the with given - * format \a fmt. - * - * See class IOFormat for some examples. - * - * \sa class IOFormat, class WithFormat - */ -template -inline const WithFormat -DenseBase::format(const IOFormat& fmt) const -{ - return WithFormat(derived(), fmt); -} - namespace internal { // NOTE: This helper is kept for backward compatibility with previous code specializing diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/MathFunctions.h b/uppsrc/plugin/Eigen/Eigen/src/Core/MathFunctions.h index 7a6b999af..a648aa0fa 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/MathFunctions.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/MathFunctions.h @@ -14,6 +14,7 @@ // TODO this should better be moved to NumTraits #define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L + namespace Eigen { // On WINCE, std::abs is defined for int only, so let's defined our own overloads: @@ -412,7 +413,7 @@ inline NewType cast(const OldType& x) static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) - EIGEN_USING_STD_MATH(round); + using std::round; return round(x); } }; @@ -481,55 +482,6 @@ struct arg_retval typedef typename NumTraits::Real type; }; -/**************************************************************************** -* Implementation of expm1 * -****************************************************************************/ - -// This implementation is based on GSL Math's expm1. -namespace std_fallback { - // fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar, - // or that there is no suitable std::expm1 function available. Implementation - // attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php. - template - EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) { - EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - typedef typename NumTraits::Real RealScalar; - - EIGEN_USING_STD_MATH(exp); - Scalar u = exp(x); - if (u == Scalar(1)) { - return x; - } - Scalar um1 = u - RealScalar(1); - if (um1 == Scalar(-1)) { - return RealScalar(-1); - } - - EIGEN_USING_STD_MATH(log); - return (u - RealScalar(1)) * x / log(u); - } -} - -template -struct expm1_impl { - static inline Scalar run(const Scalar& x) - { - EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - #if EIGEN_HAS_CXX11_MATH - using std::expm1; - #endif - using std_fallback::expm1; - return expm1(x); - } -}; - - -template -struct expm1_retval -{ - typedef Scalar type; -}; - /**************************************************************************** * Implementation of log1p * ****************************************************************************/ @@ -688,7 +640,7 @@ template struct random_default_impl { static inline Scalar run(const Scalar& x, const Scalar& y) - { + { typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; if(y T generic_fast_tanh_float(const T& a_x); namespace numext { -#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__) +#ifndef __CUDA_ARCH__ template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) @@ -890,84 +842,6 @@ EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) EIGEN_USING_STD_MATH(max); return max EIGEN_NOT_A_MACRO (x,y); } - - -#elif defined(__SYCL_DEVICE_ONLY__) -template -EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) -{ - - return y < x ? y : x; -} - -template -EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) -{ - - return x < y ? y : x; -} - -EIGEN_ALWAYS_INLINE int mini(const int& x, const int& y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE int maxi(const int& x, const int& y) -{ - return cl::sycl::max(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned int mini(const unsigned int& x, const unsigned int& y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned int maxi(const unsigned int& x, const unsigned int& y) -{ - return cl::sycl::max(x,y); -} - -EIGEN_ALWAYS_INLINE long mini(const long & x, const long & y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE long maxi(const long & x, const long & y) -{ - return cl::sycl::max(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned long mini(const unsigned long& x, const unsigned long& y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned long maxi(const unsigned long& x, const unsigned long& y) -{ - return cl::sycl::max(x,y); -} - - -EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) -{ - return cl::sycl::fmin(x,y); -} - -EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) -{ - return cl::sycl::fmax(x,y); -} - -EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y) -{ - return cl::sycl::fmin(x,y); -} - -EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y) -{ - return cl::sycl::fmax(x,y); -} - #else template EIGEN_DEVICE_FUNC @@ -1080,11 +954,6 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float log1p(float x) { return cl::sycl::log1p(x); } -EIGEN_ALWAYS_INLINE double log1p(double x) { return cl::sycl::log1p(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log1p(const float &x) { return ::log1pf(x); } @@ -1100,24 +969,10 @@ inline typename internal::pow_impl::result_type pow(const Scala return internal::pow_impl::run(x, y); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float pow(float x, float y) { return cl::sycl::pow(x, y); } -EIGEN_ALWAYS_INLINE double pow(double x, double y) { return cl::sycl::pow(x, y); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } template EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } template EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float isnan(float x) { return cl::sycl::isnan(x); } -EIGEN_ALWAYS_INLINE double isnan(double x) { return cl::sycl::isnan(x); } -EIGEN_ALWAYS_INLINE float isinf(float x) { return cl::sycl::isinf(x); } -EIGEN_ALWAYS_INLINE double isinf(double x) { return cl::sycl::isinf(x); } -EIGEN_ALWAYS_INLINE float isfinite(float x) { return cl::sycl::isfinite(x); } -EIGEN_ALWAYS_INLINE double isfinite(double x) { return cl::sycl::isfinite(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) @@ -1125,11 +980,6 @@ inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float round(float x) { return cl::sycl::round(x); } -EIGEN_ALWAYS_INLINE double round(double x) { return cl::sycl::round(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template EIGEN_DEVICE_FUNC T (floor)(const T& x) @@ -1138,11 +988,6 @@ T (floor)(const T& x) return floor(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float floor(float x) { return cl::sycl::floor(x); } -EIGEN_ALWAYS_INLINE double floor(double x) { return cl::sycl::floor(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float floor(const float &x) { return ::floorf(x); } @@ -1159,11 +1004,6 @@ T (ceil)(const T& x) return ceil(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float ceil(float x) { return cl::sycl::ceil(x); } -EIGEN_ALWAYS_INLINE double ceil(double x) { return cl::sycl::ceil(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float ceil(const float &x) { return ::ceilf(x); } @@ -1204,11 +1044,6 @@ T sqrt(const T &x) return sqrt(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float sqrt(float x) { return cl::sycl::sqrt(x); } -EIGEN_ALWAYS_INLINE double sqrt(double x) { return cl::sycl::sqrt(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T log(const T &x) { @@ -1216,12 +1051,6 @@ T log(const T &x) { return log(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float log(float x) { return cl::sycl::log(x); } -EIGEN_ALWAYS_INLINE double log(double x) { return cl::sycl::log(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log(const float &x) { return ::logf(x); } @@ -1232,11 +1061,19 @@ double log(const double &x) { return ::log(x); } template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -typename NumTraits::Real abs(const T &x) { +typename internal::enable_if::IsSigned || NumTraits::IsComplex,typename NumTraits::Real>::type +abs(const T &x) { EIGEN_USING_STD_MATH(abs); return abs(x); } +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +typename internal::enable_if::IsSigned || NumTraits::IsComplex),typename NumTraits::Real>::type +abs(const T &x) { + return x; +} + #if defined(__SYCL_DEVICE_ONLY__) EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); } EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); } @@ -1267,11 +1104,6 @@ T exp(const T &x) { return exp(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float exp(float x) { return cl::sycl::exp(x); } -EIGEN_ALWAYS_INLINE double exp(double x) { return cl::sycl::exp(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp(const float &x) { return ::expf(x); } @@ -1280,26 +1112,6 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp(const double &x) { return ::exp(x); } #endif -template -EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x) -{ - return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x); -} - -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float expm1(float x) { return cl::sycl::expm1(x); } -EIGEN_ALWAYS_INLINE double expm1(double x) { return cl::sycl::expm1(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - -#ifdef __CUDACC__ -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -float expm1(const float &x) { return ::expm1f(x); } - -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -double expm1(const double &x) { return ::expm1(x); } -#endif - template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cos(const T &x) { @@ -1307,11 +1119,6 @@ T cos(const T &x) { return cos(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float cos(float x) { return cl::sycl::cos(x); } -EIGEN_ALWAYS_INLINE double cos(double x) { return cl::sycl::cos(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cos(const float &x) { return ::cosf(x); } @@ -1327,11 +1134,6 @@ T sin(const T &x) { return sin(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float sin(float x) { return cl::sycl::sin(x); } -EIGEN_ALWAYS_INLINE double sin(double x) { return cl::sycl::sin(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sin(const float &x) { return ::sinf(x); } @@ -1347,11 +1149,6 @@ T tan(const T &x) { return tan(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float tan(float x) { return cl::sycl::tan(x); } -EIGEN_ALWAYS_INLINE double tan(double x) { return cl::sycl::tan(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tan(const float &x) { return ::tanf(x); } @@ -1367,11 +1164,6 @@ T acos(const T &x) { return acos(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float acos(float x) { return cl::sycl::acos(x); } -EIGEN_ALWAYS_INLINE double acos(double x) { return cl::sycl::acos(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float acos(const float &x) { return ::acosf(x); } @@ -1387,11 +1179,6 @@ T asin(const T &x) { return asin(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float asin(float x) { return cl::sycl::asin(x); } -EIGEN_ALWAYS_INLINE double asin(double x) { return cl::sycl::asin(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float asin(const float &x) { return ::asinf(x); } @@ -1407,11 +1194,6 @@ T atan(const T &x) { return atan(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float atan(float x) { return cl::sycl::atan(x); } -EIGEN_ALWAYS_INLINE double atan(double x) { return cl::sycl::atan(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float atan(const float &x) { return ::atanf(x); } @@ -1428,11 +1210,6 @@ T cosh(const T &x) { return cosh(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float cosh(float x) { return cl::sycl::cosh(x); } -EIGEN_ALWAYS_INLINE double cosh(double x) { return cl::sycl::cosh(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cosh(const float &x) { return ::coshf(x); } @@ -1448,11 +1225,6 @@ T sinh(const T &x) { return sinh(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float sinh(float x) { return cl::sycl::sinh(x); } -EIGEN_ALWAYS_INLINE double sinh(double x) { return cl::sycl::sinh(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sinh(const float &x) { return ::sinhf(x); } @@ -1468,10 +1240,7 @@ T tanh(const T &x) { return tanh(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float tanh(float x) { return cl::sycl::tanh(x); } -EIGEN_ALWAYS_INLINE double tanh(double x) { return cl::sycl::tanh(x); } -#elif (!defined(__CUDACC__)) && EIGEN_FAST_MATH +#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::generic_fast_tanh_float(x); } #endif @@ -1491,11 +1260,6 @@ T fmod(const T& a, const T& b) { return fmod(a, b); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float fmod(float x, float y) { return cl::sycl::fmod(x, y); } -EIGEN_ALWAYS_INLINE double fmod(double x, double y) { return cl::sycl::fmod(x, y); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1638,13 +1402,13 @@ template<> struct random_impl template<> struct scalar_fuzzy_impl { typedef bool RealScalar; - + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) { return !x; } - + EIGEN_DEVICE_FUNC static inline bool isApprox(bool x, bool y, bool) { @@ -1656,10 +1420,10 @@ template<> struct scalar_fuzzy_impl { return (!x) || y; } - + }; - + } // end namespace internal } // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/MatrixBase.h b/uppsrc/plugin/Eigen/Eigen/src/Core/MatrixBase.h index f7cf04cde..ce412180a 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/MatrixBase.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/MatrixBase.h @@ -294,7 +294,7 @@ template class MatrixBase * fuzzy comparison such as isApprox() * \sa isApprox(), operator!= */ template - inline bool operator==(const MatrixBase& other) const + EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase& other) const { return cwiseEqual(other).all(); } /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other. @@ -302,7 +302,7 @@ template class MatrixBase * fuzzy comparison such as isApprox() * \sa isApprox(), operator== */ template - inline bool operator!=(const MatrixBase& other) const + EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase& other) const { return cwiseNotEqual(other).any(); } NoAlias noalias(); diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/NumTraits.h b/uppsrc/plugin/Eigen/Eigen/src/Core/NumTraits.h index aebc0c259..daf489878 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/NumTraits.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/NumTraits.h @@ -71,7 +71,7 @@ struct default_digits10_impl // Integer * and to \c 0 otherwise. * \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed * to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers. - * Stay vague here. No need to do architecture-specific stuff. If you don't know what this means, just use \c Eigen::HugeCost. + * Stay vague here. No need to do architecture-specific stuff. * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must * be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise. @@ -215,6 +215,8 @@ struct NumTraits > static inline RealScalar epsilon() { return NumTraits::epsilon(); } EIGEN_DEVICE_FUNC static inline RealScalar dummy_precision() { return NumTraits::dummy_precision(); } + + static inline int digits10() { return NumTraits::digits10(); } }; template<> struct NumTraits diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/PlainObjectBase.h b/uppsrc/plugin/Eigen/Eigen/src/Core/PlainObjectBase.h index 0c04f8250..77f4f6066 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/PlainObjectBase.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/PlainObjectBase.h @@ -41,7 +41,7 @@ template<> struct check_rows_cols_for_overflow { { // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242 // we assume Index is signed - Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed + Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed bool error = (rows == 0 || cols == 0) ? false : (rows > max_index / cols); if (error) @@ -58,6 +58,28 @@ template struct m } // end namespace internal +#ifdef EIGEN_PARSED_BY_DOXYGEN +namespace doxygen { + +// This is a workaround to doxygen not being able to understand the inheritance logic +// when it is hidden by the dense_xpr_base helper struct. +// Moreover, doxygen fails to include members that are not documented in the declaration body of +// MatrixBase if we inherits MatrixBase >, +// this is why we simply inherits MatrixBase, though this does not make sense. + +/** This class is just a workaround for Doxygen and it does not not actually exist. */ +template struct dense_xpr_base_dispatcher; +/** This class is just a workaround for Doxygen and it does not not actually exist. */ +template +struct dense_xpr_base_dispatcher > + : public MatrixBase {}; +/** This class is just a workaround for Doxygen and it does not not actually exist. */ +template +struct dense_xpr_base_dispatcher > + : public ArrayBase {}; + +} // namespace doxygen + /** \class PlainObjectBase * \ingroup Core_Module * \brief %Dense storage base class for matrices and arrays. @@ -65,26 +87,10 @@ template struct m * This class can be extended with the help of the plugin mechanism described on the page * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN. * + * \tparam Derived is the derived type, e.g., a Matrix or Array + * * \sa \ref TopicClassHierarchy */ -#ifdef EIGEN_PARSED_BY_DOXYGEN -namespace doxygen { - -// this is a workaround to doxygen not being able to understand the inheritance logic -// when it is hidden by the dense_xpr_base helper struct. -/** This class is just a workaround for Doxygen and it does not not actually exist. */ -template struct dense_xpr_base_dispatcher; -/** This class is just a workaround for Doxygen and it does not not actually exist. */ -template -struct dense_xpr_base_dispatcher > - : public MatrixBase > {}; -/** This class is just a workaround for Doxygen and it does not not actually exist. */ -template -struct dense_xpr_base_dispatcher > - : public ArrayBase > {}; - -} // namespace doxygen - template class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher #else @@ -554,7 +560,8 @@ class PlainObjectBase : public internal::dense_xpr_base::type public: - /** \copydoc DenseBase::operator=(const EigenBase&) + /** \brief Copies the generic expression \a other into *this. + * \copydetails DenseBase::operator=(const EigenBase &other) */ template EIGEN_DEVICE_FUNC @@ -805,6 +812,13 @@ class PlainObjectBase : public internal::dense_xpr_base::type this->_set_noalias(other); } + // Initialize an arbitrary matrix from an object convertible to the Derived type. + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Derived& other){ + this->_set_noalias(other); + } + // Initialize an arbitrary matrix from a generic Eigen expression template EIGEN_DEVICE_FUNC @@ -827,7 +841,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type this->derived() = r; } - // For fixed -size arrays: + // For fixed-size Array template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Scalar& val0, @@ -839,6 +853,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type Base::setConstant(val0); } + // For fixed-size Array template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Index& val0, diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/ProductBase.h b/uppsrc/plugin/Eigen/Eigen/src/Core/ProductBase.h deleted file mode 100644 index cf74470a9..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/ProductBase.h +++ /dev/null @@ -1,290 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_PRODUCTBASE_H -#define EIGEN_PRODUCTBASE_H - -namespace Eigen { - -/** \class ProductBase - * \ingroup Core_Module - * - */ - -namespace internal { -template -struct traits > -{ - typedef MatrixXpr XprKind; - typedef typename remove_all<_Lhs>::type Lhs; - typedef typename remove_all<_Rhs>::type Rhs; - typedef typename scalar_product_traits::ReturnType Scalar; - typedef typename promote_storage_type::StorageKind, - typename traits::StorageKind>::ret StorageKind; - typedef typename promote_index_type::Index, - typename traits::Index>::type Index; - enum { - RowsAtCompileTime = traits::RowsAtCompileTime, - ColsAtCompileTime = traits::ColsAtCompileTime, - MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, - MaxColsAtCompileTime = traits::MaxColsAtCompileTime, - Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0) - | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit, - // Note that EvalBeforeNestingBit and NestByRefBit - // are not used in practice because nested is overloaded for products - CoeffReadCost = 0 // FIXME why is it needed ? - }; -}; -} - -#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \ - typedef ProductBase Base; \ - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ - typedef typename Base::LhsNested LhsNested; \ - typedef typename Base::_LhsNested _LhsNested; \ - typedef typename Base::LhsBlasTraits LhsBlasTraits; \ - typedef typename Base::ActualLhsType ActualLhsType; \ - typedef typename Base::_ActualLhsType _ActualLhsType; \ - typedef typename Base::RhsNested RhsNested; \ - typedef typename Base::_RhsNested _RhsNested; \ - typedef typename Base::RhsBlasTraits RhsBlasTraits; \ - typedef typename Base::ActualRhsType ActualRhsType; \ - typedef typename Base::_ActualRhsType _ActualRhsType; \ - using Base::m_lhs; \ - using Base::m_rhs; - -template -class ProductBase : public MatrixBase -{ - public: - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase) - - typedef typename Lhs::Nested LhsNested; - typedef typename internal::remove_all::type _LhsNested; - typedef internal::blas_traits<_LhsNested> LhsBlasTraits; - typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; - typedef typename internal::remove_all::type _ActualLhsType; - typedef typename internal::traits::Scalar LhsScalar; - - typedef typename Rhs::Nested RhsNested; - typedef typename internal::remove_all::type _RhsNested; - typedef internal::blas_traits<_RhsNested> RhsBlasTraits; - typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - typedef typename internal::remove_all::type _ActualRhsType; - typedef typename internal::traits::Scalar RhsScalar; - - // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once - typedef CoeffBasedProduct FullyLazyCoeffBaseProductType; - - public: - -#ifndef EIGEN_NO_MALLOC - typedef typename Base::PlainObject BasePlainObject; - typedef Matrix DynPlainObject; - typedef typename internal::conditional<(BasePlainObject::SizeAtCompileTime==Dynamic) || (BasePlainObject::SizeAtCompileTime*int(sizeof(Scalar)) < int(EIGEN_STACK_ALLOCATION_LIMIT)), - BasePlainObject, DynPlainObject>::type PlainObject; -#else - typedef typename Base::PlainObject PlainObject; -#endif - - ProductBase(const Lhs& a_lhs, const Rhs& a_rhs) - : m_lhs(a_lhs), m_rhs(a_rhs) - { - eigen_assert(a_lhs.cols() == a_rhs.rows() - && "invalid matrix product" - && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); - } - - inline Index rows() const { return m_lhs.rows(); } - inline Index cols() const { return m_rhs.cols(); } - - template - inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); } - - template - inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); } - - template - inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); } - - template - inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); } - - const _LhsNested& lhs() const { return m_lhs; } - const _RhsNested& rhs() const { return m_rhs; } - - // Implicit conversion to the nested type (trigger the evaluation of the product) - operator const PlainObject& () const - { - m_result.resize(m_lhs.rows(), m_rhs.cols()); - derived().evalTo(m_result); - return m_result; - } - - const Diagonal diagonal() const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } - - template - const Diagonal diagonal() const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } - - const Diagonal diagonal(Index index) const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); } - - // restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression - typename Base::CoeffReturnType coeff(Index row, Index col) const - { -#ifdef EIGEN2_SUPPORT - return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum(); -#else - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - Matrix result = *this; - return result.coeff(row,col); -#endif - } - - typename Base::CoeffReturnType coeff(Index i) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - Matrix result = *this; - return result.coeff(i); - } - - const Scalar& coeffRef(Index row, Index col) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - return derived().coeffRef(row,col); - } - - const Scalar& coeffRef(Index i) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - return derived().coeffRef(i); - } - - protected: - - LhsNested m_lhs; - RhsNested m_rhs; - - mutable PlainObject m_result; -}; - -// here we need to overload the nested rule for products -// such that the nested type is a const reference to a plain matrix -namespace internal { -template -struct nested, N, PlainObject> -{ - typedef typename GeneralProduct::PlainObject const& type; -}; -template -struct nested, N, PlainObject> -{ - typedef typename GeneralProduct::PlainObject const& type; -}; -} - -template -class ScaledProduct; - -// Note that these two operator* functions are not defined as member -// functions of ProductBase, because, otherwise we would have to -// define all overloads defined in MatrixBase. Furthermore, Using -// "using Base::operator*" would not work with MSVC. -// -// Also note that here we accept any compatible scalar types -template -const ScaledProduct -operator*(const ProductBase& prod, const typename Derived::Scalar& x) -{ return ScaledProduct(prod.derived(), x); } - -template -typename internal::enable_if::value, - const ScaledProduct >::type -operator*(const ProductBase& prod, const typename Derived::RealScalar& x) -{ return ScaledProduct(prod.derived(), x); } - - -template -const ScaledProduct -operator*(const typename Derived::Scalar& x,const ProductBase& prod) -{ return ScaledProduct(prod.derived(), x); } - -template -typename internal::enable_if::value, - const ScaledProduct >::type -operator*(const typename Derived::RealScalar& x,const ProductBase& prod) -{ return ScaledProduct(prod.derived(), x); } - -namespace internal { -template -struct traits > - : traits, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> > -{ - typedef typename traits::StorageKind StorageKind; -}; -} - -template -class ScaledProduct - : public ProductBase, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> -{ - public: - typedef ProductBase, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> Base; - typedef typename Base::Scalar Scalar; - typedef typename Base::PlainObject PlainObject; -// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct) - - ScaledProduct(const NestedProduct& prod, const Scalar& x) - : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {} - - template - inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); } - - template - inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); } - - template - inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); } - - template - inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); } - - const Scalar& alpha() const { return m_alpha; } - - protected: - const NestedProduct& m_prod; - Scalar m_alpha; -}; - -/** \internal - * Overloaded to perform an efficient C = (A*B).lazy() */ -template -template -Derived& MatrixBase::lazyAssign(const ProductBase& other) -{ - other.derived().evalTo(derived()); - return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_PRODUCTBASE_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/ProductEvaluators.h b/uppsrc/plugin/Eigen/Eigen/src/Core/ProductEvaluators.h index 583b7f59e..c42725dbd 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/ProductEvaluators.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/ProductEvaluators.h @@ -207,6 +207,12 @@ struct evaluator_assume_aliasing +struct evaluator_assume_aliasing::Scalar>, const OtherXpr, + const Product >, DenseShape > { + static const bool value = true; +}; + template struct assignment_from_xpr_op_product { diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/SelfAdjointView.h b/uppsrc/plugin/Eigen/Eigen/src/Core/SelfAdjointView.h index 06484ab30..504c98f0e 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/SelfAdjointView.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/SelfAdjointView.h @@ -319,6 +319,7 @@ public: * Implementation of MatrixBase methods ***************************************************************************/ +/** This is the const version of MatrixBase::selfadjointView() */ template template typename MatrixBase::template ConstSelfAdjointViewReturnType::Type @@ -327,6 +328,15 @@ MatrixBase::selfadjointView() const return typename ConstSelfAdjointViewReturnType::Type(derived()); } +/** \returns an expression of a symmetric/self-adjoint view extracted from the upper or lower triangular part of the current matrix + * + * The parameter \a UpLo can be either \c #Upper or \c #Lower + * + * Example: \include MatrixBase_selfadjointView.cpp + * Output: \verbinclude MatrixBase_selfadjointView.out + * + * \sa class SelfAdjointView + */ template template typename MatrixBase::template SelfAdjointViewReturnType::Type diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/SelfCwiseBinaryOp.h b/uppsrc/plugin/Eigen/Eigen/src/Core/SelfCwiseBinaryOp.h index 719ed72a5..50099df82 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -15,7 +15,7 @@ namespace Eigen { // TODO generalize the scalar type of 'other' template -EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); @@ -23,7 +23,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) } template -EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const Scalar& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); @@ -31,7 +31,7 @@ EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const Scalar& other) } template -EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const Scalar& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); @@ -39,7 +39,7 @@ EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const Scalar& other) } template -EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const Scalar& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/Solve.h b/uppsrc/plugin/Eigen/Eigen/src/Core/Solve.h index 960a58597..a8daea511 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/Solve.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/Solve.h @@ -34,12 +34,12 @@ template struct s template struct solve_traits { - typedef Matrix PlainObject; + RhsType::MaxColsAtCompileTime>::type PlainObject; }; template diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/SolveTriangular.h b/uppsrc/plugin/Eigen/Eigen/src/Core/SolveTriangular.h index 96d3dde50..049890b25 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/SolveTriangular.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/SolveTriangular.h @@ -161,6 +161,7 @@ struct triangular_solver_selector { * TriangularView methods ***************************************************************************/ +#ifndef EIGEN_PARSED_BY_DOXYGEN template template void TriangularViewImpl::solveInPlace(const MatrixBase& _other) const @@ -188,6 +189,7 @@ TriangularViewImpl::solve(const MatrixBase& other) co { return internal::triangular_solve_retval(derived(), other.derived()); } +#endif namespace internal { diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/StableNorm.h b/uppsrc/plugin/Eigen/Eigen/src/Core/StableNorm.h index d2fe1e199..be04ed44d 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/StableNorm.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/StableNorm.h @@ -170,7 +170,8 @@ MatrixBase::stableNorm() const enum { CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit) || (int(internal::evaluator::Alignment)>0) // FIXME Alignment)>0 might not be enough - ) && (blockSize*sizeof(Scalar)*20) // if we cannot allocate on the stack, then let's not bother about this optimization }; typedef typename internal::conditional, internal::evaluator::Alignment>, typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper; diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/TriangularMatrix.h b/uppsrc/plugin/Eigen/Eigen/src/Core/TriangularMatrix.h index 641c20417..667ef09dc 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/TriangularMatrix.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/TriangularMatrix.h @@ -470,6 +470,8 @@ template class TriangularViewImpl<_Mat * \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if * \a Side==OnTheRight. * + * Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft + * * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this * is an upper (resp. lower) triangular matrix. @@ -495,6 +497,8 @@ template class TriangularViewImpl<_Mat * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here. * This function will const_cast it, so constness isn't honored here. * + * Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft + * * See TriangularView:solve() for the details. */ template @@ -539,13 +543,14 @@ template class TriangularViewImpl<_Mat template EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha); + EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha, bool beta); }; /*************************************************************************** * Implementation of triangular evaluation/assignment ***************************************************************************/ +#ifndef EIGEN_PARSED_BY_DOXYGEN // FIXME should we keep that possibility template template @@ -583,6 +588,7 @@ void TriangularViewImpl::lazyAssign(const TriangularBas eigen_assert(Mode == int(OtherDerived::Mode)); internal::call_assignment_no_alias(derived(), other.derived()); } +#endif /*************************************************************************** * Implementation of TriangularBase methods @@ -944,8 +950,7 @@ struct Assignment, internal::assign_ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) dst.resize(dstRows, dstCols); - dst.setZero(); - dst._assignProduct(src, 1); + dst._assignProduct(src, 1, 0); } }; @@ -956,7 +961,7 @@ struct Assignment, internal::add_ass typedef Product SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { - dst._assignProduct(src, 1); + dst._assignProduct(src, 1, 1); } }; @@ -967,7 +972,7 @@ struct Assignment, internal::sub_ass typedef Product SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { - dst._assignProduct(src, -1); + dst._assignProduct(src, -1, 1); } }; diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/Visitor.h b/uppsrc/plugin/Eigen/Eigen/src/Core/Visitor.h index d71dfc968..54c1883d9 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/Visitor.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/Visitor.h @@ -194,7 +194,8 @@ struct functor_traits > { } // end namespace internal -/** \returns the minimum of all coefficients of *this and puts in *row and *col its location. +/** \fn DenseBase::minCoeff(IndexType* rowId, IndexType* colId) const + * \returns the minimum of all coefficients of *this and puts in *row and *col its location. * \warning the result is undefined if \c *this contains NaN. * * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff() @@ -230,7 +231,8 @@ DenseBase::minCoeff(IndexType* index) const return minVisitor.res; } -/** \returns the maximum of all coefficients of *this and puts in *row and *col its location. +/** \fn DenseBase::maxCoeff(IndexType* rowId, IndexType* colId) const + * \returns the maximum of all coefficients of *this and puts in *row and *col its location. * \warning the result is undefined if \c *this contains NaN. * * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff() diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/AVX512/PacketMath.h b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/AVX512/PacketMath.h index e46a60472..f6500a16e 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -59,8 +59,8 @@ template<> struct packet_traits : default_packet_traits HasLog = 1, #endif HasExp = 1, - HasSqrt = EIGEN_FAST_MATH, - HasRsqrt = EIGEN_FAST_MATH, + HasSqrt = 1, + HasRsqrt = 1, #endif HasDiv = 1 }; @@ -75,7 +75,7 @@ template<> struct packet_traits : default_packet_traits size = 8, HasHalfPacket = 1, #if EIGEN_GNUC_AT_LEAST(5, 3) - HasSqrt = EIGEN_FAST_MATH, + HasSqrt = 1, HasRsqrt = EIGEN_FAST_MATH, #endif HasDiv = 1 @@ -461,21 +461,53 @@ EIGEN_STRONG_INLINE Packet16i ploadu(const int* from) { // {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7} template <> EIGEN_STRONG_INLINE Packet16f ploaddup(const float* from) { - __m256i low_half = _mm256_load_si256(reinterpret_cast(from)); - __m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half)); - __m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0)); - return pairs; + Packet8f lane0 = _mm256_broadcast_ps((const __m128*)(const void*)from); + // mimic an "inplace" permutation of the lower 128bits using a blend + lane0 = _mm256_blend_ps( + lane0, _mm256_castps128_ps256(_mm_permute_ps( + _mm256_castps256_ps128(lane0), _MM_SHUFFLE(1, 0, 1, 0))), + 15); + // then we can perform a consistent permutation on the global register to get + // everything in shape: + lane0 = _mm256_permute_ps(lane0, _MM_SHUFFLE(3, 3, 2, 2)); + + Packet8f lane1 = _mm256_broadcast_ps((const __m128*)(const void*)(from + 4)); + // mimic an "inplace" permutation of the lower 128bits using a blend + lane1 = _mm256_blend_ps( + lane1, _mm256_castps128_ps256(_mm_permute_ps( + _mm256_castps256_ps128(lane1), _MM_SHUFFLE(1, 0, 1, 0))), + 15); + // then we can perform a consistent permutation on the global register to get + // everything in shape: + lane1 = _mm256_permute_ps(lane1, _MM_SHUFFLE(3, 3, 2, 2)); + +#ifdef EIGEN_VECTORIZE_AVX512DQ + Packet16f res = _mm512_undefined_ps(); + return _mm512_insertf32x8(res, lane0, 0); + return _mm512_insertf32x8(res, lane1, 1); + return res; +#else + Packet16f res = _mm512_undefined_ps(); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 0), 0); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 1), 1); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 0), 2); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 1), 3); + return res; +#endif } // Loads 4 doubles from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, // a3} template <> EIGEN_STRONG_INLINE Packet8d ploaddup(const double* from) { - __m512d x = _mm512_setzero_pd(); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[0]), 0); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[1]), 1); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[2]), 2); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[3]), 3); - return x; + Packet4d lane0 = _mm256_broadcast_pd((const __m128d*)(const void*)from); + lane0 = _mm256_permute_pd(lane0, 3 << 2); + + Packet4d lane1 = _mm256_broadcast_pd((const __m128d*)(const void*)(from + 2)); + lane1 = _mm256_permute_pd(lane1, 3 << 2); + + Packet8d res = _mm512_undefined_pd(); + res = _mm512_insertf64x4(res, lane0, 0); + return _mm512_insertf64x4(res, lane1, 1); } // Loads 4 floats from memory a returns the packet @@ -493,11 +525,11 @@ EIGEN_STRONG_INLINE Packet16f ploadquad(const float* from) { // {a0, a0 a0, a0, a1, a1, a1, a1} template <> EIGEN_STRONG_INLINE Packet8d ploadquad(const double* from) { - __m128d tmp0 = _mm_load_pd1(from); - __m256d lane0 = _mm256_broadcastsd_pd(tmp0); - __m128d tmp1 = _mm_load_pd1(from + 1); - __m256d lane1 = _mm256_broadcastsd_pd(tmp1); - __m512d tmp = _mm512_undefined_pd(); + Packet8d tmp = _mm512_undefined_pd(); + Packet2d tmp0 = _mm_load_pd1(from); + Packet2d tmp1 = _mm_load_pd1(from + 1); + Packet4d lane0 = _mm256_broadcastsd_pd(tmp0); + Packet4d lane1 = _mm256_broadcastsd_pd(tmp1); tmp = _mm512_insertf64x4(tmp, lane0, 0); return _mm512_insertf64x4(tmp, lane1, 1); } @@ -628,8 +660,8 @@ EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) { #ifdef EIGEN_VECTORIZE_AVX512DQ // AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512 #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \ - __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0); \ - __m256 OUTPUT##_1 = _mm512_extractf32x8_ps(INPUT, 1) + __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0) __m256 OUTPUT##_1 = \ + _mm512_extractf32x8_ps(INPUT, 1) #else #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \ __m256 OUTPUT##_0 = _mm256_insertf128_ps( \ @@ -719,7 +751,7 @@ vecs) blend1 = _mm256_blend_ps(sum1, sum2, 0xcc); blend2 = _mm256_blend_ps(sum3, sum4, 0xcc); - final = _mm256_add_ps(final, _mm256_blend_ps(blend1, blend2, 0xf0)); + final = padd(final, _mm256_blend_ps(blend1, blend2, 0xf0)); hsum1 = _mm256_hadd_ps(vecs8_0, vecs9_0); hsum2 = _mm256_hadd_ps(vecs10_0, vecs11_0); @@ -769,7 +801,7 @@ vecs) blend1 = _mm256_blend_ps(sum1, sum2, 0xcc); blend2 = _mm256_blend_ps(sum3, sum4, 0xcc); - final_1 = _mm256_add_ps(final_1, _mm256_blend_ps(blend1, blend2, 0xf0)); + final_1 = padd(final_1, _mm256_blend_ps(blend1, blend2, 0xf0)); __m512 final_output; @@ -819,7 +851,7 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp(const Packet8d* vecs) tmp1 = _mm256_hadd_pd(vecs2_1, vecs3_1); tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1)); - final_0 = _mm256_add_pd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC)); + final_0 = padd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC)); tmp0 = _mm256_hadd_pd(vecs4_0, vecs5_0); tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1)); @@ -835,7 +867,7 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp(const Packet8d* vecs) tmp1 = _mm256_hadd_pd(vecs6_1, vecs7_1); tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1)); - final_1 = _mm256_add_pd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC)); + final_1 = padd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC)); __m512d final_output = _mm512_insertf64x4(final_output, final_0, 0); @@ -844,52 +876,55 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp(const Packet8d* vecs) template <> EIGEN_STRONG_INLINE float predux(const Packet16f& a) { -#ifdef EIGEN_VECTORIZE_AVX512DQ - __m256 lane0 = _mm512_extractf32x8_ps(a, 0); - __m256 lane1 = _mm512_extractf32x8_ps(a, 1); - Packet8f x = _mm256_add_ps(lane0, lane1); - return predux(x); + //#ifdef EIGEN_VECTORIZE_AVX512DQ +#if 0 + Packet8f lane0 = _mm512_extractf32x8_ps(a, 0); + Packet8f lane1 = _mm512_extractf32x8_ps(a, 1); + Packet8f sum = padd(lane0, lane1); + Packet8f tmp0 = _mm256_hadd_ps(sum, _mm256_permute2f128_ps(a, a, 1)); + tmp0 = _mm256_hadd_ps(tmp0, tmp0); + return pfirst(_mm256_hadd_ps(tmp0, tmp0)); #else - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 sum = _mm_add_ps(_mm_add_ps(lane0, lane1), _mm_add_ps(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f sum = padd(padd(lane0, lane1), padd(lane2, lane3)); sum = _mm_hadd_ps(sum, sum); sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1)); - return _mm_cvtss_f32(sum); + return pfirst(sum); #endif } template <> EIGEN_STRONG_INLINE double predux(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d sum = _mm256_add_pd(lane0, lane1); - __m256d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1)); - return _mm_cvtsd_f64(_mm256_castpd256_pd128(_mm256_hadd_pd(tmp0, tmp0))); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d sum = padd(lane0, lane1); + Packet4d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1)); + return pfirst(_mm256_hadd_pd(tmp0, tmp0)); } template <> EIGEN_STRONG_INLINE Packet8f predux_downto4(const Packet16f& a) { #ifdef EIGEN_VECTORIZE_AVX512DQ - __m256 lane0 = _mm512_extractf32x8_ps(a, 0); - __m256 lane1 = _mm512_extractf32x8_ps(a, 1); - return _mm256_add_ps(lane0, lane1); + Packet8f lane0 = _mm512_extractf32x8_ps(a, 0); + Packet8f lane1 = _mm512_extractf32x8_ps(a, 1); + return padd(lane0, lane1); #else - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 sum0 = _mm_add_ps(lane0, lane2); - __m128 sum1 = _mm_add_ps(lane1, lane3); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f sum0 = padd(lane0, lane2); + Packet4f sum1 = padd(lane1, lane3); return _mm256_insertf128_ps(_mm256_castps128_ps256(sum0), sum1, 1); #endif } template <> EIGEN_STRONG_INLINE Packet4d predux_downto4(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = _mm256_add_pd(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = padd(lane0, lane1); return res; } @@ -904,59 +939,58 @@ EIGEN_STRONG_INLINE float predux_mul(const Packet16f& a) { res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); #else - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 res = pmul(pmul(lane0, lane1), pmul(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f res = pmul(pmul(lane0, lane1), pmul(lane2, lane3)); res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); #endif } template <> EIGEN_STRONG_INLINE double predux_mul(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = pmul(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = pmul(lane0, lane1); res = pmul(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1))); } template <> EIGEN_STRONG_INLINE float predux_min(const Packet16f& a) { - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3)); res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); } template <> EIGEN_STRONG_INLINE double predux_min(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = _mm256_min_pd(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = _mm256_min_pd(lane0, lane1); res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1))); } template <> EIGEN_STRONG_INLINE float predux_max(const Packet16f& a) { - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3)); res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); } - template <> EIGEN_STRONG_INLINE double predux_max(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = _mm256_max_pd(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = _mm256_max_pd(lane0, lane1); res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1))); } diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/AltiVec/Complex.h b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/AltiVec/Complex.h index 59367ba29..67db2f8ee 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/AltiVec/Complex.h @@ -65,7 +65,7 @@ template<> struct unpacket_traits { typedef std::complex type; template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) { Packet2cf res; - if((ptrdiff_t(&from) % 16) == 0) + if((std::ptrdiff_t(&from) % 16) == 0) res.v = pload((const float *)&from); else res.v = ploadu((const float *)&from); diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h index e7d4f4d8d..b3f1ea199 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -90,7 +90,7 @@ static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 }; #define _EIGEN_MASK_ALIGNMENT 0xfffffff0 #endif -#define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT) +#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT) // Handle endianness properly while loading constants // Define global static constants: @@ -450,15 +450,15 @@ template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) { Packet4f p; - if((ptrdiff_t(from) % 16) == 0) p = pload(from); - else p = ploadu(from); + if((std::ptrdiff_t(from) % 16) == 0) p = pload(from); + else p = ploadu(from); return vec_perm(p, p, p16uc_DUPLICATE32_HI); } template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) { Packet4i p; - if((ptrdiff_t(from) % 16) == 0) p = pload(from); - else p = ploadu(from); + if((std::ptrdiff_t(from) % 16) == 0) p = pload(from); + else p = ploadu(from); return vec_perm(p, p, p16uc_DUPLICATE32_HI); } @@ -935,8 +935,8 @@ template<> EIGEN_STRONG_INLINE Packet2d ploadu(const double* from) template<> EIGEN_STRONG_INLINE Packet2d ploaddup(const double* from) { Packet2d p; - if((ptrdiff_t(from) % 16) == 0) p = pload(from); - else p = ploadu(from); + if((std::ptrdiff_t(from) % 16) == 0) p = pload(from); + else p = ploadu(from); return vec_splat_dbl<0>(p); } diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/Half.h b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/Half.h index db9878796..294c517ea 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/Half.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/Half.h @@ -13,7 +13,7 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, @@ -53,7 +53,7 @@ namespace half_impl { // Make our own __half definition that is similar to CUDA's. struct __half { - EIGEN_DEVICE_FUNC __half() : x(0) {} + EIGEN_DEVICE_FUNC __half() {} explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {} unsigned short x; }; @@ -386,18 +386,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) { return result; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 - return half(hexp(a)); -#else - return half(::expf(float(a))); -#endif -} -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) { - return half(numext::expm1(float(a))); + return half(::expf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { #if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 - return half(::hlog(a)); + return Eigen::half(::hlog(a)); #else return half(::logf(float(a))); #endif @@ -409,11 +402,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { return half(::log10f(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 - return half(hsqrt(a)); -#else - return half(::sqrtf(float(a))); -#endif + return half(::sqrtf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) { return half(::powf(float(a), float(b))); @@ -431,18 +420,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) { return half(::tanhf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 - return half(hfloor(a)); -#else return half(::floorf(float(a))); -#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 - return half(hceil(a)); -#else return half(::ceilf(float(a))); -#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { @@ -493,9 +474,59 @@ template<> struct is_arithmetic { enum { value = true }; }; } // end namespace internal +} // end namespace Eigen + +namespace std { +template<> +struct numeric_limits { + static const bool is_specialized = true; + static const bool is_signed = true; + static const bool is_integer = false; + static const bool is_exact = false; + static const bool has_infinity = true; + static const bool has_quiet_NaN = true; + static const bool has_signaling_NaN = true; + static const float_denorm_style has_denorm = denorm_present; + static const bool has_denorm_loss = false; + static const std::float_round_style round_style = std::round_to_nearest; + static const bool is_iec559 = false; + static const bool is_bounded = false; + static const bool is_modulo = false; + static const int digits = 11; + static const int digits10 = 2; + //static const int max_digits10 = ; + static const int radix = 2; + static const int min_exponent = -13; + static const int min_exponent10 = -4; + static const int max_exponent = 16; + static const int max_exponent10 = 4; + static const bool traps = true; + static const bool tinyness_before = false; + + static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); } + static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); } + static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); } + static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); } + static Eigen::half round_error() { return Eigen::half(0.5); } + static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); } + static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); } +}; +} + +namespace Eigen { + template<> struct NumTraits : GenericNumTraits { + enum { + IsSigned = true, + IsInteger = false, + IsComplex = false, + RequireInitialization = false + }; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { return half_impl::raw_uint16_to_half(0x0800); } diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h index 987a5291c..0348b41db 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -56,18 +56,6 @@ double2 pexp(const double2& a) return make_double2(exp(a.x), exp(a.y)); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pexpm1(const float4& a) -{ - return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pexpm1(const double2& a) -{ - return make_double2(expm1(a.x), expm1(a.y)); -} - template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psqrt(const float4& a) { diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/PacketMath.h b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/PacketMath.h index ad66399e0..4dda63188 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -291,7 +291,7 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs(const double2& a) { EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { - double tmp = kernel.packet[0].y; + float tmp = kernel.packet[0].y; kernel.packet[0].y = kernel.packet[1].x; kernel.packet[1].x = tmp; diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index b9a125b42..ae54225f8 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -34,7 +34,6 @@ template<> struct packet_traits : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasExp = 1, - HasExpm1 = 1, HasLog = 1, HasLog1p = 1 }; @@ -276,14 +275,6 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { return __floats2half2_rn(r1, r2); } -template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1(const half2& a) { - float a1 = __low2float(a); - float a2 = __high2float(a); - float r1 = expm1f(a1); - float r2 = expm1f(a2); - return __floats2half2_rn(r1, r2); -} - #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 template<> __device__ EIGEN_STRONG_INLINE diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/NEON/PacketMath.h b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/NEON/PacketMath.h index d392bf3ff..836fbc0dd 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/NEON/PacketMath.h @@ -46,19 +46,22 @@ typedef uint32x4_t Packet4ui; const Packet4f p4f_##NAME = pset1(X) #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ - const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1(X)) + const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1(X)) #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ const Packet4i p4i_##NAME = pset1(X) -// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function -// which available on LLVM and GCC (at least) -#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC +#if EIGEN_ARCH_ARM64 + // __builtin_prefetch tends to do nothing on ARM64 compilers because the + // prefetch instructions there are too detailed for __builtin_prefetch to map + // meaningfully to them. + #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : ); +#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR); #elif defined __pld #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR) -#elif !EIGEN_ARCH_ARM64 - #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); +#elif EIGEN_ARCH_ARM32 + #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : ); #else // by default no explicit prefetching #define EIGEN_ARM_PREFETCH(ADDR) @@ -83,7 +86,7 @@ template<> struct packet_traits : default_packet_traits HasSqrt = 0 }; }; -template<> struct packet_traits : default_packet_traits +template<> struct packet_traits : default_packet_traits { typedef Packet4i type; typedef Packet4i half; // Packet2i intrinsics not implemented yet @@ -105,19 +108,19 @@ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); } #endif -template<> struct unpacket_traits { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; -template<> struct unpacket_traits { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; +template<> struct unpacket_traits { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; +template<> struct unpacket_traits { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { return vdupq_n_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return vdupq_n_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4i pset1(const int32_t& from) { return vdupq_n_s32(from); } template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { - const float32_t f[] = {0, 1, 2, 3}; + const float f[] = {0, 1, 2, 3}; Packet4f countdown = vld1q_f32(f); return vaddq_f32(pset1(a), countdown); } -template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) +template<> EIGEN_STRONG_INLINE Packet4i plset(const int32_t& a) { const int32_t i[] = {0, 1, 2, 3}; Packet4i countdown = vld1q_s32(i); @@ -240,20 +243,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, con } template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i pload(const int32_t* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int32_t* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) +template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) { float32x2_t lo, hi; lo = vld1_dup_f32(from); hi = vld1_dup_f32(from+1); return vcombine_f32(lo, hi); } -template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) +template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int32_t* from) { int32x2_t lo, hi; lo = vld1_dup_s32(from); @@ -261,11 +264,11 @@ template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) return vcombine_s32(lo, hi); } -template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } -template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } +template<> EIGEN_STRONG_INLINE void pstore (float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } +template<> EIGEN_STRONG_INLINE void pstore(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } -template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } -template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu (float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const float* from, Index stride) { @@ -276,7 +279,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const floa res = vsetq_lane_f32(from[3*stride], res, 3); return res; } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather(const int* from, Index stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather(const int32_t* from, Index stride) { Packet4i res = pset1(0); res = vsetq_lane_s32(from[0*stride], res, 0); @@ -293,7 +296,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, co to[stride*2] = vgetq_lane_f32(from, 2); to[stride*3] = vgetq_lane_f32(from, 3); } -template<> EIGEN_DEVICE_FUNC inline void pscatter(int* to, const Packet4i& from, Index stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter(int32_t* to, const Packet4i& from, Index stride) { to[stride*0] = vgetq_lane_s32(from, 0); to[stride*1] = vgetq_lane_s32(from, 1); @@ -301,12 +304,12 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter(int* to, const to[stride*3] = vgetq_lane_s32(from, 3); } -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { EIGEN_ARM_PREFETCH(addr); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { EIGEN_ARM_PREFETCH(addr); } +template<> EIGEN_STRONG_INLINE void prefetch (const float* addr) { EIGEN_ARM_PREFETCH(addr); } +template<> EIGEN_STRONG_INLINE void prefetch(const int32_t* addr) { EIGEN_ARM_PREFETCH(addr); } // FIXME only store the 2 first elements ? -template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } -template<> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; } +template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } +template<> EIGEN_STRONG_INLINE int32_t pfirst(const Packet4i& a) { int32_t EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; } template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { float32x2_t a_lo, a_hi; @@ -361,7 +364,7 @@ template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) return sum; } -template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int32_t predux(const Packet4i& a) { int32x2_t a_lo, a_hi, sum; @@ -408,7 +411,7 @@ template<> EIGEN_STRONG_INLINE float predux_mul(const Packet4f& a) return vget_lane_f32(prod, 0); } -template<> EIGEN_STRONG_INLINE int predux_mul(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int32_t predux_mul(const Packet4i& a) { int32x2_t a_lo, a_hi, prod; @@ -436,7 +439,7 @@ template<> EIGEN_STRONG_INLINE float predux_min(const Packet4f& a) return vget_lane_f32(min, 0); } -template<> EIGEN_STRONG_INLINE int predux_min(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int32_t predux_min(const Packet4i& a) { int32x2_t a_lo, a_hi, min; @@ -461,7 +464,7 @@ template<> EIGEN_STRONG_INLINE float predux_max(const Packet4f& a) return vget_lane_f32(max, 0); } -template<> EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int32_t predux_max(const Packet4i& a) { int32x2_t a_lo, a_hi, max; diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/ZVector/PacketMath.h b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/ZVector/PacketMath.h index e2deb25c8..57b01fc63 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/arch/ZVector/PacketMath.h @@ -100,7 +100,7 @@ static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 }; // Mask alignment #define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0 -#define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT) +#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT) // Handle endianness properly while loading constants // Define global static constants: diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/functors/AssignmentFunctors.h b/uppsrc/plugin/Eigen/Eigen/src/Core/functors/AssignmentFunctors.h index 9b373c783..4153b877c 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/functors/AssignmentFunctors.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/functors/AssignmentFunctors.h @@ -28,7 +28,7 @@ template struct assign_op { { internal::pstoret(a,b); } }; -// Empty overload for void type (used by PermutationMatrix +// Empty overload for void type (used by PermutationMatrix) template struct assign_op {}; template diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/functors/NullaryFunctors.h b/uppsrc/plugin/Eigen/Eigen/src/Core/functors/NullaryFunctors.h index 0311d9035..b03be0269 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/functors/NullaryFunctors.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/functors/NullaryFunctors.h @@ -44,16 +44,16 @@ struct linspaced_op_impl { linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), - m_interPacket(plset(0)), m_flip(numext::abs(high) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { + typedef typename NumTraits::Real RealScalar; if(m_flip) - return (i==0)? m_low : (m_high - (m_size1-i)*m_step); + return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step); else - return (i==m_size1)? m_high : (m_low + i*m_step); + return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step); } template @@ -63,7 +63,7 @@ struct linspaced_op_impl // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) if(m_flip) { - Packet pi = padd(pset1(Scalar(i-m_size1)),m_interPacket); + Packet pi = plset(Scalar(i-m_size1)); Packet res = padd(pset1(m_high), pmul(pset1(m_step), pi)); if(i==0) res = pinsertfirst(res, m_low); @@ -71,7 +71,7 @@ struct linspaced_op_impl } else { - Packet pi = padd(pset1(Scalar(i)),m_interPacket); + Packet pi = plset(Scalar(i)); Packet res = padd(pset1(m_low), pmul(pset1(m_step), pi)); if(i==m_size1-unpacket_traits::size+1) res = pinsertlast(res, m_high); @@ -83,7 +83,6 @@ struct linspaced_op_impl const Scalar m_high; const Index m_size1; const Scalar m_step; - const Packet m_interPacket; const bool m_flip; }; @@ -93,8 +92,8 @@ struct linspaced_op_impl linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : m_low(low), m_multiplier((high-low)/convert_index(num_steps<=1 ? 1 : num_steps-1)), - m_divisor(convert_index(num_steps+high-low)/(high-low+1)), - m_use_divisor((high+1)<(low+num_steps)) + m_divisor(convert_index((high>=low?num_steps:-num_steps)+(high-low))/((numext::abs(high-low)+1)==0?1:(numext::abs(high-low)+1))), + m_use_divisor(num_steps>1 && (numext::abs(high-low)+1) diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/functors/StlFunctors.h b/uppsrc/plugin/Eigen/Eigen/src/Core/functors/StlFunctors.h index 0b4e5a29d..6df3fa501 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/functors/StlFunctors.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/functors/StlFunctors.h @@ -72,7 +72,7 @@ template struct functor_traits > { enum { Cost = 1, PacketAccess = false }; }; -#if(__cplusplus < 201103L) +#if (__cplusplus < 201103L) && (EIGEN_COMP_MSVC <= 1900) // std::binder* are deprecated since c++11 and will be removed in c++17 template struct functor_traits > diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/functors/UnaryFunctors.h b/uppsrc/plugin/Eigen/Eigen/src/Core/functors/UnaryFunctors.h index bfc046556..2e6a00ffd 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/functors/UnaryFunctors.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/functors/UnaryFunctors.h @@ -262,26 +262,6 @@ struct functor_traits > { }; }; -/** \internal - * - * \brief Template functor to compute the exponential of a scalar - 1. - * - * \sa class CwiseUnaryOp, ArrayBase::expm1() - */ -template struct scalar_expm1_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_expm1_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::expm1(a); } - template - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexpm1(a); } -}; -template -struct functor_traits > { - enum { - PacketAccess = packet_traits::HasExpm1, - Cost = functor_traits >::Cost // TODO measure cost of expm1 - }; -}; - /** \internal * * \brief Template functor to compute the logarithm of a scalar @@ -698,13 +678,7 @@ struct functor_traits > template struct scalar_isnan_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op) typedef bool result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) - return numext::isnan(a); -#else - return (numext::isnan)(a); -#endif - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isnan)(a); } }; template struct functor_traits > @@ -722,13 +696,7 @@ struct functor_traits > template struct scalar_isinf_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op) typedef bool result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) - return numext::isinf(a); -#else - return (numext::isinf)(a); -#endif - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isinf)(a); } }; template struct functor_traits > @@ -746,13 +714,7 @@ struct functor_traits > template struct scalar_isfinite_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op) typedef bool result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) - return numext::isfinite(a); -#else - return (numext::isfinite)(a); -#endif - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isfinite)(a); } }; template struct functor_traits > diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/products/CoeffBasedProduct.h b/uppsrc/plugin/Eigen/Eigen/src/Core/products/CoeffBasedProduct.h deleted file mode 100644 index 2a9d65b94..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/products/CoeffBasedProduct.h +++ /dev/null @@ -1,476 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2006-2008 Benoit Jacob -// Copyright (C) 2008-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_COEFFBASED_PRODUCT_H -#define EIGEN_COEFFBASED_PRODUCT_H - -namespace Eigen { - -namespace internal { - -/********************************************************************************* -* Coefficient based product implementation. -* It is designed for the following use cases: -* - small fixed sizes -* - lazy products -*********************************************************************************/ - -/* Since the all the dimensions of the product are small, here we can rely - * on the generic Assign mechanism to evaluate the product per coeff (or packet). - * - * Note that here the inner-loops should always be unrolled. - */ - -template -struct product_coeff_impl; - -template -struct product_packet_impl; - -template -struct traits > -{ - typedef MatrixXpr XprKind; - typedef typename remove_all::type _LhsNested; - typedef typename remove_all::type _RhsNested; - typedef typename scalar_product_traits::ReturnType Scalar; - typedef typename promote_storage_type::StorageKind, - typename traits<_RhsNested>::StorageKind>::ret StorageKind; - typedef typename promote_index_type::Index, - typename traits<_RhsNested>::Index>::type Index; - - enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - - RowsAtCompileTime = _LhsNested::RowsAtCompileTime, - ColsAtCompileTime = _RhsNested::ColsAtCompileTime, - InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), - - MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, - MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, - - LhsRowMajor = LhsFlags & RowMajorBit, - RhsRowMajor = RhsFlags & RowMajorBit, - - SameType = is_same::value, - - CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic - || ( (ColsAtCompileTime % packet_traits::size) == 0 - && (RhsFlags&AlignedBit) - ) - ), - - CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic - || ( (RowsAtCompileTime % packet_traits::size) == 0 - && (LhsFlags&AlignedBit) - ) - ), - - EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 - : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 - : (RhsRowMajor && !CanVectorizeLhs), - - Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) - | (EvalToRowMajor ? RowMajorBit : 0) - | NestingFlags - | (LhsFlags & RhsFlags & AlignedBit) - // TODO enable vectorization for mixed types - | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), - - CoeffReadCost = InnerSize == Dynamic ? Dynamic - : InnerSize == 0 ? 0 - : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) - + (InnerSize - 1) * NumTraits::AddCost, - - /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside - * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner - * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect - * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. - */ - CanVectorizeInner = SameType - && LhsRowMajor - && (!RhsRowMajor) - && (LhsFlags & RhsFlags & ActualPacketAccessBit) - && (LhsFlags & RhsFlags & AlignedBit) - && (InnerSize % packet_traits::size == 0) - }; -}; - -} // end namespace internal - -template -class CoeffBasedProduct - : internal::no_assignment_operator, - public MatrixBase > -{ - public: - - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct) - typedef typename Base::PlainObject PlainObject; - - private: - - typedef typename internal::traits::_LhsNested _LhsNested; - typedef typename internal::traits::_RhsNested _RhsNested; - - enum { - PacketSize = internal::packet_traits::size, - InnerSize = internal::traits::InnerSize, - Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, - CanVectorizeInner = internal::traits::CanVectorizeInner - }; - - typedef internal::product_coeff_impl ScalarCoeffImpl; - - typedef CoeffBasedProduct LazyCoeffBasedProductType; - - public: - - inline CoeffBasedProduct(const CoeffBasedProduct& other) - : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs) - {} - - template - inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs) - : m_lhs(lhs), m_rhs(rhs) - { - // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. - // We still allow to mix T and complex. - EIGEN_STATIC_ASSERT((internal::scalar_product_traits::Defined), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - eigen_assert(lhs.cols() == rhs.rows() - && "invalid matrix product" - && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); - } - - EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } - - EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const - { - Scalar res; - ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); - return res; - } - - /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, - * which is why we don't set the LinearAccessBit. - */ - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - Scalar res; - const Index row = RowsAtCompileTime == 1 ? 0 : index; - const Index col = RowsAtCompileTime == 1 ? index : 0; - ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); - return res; - } - - template - EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const - { - PacketScalar res; - internal::product_packet_impl - ::run(row, col, m_lhs, m_rhs, res); - return res; - } - - // Implicit conversion to the nested type (trigger the evaluation of the product) - EIGEN_STRONG_INLINE operator const PlainObject& () const - { - m_result.lazyAssign(*this); - return m_result; - } - - const _LhsNested& lhs() const { return m_lhs; } - const _RhsNested& rhs() const { return m_rhs; } - - const Diagonal diagonal() const - { return reinterpret_cast(*this); } - - template - const Diagonal diagonal() const - { return reinterpret_cast(*this); } - - const Diagonal diagonal(Index index) const - { return reinterpret_cast(*this).diagonal(index); } - - protected: - typename internal::add_const_on_value_type::type m_lhs; - typename internal::add_const_on_value_type::type m_rhs; - - mutable PlainObject m_result; -}; - -namespace internal { - -// here we need to overload the nested rule for products -// such that the nested type is a const reference to a plain matrix -template -struct nested, N, PlainObject> -{ - typedef PlainObject const& type; -}; - -/*************************************************************************** -* Normal product .coeff() implementation (with meta-unrolling) -***************************************************************************/ - -/************************************** -*** Scalar path - no vectorization *** -**************************************/ - -template -struct product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) - { - product_coeff_impl::run(row, col, lhs, rhs, res); - res += lhs.coeff(row, UnrollingIndex-1) * rhs.coeff(UnrollingIndex-1, col); - } -}; - -template -struct product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) - { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - } -}; - -template -struct product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res) - { - res = RetScalar(0); - } -}; - -template -struct product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) - { - res = (lhs.row(row).transpose().cwiseProduct( rhs.col(col) )).sum(); - } -}; - -/******************************************* -*** Scalar path with inner vectorization *** -*******************************************/ - -template -struct product_coeff_vectorized_unroller -{ - typedef typename Lhs::Index Index; - enum { PacketSize = packet_traits::size }; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) - { - product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); - pres = padd(pres, pmul( lhs.template packet(row, UnrollingIndex) , rhs.template packet(UnrollingIndex, col) )); - } -}; - -template -struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) - { - pres = pmul(lhs.template packet(row, 0) , rhs.template packet(0, col)); - } -}; - -template -struct product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res) - { - res = 0; - } -}; - -template -struct product_coeff_impl -{ - typedef typename Lhs::PacketScalar Packet; - typedef typename Lhs::Index Index; - enum { PacketSize = packet_traits::size }; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) - { - Packet pres; - product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); - res = predux(pres); - } -}; - -template -struct product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); - } -}; - -// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower -// NOTE maybe they are now useless since we have a specialization for Block -template -struct product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); - } -}; - -template -struct product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); - } -}; - -template -struct product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = lhs.transpose().cwiseProduct(rhs).sum(); - } -}; - -template -struct product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - product_coeff_vectorized_dyn_selector::run(row, col, lhs, rhs, res); - } -}; - -/******************* -*** Packet path *** -*******************/ - -template -struct product_packet_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) - { - product_packet_impl::run(row, col, lhs, rhs, res); - res = pmadd(pset1(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet(UnrollingIndex-1, col), res); - } -}; - -template -struct product_packet_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) - { - product_packet_impl::run(row, col, lhs, rhs, res); - res = pmadd(lhs.template packet(row, UnrollingIndex-1), pset1(rhs.coeff(UnrollingIndex-1, col)), res); - } -}; - -template -struct product_packet_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) - { - res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); - } -}; - -template -struct product_packet_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) - { - res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); - } -}; - -template -struct product_packet_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res) - { - res = pset1(0); - } -}; - -template -struct product_packet_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res) - { - res = pset1(0); - } -}; - -template -struct product_packet_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) - { - res = pset1(0); - for(Index i = 0; i < lhs.cols(); ++i) - res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); - } -}; - -template -struct product_packet_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) - { - res = pset1(0); - for(Index i = 0; i < lhs.cols(); ++i) - res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_COEFFBASED_PRODUCT_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h b/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h index 61df3be57..6440e1d09 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -83,8 +83,8 @@ static void run(Index rows, Index cols, Index depth, if(info) { // this is the parallel version! - Index tid = omp_get_thread_num(); - Index threads = omp_get_num_threads(); + int tid = omp_get_thread_num(); + int threads = omp_get_num_threads(); LhsScalar* blockA = blocking.blockA(); eigen_internal_assert(blockA!=0); @@ -116,9 +116,9 @@ static void run(Index rows, Index cols, Index depth, info[tid].sync = k; // Computes C_i += A' * B' per A'_i - for(Index shift=0; shift gebp_kernel; - Matrix buffer; + Matrix buffer((internal::constructor_without_unaligned_array_assert())); // let's process the block per panel of actual_mc x BlockSize, // again, each is split into three parts, etc. @@ -199,7 +199,7 @@ struct general_product_to_triangular_selector; template struct general_product_to_triangular_selector { - static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha) + static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta) { typedef typename MatrixType::Scalar Scalar; @@ -217,6 +217,9 @@ struct general_product_to_triangular_selector Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); + if(!beta) + mat.template triangularView().setZero(); + enum { StorageOrder = (internal::traits::Flags&RowMajorBit) ? RowMajor : ColMajor, UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1, @@ -244,7 +247,7 @@ struct general_product_to_triangular_selector template struct general_product_to_triangular_selector { - static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha) + static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta) { typedef typename internal::remove_all::type Lhs; typedef internal::blas_traits LhsBlasTraits; @@ -260,13 +263,19 @@ struct general_product_to_triangular_selector typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); + if(!beta) + mat.template triangularView().setZero(); + enum { IsRowMajor = (internal::traits::Flags&RowMajorBit) ? 1 : 0, LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0, - RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0 + RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0, + SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0 }; Index size = mat.cols(); + if(SkipDiag) + size--; Index depth = actualLhs.cols(); typedef internal::gemm_blocking_space internal::general_matrix_matrix_triangular_product + IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)> ::run(size, depth, - &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), - mat.data(), mat.outerStride(), actualAlpha, blocking); + &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(), + &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(), + mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking); } }; template template -TriangularView& TriangularViewImpl::_assignProduct(const ProductType& prod, const Scalar& alpha) +TriangularView& TriangularViewImpl::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) { + EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED); eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); - general_product_to_triangular_selector::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha); + general_product_to_triangular_selector::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta); return derived(); } diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h index 5b7c15cca..41e18ff07 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h @@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product& blocking) \ { \ - if (lhs==rhs) { \ + if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \ general_matrix_matrix_rankupdate \ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \ } else { \ diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h b/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h deleted file mode 100644 index 3deed068e..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Level 3 BLAS SYRK/HERK implementation. - ******************************************************************************** -*/ - -#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H -#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H - -namespace Eigen { - -namespace internal { - -template -struct general_matrix_matrix_rankupdate : - general_matrix_matrix_triangular_product< - Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {}; - - -// try to go to BLAS specialization -#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \ -template \ -struct general_matrix_matrix_triangular_product { \ - static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ - const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \ - { \ - if (lhs==rhs) { \ - general_matrix_matrix_rankupdate \ - ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ - } else { \ - general_matrix_matrix_triangular_product \ - ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ - } \ - } \ -}; - -EIGEN_MKL_RANKUPDATE_SPECIALIZE(double) -//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex) -EIGEN_MKL_RANKUPDATE_SPECIALIZE(float) -//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex) - -// SYRK for float/double -#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \ -template \ -struct general_matrix_matrix_rankupdate { \ - enum { \ - IsLower = (UpLo&Lower) == Lower, \ - LowUp = IsLower ? Lower : Upper, \ - conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \ - }; \ - static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ - const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ - { \ - /* typedef Matrix MatrixRhs;*/ \ -\ - MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ - char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \ - MKLTYPE alpha_, beta_; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, EIGTYPE(1)); \ - MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \ - } \ -}; - -// HERK for complex data -#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \ -template \ -struct general_matrix_matrix_rankupdate { \ - enum { \ - IsLower = (UpLo&Lower) == Lower, \ - LowUp = IsLower ? Lower : Upper, \ - conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \ - }; \ - static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ - const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ - { \ - typedef Matrix MatrixType; \ -\ - MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ - char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \ - RTYPE alpha_, beta_; \ - const EIGTYPE* a_ptr; \ -\ -/* Set alpha_ & beta_ */ \ -/* assign_scalar_eig2mkl(alpha_, alpha); */\ -/* assign_scalar_eig2mkl(beta_, EIGTYPE(1));*/ \ - alpha_ = alpha.real(); \ - beta_ = 1.0; \ -/* Copy with conjugation in some cases*/ \ - MatrixType a; \ - if (conjA) { \ - Map > mapA(lhs,n,k,OuterStride<>(lhsStride)); \ - a = mapA.conjugate(); \ - lda = a.outerStride(); \ - a_ptr = a.data(); \ - } else a_ptr=lhs; \ - MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \ - } \ -}; - - -EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk) -EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk) - -//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk) -//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk) - - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h deleted file mode 100644 index 060af328e..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * General matrix-matrix product functionality based on ?GEMM. - ******************************************************************************** -*/ - -#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H -#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H - -namespace Eigen { - -namespace internal { - -/********************************************************************** -* This file implements general matrix-matrix multiplication using BLAS -* gemm function via partial specialization of -* general_matrix_matrix_product::run(..) method for float, double, -* std::complex and std::complex types -**********************************************************************/ - -// gemm specialization - -#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \ -template< \ - typename Index, \ - int LhsStorageOrder, bool ConjugateLhs, \ - int RhsStorageOrder, bool ConjugateRhs> \ -struct general_matrix_matrix_product \ -{ \ -static void run(Index rows, Index cols, Index depth, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha, \ - level3_blocking& /*blocking*/, \ - GemmParallelInfo* /*info = 0*/) \ -{ \ - using std::conj; \ -\ - char transa, transb; \ - MKL_INT m, n, k, lda, ldb, ldc; \ - const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ - MatrixX##EIGPREFIX a_tmp, b_tmp; \ - EIGTYPE myone(1);\ -\ -/* Set transpose options */ \ - transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ - transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ -\ -/* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ - k = (MKL_INT)depth; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ -\ -/* Set lda, ldb, ldc */ \ - lda = (MKL_INT)lhsStride; \ - ldb = (MKL_INT)rhsStride; \ - ldc = (MKL_INT)resStride; \ -\ -/* Set a, b, c */ \ - if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \ - Map > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \ - a_tmp = lhs.conjugate(); \ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else a = _lhs; \ -\ - if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \ - Map > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \ - b_tmp = rhs.conjugate(); \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ - } else b = _rhs; \ -\ - MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ -}}; - -GEMM_SPECIALIZATION(double, d, double, d) -GEMM_SPECIALIZATION(float, f, float, s) -GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z) -GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c) - -} // end namespase internal - -} // end namespace Eigen - -#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixVector.h b/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixVector.h index 41d8242e1..3c1a7fc40 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/products/GeneralMatrixVector.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2016 Gael Guennebaud +// Copyright (C) 2008-2009 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,8 +15,10 @@ namespace Eigen { namespace internal { /* Optimized col-major matrix * vector product: - * This algorithm processes the matrix per vertical panels, - * which are then processed horizontaly per chunck of 8*PacketSize x 1 vertical segments. + * This algorithm processes 4 columns at onces that allows to both reduce + * the number of load/stores of the result by a factor 4 and to reduce + * the instruction dependency. Moreover, we know that all bands have the + * same alignment pattern. * * Mixing type logic: C += alpha * A * B * | A | B |alpha| comments @@ -25,7 +27,33 @@ namespace internal { * |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp * |cplx |real |real | optimal case, vectorization possible via real-cplx mul * + * Accesses to the matrix coefficients follow the following logic: + * + * - if all columns have the same alignment then + * - if the columns have the same alignment as the result vector, then easy! (-> AllAligned case) + * - otherwise perform unaligned loads only (-> NoneAligned case) + * - otherwise + * - if even columns have the same alignment then + * // odd columns are guaranteed to have the same alignment too + * - if even or odd columns have the same alignment as the result, then + * // for a register size of 2 scalars, this is guarantee to be the case (e.g., SSE with double) + * - perform half aligned and half unaligned loads (-> EvenAligned case) + * - otherwise perform unaligned loads only (-> NoneAligned case) + * - otherwise, if the register size is 4 scalars (e.g., SSE with float) then + * - one over 4 consecutive columns is guaranteed to be aligned with the result vector, + * perform simple aligned loads for this column and aligned loads plus re-alignment for the other. (-> FirstAligned case) + * // this re-alignment is done by the palign function implemented for SSE in Eigen/src/Core/arch/SSE/PacketMath.h + * - otherwise, + * // if we get here, this means the register size is greater than 4 (e.g., AVX with floats), + * // we currently fall back to the NoneAligned case + * * The same reasoning apply for the transposed case. + * + * The last case (PacketSize>4) could probably be improved by generalizing the FirstAligned case, but since we do not support AVX yet... + * One might also wonder why in the EvenAligned case we perform unaligned loads instead of using the aligned-loads plus re-alignment + * strategy as in the FirstAligned case. The reason is that we observed that unaligned loads on a 8 byte boundary are not too slow + * compared to unaligned loads on a 4 byte boundary. + * */ template struct general_matrix_vector_product @@ -59,145 +87,238 @@ EIGEN_DONT_INLINE static void run( template EIGEN_DONT_INLINE void general_matrix_vector_product::run( Index rows, Index cols, - const LhsMapper& alhs, + const LhsMapper& lhs, const RhsMapper& rhs, ResScalar* res, Index resIncr, RhsScalar alpha) { EIGEN_UNUSED_VARIABLE(resIncr); eigen_internal_assert(resIncr==1); + #ifdef _EIGEN_ACCUMULATE_PACKETS + #error _EIGEN_ACCUMULATE_PACKETS has already been defined + #endif + #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) \ + pstore(&res[j], \ + padd(pload(&res[j]), \ + padd( \ + padd(pcj.pmul(lhs0.template load(j), ptmp0), \ + pcj.pmul(lhs1.template load(j), ptmp1)), \ + padd(pcj.pmul(lhs2.template load(j), ptmp2), \ + pcj.pmul(lhs3.template load(j), ptmp3)) ))) - // The following copy tells the compiler that lhs's attributes are not modified outside this function - // This helps GCC to generate propoer code. - LhsMapper lhs(alhs); + typedef typename LhsMapper::VectorMapper LhsScalars; conj_helper cj; conj_helper pcj; + if(ConjugateRhs) + alpha = numext::conj(alpha); + + enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned }; + const Index columnsAtOnce = 4; + const Index peels = 2; + const Index LhsPacketAlignedMask = LhsPacketSize-1; + const Index ResPacketAlignedMask = ResPacketSize-1; +// const Index PeelAlignedMask = ResPacketSize*peels-1; + const Index size = rows; + const Index lhsStride = lhs.stride(); - // TODO: for padded aligned inputs, we could enable aligned reads - enum { LhsAlignment = Unaligned }; - const Index n8 = rows-8*ResPacketSize+1; - const Index n4 = rows-4*ResPacketSize+1; - const Index n3 = rows-3*ResPacketSize+1; - const Index n2 = rows-2*ResPacketSize+1; - const Index n1 = rows-1*ResPacketSize+1; + // How many coeffs of the result do we have to skip to be aligned. + // Here we assume data are at least aligned on the base scalar type. + Index alignedStart = internal::first_default_aligned(res,size); + Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0; + const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1; - // TODO: improve the following heuristic: - const Index block_cols = cols<128 ? cols : (lhsStride*sizeof(LhsScalar)<32000?16:4); - ResPacket palpha = pset1(alpha); + const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0; + Index alignmentPattern = alignmentStep==0 ? AllAligned + : alignmentStep==(LhsPacketSize/2) ? EvenAligned + : FirstAligned; - for(Index j2=0; j2(ResScalar(0)), - c1 = pset1(ResScalar(0)), - c2 = pset1(ResScalar(0)), - c3 = pset1(ResScalar(0)), - c4 = pset1(ResScalar(0)), - c5 = pset1(ResScalar(0)), - c6 = pset1(ResScalar(0)), - c7 = pset1(ResScalar(0)); + alignedSize = 0; + alignedStart = 0; + alignmentPattern = NoneAligned; + } + else if(LhsPacketSize > 4) + { + // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4. + // Currently, it seems to be better to perform unaligned loads anyway + alignmentPattern = NoneAligned; + } + else if (LhsPacketSize>1) + { + // eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+LhsPacketSize*1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load(i+LhsPacketSize*2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load(i+LhsPacketSize*3,j),b0,c3); - c4 = pcj.pmadd(lhs.template load(i+LhsPacketSize*4,j),b0,c4); - c5 = pcj.pmadd(lhs.template load(i+LhsPacketSize*5,j),b0,c5); - c6 = pcj.pmadd(lhs.template load(i+LhsPacketSize*6,j),b0,c6); - c7 = pcj.pmadd(lhs.template load(i+LhsPacketSize*7,j),b0,c7); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu(res+i+ResPacketSize*1))); - pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu(res+i+ResPacketSize*2))); - pstoreu(res+i+ResPacketSize*3, pmadd(c3,palpha,ploadu(res+i+ResPacketSize*3))); - pstoreu(res+i+ResPacketSize*4, pmadd(c4,palpha,ploadu(res+i+ResPacketSize*4))); - pstoreu(res+i+ResPacketSize*5, pmadd(c5,palpha,ploadu(res+i+ResPacketSize*5))); - pstoreu(res+i+ResPacketSize*6, pmadd(c6,palpha,ploadu(res+i+ResPacketSize*6))); - pstoreu(res+i+ResPacketSize*7, pmadd(c7,palpha,ploadu(res+i+ResPacketSize*7))); + while (skipColumns(ResScalar(0)), - c1 = pset1(ResScalar(0)), - c2 = pset1(ResScalar(0)), - c3 = pset1(ResScalar(0)); - - for(Index j=j2; j(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+LhsPacketSize*1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load(i+LhsPacketSize*2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load(i+LhsPacketSize*3,j),b0,c3); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu(res+i+ResPacketSize*1))); - pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu(res+i+ResPacketSize*2))); - pstoreu(res+i+ResPacketSize*3, pmadd(c3,palpha,ploadu(res+i+ResPacketSize*3))); - - i+=ResPacketSize*4; + skipColumns = (std::min)(skipColumns,cols); + // note that the skiped columns are processed later. } - if(i(ResScalar(0)), - c1 = pset1(ResScalar(0)), - c2 = pset1(ResScalar(0)); - for(Index j=j2; j(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+LhsPacketSize*1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load(i+LhsPacketSize*2,j),b0,c2); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu(res+i+ResPacketSize*1))); - pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu(res+i+ResPacketSize*2))); + /* eigen_internal_assert( (alignmentPattern==NoneAligned) + || (skipColumns + columnsAtOnce >= cols) + || LhsPacketSize > size + || (size_t(firstLhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);*/ + } + else if(Vectorizable) + { + alignedStart = 0; + alignedSize = size; + alignmentPattern = AllAligned; + } - i+=ResPacketSize*3; - } - if(i(ResScalar(0)), - c1 = pset1(ResScalar(0)); + const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; + const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; - for(Index j=j2; j(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+LhsPacketSize*1,j),b0,c1); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu(res+i+ResPacketSize*1))); - i+=ResPacketSize*2; - } - if(i(alpha*rhs(i, 0)), + ptmp1 = pset1(alpha*rhs(i+offset1, 0)), + ptmp2 = pset1(alpha*rhs(i+2, 0)), + ptmp3 = pset1(alpha*rhs(i+offset3, 0)); + + // this helps a lot generating better binary code + const LhsScalars lhs0 = lhs.getVectorMapper(0, i+0), lhs1 = lhs.getVectorMapper(0, i+offset1), + lhs2 = lhs.getVectorMapper(0, i+2), lhs3 = lhs.getVectorMapper(0, i+offset3); + + if (Vectorizable) { - ResPacket c0 = pset1(ResScalar(0)); - for(Index j=j2; j(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load(i+0,j),b0,c0); + res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]); + res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]); + res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]); + res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]); } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu(res+i+ResPacketSize*0))); - i+=ResPacketSize; - } - for(;ialignedStart) + { + switch(alignmentPattern) + { + case AllAligned: + for (Index j = alignedStart; j1) + { + LhsPacket A00, A01, A02, A03, A10, A11, A12, A13; + ResPacket T0, T1; + + A01 = lhs1.template load(alignedStart-1); + A02 = lhs2.template load(alignedStart-2); + A03 = lhs3.template load(alignedStart-3); + + for (; j(j-1+LhsPacketSize); palign<1>(A01,A11); + A12 = lhs2.template load(j-2+LhsPacketSize); palign<2>(A02,A12); + A13 = lhs3.template load(j-3+LhsPacketSize); palign<3>(A03,A13); + + A00 = lhs0.template load(j); + A10 = lhs0.template load(j+LhsPacketSize); + T0 = pcj.pmadd(A00, ptmp0, pload(&res[j])); + T1 = pcj.pmadd(A10, ptmp0, pload(&res[j+ResPacketSize])); + + T0 = pcj.pmadd(A01, ptmp1, T0); + A01 = lhs1.template load(j-1+2*LhsPacketSize); palign<1>(A11,A01); + T0 = pcj.pmadd(A02, ptmp2, T0); + A02 = lhs2.template load(j-2+2*LhsPacketSize); palign<2>(A12,A02); + T0 = pcj.pmadd(A03, ptmp3, T0); + pstore(&res[j],T0); + A03 = lhs3.template load(j-3+2*LhsPacketSize); palign<3>(A13,A03); + T1 = pcj.pmadd(A11, ptmp1, T1); + T1 = pcj.pmadd(A12, ptmp2, T1); + T1 = pcj.pmadd(A13, ptmp3, T1); + pstore(&res[j+ResPacketSize],T1); + } + } + for (; j(alpha*rhs(k, 0)); + const LhsScalars lhs0 = lhs.getVectorMapper(0, k); + + if (Vectorizable) + { + /* explicit vectorization */ + // process first unaligned result's coeffs + for (Index j=0; j(alignedStart)) + for (Index i = alignedStart;i(i), ptmp0, pload(&res[i]))); + else + for (Index i = alignedStart;i(i), ptmp0, pload(&res[i]))); + } + + // process remaining scalars (or all if no explicit vectorization) + for (Index i=alignedSize; i EIGEN_DONT_INLINE void general_matrix_vector_product::run( Index rows, Index cols, - const LhsMapper& alhs, + const LhsMapper& lhs, const RhsMapper& rhs, ResScalar* res, Index resIncr, ResScalar alpha) { - // The following copy tells the compiler that lhs's attributes are not modified outside this function - // This helps GCC to generate propoer code. - LhsMapper lhs(alhs); - eigen_internal_assert(rhs.stride()==1); + + #ifdef _EIGEN_ACCUMULATE_PACKETS + #error _EIGEN_ACCUMULATE_PACKETS has already been defined + #endif + + #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) {\ + RhsPacket b = rhs.getVectorMapper(j, 0).template load(0); \ + ptmp0 = pcj.pmadd(lhs0.template load(j), b, ptmp0); \ + ptmp1 = pcj.pmadd(lhs1.template load(j), b, ptmp1); \ + ptmp2 = pcj.pmadd(lhs2.template load(j), b, ptmp2); \ + ptmp3 = pcj.pmadd(lhs3.template load(j), b, ptmp3); } + conj_helper cj; conj_helper pcj; - // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large, - // processing 8 rows at once might be counter productive wrt cache. - const Index n8 = lhs.stride()*sizeof(LhsScalar)>32000 ? 0 : rows-7; - const Index n4 = rows-3; - const Index n2 = rows-1; + typedef typename LhsMapper::VectorMapper LhsScalars; - // TODO: for padded aligned inputs, we could enable aligned reads - enum { LhsAlignment = Unaligned }; + enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 }; + const Index rowsAtOnce = 4; + const Index peels = 2; + const Index RhsPacketAlignedMask = RhsPacketSize-1; + const Index LhsPacketAlignedMask = LhsPacketSize-1; + const Index depth = cols; + const Index lhsStride = lhs.stride(); - Index i=0; - for(; i1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0; + const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1; + + const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0; + Index alignmentPattern = alignmentStep==0 ? AllAligned + : alignmentStep==(LhsPacketSize/2) ? EvenAligned + : FirstAligned; + + // we cannot assume the first element is aligned because of sub-matrices + const Index lhsAlignmentOffset = lhs.firstAligned(depth); + const Index rhsAlignmentOffset = rhs.firstAligned(rows); + + // find how many rows do we have to skip to be aligned with rhs (if possible) + Index skipRows = 0; + // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) + if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) || + (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == depth) || + (rhsAlignmentOffset < 0) || (rhsAlignmentOffset == rows) ) { - ResPacket c0 = pset1(ResScalar(0)), - c1 = pset1(ResScalar(0)), - c2 = pset1(ResScalar(0)), - c3 = pset1(ResScalar(0)), - c4 = pset1(ResScalar(0)), - c5 = pset1(ResScalar(0)), - c6 = pset1(ResScalar(0)), - c7 = pset1(ResScalar(0)); - - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) - { - RhsPacket b0 = rhs.template load(j,0); - - c0 = pcj.pmadd(lhs.template load(i+0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load(i+2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load(i+3,j),b0,c3); - c4 = pcj.pmadd(lhs.template load(i+4,j),b0,c4); - c5 = pcj.pmadd(lhs.template load(i+5,j),b0,c5); - c6 = pcj.pmadd(lhs.template load(i+6,j),b0,c6); - c7 = pcj.pmadd(lhs.template load(i+7,j),b0,c7); - } - ResScalar cc0 = predux(c0); - ResScalar cc1 = predux(c1); - ResScalar cc2 = predux(c2); - ResScalar cc3 = predux(c3); - ResScalar cc4 = predux(c4); - ResScalar cc5 = predux(c5); - ResScalar cc6 = predux(c6); - ResScalar cc7 = predux(c7); - for(; j 4) { - ResPacket c0 = pset1(ResScalar(0)), - c1 = pset1(ResScalar(0)), - c2 = pset1(ResScalar(0)), - c3 = pset1(ResScalar(0)); - - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) - { - RhsPacket b0 = rhs.template load(j,0); - - c0 = pcj.pmadd(lhs.template load(i+0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load(i+2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load(i+3,j),b0,c3); - } - ResScalar cc0 = predux(c0); - ResScalar cc1 = predux(c1); - ResScalar cc2 = predux(c2); - ResScalar cc3 = predux(c3); - for(; j 4. + alignmentPattern = NoneAligned; } - for(; i1) { - ResPacket c0 = pset1(ResScalar(0)), - c1 = pset1(ResScalar(0)); + // eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth(j,0); - - c0 = pcj.pmadd(lhs.template load(i+0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+1,j),b0,c1); + // nothing can be aligned, no need to skip any column + alignmentPattern = NoneAligned; + skipRows = 0; } - ResScalar cc0 = predux(c0); - ResScalar cc1 = predux(c1); - for(; j= rows) + || LhsPacketSize > depth + || (size_t(firstLhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0);*/ } - for(; i(ResScalar(0)); - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) - { - RhsPacket b0 = rhs.template load(j,0); - c0 = pcj.pmadd(lhs.template load(i,j),b0,c0); - } - ResScalar cc0 = predux(c0); - for(; j(ResScalar(0)), ptmp1 = pset1(ResScalar(0)), + ptmp2 = pset1(ResScalar(0)), ptmp3 = pset1(ResScalar(0)); + + // process initial unaligned coeffs + // FIXME this loop get vectorized by the compiler ! + for (Index j=0; jalignedStart) + { + switch(alignmentPattern) + { + case AllAligned: + for (Index j = alignedStart; j1) + { + /* Here we proccess 4 rows with with two peeled iterations to hide + * the overhead of unaligned loads. Moreover unaligned loads are handled + * using special shift/move operations between the two aligned packets + * overlaping the desired unaligned packet. This is *much* more efficient + * than basic unaligned loads. + */ + LhsPacket A01, A02, A03, A11, A12, A13; + A01 = lhs1.template load(alignedStart-1); + A02 = lhs2.template load(alignedStart-2); + A03 = lhs3.template load(alignedStart-3); + + for (; j(0); + A11 = lhs1.template load(j-1+LhsPacketSize); palign<1>(A01,A11); + A12 = lhs2.template load(j-2+LhsPacketSize); palign<2>(A02,A12); + A13 = lhs3.template load(j-3+LhsPacketSize); palign<3>(A03,A13); + + ptmp0 = pcj.pmadd(lhs0.template load(j), b, ptmp0); + ptmp1 = pcj.pmadd(A01, b, ptmp1); + A01 = lhs1.template load(j-1+2*LhsPacketSize); palign<1>(A11,A01); + ptmp2 = pcj.pmadd(A02, b, ptmp2); + A02 = lhs2.template load(j-2+2*LhsPacketSize); palign<2>(A12,A02); + ptmp3 = pcj.pmadd(A03, b, ptmp3); + A03 = lhs3.template load(j-3+2*LhsPacketSize); palign<3>(A13,A03); + + b = rhs.getVectorMapper(j+RhsPacketSize, 0).template load(0); + ptmp0 = pcj.pmadd(lhs0.template load(j+LhsPacketSize), b, ptmp0); + ptmp1 = pcj.pmadd(A11, b, ptmp1); + ptmp2 = pcj.pmadd(A12, b, ptmp2); + ptmp3 = pcj.pmadd(A13, b, ptmp3); + } + } + for (; j(tmp0); + const LhsScalars lhs0 = lhs.getVectorMapper(i, 0); + // process first unaligned result's coeffs + // FIXME this loop get vectorized by the compiler ! + for (Index j=0; jalignedStart) + { + // process aligned rhs coeffs + if (lhs0.template aligned(alignedStart)) + for (Index j = alignedStart;j(j), rhs.getVectorMapper(j, 0).template load(0), ptmp0); + else + for (Index j = alignedStart;j(j), rhs.getVectorMapper(j, 0).template load(0), ptmp0); + tmp0 += predux(ptmp0); + } + + // process remaining scalars + // FIXME this loop get vectorized by the compiler ! + for (Index j=alignedSize; j and std::complex types -**********************************************************************/ - -// gemv specialization - -template -struct general_matrix_vector_product_gemv : - general_matrix_vector_product {}; - -#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \ -template \ -struct general_matrix_vector_product { \ -static void run( \ - Index rows, Index cols, \ - const Scalar* lhs, Index lhsStride, \ - const Scalar* rhs, Index rhsIncr, \ - Scalar* res, Index resIncr, Scalar alpha) \ -{ \ - if (ConjugateLhs) { \ - general_matrix_vector_product::run( \ - rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ - } else { \ - general_matrix_vector_product_gemv::run( \ - rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ - } \ -} \ -}; \ -template \ -struct general_matrix_vector_product { \ -static void run( \ - Index rows, Index cols, \ - const Scalar* lhs, Index lhsStride, \ - const Scalar* rhs, Index rhsIncr, \ - Scalar* res, Index resIncr, Scalar alpha) \ -{ \ - general_matrix_vector_product_gemv::run( \ - rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ -} \ -}; \ - -EIGEN_MKL_GEMV_SPECIALIZE(double) -EIGEN_MKL_GEMV_SPECIALIZE(float) -EIGEN_MKL_GEMV_SPECIALIZE(dcomplex) -EIGEN_MKL_GEMV_SPECIALIZE(scomplex) - -#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \ -template \ -struct general_matrix_vector_product_gemv \ -{ \ -typedef Matrix GEMVVector;\ -\ -static void run( \ - Index rows, Index cols, \ - const EIGTYPE* lhs, Index lhsStride, \ - const EIGTYPE* rhs, Index rhsIncr, \ - EIGTYPE* res, Index resIncr, EIGTYPE alpha) \ -{ \ - MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \ - MKLTYPE alpha_, beta_; \ - const EIGTYPE *x_ptr, myone(1); \ - char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \ - if (LhsStorageOrder==RowMajor) { \ - m=cols; \ - n=rows; \ - }\ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ - GEMVVector x_tmp; \ - if (ConjugateRhs) { \ - Map > map_x(rhs,cols,1,InnerStride<>(incx)); \ - x_tmp=map_x.conjugate(); \ - x_ptr=x_tmp.data(); \ - incx=1; \ - } else x_ptr=rhs; \ - MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ -}\ -}; - -EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d) -EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s) -EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z) -EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c) - -} // end namespase internal - -} // end namespace Eigen - -#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/products/Parallelizer.h b/uppsrc/plugin/Eigen/Eigen/src/Core/products/Parallelizer.h index 2a31e4cbe..c2f084c82 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/products/Parallelizer.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/products/Parallelizer.h @@ -75,7 +75,7 @@ template struct GemmParallelInfo { GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {} - int volatile sync; + Index volatile sync; int volatile users; Index lhs_start; @@ -104,13 +104,14 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, // - the sizes are large enough // compute the maximal number of threads from the size of the product: - // FIXME this has to be fine tuned + // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once. Index size = transpose ? rows : cols; - Index pb_max_threads = std::max(1,size / 32); + Index pb_max_threads = std::max(1,size / Functor::Traits::nr); + // compute the maximal number of threads from the total amount of work: double work = static_cast(rows) * static_cast(cols) * static_cast(depth); - double kMinTaskSize = 50000; // Heuristic. + double kMinTaskSize = 50000; // FIXME improve this heuristic. pb_max_threads = std::max(1, std::min(pb_max_threads, work / kMinTaskSize)); // compute the number of threads we are going to use diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/uppsrc/plugin/Eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h deleted file mode 100644 index dfa687fef..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h +++ /dev/null @@ -1,295 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM. - ******************************************************************************** -*/ - -#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H -#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H - -namespace Eigen { - -namespace internal { - - -/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ - -#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct product_selfadjoint_matrix \ -{\ -\ - static void run( \ - Index rows, Index cols, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha) \ - { \ - char side='L', uplo='L'; \ - MKL_INT m, n, lda, ldb, ldc; \ - const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ - MatrixX##EIGPREFIX b_tmp; \ - EIGTYPE myone(1);\ -\ -/* Set transpose options */ \ -/* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ -\ -/* Set lda, ldb, ldc */ \ - lda = (MKL_INT)lhsStride; \ - ldb = (MKL_INT)rhsStride; \ - ldc = (MKL_INT)resStride; \ -\ -/* Set a, b, c */ \ - if (LhsStorageOrder==RowMajor) uplo='U'; \ - a = _lhs; \ -\ - if (RhsStorageOrder==RowMajor) { \ - Map > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ - b_tmp = rhs.adjoint(); \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ - } else b = _rhs; \ -\ - MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ -\ - } \ -}; - - -#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct product_selfadjoint_matrix \ -{\ - static void run( \ - Index rows, Index cols, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha) \ - { \ - char side='L', uplo='L'; \ - MKL_INT m, n, lda, ldb, ldc; \ - const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ - MatrixX##EIGPREFIX b_tmp; \ - Matrix a_tmp; \ - EIGTYPE myone(1); \ -\ -/* Set transpose options */ \ -/* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ -\ -/* Set lda, ldb, ldc */ \ - lda = (MKL_INT)lhsStride; \ - ldb = (MKL_INT)rhsStride; \ - ldc = (MKL_INT)resStride; \ -\ -/* Set a, b, c */ \ - if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \ - Map, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \ - a_tmp = lhs.conjugate(); \ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else a = _lhs; \ - if (LhsStorageOrder==RowMajor) uplo='U'; \ -\ - if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \ - b = _rhs; } \ - else { \ - if (RhsStorageOrder==ColMajor && ConjugateRhs) { \ - Map > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \ - b_tmp = rhs.conjugate(); \ - } else \ - if (ConjugateRhs) { \ - Map > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ - b_tmp = rhs.adjoint(); \ - } else { \ - Map > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ - b_tmp = rhs.transpose(); \ - } \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ - } \ -\ - MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ -\ - } \ -}; - -EIGEN_MKL_SYMM_L(double, double, d, d) -EIGEN_MKL_SYMM_L(float, float, f, s) -EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c) - - -/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ - -#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct product_selfadjoint_matrix \ -{\ -\ - static void run( \ - Index rows, Index cols, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha) \ - { \ - char side='R', uplo='L'; \ - MKL_INT m, n, lda, ldb, ldc; \ - const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ - MatrixX##EIGPREFIX b_tmp; \ - EIGTYPE myone(1);\ -\ -/* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ -\ -/* Set lda, ldb, ldc */ \ - lda = (MKL_INT)rhsStride; \ - ldb = (MKL_INT)lhsStride; \ - ldc = (MKL_INT)resStride; \ -\ -/* Set a, b, c */ \ - if (RhsStorageOrder==RowMajor) uplo='U'; \ - a = _rhs; \ -\ - if (LhsStorageOrder==RowMajor) { \ - Map > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \ - b_tmp = lhs.adjoint(); \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ - } else b = _lhs; \ -\ - MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ -\ - } \ -}; - - -#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct product_selfadjoint_matrix \ -{\ - static void run( \ - Index rows, Index cols, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha) \ - { \ - char side='R', uplo='L'; \ - MKL_INT m, n, lda, ldb, ldc; \ - const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ - MatrixX##EIGPREFIX b_tmp; \ - Matrix a_tmp; \ - EIGTYPE myone(1); \ -\ -/* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ -\ -/* Set lda, ldb, ldc */ \ - lda = (MKL_INT)rhsStride; \ - ldb = (MKL_INT)lhsStride; \ - ldc = (MKL_INT)resStride; \ -\ -/* Set a, b, c */ \ - if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \ - Map, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \ - a_tmp = rhs.conjugate(); \ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else a = _rhs; \ - if (RhsStorageOrder==RowMajor) uplo='U'; \ -\ - if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \ - b = _lhs; } \ - else { \ - if (LhsStorageOrder==ColMajor && ConjugateLhs) { \ - Map > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \ - b_tmp = lhs.conjugate(); \ - } else \ - if (ConjugateLhs) { \ - Map > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \ - b_tmp = lhs.adjoint(); \ - } else { \ - Map > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \ - b_tmp = lhs.transpose(); \ - } \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ - } \ -\ - MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ - } \ -}; - -EIGEN_MKL_SYMM_R(double, double, d, d) -EIGEN_MKL_SYMM_R(float, float, f, s) -EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h b/uppsrc/plugin/Eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h index d97f8caa7..3fd180e6c 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -83,10 +83,10 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product(t3); - size_t starti = FirstTriangular ? 0 : j+2; - size_t endi = FirstTriangular ? j : size; - size_t alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi-starti); - size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize); + Index starti = FirstTriangular ? 0 : j+2; + Index endi = FirstTriangular ? j : size; + Index alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi-starti); + Index alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize); res[j] += cjd.pmul(numext::real(A0[j]), t0); res[j+1] += cjd.pmul(numext::real(A1[j+1]), t1); @@ -101,7 +101,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product(a0It); a0It += PacketSize; Packet A1i = ploadu(a1It); a1It += PacketSize; @@ -125,7 +125,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product -struct selfadjoint_matrix_vector_product_symv : - selfadjoint_matrix_vector_product {}; - -#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \ -template \ -struct selfadjoint_matrix_vector_product { \ -static void run( \ - Index size, const Scalar* lhs, Index lhsStride, \ - const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \ - enum {\ - IsColMajor = StorageOrder==ColMajor \ - }; \ - if (IsColMajor == ConjugateLhs) {\ - selfadjoint_matrix_vector_product::run( \ - size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ - } else {\ - selfadjoint_matrix_vector_product_symv::run( \ - size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ - }\ - } \ -}; \ - -EIGEN_MKL_SYMV_SPECIALIZE(double) -EIGEN_MKL_SYMV_SPECIALIZE(float) -EIGEN_MKL_SYMV_SPECIALIZE(dcomplex) -EIGEN_MKL_SYMV_SPECIALIZE(scomplex) - -#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \ -template \ -struct selfadjoint_matrix_vector_product_symv \ -{ \ -typedef Matrix SYMVVector;\ -\ -static void run( \ -Index size, const EIGTYPE* lhs, Index lhsStride, \ -const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ -{ \ - enum {\ - IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \ - IsLower = UpLo == Lower ? 1 : 0 \ - }; \ - MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \ - MKLTYPE alpha_, beta_; \ - const EIGTYPE *x_ptr, myone(1); \ - char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ - SYMVVector x_tmp; \ - if (ConjugateRhs) { \ - Map > map_x(_rhs,size,1,InnerStride<>(incx)); \ - x_tmp=map_x.conjugate(); \ - x_ptr=x_tmp.data(); \ - incx=1; \ - } else x_ptr=_rhs; \ - MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ -}\ -}; - -EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv) -EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv) -EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv) -EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h b/uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h index 8a2f7cd78..6ec5a8a0b 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -137,7 +137,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix triangularBuffer; + Matrix triangularBuffer((internal::constructor_without_unaligned_array_assert())); triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); @@ -284,7 +284,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix triangularBuffer; + Matrix triangularBuffer((internal::constructor_without_unaligned_array_assert())); triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h deleted file mode 100644 index 4cc56a42f..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +++ /dev/null @@ -1,309 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Triangular matrix * matrix product functionality based on ?TRMM. - ******************************************************************************** -*/ - -#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H -#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H - -namespace Eigen { - -namespace internal { - - -template -struct product_triangular_matrix_matrix_trmm : - product_triangular_matrix_matrix {}; - - -// try to go to BLAS specialization -#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \ -template \ -struct product_triangular_matrix_matrix { \ - static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\ - const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking& blocking) { \ - product_triangular_matrix_matrix_trmm::run( \ - _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ - } \ -}; - -EIGEN_MKL_TRMM_SPECIALIZE(double, true) -EIGEN_MKL_TRMM_SPECIALIZE(double, false) -EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true) -EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false) -EIGEN_MKL_TRMM_SPECIALIZE(float, true) -EIGEN_MKL_TRMM_SPECIALIZE(float, false) -EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true) -EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false) - -// implements col-major += alpha * op(triangular) * op(general) -#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct product_triangular_matrix_matrix_trmm \ -{ \ - enum { \ - IsLower = (Mode&Lower) == Lower, \ - SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ - IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ - IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ - LowUp = IsLower ? Lower : Upper, \ - conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \ - }; \ -\ - static void run( \ - Index _rows, Index _cols, Index _depth, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha, level3_blocking& blocking) \ - { \ - Index diagSize = (std::min)(_rows,_depth); \ - Index rows = IsLower ? _rows : diagSize; \ - Index depth = IsLower ? diagSize : _depth; \ - Index cols = _cols; \ -\ - typedef Matrix MatrixLhs; \ - typedef Matrix MatrixRhs; \ -\ -/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ - if (rows != depth) { \ -\ - int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ -\ - if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \ - /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ - product_triangular_matrix_matrix::run( \ - _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ - /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \ - } else { \ - /* Make sense to call GEMM */ \ - Map > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \ - MatrixLhs aa_tmp=lhsMap.template triangularView(); \ - MKL_INT aStride = aa_tmp.outerStride(); \ - gemm_blocking_space gemm_blocking(_rows,_cols,_depth); \ - general_matrix_matrix_product::run( \ - rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \ -\ - /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ - } \ - return; \ - } \ - char side = 'L', transa, uplo, diag = 'N'; \ - EIGTYPE *b; \ - const EIGTYPE *a; \ - MKL_INT m, n, lda, ldb; \ - MKLTYPE alpha_; \ -\ -/* Set alpha_*/ \ - assign_scalar_eig2mkl(alpha_, alpha); \ -\ -/* Set m, n */ \ - m = (MKL_INT)diagSize; \ - n = (MKL_INT)cols; \ -\ -/* Set trans */ \ - transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ -\ -/* Set b, ldb */ \ - Map > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \ - MatrixX##EIGPREFIX b_tmp; \ -\ - if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ -\ -/* Set uplo */ \ - uplo = IsLower ? 'L' : 'U'; \ - if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ -/* Set a, lda */ \ - Map > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ - MatrixLhs a_tmp; \ -\ - if ((conjA!=0) || (SetDiag==0)) { \ - if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \ - if (IsZeroDiag) \ - a_tmp.diagonal().setZero(); \ - else if (IsUnitDiag) \ - a_tmp.diagonal().setOnes();\ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else { \ - a = _lhs; \ - lda = lhsStride; \ - } \ - /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \ -/* call ?trmm*/ \ - MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ -\ -/* Add op(a_triangular)*b into res*/ \ - Map > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ - res_tmp=res_tmp+b_tmp; \ - } \ -}; - -EIGEN_MKL_TRMM_L(double, double, d, d) -EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_TRMM_L(float, float, f, s) -EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c) - -// implements col-major += alpha * op(general) * op(triangular) -#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct product_triangular_matrix_matrix_trmm \ -{ \ - enum { \ - IsLower = (Mode&Lower) == Lower, \ - SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ - IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ - IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ - LowUp = IsLower ? Lower : Upper, \ - conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \ - }; \ -\ - static void run( \ - Index _rows, Index _cols, Index _depth, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha, level3_blocking& blocking) \ - { \ - Index diagSize = (std::min)(_cols,_depth); \ - Index rows = _rows; \ - Index depth = IsLower ? _depth : diagSize; \ - Index cols = IsLower ? diagSize : _cols; \ -\ - typedef Matrix MatrixLhs; \ - typedef Matrix MatrixRhs; \ -\ -/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ - if (cols != depth) { \ -\ - int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ -\ - if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \ - /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ - product_triangular_matrix_matrix::run( \ - _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ - /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \ - } else { \ - /* Make sense to call GEMM */ \ - Map > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \ - MatrixRhs aa_tmp=rhsMap.template triangularView(); \ - MKL_INT aStride = aa_tmp.outerStride(); \ - gemm_blocking_space gemm_blocking(_rows,_cols,_depth); \ - general_matrix_matrix_product::run( \ - rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \ -\ - /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ - } \ - return; \ - } \ - char side = 'R', transa, uplo, diag = 'N'; \ - EIGTYPE *b; \ - const EIGTYPE *a; \ - MKL_INT m, n, lda, ldb; \ - MKLTYPE alpha_; \ -\ -/* Set alpha_*/ \ - assign_scalar_eig2mkl(alpha_, alpha); \ -\ -/* Set m, n */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)diagSize; \ -\ -/* Set trans */ \ - transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ -\ -/* Set b, ldb */ \ - Map > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ - MatrixX##EIGPREFIX b_tmp; \ -\ - if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ -\ -/* Set uplo */ \ - uplo = IsLower ? 'L' : 'U'; \ - if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ -/* Set a, lda */ \ - Map > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \ - MatrixRhs a_tmp; \ -\ - if ((conjA!=0) || (SetDiag==0)) { \ - if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \ - if (IsZeroDiag) \ - a_tmp.diagonal().setZero(); \ - else if (IsUnitDiag) \ - a_tmp.diagonal().setOnes();\ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else { \ - a = _rhs; \ - lda = rhsStride; \ - } \ - /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \ -/* call ?trmm*/ \ - MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ -\ -/* Add op(a_triangular)*b into res*/ \ - Map > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ - res_tmp=res_tmp+b_tmp; \ - } \ -}; - -EIGEN_MKL_TRMM_R(double, double, d, d) -EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_TRMM_R(float, float, f, s) -EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularMatrixVector_MKL.h deleted file mode 100644 index 09f110da7..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/products/TriangularMatrixVector_MKL.h +++ /dev/null @@ -1,247 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Triangular matrix-vector product functionality based on ?TRMV. - ******************************************************************************** -*/ - -#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H -#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H - -namespace Eigen { - -namespace internal { - -/********************************************************************** -* This file implements triangular matrix-vector multiplication using BLAS -**********************************************************************/ - -// trmv/hemv specialization - -template -struct triangular_matrix_vector_product_trmv : - triangular_matrix_vector_product {}; - -#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \ -template \ -struct triangular_matrix_vector_product { \ - static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ - const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ - triangular_matrix_vector_product_trmv::run( \ - _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ - } \ -}; \ -template \ -struct triangular_matrix_vector_product { \ - static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ - const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ - triangular_matrix_vector_product_trmv::run( \ - _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ - } \ -}; - -EIGEN_MKL_TRMV_SPECIALIZE(double) -EIGEN_MKL_TRMV_SPECIALIZE(float) -EIGEN_MKL_TRMV_SPECIALIZE(dcomplex) -EIGEN_MKL_TRMV_SPECIALIZE(scomplex) - -// implements col-major: res += alpha * op(triangular) * vector -#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct triangular_matrix_vector_product_trmv { \ - enum { \ - IsLower = (Mode&Lower) == Lower, \ - SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ - IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ - IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ - LowUp = IsLower ? Lower : Upper \ - }; \ - static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \ - { \ - if (ConjLhs || IsZeroDiag) { \ - triangular_matrix_vector_product::run( \ - _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ - return; \ - }\ - Index size = (std::min)(_rows,_cols); \ - Index rows = IsLower ? _rows : size; \ - Index cols = IsLower ? size : _cols; \ -\ - typedef VectorX##EIGPREFIX VectorRhs; \ - EIGTYPE *x, *y;\ -\ -/* Set x*/ \ - Map > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \ - VectorRhs x_tmp; \ - if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ - x = x_tmp.data(); \ -\ -/* Square part handling */\ -\ - char trans, uplo, diag; \ - MKL_INT m, n, lda, incx, incy; \ - EIGTYPE const *a; \ - MKLTYPE alpha_, beta_; \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, EIGTYPE(1)); \ -\ -/* Set m, n */ \ - n = (MKL_INT)size; \ - lda = lhsStride; \ - incx = 1; \ - incy = resIncr; \ -\ -/* Set uplo, trans and diag*/ \ - trans = 'N'; \ - uplo = IsLower ? 'L' : 'U'; \ - diag = IsUnitDiag ? 'U' : 'N'; \ -\ -/* call ?TRMV*/ \ - MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ -\ -/* Add op(a_tr)rhs into res*/ \ - MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ -/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ - if (size<(std::max)(rows,cols)) { \ - typedef Matrix MatrixLhs; \ - if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ - x = x_tmp.data(); \ - if (size \ -struct triangular_matrix_vector_product_trmv { \ - enum { \ - IsLower = (Mode&Lower) == Lower, \ - SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ - IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ - IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ - LowUp = IsLower ? Lower : Upper \ - }; \ - static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \ - { \ - if (IsZeroDiag) { \ - triangular_matrix_vector_product::run( \ - _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ - return; \ - }\ - Index size = (std::min)(_rows,_cols); \ - Index rows = IsLower ? _rows : size; \ - Index cols = IsLower ? size : _cols; \ -\ - typedef VectorX##EIGPREFIX VectorRhs; \ - EIGTYPE *x, *y;\ -\ -/* Set x*/ \ - Map > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \ - VectorRhs x_tmp; \ - if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ - x = x_tmp.data(); \ -\ -/* Square part handling */\ -\ - char trans, uplo, diag; \ - MKL_INT m, n, lda, incx, incy; \ - EIGTYPE const *a; \ - MKLTYPE alpha_, beta_; \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, EIGTYPE(1)); \ -\ -/* Set m, n */ \ - n = (MKL_INT)size; \ - lda = lhsStride; \ - incx = 1; \ - incy = resIncr; \ -\ -/* Set uplo, trans and diag*/ \ - trans = ConjLhs ? 'C' : 'T'; \ - uplo = IsLower ? 'U' : 'L'; \ - diag = IsUnitDiag ? 'U' : 'N'; \ -\ -/* call ?TRMV*/ \ - MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ -\ -/* Add op(a_tr)rhs into res*/ \ - MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ -/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ - if (size<(std::max)(rows,cols)) { \ - typedef Matrix MatrixLhs; \ - if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ - x = x_tmp.data(); \ - if (size \ -struct triangular_solve_matrix \ -{ \ - enum { \ - IsLower = (Mode&Lower) == Lower, \ - IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ - IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ - conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \ - }; \ - static void run( \ - Index size, Index otherSize, \ - const EIGTYPE* _tri, Index triStride, \ - EIGTYPE* _other, Index otherStride, level3_blocking& /*blocking*/) \ - { \ - MKL_INT m = size, n = otherSize, lda, ldb; \ - char side = 'L', uplo, diag='N', transa; \ - /* Set alpha_ */ \ - MKLTYPE alpha; \ - EIGTYPE myone(1); \ - assign_scalar_eig2mkl(alpha, myone); \ - ldb = otherStride;\ -\ - const EIGTYPE *a; \ -/* Set trans */ \ - transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \ -/* Set uplo */ \ - uplo = IsLower ? 'L' : 'U'; \ - if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ -/* Set a, lda */ \ - typedef Matrix MatrixTri; \ - Map > tri(_tri,size,size,OuterStride<>(triStride)); \ - MatrixTri a_tmp; \ -\ - if (conjA) { \ - a_tmp = tri.conjugate(); \ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else { \ - a = _tri; \ - lda = triStride; \ - } \ - if (IsUnitDiag) diag='U'; \ -/* call ?trsm*/ \ - MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ - } \ -}; - -EIGEN_MKL_TRSM_L(double, double, d) -EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z) -EIGEN_MKL_TRSM_L(float, float, s) -EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c) - - -// implements RightSide general * op(triangular)^-1 -#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \ -template \ -struct triangular_solve_matrix \ -{ \ - enum { \ - IsLower = (Mode&Lower) == Lower, \ - IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ - IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ - conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \ - }; \ - static void run( \ - Index size, Index otherSize, \ - const EIGTYPE* _tri, Index triStride, \ - EIGTYPE* _other, Index otherStride, level3_blocking& /*blocking*/) \ - { \ - MKL_INT m = otherSize, n = size, lda, ldb; \ - char side = 'R', uplo, diag='N', transa; \ - /* Set alpha_ */ \ - MKLTYPE alpha; \ - EIGTYPE myone(1); \ - assign_scalar_eig2mkl(alpha, myone); \ - ldb = otherStride;\ -\ - const EIGTYPE *a; \ -/* Set trans */ \ - transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \ -/* Set uplo */ \ - uplo = IsLower ? 'L' : 'U'; \ - if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ -/* Set a, lda */ \ - typedef Matrix MatrixTri; \ - Map > tri(_tri,size,size,OuterStride<>(triStride)); \ - MatrixTri a_tmp; \ -\ - if (conjA) { \ - a_tmp = tri.conjugate(); \ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else { \ - a = _tri; \ - lda = triStride; \ - } \ - if (IsUnitDiag) diag='U'; \ -/* call ?trsm*/ \ - MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ - /*std::cout << "TRMS_L specialization!\n";*/ \ - } \ -}; - -EIGEN_MKL_TRSM_R(double, double, d) -EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z) -EIGEN_MKL_TRSM_R(float, float, s) -EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c) - - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/util/BlasUtil.h b/uppsrc/plugin/Eigen/Eigen/src/Core/util/BlasUtil.h index b1791fb3a..6e6ee119b 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/util/BlasUtil.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/util/BlasUtil.h @@ -222,11 +222,6 @@ class blas_data_mapper { return ploadt(&operator()(i, j)); } - template - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const { - return ploadt(&operator()(i, j)); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { return ploadt(&operator()(i, j)); } diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/util/DisableStupidWarnings.h b/uppsrc/plugin/Eigen/Eigen/src/Core/util/DisableStupidWarnings.h index 4431f2fc4..7559e129c 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/util/DisableStupidWarnings.h @@ -4,6 +4,7 @@ #ifdef _MSC_VER // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p)) // 4101 - unreferenced local variable + // 4127 - conditional expression is constant // 4181 - qualifier applied to reference type ignored // 4211 - nonstandard extension used : redefined extern to static // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data @@ -19,7 +20,7 @@ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning( push ) #endif - #pragma warning( disable : 4100 4101 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) + #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) #elif defined __INTEL_COMPILER // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) @@ -41,9 +42,6 @@ #pragma clang diagnostic push #endif #pragma clang diagnostic ignored "-Wconstant-logical-operand" - #if __clang_major__ >= 3 && __clang_minor__ >= 5 - #pragma clang diagnostic ignored "-Wabsolute-value" - #endif #elif defined __GNUC__ && __GNUC__>=6 diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/util/Macros.h b/uppsrc/plugin/Eigen/Eigen/src/Core/util/Macros.h index 95960b448..38d6ddb9a 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/util/Macros.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 3 -#define EIGEN_MINOR_VERSION 90 +#define EIGEN_MINOR_VERSION 4 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ @@ -80,8 +80,8 @@ // 2015 14 1900 // "15" 15 1900 -/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC -#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC) +/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC or clang-cl +#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG) #define EIGEN_COMP_MSVC_STRICT _MSC_VER #else #define EIGEN_COMP_MSVC_STRICT 0 @@ -356,7 +356,7 @@ #define EIGEN_MAX_CPP_VER 99 #endif -#if EIGEN_MAX_CPP_VER>=11 && defined(__cplusplus) && (__cplusplus >= 201103L) +#if EIGEN_MAX_CPP_VER>=11 && (defined(__cplusplus) && (__cplusplus >= 201103L) || EIGEN_COMP_MSVC >= 1900) #define EIGEN_HAS_CXX11 1 #else #define EIGEN_HAS_CXX11 0 @@ -380,8 +380,7 @@ #if EIGEN_MAX_CPP_VER>=11 && \ ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ - || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \ - || (EIGEN_COMP_MSVC >= 1900) || defined(__SYCL_DEVICE_ONLY__)) + || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))) #define EIGEN_HAS_C99_MATH 1 #else #define EIGEN_HAS_C99_MATH 0 @@ -400,12 +399,10 @@ // Does the compiler support variadic templates? #ifndef EIGEN_HAS_VARIADIC_TEMPLATES #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ - && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) + && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: // this prevents nvcc from crashing when compiling Eigen on Tegra X1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1 -#elif EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) && defined(__SYCL_DEVICE_ONLY__) -#define EIGEN_HAS_VARIADIC_TEMPLATES 1 #else #define EIGEN_HAS_VARIADIC_TEMPLATES 0 #endif @@ -414,14 +411,13 @@ // Does the compiler fully support const expressions? (as in c++14) #ifndef EIGEN_HAS_CONSTEXPR -#if defined(__CUDACC__) +#ifdef __CUDACC__ // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) #define EIGEN_HAS_CONSTEXPR 1 #endif #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ - (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) || \ - (EIGEN_COMP_CLANG >= 306 && (__cplusplus > 199711L))) + (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L))) #define EIGEN_HAS_CONSTEXPR 1 #endif @@ -628,14 +624,6 @@ namespace Eigen { #endif -#if EIGEN_COMP_MSVC - // NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362. - // This workaround is ugly, but it does the job. -# define EIGEN_CONST_CONDITIONAL(cond) (void)0, cond -#else -# define EIGEN_CONST_CONDITIONAL(cond) cond -#endif - //------------------------------------------------------------------------------------------ // Static and dynamic alignment control // @@ -824,7 +812,7 @@ namespace Eigen { // just an empty macro ! #define EIGEN_EMPTY -#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || __CUDACC_VER__) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) +#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ using Base::operator =; #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/util/Memory.h b/uppsrc/plugin/Eigen/Eigen/src/Core/util/Memory.h index 67053db62..c634d7ea0 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/util/Memory.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/util/Memory.h @@ -150,7 +150,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() /** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements. * On allocation error, the returned pointer is null, and std::bad_alloc is thrown. */ -EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size) +EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) { check_that_malloc_is_allowed(); @@ -185,7 +185,7 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) * \brief Reallocates an aligned block of memory. * \throws std::bad_alloc on allocation failure */ -inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size) +inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size) { EIGEN_UNUSED_VARIABLE(old_size); @@ -209,12 +209,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size) /** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned. * On allocation error, the returned pointer is null, and a std::bad_alloc is thrown. */ -template EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(size_t size) +template EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size) { return aligned_malloc(size); } -template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(size_t size) +template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size) { check_that_malloc_is_allowed(); @@ -235,12 +235,12 @@ template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *p std::free(ptr); } -template inline void* conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size) +template inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) { return aligned_realloc(ptr, new_size, old_size); } -template<> inline void* conditional_aligned_realloc(void* ptr, size_t new_size, size_t) +template<> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t) { return std::realloc(ptr, new_size); } @@ -252,7 +252,7 @@ template<> inline void* conditional_aligned_realloc(void* ptr, size_t new /** \internal Destructs the elements of an array. * The \a size parameters tells on how many objects to call the destructor of T. */ -template EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, size_t size) +template EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size) { // always destruct an array starting from the end. if(ptr) @@ -262,9 +262,9 @@ template EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T /** \internal Constructs the elements of an array. * The \a size parameter tells on how many objects to call the constructor of T. */ -template EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, size_t size) +template EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size) { - size_t i; + std::size_t i; EIGEN_TRY { for (i = 0; i < size; ++i) ::new (ptr + i) T; @@ -283,9 +283,9 @@ template EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T * *****************************************************************************/ template -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size) +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size) { - if(size > size_t(-1) / sizeof(T)) + if(size > std::size_t(-1) / sizeof(T)) throw_std_bad_alloc(); } @@ -293,7 +293,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size) * On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown. * The default constructor of T is called. */ -template EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size) +template EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size) { check_size_for_overflow(size); T *result = reinterpret_cast(aligned_malloc(sizeof(T)*size)); @@ -309,7 +309,7 @@ template EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size) return result; } -template EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(size_t size) +template EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size) { check_size_for_overflow(size); T *result = reinterpret_cast(conditional_aligned_malloc(sizeof(T)*size)); @@ -328,7 +328,7 @@ template EIGEN_DEVICE_FUNC inline T* conditional_aligned /** \internal Deletes objects constructed with aligned_new * The \a size parameters tells on how many objects to call the destructor of T. */ -template EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, size_t size) +template EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size) { destruct_elements_of_array(ptr, size); aligned_free(ptr); @@ -337,13 +337,13 @@ template EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, size_t /** \internal Deletes objects constructed with conditional_aligned_new * The \a size parameters tells on how many objects to call the destructor of T. */ -template EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, size_t size) +template EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size) { destruct_elements_of_array(ptr, size); conditional_aligned_free(ptr); } -template EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size) +template EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size) { check_size_for_overflow(new_size); check_size_for_overflow(old_size); @@ -366,7 +366,7 @@ template EIGEN_DEVICE_FUNC inline T* conditional_aligned } -template EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(size_t size) +template EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size) { if(size==0) return 0; // short-cut. Also fixes Bug 884 @@ -387,7 +387,7 @@ template EIGEN_DEVICE_FUNC inline T* conditional_aligned return result; } -template inline T* conditional_aligned_realloc_new_auto(T* pts, size_t new_size, size_t old_size) +template inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size) { check_size_for_overflow(new_size); check_size_for_overflow(old_size); @@ -409,7 +409,7 @@ template inline T* conditional_aligned_realloc_new_auto( return result; } -template EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, size_t size) +template EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size) { if(NumTraits::RequireInitialization) destruct_elements_of_array(ptr, size); @@ -561,7 +561,7 @@ template class aligned_stack_memory_handler : noncopyable * In this case, the buffer elements will also be destructed when this handler will be destructed. * Finally, if \a dealloc is true, then the pointer \a ptr is freed. **/ - aligned_stack_memory_handler(T* ptr, size_t size, bool dealloc) + aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc) : m_ptr(ptr), m_size(size), m_deallocate(dealloc) { if(NumTraits::RequireInitialization && m_ptr) @@ -576,7 +576,7 @@ template class aligned_stack_memory_handler : noncopyable } protected: T* m_ptr; - size_t m_size; + std::size_t m_size; bool m_deallocate; }; @@ -655,15 +655,15 @@ template void swap(scoped_array &a,scoped_array &b) #if EIGEN_MAX_ALIGN_BYTES!=0 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ - void* operator new(size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \ + void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \ EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc(size); } \ EIGEN_CATCH (...) { return 0; } \ } #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \ - void *operator new(size_t size) { \ + void *operator new(std::size_t size) { \ return Eigen::internal::conditional_aligned_malloc(size); \ } \ - void *operator new[](size_t size) { \ + void *operator new[](std::size_t size) { \ return Eigen::internal::conditional_aligned_malloc(size); \ } \ void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ @@ -673,8 +673,8 @@ template void swap(scoped_array &a,scoped_array &b) /* in-place new and delete. since (at least afaik) there is no actual */ \ /* memory allocated we can safely let the default implementation handle */ \ /* this particular case. */ \ - static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \ - static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \ + static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \ + static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \ void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \ void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \ /* nothrow-new (returns zero instead of std::bad_alloc) */ \ @@ -713,7 +713,7 @@ template class aligned_allocator : public std::allocator { public: - typedef size_t size_type; + typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; typedef T* pointer; typedef const T* const_pointer; diff --git a/uppsrc/plugin/Eigen/Eigen/src/Core/util/XprHelper.h b/uppsrc/plugin/Eigen/Eigen/src/Core/util/XprHelper.h index efd179b35..ba5bd186d 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Core/util/XprHelper.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Core/util/XprHelper.h @@ -532,6 +532,15 @@ template struct cwise_promote_s template struct cwise_promote_storage_type { typedef Sparse ret; }; template struct cwise_promote_storage_type { typedef Sparse ret; }; +template struct cwise_promote_storage_order { + enum { value = LhsOrder }; +}; + +template struct cwise_promote_storage_order { enum { value = RhsOrder }; }; +template struct cwise_promote_storage_order { enum { value = LhsOrder }; }; +template struct cwise_promote_storage_order { enum { value = Order }; }; + + /** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B. * The template parameter ProductTag permits to specialize the resulting storage kind wrt to * some compile-time properties of the product: GemmProduct, GemvProduct, OuterProduct, InnerProduct. @@ -629,7 +638,7 @@ struct plain_constant_type template struct is_lvalue { - enum { value = !bool(is_const::value) && + enum { value = (!bool(is_const::value)) && bool(traits::Flags & LvalueBit) }; }; diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Block.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Block.h deleted file mode 100644 index 604456f40..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Block.h +++ /dev/null @@ -1,126 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_BLOCK2_H -#define EIGEN_BLOCK2_H - -namespace Eigen { - -/** \returns a dynamic-size expression of a corner of *this. - * - * \param type the type of corner. Can be \a Eigen::TopLeft, \a Eigen::TopRight, - * \a Eigen::BottomLeft, \a Eigen::BottomRight. - * \param cRows the number of rows in the corner - * \param cCols the number of columns in the corner - * - * Example: \include MatrixBase_corner_enum_int_int.cpp - * Output: \verbinclude MatrixBase_corner_enum_int_int.out - * - * \note Even though the returned expression has dynamic size, in the case - * when it is applied to a fixed-size matrix, it inherits a fixed maximal size, - * which means that evaluating it does not cause a dynamic memory allocation. - * - * \sa class Block, block(Index,Index,Index,Index) - */ -template -inline Block DenseBase - ::corner(CornerType type, Index cRows, Index cCols) -{ - switch(type) - { - default: - eigen_assert(false && "Bad corner type."); - case TopLeft: - return Block(derived(), 0, 0, cRows, cCols); - case TopRight: - return Block(derived(), 0, cols() - cCols, cRows, cCols); - case BottomLeft: - return Block(derived(), rows() - cRows, 0, cRows, cCols); - case BottomRight: - return Block(derived(), rows() - cRows, cols() - cCols, cRows, cCols); - } -} - -/** This is the const version of corner(CornerType, Index, Index).*/ -template -inline const Block -DenseBase::corner(CornerType type, Index cRows, Index cCols) const -{ - switch(type) - { - default: - eigen_assert(false && "Bad corner type."); - case TopLeft: - return Block(derived(), 0, 0, cRows, cCols); - case TopRight: - return Block(derived(), 0, cols() - cCols, cRows, cCols); - case BottomLeft: - return Block(derived(), rows() - cRows, 0, cRows, cCols); - case BottomRight: - return Block(derived(), rows() - cRows, cols() - cCols, cRows, cCols); - } -} - -/** \returns a fixed-size expression of a corner of *this. - * - * \param type the type of corner. Can be \a Eigen::TopLeft, \a Eigen::TopRight, - * \a Eigen::BottomLeft, \a Eigen::BottomRight. - * - * The template parameters CRows and CCols arethe number of rows and columns in the corner. - * - * Example: \include MatrixBase_template_int_int_corner_enum.cpp - * Output: \verbinclude MatrixBase_template_int_int_corner_enum.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ -template -template -inline Block -DenseBase::corner(CornerType type) -{ - switch(type) - { - default: - eigen_assert(false && "Bad corner type."); - case TopLeft: - return Block(derived(), 0, 0); - case TopRight: - return Block(derived(), 0, cols() - CCols); - case BottomLeft: - return Block(derived(), rows() - CRows, 0); - case BottomRight: - return Block(derived(), rows() - CRows, cols() - CCols); - } -} - -/** This is the const version of corner(CornerType).*/ -template -template -inline const Block -DenseBase::corner(CornerType type) const -{ - switch(type) - { - default: - eigen_assert(false && "Bad corner type."); - case TopLeft: - return Block(derived(), 0, 0); - case TopRight: - return Block(derived(), 0, cols() - CCols); - case BottomLeft: - return Block(derived(), rows() - CRows, 0); - case BottomRight: - return Block(derived(), rows() - CRows, cols() - CCols); - } -} - -} // end namespace Eigen - -#endif // EIGEN_BLOCK2_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Cwise.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Cwise.h deleted file mode 100644 index d95009b6e..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Cwise.h +++ /dev/null @@ -1,192 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CWISE_H -#define EIGEN_CWISE_H - -namespace Eigen { - -/** \internal - * convenient macro to defined the return type of a cwise binary operation */ -#define EIGEN_CWISE_BINOP_RETURN_TYPE(OP) \ - CwiseBinaryOp::Scalar>, ExpressionType, OtherDerived> - -/** \internal - * convenient macro to defined the return type of a cwise unary operation */ -#define EIGEN_CWISE_UNOP_RETURN_TYPE(OP) \ - CwiseUnaryOp::Scalar>, ExpressionType> - -/** \internal - * convenient macro to defined the return type of a cwise comparison to a scalar */ -#define EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(OP) \ - CwiseBinaryOp::Scalar>, ExpressionType, \ - typename ExpressionType::ConstantReturnType > - -/** \class Cwise - * - * \brief Pseudo expression providing additional coefficient-wise operations - * - * \param ExpressionType the type of the object on which to do coefficient-wise operations - * - * This class represents an expression with additional coefficient-wise features. - * It is the return type of MatrixBase::cwise() - * and most of the time this is the only way it is used. - * - * Example: \include MatrixBase_cwise_const.cpp - * Output: \verbinclude MatrixBase_cwise_const.out - * - * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_CWISE_PLUGIN. - * - * \sa MatrixBase::cwise() const, MatrixBase::cwise() - */ -template class Cwise -{ - public: - - typedef typename internal::traits::Scalar Scalar; - typedef typename internal::conditional::ret, - ExpressionType, const ExpressionType&>::type ExpressionTypeNested; - typedef CwiseUnaryOp, ExpressionType> ScalarAddReturnType; - - inline Cwise(const ExpressionType& matrix) : m_matrix(matrix) {} - - /** \internal */ - inline const ExpressionType& _expression() const { return m_matrix; } - - template - const EIGEN_CWISE_PRODUCT_RETURN_TYPE(ExpressionType,OtherDerived) - operator*(const MatrixBase &other) const; - - template - const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_quotient_op) - operator/(const MatrixBase &other) const; - - /** \deprecated ArrayBase::min() */ - template - const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_min_op) - (min)(const MatrixBase &other) const - { return EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_min_op)(_expression(), other.derived()); } - - /** \deprecated ArrayBase::max() */ - template - const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_max_op) - (max)(const MatrixBase &other) const - { return EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_max_op)(_expression(), other.derived()); } - - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_abs_op) abs() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_abs2_op) abs2() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_square_op) square() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_cube_op) cube() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_inverse_op) inverse() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_sqrt_op) sqrt() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_exp_op) exp() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_log_op) log() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_cos_op) cos() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_sin_op) sin() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_pow_op) pow(const Scalar& exponent) const; - - const ScalarAddReturnType - operator+(const Scalar& scalar) const; - - /** \relates Cwise */ - friend const ScalarAddReturnType - operator+(const Scalar& scalar, const Cwise& mat) - { return mat + scalar; } - - ExpressionType& operator+=(const Scalar& scalar); - - const ScalarAddReturnType - operator-(const Scalar& scalar) const; - - ExpressionType& operator-=(const Scalar& scalar); - - template - inline ExpressionType& operator*=(const MatrixBase &other); - - template - inline ExpressionType& operator/=(const MatrixBase &other); - - template const EIGEN_CWISE_BINOP_RETURN_TYPE(std::less) - operator<(const MatrixBase& other) const; - - template const EIGEN_CWISE_BINOP_RETURN_TYPE(std::less_equal) - operator<=(const MatrixBase& other) const; - - template const EIGEN_CWISE_BINOP_RETURN_TYPE(std::greater) - operator>(const MatrixBase& other) const; - - template const EIGEN_CWISE_BINOP_RETURN_TYPE(std::greater_equal) - operator>=(const MatrixBase& other) const; - - template const EIGEN_CWISE_BINOP_RETURN_TYPE(std::equal_to) - operator==(const MatrixBase& other) const; - - template const EIGEN_CWISE_BINOP_RETURN_TYPE(std::not_equal_to) - operator!=(const MatrixBase& other) const; - - // comparisons to a scalar value - const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::less) - operator<(Scalar s) const; - - const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::less_equal) - operator<=(Scalar s) const; - - const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::greater) - operator>(Scalar s) const; - - const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::greater_equal) - operator>=(Scalar s) const; - - const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::equal_to) - operator==(Scalar s) const; - - const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::not_equal_to) - operator!=(Scalar s) const; - - // allow to extend Cwise outside Eigen - #ifdef EIGEN_CWISE_PLUGIN - #include EIGEN_CWISE_PLUGIN - #endif - - protected: - ExpressionTypeNested m_matrix; -}; - - -/** \returns a Cwise wrapper of *this providing additional coefficient-wise operations - * - * Example: \include MatrixBase_cwise_const.cpp - * Output: \verbinclude MatrixBase_cwise_const.out - * - * \sa class Cwise, cwise() - */ -template -inline const Cwise MatrixBase::cwise() const -{ - return derived(); -} - -/** \returns a Cwise wrapper of *this providing additional coefficient-wise operations - * - * Example: \include MatrixBase_cwise.cpp - * Output: \verbinclude MatrixBase_cwise.out - * - * \sa class Cwise, cwise() const - */ -template -inline Cwise MatrixBase::cwise() -{ - return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_CWISE_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/CwiseOperators.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/CwiseOperators.h deleted file mode 100644 index 482f30648..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/CwiseOperators.h +++ /dev/null @@ -1,298 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ARRAY_CWISE_OPERATORS_H -#define EIGEN_ARRAY_CWISE_OPERATORS_H - -namespace Eigen { - -/*************************************************************************** -* The following functions were defined in Core -***************************************************************************/ - - -/** \deprecated ArrayBase::abs() */ -template -EIGEN_STRONG_INLINE const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_abs_op) -Cwise::abs() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::abs2() */ -template -EIGEN_STRONG_INLINE const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_abs2_op) -Cwise::abs2() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::exp() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_exp_op) -Cwise::exp() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::log() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_log_op) -Cwise::log() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::operator*() */ -template -template -EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(ExpressionType,OtherDerived) -Cwise::operator*(const MatrixBase &other) const -{ - return EIGEN_CWISE_PRODUCT_RETURN_TYPE(ExpressionType,OtherDerived)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::operator/() */ -template -template -EIGEN_STRONG_INLINE const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_quotient_op) -Cwise::operator/(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_quotient_op)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::operator*=() */ -template -template -inline ExpressionType& Cwise::operator*=(const MatrixBase &other) -{ - return m_matrix.const_cast_derived() = *this * other; -} - -/** \deprecated ArrayBase::operator/=() */ -template -template -inline ExpressionType& Cwise::operator/=(const MatrixBase &other) -{ - return m_matrix.const_cast_derived() = *this / other; -} - -/*************************************************************************** -* The following functions were defined in Array -***************************************************************************/ - -// -- unary operators -- - -/** \deprecated ArrayBase::sqrt() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_sqrt_op) -Cwise::sqrt() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::cos() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_cos_op) -Cwise::cos() const -{ - return _expression(); -} - - -/** \deprecated ArrayBase::sin() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_sin_op) -Cwise::sin() const -{ - return _expression(); -} - - -/** \deprecated ArrayBase::log() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_pow_op) -Cwise::pow(const Scalar& exponent) const -{ - return EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_pow_op)(_expression(), internal::scalar_pow_op(exponent)); -} - - -/** \deprecated ArrayBase::inverse() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_inverse_op) -Cwise::inverse() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::square() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_square_op) -Cwise::square() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::cube() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_cube_op) -Cwise::cube() const -{ - return _expression(); -} - - -// -- binary operators -- - -/** \deprecated ArrayBase::operator<() */ -template -template -inline const EIGEN_CWISE_BINOP_RETURN_TYPE(std::less) -Cwise::operator<(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(std::less)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::<=() */ -template -template -inline const EIGEN_CWISE_BINOP_RETURN_TYPE(std::less_equal) -Cwise::operator<=(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(std::less_equal)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::operator>() */ -template -template -inline const EIGEN_CWISE_BINOP_RETURN_TYPE(std::greater) -Cwise::operator>(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(std::greater)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::operator>=() */ -template -template -inline const EIGEN_CWISE_BINOP_RETURN_TYPE(std::greater_equal) -Cwise::operator>=(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(std::greater_equal)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::operator==() */ -template -template -inline const EIGEN_CWISE_BINOP_RETURN_TYPE(std::equal_to) -Cwise::operator==(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(std::equal_to)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::operator!=() */ -template -template -inline const EIGEN_CWISE_BINOP_RETURN_TYPE(std::not_equal_to) -Cwise::operator!=(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(std::not_equal_to)(_expression(), other.derived()); -} - -// comparisons to scalar value - -/** \deprecated ArrayBase::operator<(Scalar) */ -template -inline const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::less) -Cwise::operator<(Scalar s) const -{ - return EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::less)(_expression(), - typename ExpressionType::ConstantReturnType(_expression().rows(), _expression().cols(), s)); -} - -/** \deprecated ArrayBase::operator<=(Scalar) */ -template -inline const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::less_equal) -Cwise::operator<=(Scalar s) const -{ - return EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::less_equal)(_expression(), - typename ExpressionType::ConstantReturnType(_expression().rows(), _expression().cols(), s)); -} - -/** \deprecated ArrayBase::operator>(Scalar) */ -template -inline const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::greater) -Cwise::operator>(Scalar s) const -{ - return EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::greater)(_expression(), - typename ExpressionType::ConstantReturnType(_expression().rows(), _expression().cols(), s)); -} - -/** \deprecated ArrayBase::operator>=(Scalar) */ -template -inline const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::greater_equal) -Cwise::operator>=(Scalar s) const -{ - return EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::greater_equal)(_expression(), - typename ExpressionType::ConstantReturnType(_expression().rows(), _expression().cols(), s)); -} - -/** \deprecated ArrayBase::operator==(Scalar) */ -template -inline const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::equal_to) -Cwise::operator==(Scalar s) const -{ - return EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::equal_to)(_expression(), - typename ExpressionType::ConstantReturnType(_expression().rows(), _expression().cols(), s)); -} - -/** \deprecated ArrayBase::operator!=(Scalar) */ -template -inline const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::not_equal_to) -Cwise::operator!=(Scalar s) const -{ - return EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::not_equal_to)(_expression(), - typename ExpressionType::ConstantReturnType(_expression().rows(), _expression().cols(), s)); -} - -// scalar addition - -/** \deprecated ArrayBase::operator+(Scalar) */ -template -inline const typename Cwise::ScalarAddReturnType -Cwise::operator+(const Scalar& scalar) const -{ - return typename Cwise::ScalarAddReturnType(m_matrix, internal::scalar_add_op(scalar)); -} - -/** \deprecated ArrayBase::operator+=(Scalar) */ -template -inline ExpressionType& Cwise::operator+=(const Scalar& scalar) -{ - return m_matrix.const_cast_derived() = *this + scalar; -} - -/** \deprecated ArrayBase::operator-(Scalar) */ -template -inline const typename Cwise::ScalarAddReturnType -Cwise::operator-(const Scalar& scalar) const -{ - return *this + (-scalar); -} - -/** \deprecated ArrayBase::operator-=(Scalar) */ -template -inline ExpressionType& Cwise::operator-=(const Scalar& scalar) -{ - return m_matrix.const_cast_derived() = *this - scalar; -} - -} // end namespace Eigen - -#endif // EIGEN_ARRAY_CWISE_OPERATORS_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/AlignedBox.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/AlignedBox.h deleted file mode 100644 index 2e4309dd9..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/AlignedBox.h +++ /dev/null @@ -1,159 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * \nonstableyet - * - * \class AlignedBox - * - * \brief An axis aligned box - * - * \param _Scalar the type of the scalar coefficients - * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic. - * - * This class represents an axis aligned box as a pair of the minimal and maximal corners. - */ -template -class AlignedBox -{ -public: -EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==Dynamic ? Dynamic : _AmbientDim+1) - enum { AmbientDimAtCompileTime = _AmbientDim }; - typedef _Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - - /** Default constructor initializing a null box. */ - inline AlignedBox() - { if (AmbientDimAtCompileTime!=Dynamic) setNull(); } - - /** Constructs a null box with \a _dim the dimension of the ambient space. */ - inline explicit AlignedBox(int _dim) : m_min(_dim), m_max(_dim) - { setNull(); } - - /** Constructs a box with extremities \a _min and \a _max. */ - inline AlignedBox(const VectorType& _min, const VectorType& _max) : m_min(_min), m_max(_max) {} - - /** Constructs a box containing a single point \a p. */ - inline explicit AlignedBox(const VectorType& p) : m_min(p), m_max(p) {} - - ~AlignedBox() {} - - /** \returns the dimension in which the box holds */ - inline int dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size()-1 : AmbientDimAtCompileTime; } - - /** \returns true if the box is null, i.e, empty. */ - inline bool isNull() const { return (m_min.cwise() > m_max).any(); } - - /** Makes \c *this a null/empty box. */ - inline void setNull() - { - m_min.setConstant( (std::numeric_limits::max)()); - m_max.setConstant(-(std::numeric_limits::max)()); - } - - /** \returns the minimal corner */ - inline const VectorType& (min)() const { return m_min; } - /** \returns a non const reference to the minimal corner */ - inline VectorType& (min)() { return m_min; } - /** \returns the maximal corner */ - inline const VectorType& (max)() const { return m_max; } - /** \returns a non const reference to the maximal corner */ - inline VectorType& (max)() { return m_max; } - - /** \returns true if the point \a p is inside the box \c *this. */ - inline bool contains(const VectorType& p) const - { return (m_min.cwise()<=p).all() && (p.cwise()<=m_max).all(); } - - /** \returns true if the box \a b is entirely inside the box \c *this. */ - inline bool contains(const AlignedBox& b) const - { return (m_min.cwise()<=(b.min)()).all() && ((b.max)().cwise()<=m_max).all(); } - - /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this. */ - inline AlignedBox& extend(const VectorType& p) - { m_min = (m_min.cwise().min)(p); m_max = (m_max.cwise().max)(p); return *this; } - - /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this. */ - inline AlignedBox& extend(const AlignedBox& b) - { m_min = (m_min.cwise().min)(b.m_min); m_max = (m_max.cwise().max)(b.m_max); return *this; } - - /** Clamps \c *this by the box \a b and returns a reference to \c *this. */ - inline AlignedBox& clamp(const AlignedBox& b) - { m_min = (m_min.cwise().max)(b.m_min); m_max = (m_max.cwise().min)(b.m_max); return *this; } - - /** Translate \c *this by the vector \a t and returns a reference to \c *this. */ - inline AlignedBox& translate(const VectorType& t) - { m_min += t; m_max += t; return *this; } - - /** \returns the squared distance between the point \a p and the box \c *this, - * and zero if \a p is inside the box. - * \sa exteriorDistance() - */ - inline Scalar squaredExteriorDistance(const VectorType& p) const; - - /** \returns the distance between the point \a p and the box \c *this, - * and zero if \a p is inside the box. - * \sa squaredExteriorDistance() - */ - inline Scalar exteriorDistance(const VectorType& p) const - { return ei_sqrt(squaredExteriorDistance(p)); } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { - return typename internal::cast_return_type >::type(*this); - } - - /** Copy constructor with scalar type conversion */ - template - inline explicit AlignedBox(const AlignedBox& other) - { - m_min = (other.min)().template cast(); - m_max = (other.max)().template cast(); - } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const AlignedBox& other, typename NumTraits::Real prec = precision()) const - { return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); } - -protected: - - VectorType m_min, m_max; -}; - -template -inline Scalar AlignedBox::squaredExteriorDistance(const VectorType& p) const -{ - Scalar dist2(0); - Scalar aux; - for (int k=0; k - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - -#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS -#include "RotationBase.h" -#include "Rotation2D.h" -#include "Quaternion.h" -#include "AngleAxis.h" -#include "Transform.h" -#include "Translation.h" -#include "Scaling.h" -#include "AlignedBox.h" -#include "Hyperplane.h" -#include "ParametrizedLine.h" -#endif - - -#define RotationBase eigen2_RotationBase -#define Rotation2D eigen2_Rotation2D -#define Rotation2Df eigen2_Rotation2Df -#define Rotation2Dd eigen2_Rotation2Dd - -#define Quaternion eigen2_Quaternion -#define Quaternionf eigen2_Quaternionf -#define Quaterniond eigen2_Quaterniond - -#define AngleAxis eigen2_AngleAxis -#define AngleAxisf eigen2_AngleAxisf -#define AngleAxisd eigen2_AngleAxisd - -#define Transform eigen2_Transform -#define Transform2f eigen2_Transform2f -#define Transform2d eigen2_Transform2d -#define Transform3f eigen2_Transform3f -#define Transform3d eigen2_Transform3d - -#define Translation eigen2_Translation -#define Translation2f eigen2_Translation2f -#define Translation2d eigen2_Translation2d -#define Translation3f eigen2_Translation3f -#define Translation3d eigen2_Translation3d - -#define Scaling eigen2_Scaling -#define Scaling2f eigen2_Scaling2f -#define Scaling2d eigen2_Scaling2d -#define Scaling3f eigen2_Scaling3f -#define Scaling3d eigen2_Scaling3d - -#define AlignedBox eigen2_AlignedBox - -#define Hyperplane eigen2_Hyperplane -#define ParametrizedLine eigen2_ParametrizedLine - -#define ei_toRotationMatrix eigen2_ei_toRotationMatrix -#define ei_quaternion_assign_impl eigen2_ei_quaternion_assign_impl -#define ei_transform_product_impl eigen2_ei_transform_product_impl - -#include "RotationBase.h" -#include "Rotation2D.h" -#include "Quaternion.h" -#include "AngleAxis.h" -#include "Transform.h" -#include "Translation.h" -#include "Scaling.h" -#include "AlignedBox.h" -#include "Hyperplane.h" -#include "ParametrizedLine.h" - -#undef ei_toRotationMatrix -#undef ei_quaternion_assign_impl -#undef ei_transform_product_impl - -#undef RotationBase -#undef Rotation2D -#undef Rotation2Df -#undef Rotation2Dd - -#undef Quaternion -#undef Quaternionf -#undef Quaterniond - -#undef AngleAxis -#undef AngleAxisf -#undef AngleAxisd - -#undef Transform -#undef Transform2f -#undef Transform2d -#undef Transform3f -#undef Transform3d - -#undef Translation -#undef Translation2f -#undef Translation2d -#undef Translation3f -#undef Translation3d - -#undef Scaling -#undef Scaling2f -#undef Scaling2d -#undef Scaling3f -#undef Scaling3d - -#undef AlignedBox - -#undef Hyperplane -#undef ParametrizedLine - -#endif // EIGEN2_GEOMETRY_MODULE_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/AngleAxis.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/AngleAxis.h deleted file mode 100644 index af598a403..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/AngleAxis.h +++ /dev/null @@ -1,214 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * - * \class AngleAxis - * - * \brief Represents a 3D rotation as a rotation angle around an arbitrary 3D axis - * - * \param _Scalar the scalar type, i.e., the type of the coefficients. - * - * The following two typedefs are provided for convenience: - * \li \c AngleAxisf for \c float - * \li \c AngleAxisd for \c double - * - * \addexample AngleAxisForEuler \label How to define a rotation from Euler-angles - * - * Combined with MatrixBase::Unit{X,Y,Z}, AngleAxis can be used to easily - * mimic Euler-angles. Here is an example: - * \include AngleAxis_mimic_euler.cpp - * Output: \verbinclude AngleAxis_mimic_euler.out - * - * \note This class is not aimed to be used to store a rotation transformation, - * but rather to make easier the creation of other rotation (Quaternion, rotation Matrix) - * and transformation objects. - * - * \sa class Quaternion, class Transform, MatrixBase::UnitX() - */ - -template struct ei_traits > -{ - typedef _Scalar Scalar; -}; - -template -class AngleAxis : public RotationBase,3> -{ - typedef RotationBase,3> Base; - -public: - - using Base::operator*; - - enum { Dim = 3 }; - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - typedef Matrix Matrix3; - typedef Matrix Vector3; - typedef Quaternion QuaternionType; - -protected: - - Vector3 m_axis; - Scalar m_angle; - -public: - - /** Default constructor without initialization. */ - AngleAxis() {} - /** Constructs and initialize the angle-axis rotation from an \a angle in radian - * and an \a axis which must be normalized. */ - template - inline AngleAxis(Scalar angle, const MatrixBase& axis) : m_axis(axis), m_angle(angle) {} - /** Constructs and initialize the angle-axis rotation from a quaternion \a q. */ - inline AngleAxis(const QuaternionType& q) { *this = q; } - /** Constructs and initialize the angle-axis rotation from a 3x3 rotation matrix. */ - template - inline explicit AngleAxis(const MatrixBase& m) { *this = m; } - - Scalar angle() const { return m_angle; } - Scalar& angle() { return m_angle; } - - const Vector3& axis() const { return m_axis; } - Vector3& axis() { return m_axis; } - - /** Concatenates two rotations */ - inline QuaternionType operator* (const AngleAxis& other) const - { return QuaternionType(*this) * QuaternionType(other); } - - /** Concatenates two rotations */ - inline QuaternionType operator* (const QuaternionType& other) const - { return QuaternionType(*this) * other; } - - /** Concatenates two rotations */ - friend inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b) - { return a * QuaternionType(b); } - - /** Concatenates two rotations */ - inline Matrix3 operator* (const Matrix3& other) const - { return toRotationMatrix() * other; } - - /** Concatenates two rotations */ - inline friend Matrix3 operator* (const Matrix3& a, const AngleAxis& b) - { return a * b.toRotationMatrix(); } - - /** Applies rotation to vector */ - inline Vector3 operator* (const Vector3& other) const - { return toRotationMatrix() * other; } - - /** \returns the inverse rotation, i.e., an angle-axis with opposite rotation angle */ - AngleAxis inverse() const - { return AngleAxis(-m_angle, m_axis); } - - AngleAxis& operator=(const QuaternionType& q); - template - AngleAxis& operator=(const MatrixBase& m); - - template - AngleAxis& fromRotationMatrix(const MatrixBase& m); - Matrix3 toRotationMatrix(void) const; - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { return typename internal::cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit AngleAxis(const AngleAxis& other) - { - m_axis = other.axis().template cast(); - m_angle = Scalar(other.angle()); - } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const AngleAxis& other, typename NumTraits::Real prec = precision()) const - { return m_axis.isApprox(other.m_axis, prec) && ei_isApprox(m_angle,other.m_angle, prec); } -}; - -/** \ingroup Geometry_Module - * single precision angle-axis type */ -typedef AngleAxis AngleAxisf; -/** \ingroup Geometry_Module - * double precision angle-axis type */ -typedef AngleAxis AngleAxisd; - -/** Set \c *this from a quaternion. - * The axis is normalized. - */ -template -AngleAxis& AngleAxis::operator=(const QuaternionType& q) -{ - Scalar n2 = q.vec().squaredNorm(); - if (n2 < precision()*precision()) - { - m_angle = 0; - m_axis << 1, 0, 0; - } - else - { - m_angle = 2*std::acos(q.w()); - m_axis = q.vec() / ei_sqrt(n2); - } - return *this; -} - -/** Set \c *this from a 3x3 rotation matrix \a mat. - */ -template -template -AngleAxis& AngleAxis::operator=(const MatrixBase& mat) -{ - // Since a direct conversion would not be really faster, - // let's use the robust Quaternion implementation: - return *this = QuaternionType(mat); -} - -/** Constructs and \returns an equivalent 3x3 rotation matrix. - */ -template -typename AngleAxis::Matrix3 -AngleAxis::toRotationMatrix(void) const -{ - Matrix3 res; - Vector3 sin_axis = ei_sin(m_angle) * m_axis; - Scalar c = ei_cos(m_angle); - Vector3 cos1_axis = (Scalar(1)-c) * m_axis; - - Scalar tmp; - tmp = cos1_axis.x() * m_axis.y(); - res.coeffRef(0,1) = tmp - sin_axis.z(); - res.coeffRef(1,0) = tmp + sin_axis.z(); - - tmp = cos1_axis.x() * m_axis.z(); - res.coeffRef(0,2) = tmp + sin_axis.y(); - res.coeffRef(2,0) = tmp - sin_axis.y(); - - tmp = cos1_axis.y() * m_axis.z(); - res.coeffRef(1,2) = tmp - sin_axis.x(); - res.coeffRef(2,1) = tmp + sin_axis.x(); - - res.diagonal() = (cos1_axis.cwise() * m_axis).cwise() + c; - - return res; -} - -} // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Hyperplane.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Hyperplane.h deleted file mode 100644 index b95bf00ec..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Hyperplane.h +++ /dev/null @@ -1,254 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * - * \class Hyperplane - * - * \brief A hyperplane - * - * A hyperplane is an affine subspace of dimension n-1 in a space of dimension n. - * For example, a hyperplane in a plane is a line; a hyperplane in 3-space is a plane. - * - * \param _Scalar the scalar type, i.e., the type of the coefficients - * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic. - * Notice that the dimension of the hyperplane is _AmbientDim-1. - * - * This class represents an hyperplane as the zero set of the implicit equation - * \f$ n \cdot x + d = 0 \f$ where \f$ n \f$ is a unit normal vector of the plane (linear part) - * and \f$ d \f$ is the distance (offset) to the origin. - */ -template -class Hyperplane -{ -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==Dynamic ? Dynamic : _AmbientDim+1) - enum { AmbientDimAtCompileTime = _AmbientDim }; - typedef _Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - typedef Matrix Coefficients; - typedef Block NormalReturnType; - - /** Default constructor without initialization */ - inline Hyperplane() {} - - /** Constructs a dynamic-size hyperplane with \a _dim the dimension - * of the ambient space */ - inline explicit Hyperplane(int _dim) : m_coeffs(_dim+1) {} - - /** Construct a plane from its normal \a n and a point \a e onto the plane. - * \warning the vector normal is assumed to be normalized. - */ - inline Hyperplane(const VectorType& n, const VectorType& e) - : m_coeffs(n.size()+1) - { - normal() = n; - offset() = -e.eigen2_dot(n); - } - - /** Constructs a plane from its normal \a n and distance to the origin \a d - * such that the algebraic equation of the plane is \f$ n \cdot x + d = 0 \f$. - * \warning the vector normal is assumed to be normalized. - */ - inline Hyperplane(const VectorType& n, Scalar d) - : m_coeffs(n.size()+1) - { - normal() = n; - offset() = d; - } - - /** Constructs a hyperplane passing through the two points. If the dimension of the ambient space - * is greater than 2, then there isn't uniqueness, so an arbitrary choice is made. - */ - static inline Hyperplane Through(const VectorType& p0, const VectorType& p1) - { - Hyperplane result(p0.size()); - result.normal() = (p1 - p0).unitOrthogonal(); - result.offset() = -result.normal().eigen2_dot(p0); - return result; - } - - /** Constructs a hyperplane passing through the three points. The dimension of the ambient space - * is required to be exactly 3. - */ - static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2) - { - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3) - Hyperplane result(p0.size()); - result.normal() = (p2 - p0).cross(p1 - p0).normalized(); - result.offset() = -result.normal().eigen2_dot(p0); - return result; - } - - /** Constructs a hyperplane passing through the parametrized line \a parametrized. - * If the dimension of the ambient space is greater than 2, then there isn't uniqueness, - * so an arbitrary choice is made. - */ - // FIXME to be consitent with the rest this could be implemented as a static Through function ?? - explicit Hyperplane(const ParametrizedLine& parametrized) - { - normal() = parametrized.direction().unitOrthogonal(); - offset() = -normal().eigen2_dot(parametrized.origin()); - } - - ~Hyperplane() {} - - /** \returns the dimension in which the plane holds */ - inline int dim() const { return int(AmbientDimAtCompileTime)==Dynamic ? m_coeffs.size()-1 : int(AmbientDimAtCompileTime); } - - /** normalizes \c *this */ - void normalize(void) - { - m_coeffs /= normal().norm(); - } - - /** \returns the signed distance between the plane \c *this and a point \a p. - * \sa absDistance() - */ - inline Scalar signedDistance(const VectorType& p) const { return p.eigen2_dot(normal()) + offset(); } - - /** \returns the absolute distance between the plane \c *this and a point \a p. - * \sa signedDistance() - */ - inline Scalar absDistance(const VectorType& p) const { return ei_abs(signedDistance(p)); } - - /** \returns the projection of a point \a p onto the plane \c *this. - */ - inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); } - - /** \returns a constant reference to the unit normal vector of the plane, which corresponds - * to the linear part of the implicit equation. - */ - inline const NormalReturnType normal() const { return NormalReturnType(*const_cast(&m_coeffs),0,0,dim(),1); } - - /** \returns a non-constant reference to the unit normal vector of the plane, which corresponds - * to the linear part of the implicit equation. - */ - inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); } - - /** \returns the distance to the origin, which is also the "constant term" of the implicit equation - * \warning the vector normal is assumed to be normalized. - */ - inline const Scalar& offset() const { return m_coeffs.coeff(dim()); } - - /** \returns a non-constant reference to the distance to the origin, which is also the constant part - * of the implicit equation */ - inline Scalar& offset() { return m_coeffs(dim()); } - - /** \returns a constant reference to the coefficients c_i of the plane equation: - * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$ - */ - inline const Coefficients& coeffs() const { return m_coeffs; } - - /** \returns a non-constant reference to the coefficients c_i of the plane equation: - * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$ - */ - inline Coefficients& coeffs() { return m_coeffs; } - - /** \returns the intersection of *this with \a other. - * - * \warning The ambient space must be a plane, i.e. have dimension 2, so that \c *this and \a other are lines. - * - * \note If \a other is approximately parallel to *this, this method will return any point on *this. - */ - VectorType intersection(const Hyperplane& other) - { - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2) - Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0); - // since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests - // whether the two lines are approximately parallel. - if(ei_isMuchSmallerThan(det, Scalar(1))) - { // special case where the two lines are approximately parallel. Pick any point on the first line. - if(ei_abs(coeffs().coeff(1))>ei_abs(coeffs().coeff(0))) - return VectorType(coeffs().coeff(1), -coeffs().coeff(2)/coeffs().coeff(1)-coeffs().coeff(0)); - else - return VectorType(-coeffs().coeff(2)/coeffs().coeff(0)-coeffs().coeff(1), coeffs().coeff(0)); - } - else - { // general case - Scalar invdet = Scalar(1) / det; - return VectorType(invdet*(coeffs().coeff(1)*other.coeffs().coeff(2)-other.coeffs().coeff(1)*coeffs().coeff(2)), - invdet*(other.coeffs().coeff(0)*coeffs().coeff(2)-coeffs().coeff(0)*other.coeffs().coeff(2))); - } - } - - /** Applies the transformation matrix \a mat to \c *this and returns a reference to \c *this. - * - * \param mat the Dim x Dim transformation matrix - * \param traits specifies whether the matrix \a mat represents an Isometry - * or a more generic Affine transformation. The default is Affine. - */ - template - inline Hyperplane& transform(const MatrixBase& mat, TransformTraits traits = Affine) - { - if (traits==Affine) - normal() = mat.inverse().transpose() * normal(); - else if (traits==Isometry) - normal() = mat * normal(); - else - { - ei_assert("invalid traits value in Hyperplane::transform()"); - } - return *this; - } - - /** Applies the transformation \a t to \c *this and returns a reference to \c *this. - * - * \param t the transformation of dimension Dim - * \param traits specifies whether the transformation \a t represents an Isometry - * or a more generic Affine transformation. The default is Affine. - * Other kind of transformations are not supported. - */ - inline Hyperplane& transform(const Transform& t, - TransformTraits traits = Affine) - { - transform(t.linear(), traits); - offset() -= t.translation().eigen2_dot(normal()); - return *this; - } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { - return typename internal::cast_return_type >::type(*this); - } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Hyperplane(const Hyperplane& other) - { m_coeffs = other.coeffs().template cast(); } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Hyperplane& other, typename NumTraits::Real prec = precision()) const - { return m_coeffs.isApprox(other.m_coeffs, prec); } - -protected: - - Coefficients m_coeffs; -}; - -} // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h deleted file mode 100644 index 9b57b7e0b..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h +++ /dev/null @@ -1,141 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * - * \class ParametrizedLine - * - * \brief A parametrized line - * - * A parametrized line is defined by an origin point \f$ \mathbf{o} \f$ and a unit - * direction vector \f$ \mathbf{d} \f$ such that the line corresponds to - * the set \f$ l(t) = \mathbf{o} + t \mathbf{d} \f$, \f$ l \in \mathbf{R} \f$. - * - * \param _Scalar the scalar type, i.e., the type of the coefficients - * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic. - */ -template -class ParametrizedLine -{ -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) - enum { AmbientDimAtCompileTime = _AmbientDim }; - typedef _Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - - /** Default constructor without initialization */ - inline ParametrizedLine() {} - - /** Constructs a dynamic-size line with \a _dim the dimension - * of the ambient space */ - inline explicit ParametrizedLine(int _dim) : m_origin(_dim), m_direction(_dim) {} - - /** Initializes a parametrized line of direction \a direction and origin \a origin. - * \warning the vector direction is assumed to be normalized. - */ - ParametrizedLine(const VectorType& origin, const VectorType& direction) - : m_origin(origin), m_direction(direction) {} - - explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim>& hyperplane); - - /** Constructs a parametrized line going from \a p0 to \a p1. */ - static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1) - { return ParametrizedLine(p0, (p1-p0).normalized()); } - - ~ParametrizedLine() {} - - /** \returns the dimension in which the line holds */ - inline int dim() const { return m_direction.size(); } - - const VectorType& origin() const { return m_origin; } - VectorType& origin() { return m_origin; } - - const VectorType& direction() const { return m_direction; } - VectorType& direction() { return m_direction; } - - /** \returns the squared distance of a point \a p to its projection onto the line \c *this. - * \sa distance() - */ - RealScalar squaredDistance(const VectorType& p) const - { - VectorType diff = p-origin(); - return (diff - diff.eigen2_dot(direction())* direction()).squaredNorm(); - } - /** \returns the distance of a point \a p to its projection onto the line \c *this. - * \sa squaredDistance() - */ - RealScalar distance(const VectorType& p) const { return ei_sqrt(squaredDistance(p)); } - - /** \returns the projection of a point \a p onto the line \c *this. */ - VectorType projection(const VectorType& p) const - { return origin() + (p-origin()).eigen2_dot(direction()) * direction(); } - - Scalar intersection(const Hyperplane<_Scalar, _AmbientDim>& hyperplane); - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { - return typename internal::cast_return_type >::type(*this); - } - - /** Copy constructor with scalar type conversion */ - template - inline explicit ParametrizedLine(const ParametrizedLine& other) - { - m_origin = other.origin().template cast(); - m_direction = other.direction().template cast(); - } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const ParametrizedLine& other, typename NumTraits::Real prec = precision()) const - { return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); } - -protected: - - VectorType m_origin, m_direction; -}; - -/** Constructs a parametrized line from a 2D hyperplane - * - * \warning the ambient space must have dimension 2 such that the hyperplane actually describes a line - */ -template -inline ParametrizedLine<_Scalar, _AmbientDim>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim>& hyperplane) -{ - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2) - direction() = hyperplane.normal().unitOrthogonal(); - origin() = -hyperplane.normal()*hyperplane.offset(); -} - -/** \returns the parameter value of the intersection between \c *this and the given hyperplane - */ -template -inline _Scalar ParametrizedLine<_Scalar, _AmbientDim>::intersection(const Hyperplane<_Scalar, _AmbientDim>& hyperplane) -{ - return -(hyperplane.offset()+origin().eigen2_dot(hyperplane.normal())) - /(direction().eigen2_dot(hyperplane.normal())); -} - -} // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Quaternion.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Quaternion.h deleted file mode 100644 index 4b6390cf1..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Quaternion.h +++ /dev/null @@ -1,495 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -template -struct ei_quaternion_assign_impl; - -/** \geometry_module \ingroup Geometry_Module - * - * \class Quaternion - * - * \brief The quaternion class used to represent 3D orientations and rotations - * - * \param _Scalar the scalar type, i.e., the type of the coefficients - * - * This class represents a quaternion \f$ w+xi+yj+zk \f$ that is a convenient representation of - * orientations and rotations of objects in three dimensions. Compared to other representations - * like Euler angles or 3x3 matrices, quatertions offer the following advantages: - * \li \b compact storage (4 scalars) - * \li \b efficient to compose (28 flops), - * \li \b stable spherical interpolation - * - * The following two typedefs are provided for convenience: - * \li \c Quaternionf for \c float - * \li \c Quaterniond for \c double - * - * \sa class AngleAxis, class Transform - */ - -template struct ei_traits > -{ - typedef _Scalar Scalar; -}; - -template -class Quaternion : public RotationBase,3> -{ - typedef RotationBase,3> Base; - -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,4) - - using Base::operator*; - - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - - /** the type of the Coefficients 4-vector */ - typedef Matrix Coefficients; - /** the type of a 3D vector */ - typedef Matrix Vector3; - /** the equivalent rotation matrix type */ - typedef Matrix Matrix3; - /** the equivalent angle-axis type */ - typedef AngleAxis AngleAxisType; - - /** \returns the \c x coefficient */ - inline Scalar x() const { return m_coeffs.coeff(0); } - /** \returns the \c y coefficient */ - inline Scalar y() const { return m_coeffs.coeff(1); } - /** \returns the \c z coefficient */ - inline Scalar z() const { return m_coeffs.coeff(2); } - /** \returns the \c w coefficient */ - inline Scalar w() const { return m_coeffs.coeff(3); } - - /** \returns a reference to the \c x coefficient */ - inline Scalar& x() { return m_coeffs.coeffRef(0); } - /** \returns a reference to the \c y coefficient */ - inline Scalar& y() { return m_coeffs.coeffRef(1); } - /** \returns a reference to the \c z coefficient */ - inline Scalar& z() { return m_coeffs.coeffRef(2); } - /** \returns a reference to the \c w coefficient */ - inline Scalar& w() { return m_coeffs.coeffRef(3); } - - /** \returns a read-only vector expression of the imaginary part (x,y,z) */ - inline const Block vec() const { return m_coeffs.template start<3>(); } - - /** \returns a vector expression of the imaginary part (x,y,z) */ - inline Block vec() { return m_coeffs.template start<3>(); } - - /** \returns a read-only vector expression of the coefficients (x,y,z,w) */ - inline const Coefficients& coeffs() const { return m_coeffs; } - - /** \returns a vector expression of the coefficients (x,y,z,w) */ - inline Coefficients& coeffs() { return m_coeffs; } - - /** Default constructor leaving the quaternion uninitialized. */ - inline Quaternion() {} - - /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from - * its four coefficients \a w, \a x, \a y and \a z. - * - * \warning Note the order of the arguments: the real \a w coefficient first, - * while internally the coefficients are stored in the following order: - * [\c x, \c y, \c z, \c w] - */ - inline Quaternion(Scalar w, Scalar x, Scalar y, Scalar z) - { m_coeffs << x, y, z, w; } - - /** Copy constructor */ - inline Quaternion(const Quaternion& other) { m_coeffs = other.m_coeffs; } - - /** Constructs and initializes a quaternion from the angle-axis \a aa */ - explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; } - - /** Constructs and initializes a quaternion from either: - * - a rotation matrix expression, - * - a 4D vector expression representing quaternion coefficients. - * \sa operator=(MatrixBase) - */ - template - explicit inline Quaternion(const MatrixBase& other) { *this = other; } - - Quaternion& operator=(const Quaternion& other); - Quaternion& operator=(const AngleAxisType& aa); - template - Quaternion& operator=(const MatrixBase& m); - - /** \returns a quaternion representing an identity rotation - * \sa MatrixBase::Identity() - */ - static inline Quaternion Identity() { return Quaternion(1, 0, 0, 0); } - - /** \sa Quaternion::Identity(), MatrixBase::setIdentity() - */ - inline Quaternion& setIdentity() { m_coeffs << 0, 0, 0, 1; return *this; } - - /** \returns the squared norm of the quaternion's coefficients - * \sa Quaternion::norm(), MatrixBase::squaredNorm() - */ - inline Scalar squaredNorm() const { return m_coeffs.squaredNorm(); } - - /** \returns the norm of the quaternion's coefficients - * \sa Quaternion::squaredNorm(), MatrixBase::norm() - */ - inline Scalar norm() const { return m_coeffs.norm(); } - - /** Normalizes the quaternion \c *this - * \sa normalized(), MatrixBase::normalize() */ - inline void normalize() { m_coeffs.normalize(); } - /** \returns a normalized version of \c *this - * \sa normalize(), MatrixBase::normalized() */ - inline Quaternion normalized() const { return Quaternion(m_coeffs.normalized()); } - - /** \returns the dot product of \c *this and \a other - * Geometrically speaking, the dot product of two unit quaternions - * corresponds to the cosine of half the angle between the two rotations. - * \sa angularDistance() - */ - inline Scalar eigen2_dot(const Quaternion& other) const { return m_coeffs.eigen2_dot(other.m_coeffs); } - - inline Scalar angularDistance(const Quaternion& other) const; - - Matrix3 toRotationMatrix(void) const; - - template - Quaternion& setFromTwoVectors(const MatrixBase& a, const MatrixBase& b); - - inline Quaternion operator* (const Quaternion& q) const; - inline Quaternion& operator*= (const Quaternion& q); - - Quaternion inverse(void) const; - Quaternion conjugate(void) const; - - Quaternion slerp(Scalar t, const Quaternion& other) const; - - template - Vector3 operator* (const MatrixBase& vec) const; - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { return typename internal::cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Quaternion(const Quaternion& other) - { m_coeffs = other.coeffs().template cast(); } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Quaternion& other, typename NumTraits::Real prec = precision()) const - { return m_coeffs.isApprox(other.m_coeffs, prec); } - -protected: - Coefficients m_coeffs; -}; - -/** \ingroup Geometry_Module - * single precision quaternion type */ -typedef Quaternion Quaternionf; -/** \ingroup Geometry_Module - * double precision quaternion type */ -typedef Quaternion Quaterniond; - -// Generic Quaternion * Quaternion product -template inline Quaternion -ei_quaternion_product(const Quaternion& a, const Quaternion& b) -{ - return Quaternion - ( - a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(), - a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(), - a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(), - a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x() - ); -} - -/** \returns the concatenation of two rotations as a quaternion-quaternion product */ -template -inline Quaternion Quaternion::operator* (const Quaternion& other) const -{ - return ei_quaternion_product(*this,other); -} - -/** \sa operator*(Quaternion) */ -template -inline Quaternion& Quaternion::operator*= (const Quaternion& other) -{ - return (*this = *this * other); -} - -/** Rotation of a vector by a quaternion. - * \remarks If the quaternion is used to rotate several points (>1) - * then it is much more efficient to first convert it to a 3x3 Matrix. - * Comparison of the operation cost for n transformations: - * - Quaternion: 30n - * - Via a Matrix3: 24 + 15n - */ -template -template -inline typename Quaternion::Vector3 -Quaternion::operator* (const MatrixBase& v) const -{ - // Note that this algorithm comes from the optimization by hand - // of the conversion to a Matrix followed by a Matrix/Vector product. - // It appears to be much faster than the common algorithm found - // in the litterature (30 versus 39 flops). It also requires two - // Vector3 as temporaries. - Vector3 uv; - uv = 2 * this->vec().cross(v); - return v + this->w() * uv + this->vec().cross(uv); -} - -template -inline Quaternion& Quaternion::operator=(const Quaternion& other) -{ - m_coeffs = other.m_coeffs; - return *this; -} - -/** Set \c *this from an angle-axis \a aa and returns a reference to \c *this - */ -template -inline Quaternion& Quaternion::operator=(const AngleAxisType& aa) -{ - Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings - this->w() = ei_cos(ha); - this->vec() = ei_sin(ha) * aa.axis(); - return *this; -} - -/** Set \c *this from the expression \a xpr: - * - if \a xpr is a 4x1 vector, then \a xpr is assumed to be a quaternion - * - if \a xpr is a 3x3 matrix, then \a xpr is assumed to be rotation matrix - * and \a xpr is converted to a quaternion - */ -template -template -inline Quaternion& Quaternion::operator=(const MatrixBase& xpr) -{ - ei_quaternion_assign_impl::run(*this, xpr.derived()); - return *this; -} - -/** Convert the quaternion to a 3x3 rotation matrix */ -template -inline typename Quaternion::Matrix3 -Quaternion::toRotationMatrix(void) const -{ - // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!) - // if not inlined then the cost of the return by value is huge ~ +35%, - // however, not inlining this function is an order of magnitude slower, so - // it has to be inlined, and so the return by value is not an issue - Matrix3 res; - - const Scalar tx = Scalar(2)*this->x(); - const Scalar ty = Scalar(2)*this->y(); - const Scalar tz = Scalar(2)*this->z(); - const Scalar twx = tx*this->w(); - const Scalar twy = ty*this->w(); - const Scalar twz = tz*this->w(); - const Scalar txx = tx*this->x(); - const Scalar txy = ty*this->x(); - const Scalar txz = tz*this->x(); - const Scalar tyy = ty*this->y(); - const Scalar tyz = tz*this->y(); - const Scalar tzz = tz*this->z(); - - res.coeffRef(0,0) = Scalar(1)-(tyy+tzz); - res.coeffRef(0,1) = txy-twz; - res.coeffRef(0,2) = txz+twy; - res.coeffRef(1,0) = txy+twz; - res.coeffRef(1,1) = Scalar(1)-(txx+tzz); - res.coeffRef(1,2) = tyz-twx; - res.coeffRef(2,0) = txz-twy; - res.coeffRef(2,1) = tyz+twx; - res.coeffRef(2,2) = Scalar(1)-(txx+tyy); - - return res; -} - -/** Sets *this to be a quaternion representing a rotation sending the vector \a a to the vector \a b. - * - * \returns a reference to *this. - * - * Note that the two input vectors do \b not have to be normalized. - */ -template -template -inline Quaternion& Quaternion::setFromTwoVectors(const MatrixBase& a, const MatrixBase& b) -{ - Vector3 v0 = a.normalized(); - Vector3 v1 = b.normalized(); - Scalar c = v0.eigen2_dot(v1); - - // if dot == 1, vectors are the same - if (ei_isApprox(c,Scalar(1))) - { - // set to identity - this->w() = 1; this->vec().setZero(); - return *this; - } - // if dot == -1, vectors are opposites - if (ei_isApprox(c,Scalar(-1))) - { - this->vec() = v0.unitOrthogonal(); - this->w() = 0; - return *this; - } - - Vector3 axis = v0.cross(v1); - Scalar s = ei_sqrt((Scalar(1)+c)*Scalar(2)); - Scalar invs = Scalar(1)/s; - this->vec() = axis * invs; - this->w() = s * Scalar(0.5); - - return *this; -} - -/** \returns the multiplicative inverse of \c *this - * Note that in most cases, i.e., if you simply want the opposite rotation, - * and/or the quaternion is normalized, then it is enough to use the conjugate. - * - * \sa Quaternion::conjugate() - */ -template -inline Quaternion Quaternion::inverse() const -{ - // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ?? - Scalar n2 = this->squaredNorm(); - if (n2 > 0) - return Quaternion(conjugate().coeffs() / n2); - else - { - // return an invalid result to flag the error - return Quaternion(Coefficients::Zero()); - } -} - -/** \returns the conjugate of the \c *this which is equal to the multiplicative inverse - * if the quaternion is normalized. - * The conjugate of a quaternion represents the opposite rotation. - * - * \sa Quaternion::inverse() - */ -template -inline Quaternion Quaternion::conjugate() const -{ - return Quaternion(this->w(),-this->x(),-this->y(),-this->z()); -} - -/** \returns the angle (in radian) between two rotations - * \sa eigen2_dot() - */ -template -inline Scalar Quaternion::angularDistance(const Quaternion& other) const -{ - double d = ei_abs(this->eigen2_dot(other)); - if (d>=1.0) - return 0; - return Scalar(2) * std::acos(d); -} - -/** \returns the spherical linear interpolation between the two quaternions - * \c *this and \a other at the parameter \a t - */ -template -Quaternion Quaternion::slerp(Scalar t, const Quaternion& other) const -{ - static const Scalar one = Scalar(1) - machine_epsilon(); - Scalar d = this->eigen2_dot(other); - Scalar absD = ei_abs(d); - - Scalar scale0; - Scalar scale1; - - if (absD>=one) - { - scale0 = Scalar(1) - t; - scale1 = t; - } - else - { - // theta is the angle between the 2 quaternions - Scalar theta = std::acos(absD); - Scalar sinTheta = ei_sin(theta); - - scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta; - scale1 = ei_sin( ( t * theta) ) / sinTheta; - if (d<0) - scale1 = -scale1; - } - - return Quaternion(scale0 * coeffs() + scale1 * other.coeffs()); -} - -// set from a rotation matrix -template -struct ei_quaternion_assign_impl -{ - typedef typename Other::Scalar Scalar; - static inline void run(Quaternion& q, const Other& mat) - { - // This algorithm comes from "Quaternion Calculus and Fast Animation", - // Ken Shoemake, 1987 SIGGRAPH course notes - Scalar t = mat.trace(); - if (t > 0) - { - t = ei_sqrt(t + Scalar(1.0)); - q.w() = Scalar(0.5)*t; - t = Scalar(0.5)/t; - q.x() = (mat.coeff(2,1) - mat.coeff(1,2)) * t; - q.y() = (mat.coeff(0,2) - mat.coeff(2,0)) * t; - q.z() = (mat.coeff(1,0) - mat.coeff(0,1)) * t; - } - else - { - int i = 0; - if (mat.coeff(1,1) > mat.coeff(0,0)) - i = 1; - if (mat.coeff(2,2) > mat.coeff(i,i)) - i = 2; - int j = (i+1)%3; - int k = (j+1)%3; - - t = ei_sqrt(mat.coeff(i,i)-mat.coeff(j,j)-mat.coeff(k,k) + Scalar(1.0)); - q.coeffs().coeffRef(i) = Scalar(0.5) * t; - t = Scalar(0.5)/t; - q.w() = (mat.coeff(k,j)-mat.coeff(j,k))*t; - q.coeffs().coeffRef(j) = (mat.coeff(j,i)+mat.coeff(i,j))*t; - q.coeffs().coeffRef(k) = (mat.coeff(k,i)+mat.coeff(i,k))*t; - } - } -}; - -// set from a vector of coefficients assumed to be a quaternion -template -struct ei_quaternion_assign_impl -{ - typedef typename Other::Scalar Scalar; - static inline void run(Quaternion& q, const Other& vec) - { - q.coeffs() = vec; - } -}; - -} // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Rotation2D.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Rotation2D.h deleted file mode 100644 index 19b8582a1..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Rotation2D.h +++ /dev/null @@ -1,145 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * - * \class Rotation2D - * - * \brief Represents a rotation/orientation in a 2 dimensional space. - * - * \param _Scalar the scalar type, i.e., the type of the coefficients - * - * This class is equivalent to a single scalar representing a counter clock wise rotation - * as a single angle in radian. It provides some additional features such as the automatic - * conversion from/to a 2x2 rotation matrix. Moreover this class aims to provide a similar - * interface to Quaternion in order to facilitate the writing of generic algorithms - * dealing with rotations. - * - * \sa class Quaternion, class Transform - */ -template struct ei_traits > -{ - typedef _Scalar Scalar; -}; - -template -class Rotation2D : public RotationBase,2> -{ - typedef RotationBase,2> Base; - -public: - - using Base::operator*; - - enum { Dim = 2 }; - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - typedef Matrix Vector2; - typedef Matrix Matrix2; - -protected: - - Scalar m_angle; - -public: - - /** Construct a 2D counter clock wise rotation from the angle \a a in radian. */ - inline Rotation2D(Scalar a) : m_angle(a) {} - - /** \returns the rotation angle */ - inline Scalar angle() const { return m_angle; } - - /** \returns a read-write reference to the rotation angle */ - inline Scalar& angle() { return m_angle; } - - /** \returns the inverse rotation */ - inline Rotation2D inverse() const { return -m_angle; } - - /** Concatenates two rotations */ - inline Rotation2D operator*(const Rotation2D& other) const - { return m_angle + other.m_angle; } - - /** Concatenates two rotations */ - inline Rotation2D& operator*=(const Rotation2D& other) - { return m_angle += other.m_angle; return *this; } - - /** Applies the rotation to a 2D vector */ - Vector2 operator* (const Vector2& vec) const - { return toRotationMatrix() * vec; } - - template - Rotation2D& fromRotationMatrix(const MatrixBase& m); - Matrix2 toRotationMatrix(void) const; - - /** \returns the spherical interpolation between \c *this and \a other using - * parameter \a t. It is in fact equivalent to a linear interpolation. - */ - inline Rotation2D slerp(Scalar t, const Rotation2D& other) const - { return m_angle * (1-t) + other.angle() * t; } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { return typename internal::cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Rotation2D(const Rotation2D& other) - { - m_angle = Scalar(other.angle()); - } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Rotation2D& other, typename NumTraits::Real prec = precision()) const - { return ei_isApprox(m_angle,other.m_angle, prec); } -}; - -/** \ingroup Geometry_Module - * single precision 2D rotation type */ -typedef Rotation2D Rotation2Df; -/** \ingroup Geometry_Module - * double precision 2D rotation type */ -typedef Rotation2D Rotation2Dd; - -/** Set \c *this from a 2x2 rotation matrix \a mat. - * In other words, this function extract the rotation angle - * from the rotation matrix. - */ -template -template -Rotation2D& Rotation2D::fromRotationMatrix(const MatrixBase& mat) -{ - EIGEN_STATIC_ASSERT(Derived::RowsAtCompileTime==2 && Derived::ColsAtCompileTime==2,YOU_MADE_A_PROGRAMMING_MISTAKE) - m_angle = ei_atan2(mat.coeff(1,0), mat.coeff(0,0)); - return *this; -} - -/** Constructs and \returns an equivalent 2x2 rotation matrix. - */ -template -typename Rotation2D::Matrix2 -Rotation2D::toRotationMatrix(void) const -{ - Scalar sinA = ei_sin(m_angle); - Scalar cosA = ei_cos(m_angle); - return (Matrix2() << cosA, -sinA, sinA, cosA).finished(); -} - -} // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/RotationBase.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/RotationBase.h deleted file mode 100644 index b1c8f38da..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/RotationBase.h +++ /dev/null @@ -1,123 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -// this file aims to contains the various representations of rotation/orientation -// in 2D and 3D space excepted Matrix and Quaternion. - -/** \class RotationBase - * - * \brief Common base class for compact rotation representations - * - * \param Derived is the derived type, i.e., a rotation type - * \param _Dim the dimension of the space - */ -template -class RotationBase -{ - public: - enum { Dim = _Dim }; - /** the scalar type of the coefficients */ - typedef typename ei_traits::Scalar Scalar; - - /** corresponding linear transformation matrix type */ - typedef Matrix RotationMatrixType; - - inline const Derived& derived() const { return *static_cast(this); } - inline Derived& derived() { return *static_cast(this); } - - /** \returns an equivalent rotation matrix */ - inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); } - - /** \returns the inverse rotation */ - inline Derived inverse() const { return derived().inverse(); } - - /** \returns the concatenation of the rotation \c *this with a translation \a t */ - inline Transform operator*(const Translation& t) const - { return toRotationMatrix() * t; } - - /** \returns the concatenation of the rotation \c *this with a scaling \a s */ - inline RotationMatrixType operator*(const Scaling& s) const - { return toRotationMatrix() * s; } - - /** \returns the concatenation of the rotation \c *this with an affine transformation \a t */ - inline Transform operator*(const Transform& t) const - { return toRotationMatrix() * t; } -}; - -/** \geometry_module - * - * Constructs a Dim x Dim rotation matrix from the rotation \a r - */ -template -template -Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols> -::Matrix(const RotationBase& r) -{ - EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim)) - *this = r.toRotationMatrix(); -} - -/** \geometry_module - * - * Set a Dim x Dim rotation matrix from the rotation \a r - */ -template -template -Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>& -Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols> -::operator=(const RotationBase& r) -{ - EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim)) - return *this = r.toRotationMatrix(); -} - -/** \internal - * - * Helper function to return an arbitrary rotation object to a rotation matrix. - * - * \param Scalar the numeric type of the matrix coefficients - * \param Dim the dimension of the current space - * - * It returns a Dim x Dim fixed size matrix. - * - * Default specializations are provided for: - * - any scalar type (2D), - * - any matrix expression, - * - any type based on RotationBase (e.g., Quaternion, AngleAxis, Rotation2D) - * - * Currently ei_toRotationMatrix is only used by Transform. - * - * \sa class Transform, class Rotation2D, class Quaternion, class AngleAxis - */ -template -static inline Matrix ei_toRotationMatrix(const Scalar& s) -{ - EIGEN_STATIC_ASSERT(Dim==2,YOU_MADE_A_PROGRAMMING_MISTAKE) - return Rotation2D(s).toRotationMatrix(); -} - -template -static inline Matrix ei_toRotationMatrix(const RotationBase& r) -{ - return r.toRotationMatrix(); -} - -template -static inline const MatrixBase& ei_toRotationMatrix(const MatrixBase& mat) -{ - EIGEN_STATIC_ASSERT(OtherDerived::RowsAtCompileTime==Dim && OtherDerived::ColsAtCompileTime==Dim, - YOU_MADE_A_PROGRAMMING_MISTAKE) - return mat; -} - -} // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Scaling.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Scaling.h deleted file mode 100644 index b8fa6cd3f..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Scaling.h +++ /dev/null @@ -1,167 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * - * \class Scaling - * - * \brief Represents a possibly non uniform scaling transformation - * - * \param _Scalar the scalar type, i.e., the type of the coefficients. - * \param _Dim the dimension of the space, can be a compile time value or Dynamic - * - * \note This class is not aimed to be used to store a scaling transformation, - * but rather to make easier the constructions and updates of Transform objects. - * - * \sa class Translation, class Transform - */ -template -class Scaling -{ -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim) - /** dimension of the space */ - enum { Dim = _Dim }; - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - /** corresponding vector type */ - typedef Matrix VectorType; - /** corresponding linear transformation matrix type */ - typedef Matrix LinearMatrixType; - /** corresponding translation type */ - typedef Translation TranslationType; - /** corresponding affine transformation type */ - typedef Transform TransformType; - -protected: - - VectorType m_coeffs; - -public: - - /** Default constructor without initialization. */ - Scaling() {} - /** Constructs and initialize a uniform scaling transformation */ - explicit inline Scaling(const Scalar& s) { m_coeffs.setConstant(s); } - /** 2D only */ - inline Scaling(const Scalar& sx, const Scalar& sy) - { - ei_assert(Dim==2); - m_coeffs.x() = sx; - m_coeffs.y() = sy; - } - /** 3D only */ - inline Scaling(const Scalar& sx, const Scalar& sy, const Scalar& sz) - { - ei_assert(Dim==3); - m_coeffs.x() = sx; - m_coeffs.y() = sy; - m_coeffs.z() = sz; - } - /** Constructs and initialize the scaling transformation from a vector of scaling coefficients */ - explicit inline Scaling(const VectorType& coeffs) : m_coeffs(coeffs) {} - - const VectorType& coeffs() const { return m_coeffs; } - VectorType& coeffs() { return m_coeffs; } - - /** Concatenates two scaling */ - inline Scaling operator* (const Scaling& other) const - { return Scaling(coeffs().cwise() * other.coeffs()); } - - /** Concatenates a scaling and a translation */ - inline TransformType operator* (const TranslationType& t) const; - - /** Concatenates a scaling and an affine transformation */ - inline TransformType operator* (const TransformType& t) const; - - /** Concatenates a scaling and a linear transformation matrix */ - // TODO returns an expression - inline LinearMatrixType operator* (const LinearMatrixType& other) const - { return coeffs().asDiagonal() * other; } - - /** Concatenates a linear transformation matrix and a scaling */ - // TODO returns an expression - friend inline LinearMatrixType operator* (const LinearMatrixType& other, const Scaling& s) - { return other * s.coeffs().asDiagonal(); } - - template - inline LinearMatrixType operator*(const RotationBase& r) const - { return *this * r.toRotationMatrix(); } - - /** Applies scaling to vector */ - inline VectorType operator* (const VectorType& other) const - { return coeffs().asDiagonal() * other; } - - /** \returns the inverse scaling */ - inline Scaling inverse() const - { return Scaling(coeffs().cwise().inverse()); } - - inline Scaling& operator=(const Scaling& other) - { - m_coeffs = other.m_coeffs; - return *this; - } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { return typename internal::cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Scaling(const Scaling& other) - { m_coeffs = other.coeffs().template cast(); } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Scaling& other, typename NumTraits::Real prec = precision()) const - { return m_coeffs.isApprox(other.m_coeffs, prec); } - -}; - -/** \addtogroup Geometry_Module */ -//@{ -typedef Scaling Scaling2f; -typedef Scaling Scaling2d; -typedef Scaling Scaling3f; -typedef Scaling Scaling3d; -//@} - -template -inline typename Scaling::TransformType -Scaling::operator* (const TranslationType& t) const -{ - TransformType res; - res.matrix().setZero(); - res.linear().diagonal() = coeffs(); - res.translation() = m_coeffs.cwise() * t.vector(); - res(Dim,Dim) = Scalar(1); - return res; -} - -template -inline typename Scaling::TransformType -Scaling::operator* (const TransformType& t) const -{ - TransformType res = t; - res.prescale(m_coeffs); - return res; -} - -} // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Transform.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Transform.h deleted file mode 100644 index fab60b251..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Transform.h +++ /dev/null @@ -1,786 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2009 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -// Note that we have to pass Dim and HDim because it is not allowed to use a template -// parameter to define a template specialization. To be more precise, in the following -// specializations, it is not allowed to use Dim+1 instead of HDim. -template< typename Other, - int Dim, - int HDim, - int OtherRows=Other::RowsAtCompileTime, - int OtherCols=Other::ColsAtCompileTime> -struct ei_transform_product_impl; - -/** \geometry_module \ingroup Geometry_Module - * - * \class Transform - * - * \brief Represents an homogeneous transformation in a N dimensional space - * - * \param _Scalar the scalar type, i.e., the type of the coefficients - * \param _Dim the dimension of the space - * - * The homography is internally represented and stored as a (Dim+1)^2 matrix which - * is available through the matrix() method. - * - * Conversion methods from/to Qt's QMatrix and QTransform are available if the - * preprocessor token EIGEN_QT_SUPPORT is defined. - * - * \sa class Matrix, class Quaternion - */ -template -class Transform -{ -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim==Dynamic ? Dynamic : (_Dim+1)*(_Dim+1)) - enum { - Dim = _Dim, ///< space dimension in which the transformation holds - HDim = _Dim+1 ///< size of a respective homogeneous vector - }; - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - /** type of the matrix used to represent the transformation */ - typedef Matrix MatrixType; - /** type of the matrix used to represent the linear part of the transformation */ - typedef Matrix LinearMatrixType; - /** type of read/write reference to the linear part of the transformation */ - typedef Block LinearPart; - /** type of read/write reference to the linear part of the transformation */ - typedef const Block ConstLinearPart; - /** type of a vector */ - typedef Matrix VectorType; - /** type of a read/write reference to the translation part of the rotation */ - typedef Block TranslationPart; - /** type of a read/write reference to the translation part of the rotation */ - typedef const Block ConstTranslationPart; - /** corresponding translation type */ - typedef Translation TranslationType; - /** corresponding scaling transformation type */ - typedef Scaling ScalingType; - -protected: - - MatrixType m_matrix; - -public: - - /** Default constructor without initialization of the coefficients. */ - inline Transform() { } - - inline Transform(const Transform& other) - { - m_matrix = other.m_matrix; - } - - inline explicit Transform(const TranslationType& t) { *this = t; } - inline explicit Transform(const ScalingType& s) { *this = s; } - template - inline explicit Transform(const RotationBase& r) { *this = r; } - - inline Transform& operator=(const Transform& other) - { m_matrix = other.m_matrix; return *this; } - - template // MSVC 2005 will commit suicide if BigMatrix has a default value - struct construct_from_matrix - { - static inline void run(Transform *transform, const MatrixBase& other) - { - transform->matrix() = other; - } - }; - - template struct construct_from_matrix - { - static inline void run(Transform *transform, const MatrixBase& other) - { - transform->linear() = other; - transform->translation().setZero(); - transform->matrix()(Dim,Dim) = Scalar(1); - transform->matrix().template block<1,Dim>(Dim,0).setZero(); - } - }; - - /** Constructs and initializes a transformation from a Dim^2 or a (Dim+1)^2 matrix. */ - template - inline explicit Transform(const MatrixBase& other) - { - construct_from_matrix::run(this, other); - } - - /** Set \c *this from a (Dim+1)^2 matrix. */ - template - inline Transform& operator=(const MatrixBase& other) - { m_matrix = other; return *this; } - - #ifdef EIGEN_QT_SUPPORT - inline Transform(const QMatrix& other); - inline Transform& operator=(const QMatrix& other); - inline QMatrix toQMatrix(void) const; - inline Transform(const QTransform& other); - inline Transform& operator=(const QTransform& other); - inline QTransform toQTransform(void) const; - #endif - - /** shortcut for m_matrix(row,col); - * \sa MatrixBase::operaror(int,int) const */ - inline Scalar operator() (int row, int col) const { return m_matrix(row,col); } - /** shortcut for m_matrix(row,col); - * \sa MatrixBase::operaror(int,int) */ - inline Scalar& operator() (int row, int col) { return m_matrix(row,col); } - - /** \returns a read-only expression of the transformation matrix */ - inline const MatrixType& matrix() const { return m_matrix; } - /** \returns a writable expression of the transformation matrix */ - inline MatrixType& matrix() { return m_matrix; } - - /** \returns a read-only expression of the linear (linear) part of the transformation */ - inline ConstLinearPart linear() const { return m_matrix.template block(0,0); } - /** \returns a writable expression of the linear (linear) part of the transformation */ - inline LinearPart linear() { return m_matrix.template block(0,0); } - - /** \returns a read-only expression of the translation vector of the transformation */ - inline ConstTranslationPart translation() const { return m_matrix.template block(0,Dim); } - /** \returns a writable expression of the translation vector of the transformation */ - inline TranslationPart translation() { return m_matrix.template block(0,Dim); } - - /** \returns an expression of the product between the transform \c *this and a matrix expression \a other - * - * The right hand side \a other might be either: - * \li a vector of size Dim, - * \li an homogeneous vector of size Dim+1, - * \li a transformation matrix of size Dim+1 x Dim+1. - */ - // note: this function is defined here because some compilers cannot find the respective declaration - template - inline const typename ei_transform_product_impl::ResultType - operator * (const MatrixBase &other) const - { return ei_transform_product_impl::run(*this,other.derived()); } - - /** \returns the product expression of a transformation matrix \a a times a transform \a b - * The transformation matrix \a a must have a Dim+1 x Dim+1 sizes. */ - template - friend inline const typename ProductReturnType::Type - operator * (const MatrixBase &a, const Transform &b) - { return a.derived() * b.matrix(); } - - /** Contatenates two transformations */ - inline const Transform - operator * (const Transform& other) const - { return Transform(m_matrix * other.matrix()); } - - /** \sa MatrixBase::setIdentity() */ - void setIdentity() { m_matrix.setIdentity(); } - static const typename MatrixType::IdentityReturnType Identity() - { - return MatrixType::Identity(); - } - - template - inline Transform& scale(const MatrixBase &other); - - template - inline Transform& prescale(const MatrixBase &other); - - inline Transform& scale(Scalar s); - inline Transform& prescale(Scalar s); - - template - inline Transform& translate(const MatrixBase &other); - - template - inline Transform& pretranslate(const MatrixBase &other); - - template - inline Transform& rotate(const RotationType& rotation); - - template - inline Transform& prerotate(const RotationType& rotation); - - Transform& shear(Scalar sx, Scalar sy); - Transform& preshear(Scalar sx, Scalar sy); - - inline Transform& operator=(const TranslationType& t); - inline Transform& operator*=(const TranslationType& t) { return translate(t.vector()); } - inline Transform operator*(const TranslationType& t) const; - - inline Transform& operator=(const ScalingType& t); - inline Transform& operator*=(const ScalingType& s) { return scale(s.coeffs()); } - inline Transform operator*(const ScalingType& s) const; - friend inline Transform operator*(const LinearMatrixType& mat, const Transform& t) - { - Transform res = t; - res.matrix().row(Dim) = t.matrix().row(Dim); - res.matrix().template block(0,0) = (mat * t.matrix().template block(0,0)).lazy(); - return res; - } - - template - inline Transform& operator=(const RotationBase& r); - template - inline Transform& operator*=(const RotationBase& r) { return rotate(r.toRotationMatrix()); } - template - inline Transform operator*(const RotationBase& r) const; - - LinearMatrixType rotation() const; - template - void computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const; - template - void computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const; - - template - Transform& fromPositionOrientationScale(const MatrixBase &position, - const OrientationType& orientation, const MatrixBase &scale); - - inline const MatrixType inverse(TransformTraits traits = Affine) const; - - /** \returns a const pointer to the column major internal matrix */ - const Scalar* data() const { return m_matrix.data(); } - /** \returns a non-const pointer to the column major internal matrix */ - Scalar* data() { return m_matrix.data(); } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { return typename internal::cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Transform(const Transform& other) - { m_matrix = other.matrix().template cast(); } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Transform& other, typename NumTraits::Real prec = precision()) const - { return m_matrix.isApprox(other.m_matrix, prec); } - - #ifdef EIGEN_TRANSFORM_PLUGIN - #include EIGEN_TRANSFORM_PLUGIN - #endif - -protected: - -}; - -/** \ingroup Geometry_Module */ -typedef Transform Transform2f; -/** \ingroup Geometry_Module */ -typedef Transform Transform3f; -/** \ingroup Geometry_Module */ -typedef Transform Transform2d; -/** \ingroup Geometry_Module */ -typedef Transform Transform3d; - -/************************** -*** Optional QT support *** -**************************/ - -#ifdef EIGEN_QT_SUPPORT -/** Initialises \c *this from a QMatrix assuming the dimension is 2. - * - * This function is available only if the token EIGEN_QT_SUPPORT is defined. - */ -template -Transform::Transform(const QMatrix& other) -{ - *this = other; -} - -/** Set \c *this from a QMatrix assuming the dimension is 2. - * - * This function is available only if the token EIGEN_QT_SUPPORT is defined. - */ -template -Transform& Transform::operator=(const QMatrix& other) -{ - EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - m_matrix << other.m11(), other.m21(), other.dx(), - other.m12(), other.m22(), other.dy(), - 0, 0, 1; - return *this; -} - -/** \returns a QMatrix from \c *this assuming the dimension is 2. - * - * \warning this convertion might loss data if \c *this is not affine - * - * This function is available only if the token EIGEN_QT_SUPPORT is defined. - */ -template -QMatrix Transform::toQMatrix(void) const -{ - EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - return QMatrix(m_matrix.coeff(0,0), m_matrix.coeff(1,0), - m_matrix.coeff(0,1), m_matrix.coeff(1,1), - m_matrix.coeff(0,2), m_matrix.coeff(1,2)); -} - -/** Initialises \c *this from a QTransform assuming the dimension is 2. - * - * This function is available only if the token EIGEN_QT_SUPPORT is defined. - */ -template -Transform::Transform(const QTransform& other) -{ - *this = other; -} - -/** Set \c *this from a QTransform assuming the dimension is 2. - * - * This function is available only if the token EIGEN_QT_SUPPORT is defined. - */ -template -Transform& Transform::operator=(const QTransform& other) -{ - EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - m_matrix << other.m11(), other.m21(), other.dx(), - other.m12(), other.m22(), other.dy(), - other.m13(), other.m23(), other.m33(); - return *this; -} - -/** \returns a QTransform from \c *this assuming the dimension is 2. - * - * This function is available only if the token EIGEN_QT_SUPPORT is defined. - */ -template -QTransform Transform::toQTransform(void) const -{ - EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0), m_matrix.coeff(2,0), - m_matrix.coeff(0,1), m_matrix.coeff(1,1), m_matrix.coeff(2,1), - m_matrix.coeff(0,2), m_matrix.coeff(1,2), m_matrix.coeff(2,2)); -} -#endif - -/********************* -*** Procedural API *** -*********************/ - -/** Applies on the right the non uniform scale transformation represented - * by the vector \a other to \c *this and returns a reference to \c *this. - * \sa prescale() - */ -template -template -Transform& -Transform::scale(const MatrixBase &other) -{ - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) - linear() = (linear() * other.asDiagonal()).lazy(); - return *this; -} - -/** Applies on the right a uniform scale of a factor \a c to \c *this - * and returns a reference to \c *this. - * \sa prescale(Scalar) - */ -template -inline Transform& Transform::scale(Scalar s) -{ - linear() *= s; - return *this; -} - -/** Applies on the left the non uniform scale transformation represented - * by the vector \a other to \c *this and returns a reference to \c *this. - * \sa scale() - */ -template -template -Transform& -Transform::prescale(const MatrixBase &other) -{ - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) - m_matrix.template block(0,0) = (other.asDiagonal() * m_matrix.template block(0,0)).lazy(); - return *this; -} - -/** Applies on the left a uniform scale of a factor \a c to \c *this - * and returns a reference to \c *this. - * \sa scale(Scalar) - */ -template -inline Transform& Transform::prescale(Scalar s) -{ - m_matrix.template corner(TopLeft) *= s; - return *this; -} - -/** Applies on the right the translation matrix represented by the vector \a other - * to \c *this and returns a reference to \c *this. - * \sa pretranslate() - */ -template -template -Transform& -Transform::translate(const MatrixBase &other) -{ - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) - translation() += linear() * other; - return *this; -} - -/** Applies on the left the translation matrix represented by the vector \a other - * to \c *this and returns a reference to \c *this. - * \sa translate() - */ -template -template -Transform& -Transform::pretranslate(const MatrixBase &other) -{ - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) - translation() += other; - return *this; -} - -/** Applies on the right the rotation represented by the rotation \a rotation - * to \c *this and returns a reference to \c *this. - * - * The template parameter \a RotationType is the type of the rotation which - * must be known by ei_toRotationMatrix<>. - * - * Natively supported types includes: - * - any scalar (2D), - * - a Dim x Dim matrix expression, - * - a Quaternion (3D), - * - a AngleAxis (3D) - * - * This mechanism is easily extendable to support user types such as Euler angles, - * or a pair of Quaternion for 4D rotations. - * - * \sa rotate(Scalar), class Quaternion, class AngleAxis, prerotate(RotationType) - */ -template -template -Transform& -Transform::rotate(const RotationType& rotation) -{ - linear() *= ei_toRotationMatrix(rotation); - return *this; -} - -/** Applies on the left the rotation represented by the rotation \a rotation - * to \c *this and returns a reference to \c *this. - * - * See rotate() for further details. - * - * \sa rotate() - */ -template -template -Transform& -Transform::prerotate(const RotationType& rotation) -{ - m_matrix.template block(0,0) = ei_toRotationMatrix(rotation) - * m_matrix.template block(0,0); - return *this; -} - -/** Applies on the right the shear transformation represented - * by the vector \a other to \c *this and returns a reference to \c *this. - * \warning 2D only. - * \sa preshear() - */ -template -Transform& -Transform::shear(Scalar sx, Scalar sy) -{ - EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - VectorType tmp = linear().col(0)*sy + linear().col(1); - linear() << linear().col(0) + linear().col(1)*sx, tmp; - return *this; -} - -/** Applies on the left the shear transformation represented - * by the vector \a other to \c *this and returns a reference to \c *this. - * \warning 2D only. - * \sa shear() - */ -template -Transform& -Transform::preshear(Scalar sx, Scalar sy) -{ - EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - m_matrix.template block(0,0) = LinearMatrixType(1, sx, sy, 1) * m_matrix.template block(0,0); - return *this; -} - -/****************************************************** -*** Scaling, Translation and Rotation compatibility *** -******************************************************/ - -template -inline Transform& Transform::operator=(const TranslationType& t) -{ - linear().setIdentity(); - translation() = t.vector(); - m_matrix.template block<1,Dim>(Dim,0).setZero(); - m_matrix(Dim,Dim) = Scalar(1); - return *this; -} - -template -inline Transform Transform::operator*(const TranslationType& t) const -{ - Transform res = *this; - res.translate(t.vector()); - return res; -} - -template -inline Transform& Transform::operator=(const ScalingType& s) -{ - m_matrix.setZero(); - linear().diagonal() = s.coeffs(); - m_matrix.coeffRef(Dim,Dim) = Scalar(1); - return *this; -} - -template -inline Transform Transform::operator*(const ScalingType& s) const -{ - Transform res = *this; - res.scale(s.coeffs()); - return res; -} - -template -template -inline Transform& Transform::operator=(const RotationBase& r) -{ - linear() = ei_toRotationMatrix(r); - translation().setZero(); - m_matrix.template block<1,Dim>(Dim,0).setZero(); - m_matrix.coeffRef(Dim,Dim) = Scalar(1); - return *this; -} - -template -template -inline Transform Transform::operator*(const RotationBase& r) const -{ - Transform res = *this; - res.rotate(r.derived()); - return res; -} - -/************************ -*** Special functions *** -************************/ - -/** \returns the rotation part of the transformation - * \nonstableyet - * - * \svd_module - * - * \sa computeRotationScaling(), computeScalingRotation(), class SVD - */ -template -typename Transform::LinearMatrixType -Transform::rotation() const -{ - LinearMatrixType result; - computeRotationScaling(&result, (LinearMatrixType*)0); - return result; -} - - -/** decomposes the linear part of the transformation as a product rotation x scaling, the scaling being - * not necessarily positive. - * - * If either pointer is zero, the corresponding computation is skipped. - * - * \nonstableyet - * - * \svd_module - * - * \sa computeScalingRotation(), rotation(), class SVD - */ -template -template -void Transform::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const -{ - JacobiSVD svd(linear(), ComputeFullU|ComputeFullV); - Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant(); // so x has absolute value 1 - Matrix sv(svd.singularValues()); - sv.coeffRef(0) *= x; - if(scaling) - { - scaling->noalias() = svd.matrixV() * sv.asDiagonal() * svd.matrixV().adjoint(); - } - if(rotation) - { - LinearMatrixType m(svd.matrixU()); - m.col(0) /= x; - rotation->noalias() = m * svd.matrixV().adjoint(); - } -} - -/** decomposes the linear part of the transformation as a product rotation x scaling, the scaling being - * not necessarily positive. - * - * If either pointer is zero, the corresponding computation is skipped. - * - * \nonstableyet - * - * \svd_module - * - * \sa computeRotationScaling(), rotation(), class SVD - */ -template -template -void Transform::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const -{ - JacobiSVD svd(linear(), ComputeFullU|ComputeFullV); - Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant(); // so x has absolute value 1 - Matrix sv(svd.singularValues()); - sv.coeffRef(0) *= x; - if(scaling) - { - scaling->noalias() = svd.matrixU() * sv.asDiagonal() * svd.matrixU().adjoint(); - } - if(rotation) - { - LinearMatrixType m(svd.matrixU()); - m.col(0) /= x; - rotation->noalias() = m * svd.matrixV().adjoint(); - } -} - -/** Convenient method to set \c *this from a position, orientation and scale - * of a 3D object. - */ -template -template -Transform& -Transform::fromPositionOrientationScale(const MatrixBase &position, - const OrientationType& orientation, const MatrixBase &scale) -{ - linear() = ei_toRotationMatrix(orientation); - linear() *= scale.asDiagonal(); - translation() = position; - m_matrix.template block<1,Dim>(Dim,0).setZero(); - m_matrix(Dim,Dim) = Scalar(1); - return *this; -} - -/** \nonstableyet - * - * \returns the inverse transformation matrix according to some given knowledge - * on \c *this. - * - * \param traits allows to optimize the inversion process when the transformion - * is known to be not a general transformation. The possible values are: - * - Projective if the transformation is not necessarily affine, i.e., if the - * last row is not guaranteed to be [0 ... 0 1] - * - Affine is the default, the last row is assumed to be [0 ... 0 1] - * - Isometry if the transformation is only a concatenations of translations - * and rotations. - * - * \warning unless \a traits is always set to NoShear or NoScaling, this function - * requires the generic inverse method of MatrixBase defined in the LU module. If - * you forget to include this module, then you will get hard to debug linking errors. - * - * \sa MatrixBase::inverse() - */ -template -inline const typename Transform::MatrixType -Transform::inverse(TransformTraits traits) const -{ - if (traits == Projective) - { - return m_matrix.inverse(); - } - else - { - MatrixType res; - if (traits == Affine) - { - res.template corner(TopLeft) = linear().inverse(); - } - else if (traits == Isometry) - { - res.template corner(TopLeft) = linear().transpose(); - } - else - { - ei_assert("invalid traits value in Transform::inverse()"); - } - // translation and remaining parts - res.template corner(TopRight) = - res.template corner(TopLeft) * translation(); - res.template corner<1,Dim>(BottomLeft).setZero(); - res.coeffRef(Dim,Dim) = Scalar(1); - return res; - } -} - -/***************************************************** -*** Specializations of operator* with a MatrixBase *** -*****************************************************/ - -template -struct ei_transform_product_impl -{ - typedef Transform TransformType; - typedef typename TransformType::MatrixType MatrixType; - typedef typename ProductReturnType::Type ResultType; - static ResultType run(const TransformType& tr, const Other& other) - { return tr.matrix() * other; } -}; - -template -struct ei_transform_product_impl -{ - typedef Transform TransformType; - typedef typename TransformType::MatrixType MatrixType; - typedef TransformType ResultType; - static ResultType run(const TransformType& tr, const Other& other) - { - TransformType res; - res.translation() = tr.translation(); - res.matrix().row(Dim) = tr.matrix().row(Dim); - res.linear() = (tr.linear() * other).lazy(); - return res; - } -}; - -template -struct ei_transform_product_impl -{ - typedef Transform TransformType; - typedef typename TransformType::MatrixType MatrixType; - typedef typename ProductReturnType::Type ResultType; - static ResultType run(const TransformType& tr, const Other& other) - { return tr.matrix() * other; } -}; - -template -struct ei_transform_product_impl -{ - typedef typename Other::Scalar Scalar; - typedef Transform TransformType; - typedef Matrix ResultType; - static ResultType run(const TransformType& tr, const Other& other) - { return ((tr.linear() * other) + tr.translation()) - * (Scalar(1) / ( (tr.matrix().template block<1,Dim>(Dim,0) * other).coeff(0) + tr.matrix().coeff(Dim,Dim))); } -}; - -} // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Translation.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Translation.h deleted file mode 100644 index 2b9859f6f..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Geometry/Translation.h +++ /dev/null @@ -1,184 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * - * \class Translation - * - * \brief Represents a translation transformation - * - * \param _Scalar the scalar type, i.e., the type of the coefficients. - * \param _Dim the dimension of the space, can be a compile time value or Dynamic - * - * \note This class is not aimed to be used to store a translation transformation, - * but rather to make easier the constructions and updates of Transform objects. - * - * \sa class Scaling, class Transform - */ -template -class Translation -{ -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim) - /** dimension of the space */ - enum { Dim = _Dim }; - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - /** corresponding vector type */ - typedef Matrix VectorType; - /** corresponding linear transformation matrix type */ - typedef Matrix LinearMatrixType; - /** corresponding scaling transformation type */ - typedef Scaling ScalingType; - /** corresponding affine transformation type */ - typedef Transform TransformType; - -protected: - - VectorType m_coeffs; - -public: - - /** Default constructor without initialization. */ - Translation() {} - /** */ - inline Translation(const Scalar& sx, const Scalar& sy) - { - ei_assert(Dim==2); - m_coeffs.x() = sx; - m_coeffs.y() = sy; - } - /** */ - inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz) - { - ei_assert(Dim==3); - m_coeffs.x() = sx; - m_coeffs.y() = sy; - m_coeffs.z() = sz; - } - /** Constructs and initialize the scaling transformation from a vector of scaling coefficients */ - explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {} - - const VectorType& vector() const { return m_coeffs; } - VectorType& vector() { return m_coeffs; } - - /** Concatenates two translation */ - inline Translation operator* (const Translation& other) const - { return Translation(m_coeffs + other.m_coeffs); } - - /** Concatenates a translation and a scaling */ - inline TransformType operator* (const ScalingType& other) const; - - /** Concatenates a translation and a linear transformation */ - inline TransformType operator* (const LinearMatrixType& linear) const; - - template - inline TransformType operator*(const RotationBase& r) const - { return *this * r.toRotationMatrix(); } - - /** Concatenates a linear transformation and a translation */ - // its a nightmare to define a templated friend function outside its declaration - friend inline TransformType operator* (const LinearMatrixType& linear, const Translation& t) - { - TransformType res; - res.matrix().setZero(); - res.linear() = linear; - res.translation() = linear * t.m_coeffs; - res.matrix().row(Dim).setZero(); - res(Dim,Dim) = Scalar(1); - return res; - } - - /** Concatenates a translation and an affine transformation */ - inline TransformType operator* (const TransformType& t) const; - - /** Applies translation to vector */ - inline VectorType operator* (const VectorType& other) const - { return m_coeffs + other; } - - /** \returns the inverse translation (opposite) */ - Translation inverse() const { return Translation(-m_coeffs); } - - Translation& operator=(const Translation& other) - { - m_coeffs = other.m_coeffs; - return *this; - } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { return typename internal::cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Translation(const Translation& other) - { m_coeffs = other.vector().template cast(); } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Translation& other, typename NumTraits::Real prec = precision()) const - { return m_coeffs.isApprox(other.m_coeffs, prec); } - -}; - -/** \addtogroup Geometry_Module */ -//@{ -typedef Translation Translation2f; -typedef Translation Translation2d; -typedef Translation Translation3f; -typedef Translation Translation3d; -//@} - - -template -inline typename Translation::TransformType -Translation::operator* (const ScalingType& other) const -{ - TransformType res; - res.matrix().setZero(); - res.linear().diagonal() = other.coeffs(); - res.translation() = m_coeffs; - res(Dim,Dim) = Scalar(1); - return res; -} - -template -inline typename Translation::TransformType -Translation::operator* (const LinearMatrixType& linear) const -{ - TransformType res; - res.matrix().setZero(); - res.linear() = linear; - res.translation() = m_coeffs; - res.matrix().row(Dim).setZero(); - res(Dim,Dim) = Scalar(1); - return res; -} - -template -inline typename Translation::TransformType -Translation::operator* (const TransformType& t) const -{ - TransformType res = t; - res.pretranslate(m_coeffs); - return res; -} - -} // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/LU.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/LU.h deleted file mode 100644 index 49f19ad76..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/LU.h +++ /dev/null @@ -1,120 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_LU_H -#define EIGEN2_LU_H - -namespace Eigen { - -template -class LU : public FullPivLU -{ - public: - - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix IntRowVectorType; - typedef Matrix IntColVectorType; - typedef Matrix RowVectorType; - typedef Matrix ColVectorType; - - typedef Matrix KernelResultType; - - typedef Matrix ImageResultType; - - typedef FullPivLU Base; - - template - explicit LU(const T& t) : Base(t), m_originalMatrix(t) {} - - template - bool solve(const MatrixBase& b, ResultType *result) const - { - *result = static_cast(this)->solve(b); - return true; - } - - template - inline void computeInverse(ResultType *result) const - { - solve(MatrixType::Identity(this->rows(), this->cols()), result); - } - - template - void computeKernel(KernelMatrixType *result) const - { - *result = static_cast(this)->kernel(); - } - - template - void computeImage(ImageMatrixType *result) const - { - *result = static_cast(this)->image(m_originalMatrix); - } - - const ImageResultType image() const - { - return static_cast(this)->image(m_originalMatrix); - } - - const MatrixType& m_originalMatrix; -}; - -#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS -/** \lu_module - * - * Synonym of partialPivLu(). - * - * \return the partial-pivoting LU decomposition of \c *this. - * - * \sa class PartialPivLU - */ -template -inline const LU::PlainObject> -MatrixBase::lu() const -{ - return LU(eval()); -} -#endif - -#ifdef EIGEN2_SUPPORT -/** \lu_module - * - * Synonym of partialPivLu(). - * - * \return the partial-pivoting LU decomposition of \c *this. - * - * \sa class PartialPivLU - */ -template -inline const LU::PlainObject> -MatrixBase::eigen2_lu() const -{ - return LU(eval()); -} -#endif - -} // end namespace Eigen - -#endif // EIGEN2_LU_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Lazy.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Lazy.h deleted file mode 100644 index 593fc78e6..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Lazy.h +++ /dev/null @@ -1,71 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_LAZY_H -#define EIGEN_LAZY_H - -namespace Eigen { - -/** \deprecated it is only used by lazy() which is deprecated - * - * \returns an expression of *this with added flags - * - * Example: \include MatrixBase_marked.cpp - * Output: \verbinclude MatrixBase_marked.out - * - * \sa class Flagged, extract(), part() - */ -template -template -inline const Flagged -MatrixBase::marked() const -{ - return derived(); -} - -/** \deprecated use MatrixBase::noalias() - * - * \returns an expression of *this with the EvalBeforeAssigningBit flag removed. - * - * Example: \include MatrixBase_lazy.cpp - * Output: \verbinclude MatrixBase_lazy.out - * - * \sa class Flagged, marked() - */ -template -inline const Flagged -MatrixBase::lazy() const -{ - return derived(); -} - - -/** \internal - * Overloaded to perform an efficient C += (A*B).lazy() */ -template -template -Derived& MatrixBase::operator+=(const Flagged, 0, - EvalBeforeAssigningBit>& other) -{ - other._expression().derived().addTo(derived()); return derived(); -} - -/** \internal - * Overloaded to perform an efficient C -= (A*B).lazy() */ -template -template -Derived& MatrixBase::operator-=(const Flagged, 0, - EvalBeforeAssigningBit>& other) -{ - other._expression().derived().subTo(derived()); return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_LAZY_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/LeastSquares.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/LeastSquares.h deleted file mode 100644 index 7992d4944..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/LeastSquares.h +++ /dev/null @@ -1,169 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2006-2009 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_LEASTSQUARES_H -#define EIGEN2_LEASTSQUARES_H - -namespace Eigen { - -/** \ingroup LeastSquares_Module - * - * \leastsquares_module - * - * For a set of points, this function tries to express - * one of the coords as a linear (affine) function of the other coords. - * - * This is best explained by an example. This function works in full - * generality, for points in a space of arbitrary dimension, and also over - * the complex numbers, but for this example we will work in dimension 3 - * over the real numbers (doubles). - * - * So let us work with the following set of 5 points given by their - * \f$(x,y,z)\f$ coordinates: - * @code - Vector3d points[5]; - points[0] = Vector3d( 3.02, 6.89, -4.32 ); - points[1] = Vector3d( 2.01, 5.39, -3.79 ); - points[2] = Vector3d( 2.41, 6.01, -4.01 ); - points[3] = Vector3d( 2.09, 5.55, -3.86 ); - points[4] = Vector3d( 2.58, 6.32, -4.10 ); - * @endcode - * Suppose that we want to express the second coordinate (\f$y\f$) as a linear - * expression in \f$x\f$ and \f$z\f$, that is, - * \f[ y=ax+bz+c \f] - * for some constants \f$a,b,c\f$. Thus, we want to find the best possible - * constants \f$a,b,c\f$ so that the plane of equation \f$y=ax+bz+c\f$ fits - * best the five above points. To do that, call this function as follows: - * @code - Vector3d coeffs; // will store the coefficients a, b, c - linearRegression( - 5, - &points, - &coeffs, - 1 // the coord to express as a function of - // the other ones. 0 means x, 1 means y, 2 means z. - ); - * @endcode - * Now the vector \a coeffs is approximately - * \f$( 0.495 , -1.927 , -2.906 )\f$. - * Thus, we get \f$a=0.495, b = -1.927, c = -2.906\f$. Let us check for - * instance how near points[0] is from the plane of equation \f$y=ax+bz+c\f$. - * Looking at the coords of points[0], we see that: - * \f[ax+bz+c = 0.495 * 3.02 + (-1.927) * (-4.32) + (-2.906) = 6.91.\f] - * On the other hand, we have \f$y=6.89\f$. We see that the values - * \f$6.91\f$ and \f$6.89\f$ - * are near, so points[0] is very near the plane of equation \f$y=ax+bz+c\f$. - * - * Let's now describe precisely the parameters: - * @param numPoints the number of points - * @param points the array of pointers to the points on which to perform the linear regression - * @param result pointer to the vector in which to store the result. - This vector must be of the same type and size as the - data points. The meaning of its coords is as follows. - For brevity, let \f$n=Size\f$, - \f$r_i=result[i]\f$, - and \f$f=funcOfOthers\f$. Denote by - \f$x_0,\ldots,x_{n-1}\f$ - the n coordinates in the n-dimensional space. - Then the resulting equation is: - \f[ x_f = r_0 x_0 + \cdots + r_{f-1}x_{f-1} - + r_{f+1}x_{f+1} + \cdots + r_{n-1}x_{n-1} + r_n. \f] - * @param funcOfOthers Determines which coord to express as a function of the - others. Coords are numbered starting from 0, so that a - value of 0 means \f$x\f$, 1 means \f$y\f$, - 2 means \f$z\f$, ... - * - * \sa fitHyperplane() - */ -template -void linearRegression(int numPoints, - VectorType **points, - VectorType *result, - int funcOfOthers ) -{ - typedef typename VectorType::Scalar Scalar; - typedef Hyperplane HyperplaneType; - const int size = points[0]->size(); - result->resize(size); - HyperplaneType h(size); - fitHyperplane(numPoints, points, &h); - for(int i = 0; i < funcOfOthers; i++) - result->coeffRef(i) = - h.coeffs()[i] / h.coeffs()[funcOfOthers]; - for(int i = funcOfOthers; i < size; i++) - result->coeffRef(i) = - h.coeffs()[i+1] / h.coeffs()[funcOfOthers]; -} - -/** \ingroup LeastSquares_Module - * - * \leastsquares_module - * - * This function is quite similar to linearRegression(), so we refer to the - * documentation of this function and only list here the differences. - * - * The main difference from linearRegression() is that this function doesn't - * take a \a funcOfOthers argument. Instead, it finds a general equation - * of the form - * \f[ r_0 x_0 + \cdots + r_{n-1}x_{n-1} + r_n = 0, \f] - * where \f$n=Size\f$, \f$r_i=retCoefficients[i]\f$, and we denote by - * \f$x_0,\ldots,x_{n-1}\f$ the n coordinates in the n-dimensional space. - * - * Thus, the vector \a retCoefficients has size \f$n+1\f$, which is another - * difference from linearRegression(). - * - * In practice, this function performs an hyper-plane fit in a total least square sense - * via the following steps: - * 1 - center the data to the mean - * 2 - compute the covariance matrix - * 3 - pick the eigenvector corresponding to the smallest eigenvalue of the covariance matrix - * The ratio of the smallest eigenvalue and the second one gives us a hint about the relevance - * of the solution. This value is optionally returned in \a soundness. - * - * \sa linearRegression() - */ -template -void fitHyperplane(int numPoints, - VectorType **points, - HyperplaneType *result, - typename NumTraits::Real* soundness = 0) -{ - typedef typename VectorType::Scalar Scalar; - typedef Matrix CovMatrixType; - EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType) - ei_assert(numPoints >= 1); - int size = points[0]->size(); - ei_assert(size+1 == result->coeffs().size()); - - // compute the mean of the data - VectorType mean = VectorType::Zero(size); - for(int i = 0; i < numPoints; ++i) - mean += *(points[i]); - mean /= numPoints; - - // compute the covariance matrix - CovMatrixType covMat = CovMatrixType::Zero(size, size); - for(int i = 0; i < numPoints; ++i) - { - VectorType diff = (*(points[i]) - mean).conjugate(); - covMat += diff * diff.adjoint(); - } - - // now we just have to pick the eigen vector with smallest eigen value - SelfAdjointEigenSolver eig(covMat); - result->normal() = eig.eigenvectors().col(0); - if (soundness) - *soundness = eig.eigenvalues().coeff(0)/eig.eigenvalues().coeff(1); - - // let's compute the constant coefficient such that the - // plane pass trough the mean point: - result->offset() = - (result->normal().cwise()* mean).sum(); -} - -} // end namespace Eigen - -#endif // EIGEN2_LEASTSQUARES_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Macros.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Macros.h deleted file mode 100644 index 351c32afb..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Macros.h +++ /dev/null @@ -1,20 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_MACROS_H -#define EIGEN2_MACROS_H - -#define ei_assert eigen_assert -#define ei_internal_assert eigen_internal_assert - -#define EIGEN_ALIGN_128 EIGEN_ALIGN16 - -#define EIGEN_ARCH_WANTS_ALIGNMENT EIGEN_ALIGN_STATICALLY - -#endif // EIGEN2_MACROS_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/MathFunctions.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/MathFunctions.h deleted file mode 100644 index 3544af253..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/MathFunctions.h +++ /dev/null @@ -1,57 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_MATH_FUNCTIONS_H -#define EIGEN2_MATH_FUNCTIONS_H - -namespace Eigen { - -template inline typename NumTraits::Real ei_real(const T& x) { return numext::real(x); } -template inline typename NumTraits::Real ei_imag(const T& x) { return numext::imag(x); } -template inline T ei_conj(const T& x) { return numext::conj(x); } -template inline typename NumTraits::Real ei_abs (const T& x) { using std::abs; return abs(x); } -template inline typename NumTraits::Real ei_abs2(const T& x) { return numext::abs2(x); } -template inline T ei_sqrt(const T& x) { using std::sqrt; return sqrt(x); } -template inline T ei_exp (const T& x) { using std::exp; return exp(x); } -template inline T ei_log (const T& x) { using std::log; return log(x); } -template inline T ei_sin (const T& x) { using std::sin; return sin(x); } -template inline T ei_cos (const T& x) { using std::cos; return cos(x); } -template inline T ei_atan2(const T& x,const T& y) { using std::atan2; return atan2(x,y); } -template inline T ei_pow (const T& x,const T& y) { return numext::pow(x,y); } -template inline T ei_random () { return internal::random(); } -template inline T ei_random (const T& x, const T& y) { return internal::random(x, y); } - -template inline T precision () { return NumTraits::dummy_precision(); } -template inline T machine_epsilon () { return NumTraits::epsilon(); } - - -template -inline bool ei_isMuchSmallerThan(const Scalar& x, const OtherScalar& y, - typename NumTraits::Real precision = NumTraits::dummy_precision()) -{ - return internal::isMuchSmallerThan(x, y, precision); -} - -template -inline bool ei_isApprox(const Scalar& x, const Scalar& y, - typename NumTraits::Real precision = NumTraits::dummy_precision()) -{ - return internal::isApprox(x, y, precision); -} - -template -inline bool ei_isApproxOrLessThan(const Scalar& x, const Scalar& y, - typename NumTraits::Real precision = NumTraits::dummy_precision()) -{ - return internal::isApproxOrLessThan(x, y, precision); -} - -} // end namespace Eigen - -#endif // EIGEN2_MATH_FUNCTIONS_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Memory.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Memory.h deleted file mode 100644 index f86372b6b..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Memory.h +++ /dev/null @@ -1,45 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_MEMORY_H -#define EIGEN2_MEMORY_H - -namespace Eigen { - -inline void* ei_aligned_malloc(size_t size) { return internal::aligned_malloc(size); } -inline void ei_aligned_free(void *ptr) { internal::aligned_free(ptr); } -inline void* ei_aligned_realloc(void *ptr, size_t new_size, size_t old_size) { return internal::aligned_realloc(ptr, new_size, old_size); } -inline void* ei_handmade_aligned_malloc(size_t size) { return internal::handmade_aligned_malloc(size); } -inline void ei_handmade_aligned_free(void *ptr) { internal::handmade_aligned_free(ptr); } - -template inline void* ei_conditional_aligned_malloc(size_t size) -{ - return internal::conditional_aligned_malloc(size); -} -template inline void ei_conditional_aligned_free(void *ptr) -{ - internal::conditional_aligned_free(ptr); -} -template inline void* ei_conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size) -{ - return internal::conditional_aligned_realloc(ptr, new_size, old_size); -} - -template inline T* ei_aligned_new(size_t size) -{ - return internal::aligned_new(size); -} -template inline void ei_aligned_delete(T *ptr, size_t size) -{ - return internal::aligned_delete(ptr, size); -} - -} // end namespace Eigen - -#endif // EIGEN2_MACROS_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Meta.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Meta.h deleted file mode 100644 index fa37cfc96..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Meta.h +++ /dev/null @@ -1,75 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_META_H -#define EIGEN2_META_H - -namespace Eigen { - -template -struct ei_traits : internal::traits -{}; - -struct ei_meta_true { enum { ret = 1 }; }; -struct ei_meta_false { enum { ret = 0 }; }; - -template -struct ei_meta_if { typedef Then ret; }; - -template -struct ei_meta_if { typedef Else ret; }; - -template struct ei_is_same_type { enum { ret = 0 }; }; -template struct ei_is_same_type { enum { ret = 1 }; }; - -template struct ei_unref { typedef T type; }; -template struct ei_unref { typedef T type; }; - -template struct ei_unpointer { typedef T type; }; -template struct ei_unpointer { typedef T type; }; -template struct ei_unpointer { typedef T type; }; - -template struct ei_unconst { typedef T type; }; -template struct ei_unconst { typedef T type; }; -template struct ei_unconst { typedef T & type; }; -template struct ei_unconst { typedef T * type; }; - -template struct ei_cleantype { typedef T type; }; -template struct ei_cleantype { typedef typename ei_cleantype::type type; }; -template struct ei_cleantype { typedef typename ei_cleantype::type type; }; -template struct ei_cleantype { typedef typename ei_cleantype::type type; }; -template struct ei_cleantype { typedef typename ei_cleantype::type type; }; -template struct ei_cleantype { typedef typename ei_cleantype::type type; }; - -/** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer. - * Usage example: \code ei_meta_sqrt<1023>::ret \endcode - */ -template Y))) > - // use ?: instead of || just to shut up a stupid gcc 4.3 warning -class ei_meta_sqrt -{ - enum { - MidX = (InfX+SupX)/2, - TakeInf = MidX*MidX > Y ? 1 : 0, - NewInf = int(TakeInf) ? InfX : int(MidX), - NewSup = int(TakeInf) ? int(MidX) : SupX - }; - public: - enum { ret = ei_meta_sqrt::ret }; -}; - -template -class ei_meta_sqrt { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; }; - -} // end namespace Eigen - -#endif // EIGEN2_META_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Minor.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Minor.h deleted file mode 100644 index 4cded5734..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/Minor.h +++ /dev/null @@ -1,117 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2006-2009 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MINOR_H -#define EIGEN_MINOR_H - -namespace Eigen { - -/** - * \class Minor - * - * \brief Expression of a minor - * - * \param MatrixType the type of the object in which we are taking a minor - * - * This class represents an expression of a minor. It is the return - * type of MatrixBase::minor() and most of the time this is the only way it - * is used. - * - * \sa MatrixBase::minor() - */ - -namespace internal { -template -struct traits > - : traits -{ - typedef typename nested::type MatrixTypeNested; - typedef typename remove_reference::type _MatrixTypeNested; - typedef typename MatrixType::StorageKind StorageKind; - enum { - RowsAtCompileTime = (MatrixType::RowsAtCompileTime != Dynamic) ? - int(MatrixType::RowsAtCompileTime) - 1 : Dynamic, - ColsAtCompileTime = (MatrixType::ColsAtCompileTime != Dynamic) ? - int(MatrixType::ColsAtCompileTime) - 1 : Dynamic, - MaxRowsAtCompileTime = (MatrixType::MaxRowsAtCompileTime != Dynamic) ? - int(MatrixType::MaxRowsAtCompileTime) - 1 : Dynamic, - MaxColsAtCompileTime = (MatrixType::MaxColsAtCompileTime != Dynamic) ? - int(MatrixType::MaxColsAtCompileTime) - 1 : Dynamic, - Flags = _MatrixTypeNested::Flags & (HereditaryBits | LvalueBit), - CoeffReadCost = _MatrixTypeNested::CoeffReadCost // minor is used typically on tiny matrices, - // where loops are unrolled and the 'if' evaluates at compile time - }; -}; -} - -template class Minor - : public MatrixBase > -{ - public: - - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(Minor) - - inline Minor(const MatrixType& matrix, - Index row, Index col) - : m_matrix(matrix), m_row(row), m_col(col) - { - eigen_assert(row >= 0 && row < matrix.rows() - && col >= 0 && col < matrix.cols()); - } - - EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Minor) - - inline Index rows() const { return m_matrix.rows() - 1; } - inline Index cols() const { return m_matrix.cols() - 1; } - - inline Scalar& coeffRef(Index row, Index col) - { - return m_matrix.const_cast_derived().coeffRef(row + (row >= m_row), col + (col >= m_col)); - } - - inline const Scalar coeff(Index row, Index col) const - { - return m_matrix.coeff(row + (row >= m_row), col + (col >= m_col)); - } - - protected: - const typename MatrixType::Nested m_matrix; - const Index m_row, m_col; -}; - -/** - * \return an expression of the (\a row, \a col)-minor of *this, - * i.e. an expression constructed from *this by removing the specified - * row and column. - * - * Example: \include MatrixBase_minor.cpp - * Output: \verbinclude MatrixBase_minor.out - * - * \sa class Minor - */ -template -inline Minor -MatrixBase::minor(Index row, Index col) -{ - return Minor(derived(), row, col); -} - -/** - * This is the const version of minor(). */ -template -inline const Minor -MatrixBase::minor(Index row, Index col) const -{ - return Minor(derived(), row, col); -} - -} // end namespace Eigen - -#endif // EIGEN_MINOR_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/QR.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/QR.h deleted file mode 100644 index 2042c9851..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/QR.h +++ /dev/null @@ -1,67 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2011 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_QR_H -#define EIGEN2_QR_H - -namespace Eigen { - -template -class QR : public HouseholderQR -{ - public: - - typedef HouseholderQR Base; - typedef Block MatrixRBlockType; - - QR() : Base() {} - - template - explicit QR(const T& t) : Base(t) {} - - template - bool solve(const MatrixBase& b, ResultType *result) const - { - *result = static_cast(this)->solve(b); - return true; - } - - MatrixType matrixQ(void) const { - MatrixType ret = MatrixType::Identity(this->rows(), this->cols()); - ret = this->householderQ() * ret; - return ret; - } - - bool isFullRank() const { - return true; - } - - const TriangularView - matrixR(void) const - { - int cols = this->cols(); - return MatrixRBlockType(this->matrixQR(), 0, 0, cols, cols).template triangularView(); - } -}; - -/** \return the QR decomposition of \c *this. - * - * \sa class QR - */ -template -const QR::PlainObject> -MatrixBase::qr() const -{ - return QR(eval()); -} - -} // end namespace Eigen - -#endif // EIGEN2_QR_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/SVD.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/SVD.h deleted file mode 100644 index 3d03d2288..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/SVD.h +++ /dev/null @@ -1,637 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_SVD_H -#define EIGEN2_SVD_H - -namespace Eigen { - -/** \ingroup SVD_Module - * \nonstableyet - * - * \class SVD - * - * \brief Standard SVD decomposition of a matrix and associated features - * - * \param MatrixType the type of the matrix of which we are computing the SVD decomposition - * - * This class performs a standard SVD decomposition of a real matrix A of size \c M x \c N - * with \c M \>= \c N. - * - * - * \sa MatrixBase::SVD() - */ -template class SVD -{ - private: - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - - enum { - PacketSize = internal::packet_traits::size, - AlignmentMask = int(PacketSize)-1, - MinSize = EIGEN_SIZE_MIN_PREFER_DYNAMIC(MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime) - }; - - typedef Matrix ColVector; - typedef Matrix RowVector; - - typedef Matrix MatrixUType; - typedef Matrix MatrixVType; - typedef Matrix SingularValuesType; - - public: - - SVD() {} // a user who relied on compiler-generated default compiler reported problems with MSVC in 2.0.7 - - SVD(const MatrixType& matrix) - : m_matU(matrix.rows(), (std::min)(matrix.rows(), matrix.cols())), - m_matV(matrix.cols(),matrix.cols()), - m_sigma((std::min)(matrix.rows(),matrix.cols())) - { - compute(matrix); - } - - template - bool solve(const MatrixBase &b, ResultType* result) const; - - const MatrixUType& matrixU() const { return m_matU; } - const SingularValuesType& singularValues() const { return m_sigma; } - const MatrixVType& matrixV() const { return m_matV; } - - void compute(const MatrixType& matrix); - SVD& sort(); - - template - void computeUnitaryPositive(UnitaryType *unitary, PositiveType *positive) const; - template - void computePositiveUnitary(PositiveType *positive, UnitaryType *unitary) const; - template - void computeRotationScaling(RotationType *unitary, ScalingType *positive) const; - template - void computeScalingRotation(ScalingType *positive, RotationType *unitary) const; - - protected: - /** \internal */ - MatrixUType m_matU; - /** \internal */ - MatrixVType m_matV; - /** \internal */ - SingularValuesType m_sigma; -}; - -/** Computes / recomputes the SVD decomposition A = U S V^* of \a matrix - * - * \note this code has been adapted from JAMA (public domain) - */ -template -void SVD::compute(const MatrixType& matrix) -{ - const int m = matrix.rows(); - const int n = matrix.cols(); - const int nu = (std::min)(m,n); - ei_assert(m>=n && "In Eigen 2.0, SVD only works for MxN matrices with M>=N. Sorry!"); - ei_assert(m>1 && "In Eigen 2.0, SVD doesn't work on 1x1 matrices"); - - m_matU.resize(m, nu); - m_matU.setZero(); - m_sigma.resize((std::min)(m,n)); - m_matV.resize(n,n); - - RowVector e(n); - ColVector work(m); - MatrixType matA(matrix); - const bool wantu = true; - const bool wantv = true; - int i=0, j=0, k=0; - - // Reduce A to bidiagonal form, storing the diagonal elements - // in s and the super-diagonal elements in e. - int nct = (std::min)(m-1,n); - int nrt = (std::max)(0,(std::min)(n-2,m)); - for (k = 0; k < (std::max)(nct,nrt); ++k) - { - if (k < nct) - { - // Compute the transformation for the k-th column and - // place the k-th diagonal in m_sigma[k]. - m_sigma[k] = matA.col(k).end(m-k).norm(); - if (m_sigma[k] != 0.0) // FIXME - { - if (matA(k,k) < 0.0) - m_sigma[k] = -m_sigma[k]; - matA.col(k).end(m-k) /= m_sigma[k]; - matA(k,k) += 1.0; - } - m_sigma[k] = -m_sigma[k]; - } - - for (j = k+1; j < n; ++j) - { - if ((k < nct) && (m_sigma[k] != 0.0)) - { - // Apply the transformation. - Scalar t = matA.col(k).end(m-k).eigen2_dot(matA.col(j).end(m-k)); // FIXME dot product or cwise prod + .sum() ?? - t = -t/matA(k,k); - matA.col(j).end(m-k) += t * matA.col(k).end(m-k); - } - - // Place the k-th row of A into e for the - // subsequent calculation of the row transformation. - e[j] = matA(k,j); - } - - // Place the transformation in U for subsequent back multiplication. - if (wantu & (k < nct)) - m_matU.col(k).end(m-k) = matA.col(k).end(m-k); - - if (k < nrt) - { - // Compute the k-th row transformation and place the - // k-th super-diagonal in e[k]. - e[k] = e.end(n-k-1).norm(); - if (e[k] != 0.0) - { - if (e[k+1] < 0.0) - e[k] = -e[k]; - e.end(n-k-1) /= e[k]; - e[k+1] += 1.0; - } - e[k] = -e[k]; - if ((k+1 < m) & (e[k] != 0.0)) - { - // Apply the transformation. - work.end(m-k-1) = matA.corner(BottomRight,m-k-1,n-k-1) * e.end(n-k-1); - for (j = k+1; j < n; ++j) - matA.col(j).end(m-k-1) += (-e[j]/e[k+1]) * work.end(m-k-1); - } - - // Place the transformation in V for subsequent back multiplication. - if (wantv) - m_matV.col(k).end(n-k-1) = e.end(n-k-1); - } - } - - - // Set up the final bidiagonal matrix or order p. - int p = (std::min)(n,m+1); - if (nct < n) - m_sigma[nct] = matA(nct,nct); - if (m < p) - m_sigma[p-1] = 0.0; - if (nrt+1 < p) - e[nrt] = matA(nrt,p-1); - e[p-1] = 0.0; - - // If required, generate U. - if (wantu) - { - for (j = nct; j < nu; ++j) - { - m_matU.col(j).setZero(); - m_matU(j,j) = 1.0; - } - for (k = nct-1; k >= 0; k--) - { - if (m_sigma[k] != 0.0) - { - for (j = k+1; j < nu; ++j) - { - Scalar t = m_matU.col(k).end(m-k).eigen2_dot(m_matU.col(j).end(m-k)); // FIXME is it really a dot product we want ? - t = -t/m_matU(k,k); - m_matU.col(j).end(m-k) += t * m_matU.col(k).end(m-k); - } - m_matU.col(k).end(m-k) = - m_matU.col(k).end(m-k); - m_matU(k,k) = Scalar(1) + m_matU(k,k); - if (k-1>0) - m_matU.col(k).start(k-1).setZero(); - } - else - { - m_matU.col(k).setZero(); - m_matU(k,k) = 1.0; - } - } - } - - // If required, generate V. - if (wantv) - { - for (k = n-1; k >= 0; k--) - { - if ((k < nrt) & (e[k] != 0.0)) - { - for (j = k+1; j < nu; ++j) - { - Scalar t = m_matV.col(k).end(n-k-1).eigen2_dot(m_matV.col(j).end(n-k-1)); // FIXME is it really a dot product we want ? - t = -t/m_matV(k+1,k); - m_matV.col(j).end(n-k-1) += t * m_matV.col(k).end(n-k-1); - } - } - m_matV.col(k).setZero(); - m_matV(k,k) = 1.0; - } - } - - // Main iteration loop for the singular values. - int pp = p-1; - int iter = 0; - Scalar eps = ei_pow(Scalar(2),ei_is_same_type::ret ? Scalar(-23) : Scalar(-52)); - while (p > 0) - { - int k=0; - int kase=0; - - // Here is where a test for too many iterations would go. - - // This section of the program inspects for - // negligible elements in the s and e arrays. On - // completion the variables kase and k are set as follows. - - // kase = 1 if s(p) and e[k-1] are negligible and k

= -1; --k) - { - if (k == -1) - break; - if (ei_abs(e[k]) <= eps*(ei_abs(m_sigma[k]) + ei_abs(m_sigma[k+1]))) - { - e[k] = 0.0; - break; - } - } - if (k == p-2) - { - kase = 4; - } - else - { - int ks; - for (ks = p-1; ks >= k; --ks) - { - if (ks == k) - break; - Scalar t = (ks != p ? ei_abs(e[ks]) : Scalar(0)) + (ks != k+1 ? ei_abs(e[ks-1]) : Scalar(0)); - if (ei_abs(m_sigma[ks]) <= eps*t) - { - m_sigma[ks] = 0.0; - break; - } - } - if (ks == k) - { - kase = 3; - } - else if (ks == p-1) - { - kase = 1; - } - else - { - kase = 2; - k = ks; - } - } - ++k; - - // Perform the task indicated by kase. - switch (kase) - { - - // Deflate negligible s(p). - case 1: - { - Scalar f(e[p-2]); - e[p-2] = 0.0; - for (j = p-2; j >= k; --j) - { - Scalar t(numext::hypot(m_sigma[j],f)); - Scalar cs(m_sigma[j]/t); - Scalar sn(f/t); - m_sigma[j] = t; - if (j != k) - { - f = -sn*e[j-1]; - e[j-1] = cs*e[j-1]; - } - if (wantv) - { - for (i = 0; i < n; ++i) - { - t = cs*m_matV(i,j) + sn*m_matV(i,p-1); - m_matV(i,p-1) = -sn*m_matV(i,j) + cs*m_matV(i,p-1); - m_matV(i,j) = t; - } - } - } - } - break; - - // Split at negligible s(k). - case 2: - { - Scalar f(e[k-1]); - e[k-1] = 0.0; - for (j = k; j < p; ++j) - { - Scalar t(numext::hypot(m_sigma[j],f)); - Scalar cs( m_sigma[j]/t); - Scalar sn(f/t); - m_sigma[j] = t; - f = -sn*e[j]; - e[j] = cs*e[j]; - if (wantu) - { - for (i = 0; i < m; ++i) - { - t = cs*m_matU(i,j) + sn*m_matU(i,k-1); - m_matU(i,k-1) = -sn*m_matU(i,j) + cs*m_matU(i,k-1); - m_matU(i,j) = t; - } - } - } - } - break; - - // Perform one qr step. - case 3: - { - // Calculate the shift. - Scalar scale = (std::max)((std::max)((std::max)((std::max)( - ei_abs(m_sigma[p-1]),ei_abs(m_sigma[p-2])),ei_abs(e[p-2])), - ei_abs(m_sigma[k])),ei_abs(e[k])); - Scalar sp = m_sigma[p-1]/scale; - Scalar spm1 = m_sigma[p-2]/scale; - Scalar epm1 = e[p-2]/scale; - Scalar sk = m_sigma[k]/scale; - Scalar ek = e[k]/scale; - Scalar b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)/Scalar(2); - Scalar c = (sp*epm1)*(sp*epm1); - Scalar shift(0); - if ((b != 0.0) || (c != 0.0)) - { - shift = ei_sqrt(b*b + c); - if (b < 0.0) - shift = -shift; - shift = c/(b + shift); - } - Scalar f = (sk + sp)*(sk - sp) + shift; - Scalar g = sk*ek; - - // Chase zeros. - - for (j = k; j < p-1; ++j) - { - Scalar t = numext::hypot(f,g); - Scalar cs = f/t; - Scalar sn = g/t; - if (j != k) - e[j-1] = t; - f = cs*m_sigma[j] + sn*e[j]; - e[j] = cs*e[j] - sn*m_sigma[j]; - g = sn*m_sigma[j+1]; - m_sigma[j+1] = cs*m_sigma[j+1]; - if (wantv) - { - for (i = 0; i < n; ++i) - { - t = cs*m_matV(i,j) + sn*m_matV(i,j+1); - m_matV(i,j+1) = -sn*m_matV(i,j) + cs*m_matV(i,j+1); - m_matV(i,j) = t; - } - } - t = numext::hypot(f,g); - cs = f/t; - sn = g/t; - m_sigma[j] = t; - f = cs*e[j] + sn*m_sigma[j+1]; - m_sigma[j+1] = -sn*e[j] + cs*m_sigma[j+1]; - g = sn*e[j+1]; - e[j+1] = cs*e[j+1]; - if (wantu && (j < m-1)) - { - for (i = 0; i < m; ++i) - { - t = cs*m_matU(i,j) + sn*m_matU(i,j+1); - m_matU(i,j+1) = -sn*m_matU(i,j) + cs*m_matU(i,j+1); - m_matU(i,j) = t; - } - } - } - e[p-2] = f; - iter = iter + 1; - } - break; - - // Convergence. - case 4: - { - // Make the singular values positive. - if (m_sigma[k] <= 0.0) - { - m_sigma[k] = m_sigma[k] < Scalar(0) ? -m_sigma[k] : Scalar(0); - if (wantv) - m_matV.col(k).start(pp+1) = -m_matV.col(k).start(pp+1); - } - - // Order the singular values. - while (k < pp) - { - if (m_sigma[k] >= m_sigma[k+1]) - break; - Scalar t = m_sigma[k]; - m_sigma[k] = m_sigma[k+1]; - m_sigma[k+1] = t; - if (wantv && (k < n-1)) - m_matV.col(k).swap(m_matV.col(k+1)); - if (wantu && (k < m-1)) - m_matU.col(k).swap(m_matU.col(k+1)); - ++k; - } - iter = 0; - p--; - } - break; - } // end big switch - } // end iterations -} - -template -SVD& SVD::sort() -{ - int mu = m_matU.rows(); - int mv = m_matV.rows(); - int n = m_matU.cols(); - - for (int i=0; i p) - { - k = j; - p = m_sigma.coeff(j); - } - } - if (k != i) - { - m_sigma.coeffRef(k) = m_sigma.coeff(i); // i.e. - m_sigma.coeffRef(i) = p; // swaps the i-th and the k-th elements - - int j = mu; - for(int s=0; j!=0; ++s, --j) - std::swap(m_matU.coeffRef(s,i), m_matU.coeffRef(s,k)); - - j = mv; - for (int s=0; j!=0; ++s, --j) - std::swap(m_matV.coeffRef(s,i), m_matV.coeffRef(s,k)); - } - } - return *this; -} - -/** \returns the solution of \f$ A x = b \f$ using the current SVD decomposition of A. - * The parts of the solution corresponding to zero singular values are ignored. - * - * \sa MatrixBase::svd(), LU::solve(), LLT::solve() - */ -template -template -bool SVD::solve(const MatrixBase &b, ResultType* result) const -{ - ei_assert(b.rows() == m_matU.rows()); - - Scalar maxVal = m_sigma.cwise().abs().maxCoeff(); - for (int j=0; j aux = m_matU.transpose() * b.col(j); - - for (int i = 0; i col(j) = m_matV * aux; - } - return true; -} - -/** Computes the polar decomposition of the matrix, as a product unitary x positive. - * - * If either pointer is zero, the corresponding computation is skipped. - * - * Only for square matrices. - * - * \sa computePositiveUnitary(), computeRotationScaling() - */ -template -template -void SVD::computeUnitaryPositive(UnitaryType *unitary, - PositiveType *positive) const -{ - ei_assert(m_matU.cols() == m_matV.cols() && "Polar decomposition is only for square matrices"); - if(unitary) *unitary = m_matU * m_matV.adjoint(); - if(positive) *positive = m_matV * m_sigma.asDiagonal() * m_matV.adjoint(); -} - -/** Computes the polar decomposition of the matrix, as a product positive x unitary. - * - * If either pointer is zero, the corresponding computation is skipped. - * - * Only for square matrices. - * - * \sa computeUnitaryPositive(), computeRotationScaling() - */ -template -template -void SVD::computePositiveUnitary(UnitaryType *positive, - PositiveType *unitary) const -{ - ei_assert(m_matU.rows() == m_matV.rows() && "Polar decomposition is only for square matrices"); - if(unitary) *unitary = m_matU * m_matV.adjoint(); - if(positive) *positive = m_matU * m_sigma.asDiagonal() * m_matU.adjoint(); -} - -/** decomposes the matrix as a product rotation x scaling, the scaling being - * not necessarily positive. - * - * If either pointer is zero, the corresponding computation is skipped. - * - * This method requires the Geometry module. - * - * \sa computeScalingRotation(), computeUnitaryPositive() - */ -template -template -void SVD::computeRotationScaling(RotationType *rotation, ScalingType *scaling) const -{ - ei_assert(m_matU.rows() == m_matV.rows() && "Polar decomposition is only for square matrices"); - Scalar x = (m_matU * m_matV.adjoint()).determinant(); // so x has absolute value 1 - Matrix sv(m_sigma); - sv.coeffRef(0) *= x; - if(scaling) scaling->lazyAssign(m_matV * sv.asDiagonal() * m_matV.adjoint()); - if(rotation) - { - MatrixType m(m_matU); - m.col(0) /= x; - rotation->lazyAssign(m * m_matV.adjoint()); - } -} - -/** decomposes the matrix as a product scaling x rotation, the scaling being - * not necessarily positive. - * - * If either pointer is zero, the corresponding computation is skipped. - * - * This method requires the Geometry module. - * - * \sa computeRotationScaling(), computeUnitaryPositive() - */ -template -template -void SVD::computeScalingRotation(ScalingType *scaling, RotationType *rotation) const -{ - ei_assert(m_matU.rows() == m_matV.rows() && "Polar decomposition is only for square matrices"); - Scalar x = (m_matU * m_matV.adjoint()).determinant(); // so x has absolute value 1 - Matrix sv(m_sigma); - sv.coeffRef(0) *= x; - if(scaling) scaling->lazyAssign(m_matU * sv.asDiagonal() * m_matU.adjoint()); - if(rotation) - { - MatrixType m(m_matU); - m.col(0) /= x; - rotation->lazyAssign(m * m_matV.adjoint()); - } -} - - -/** \svd_module - * \returns the SVD decomposition of \c *this - */ -template -inline SVD::PlainObject> -MatrixBase::svd() const -{ - return SVD(derived()); -} - -} // end namespace Eigen - -#endif // EIGEN2_SVD_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/TriangularSolver.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/TriangularSolver.h deleted file mode 100644 index ebbeb3b49..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/TriangularSolver.h +++ /dev/null @@ -1,42 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_TRIANGULAR_SOLVER2_H -#define EIGEN_TRIANGULAR_SOLVER2_H - -namespace Eigen { - -const unsigned int UnitDiagBit = UnitDiag; -const unsigned int SelfAdjointBit = SelfAdjoint; -const unsigned int UpperTriangularBit = Upper; -const unsigned int LowerTriangularBit = Lower; - -const unsigned int UpperTriangular = Upper; -const unsigned int LowerTriangular = Lower; -const unsigned int UnitUpperTriangular = UnitUpper; -const unsigned int UnitLowerTriangular = UnitLower; - -template -template -typename ExpressionType::PlainObject -Flagged::solveTriangular(const MatrixBase& other) const -{ - return m_matrix.template triangularView().solve(other.derived()); -} - -template -template -void Flagged::solveTriangularInPlace(const MatrixBase& other) const -{ - m_matrix.template triangularView().solveInPlace(other.derived()); -} - -} // end namespace Eigen - -#endif // EIGEN_TRIANGULAR_SOLVER2_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/VectorBlock.h b/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/VectorBlock.h deleted file mode 100644 index 71a8080a9..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigen2Support/VectorBlock.h +++ /dev/null @@ -1,94 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_VECTORBLOCK_H -#define EIGEN2_VECTORBLOCK_H - -namespace Eigen { - -/** \deprecated use DenseMase::head(Index) */ -template -inline VectorBlock -MatrixBase::start(Index size) -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), 0, size); -} - -/** \deprecated use DenseMase::head(Index) */ -template -inline const VectorBlock -MatrixBase::start(Index size) const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), 0, size); -} - -/** \deprecated use DenseMase::tail(Index) */ -template -inline VectorBlock -MatrixBase::end(Index size) -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), this->size() - size, size); -} - -/** \deprecated use DenseMase::tail(Index) */ -template -inline const VectorBlock -MatrixBase::end(Index size) const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), this->size() - size, size); -} - -/** \deprecated use DenseMase::head() */ -template -template -inline VectorBlock -MatrixBase::start() -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), 0); -} - -/** \deprecated use DenseMase::head() */ -template -template -inline const VectorBlock -MatrixBase::start() const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), 0); -} - -/** \deprecated use DenseMase::tail() */ -template -template -inline VectorBlock -MatrixBase::end() -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), size() - Size); -} - -/** \deprecated use DenseMase::tail() */ -template -template -inline const VectorBlock -MatrixBase::end() const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), size() - Size); -} - -} // end namespace Eigen - -#endif // EIGEN2_VECTORBLOCK_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h index ec3b1633e..dc5fae06a 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h @@ -250,7 +250,7 @@ template class ComplexEigenSolver EigenvectorType m_matX; private: - void doComputeEigenvectors(const RealScalar& matrixnorm); + void doComputeEigenvectors(RealScalar matrixnorm); void sortEigenvalues(bool computeEigenvectors); }; @@ -284,10 +284,12 @@ ComplexEigenSolver::compute(const EigenBase& matrix, bool template -void ComplexEigenSolver::doComputeEigenvectors(const RealScalar& matrixnorm) +void ComplexEigenSolver::doComputeEigenvectors(RealScalar matrixnorm) { const Index n = m_eivalues.size(); + matrixnorm = numext::maxi(matrixnorm,(std::numeric_limits::min)()); + // Compute X such that T = X D X^(-1), where D is the diagonal of T. // The matrix X is unit triangular. m_matX = EigenvectorType::Zero(n, n); diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/ComplexSchur_MKL.h deleted file mode 100644 index 91496ae5b..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Complex Schur needed to complex unsymmetrical eigenvalues/eigenvectors. - ******************************************************************************** -*/ - -#ifndef EIGEN_COMPLEX_SCHUR_MKL_H -#define EIGEN_COMPLEX_SCHUR_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_SCHUR_COMPLEX(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ -template<> inline \ -ComplexSchur >& \ -ComplexSchur >::compute(const Matrix& matrix, bool computeU) \ -{ \ - typedef Matrix MatrixType; \ - typedef MatrixType::Scalar Scalar; \ - typedef MatrixType::RealScalar RealScalar; \ - typedef std::complex ComplexScalar; \ -\ - eigen_assert(matrix.cols() == matrix.rows()); \ -\ - m_matUisUptodate = false; \ - if(matrix.cols() == 1) \ - { \ - m_matT = matrix.cast(); \ - if(computeU) m_matU = ComplexMatrixType::Identity(1,1); \ - m_info = Success; \ - m_isInitialized = true; \ - m_matUisUptodate = computeU; \ - return *this; \ - } \ - lapack_int n = matrix.cols(), sdim, info; \ - lapack_int lda = matrix.outerStride(); \ - lapack_int matrix_order = MKLCOLROW; \ - char jobvs, sort='N'; \ - LAPACK_##MKLPREFIX_U##_SELECT1 select = 0; \ - jobvs = (computeU) ? 'V' : 'N'; \ - m_matU.resize(n, n); \ - lapack_int ldvs = m_matU.outerStride(); \ - m_matT = matrix; \ - Matrix w; \ - w.resize(n, 1);\ - info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)w.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ - if(info == 0) \ - m_info = Success; \ - else \ - m_info = NoConvergence; \ -\ - m_isInitialized = true; \ - m_matUisUptodate = computeU; \ - return *this; \ -\ -} - -EIGEN_MKL_SCHUR_COMPLEX(dcomplex, MKL_Complex16, z, Z, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_SCHUR_COMPLEX(scomplex, MKL_Complex8, c, C, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_SCHUR_COMPLEX(dcomplex, MKL_Complex16, z, Z, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_SCHUR_COMPLEX(scomplex, MKL_Complex8, c, C, RowMajor, LAPACK_ROW_MAJOR) - -} // end namespace Eigen - -#endif // EIGEN_COMPLEX_SCHUR_MKL_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/RealSchur.h b/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/RealSchur.h index d6a339f07..f5c86041d 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/RealSchur.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/RealSchur.h @@ -248,12 +248,24 @@ template template RealSchur& RealSchur::compute(const EigenBase& matrix, bool computeU) { + const Scalar considerAsZero = (std::numeric_limits::min)(); + eigen_assert(matrix.cols() == matrix.rows()); Index maxIters = m_maxIters; if (maxIters == -1) maxIters = m_maxIterationsPerRow * matrix.rows(); Scalar scale = matrix.derived().cwiseAbs().maxCoeff(); + if(scale inline \ -RealSchur >& \ -RealSchur >::compute(const Matrix& matrix, bool computeU) \ -{ \ - typedef Matrix MatrixType; \ - typedef MatrixType::Scalar Scalar; \ - typedef MatrixType::RealScalar RealScalar; \ -\ - eigen_assert(matrix.cols() == matrix.rows()); \ -\ - lapack_int n = matrix.cols(), sdim, info; \ - lapack_int lda = matrix.outerStride(); \ - lapack_int matrix_order = MKLCOLROW; \ - char jobvs, sort='N'; \ - LAPACK_##MKLPREFIX_U##_SELECT2 select = 0; \ - jobvs = (computeU) ? 'V' : 'N'; \ - m_matU.resize(n, n); \ - lapack_int ldvs = m_matU.outerStride(); \ - m_matT = matrix; \ - Matrix wr, wi; \ - wr.resize(n, 1); wi.resize(n, 1); \ - info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)wr.data(), (MKLTYPE*)wi.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ - if(info == 0) \ - m_info = Success; \ - else \ - m_info = NoConvergence; \ -\ - m_isInitialized = true; \ - m_matUisUptodate = computeU; \ - return *this; \ -\ -} - -EIGEN_MKL_SCHUR_REAL(double, double, d, D, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_SCHUR_REAL(float, float, s, S, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_SCHUR_REAL(double, double, d, D, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_SCHUR_REAL(float, float, s, S, RowMajor, LAPACK_ROW_MAJOR) - -} // end namespace Eigen - -#endif // EIGEN_REAL_SCHUR_MKL_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index a9f56c4f5..9ddd553f2 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -414,7 +414,8 @@ SelfAdjointEigenSolver& SelfAdjointEigenSolver if(n==1) { - m_eivalues.coeffRef(0,0) = numext::real(matrix.diagonal()[0]); + m_eivec = matrix; + m_eivalues.coeffRef(0,0) = numext::real(m_eivec.coeff(0,0)); if(computeEigenvectors) m_eivec.setOnes(n,n); m_info = Success; diff --git a/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h b/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h deleted file mode 100644 index 17c0dadd2..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Self-adjoint eigenvalues/eigenvectors. - ******************************************************************************** -*/ - -#ifndef EIGEN_SAEIGENSOLVER_MKL_H -#define EIGEN_SAEIGENSOLVER_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_EIG_SELFADJ(EIGTYPE, MKLTYPE, MKLRTYPE, MKLNAME, EIGCOLROW, MKLCOLROW ) \ -template<> inline \ -SelfAdjointEigenSolver >& \ -SelfAdjointEigenSolver >::compute(const Matrix& matrix, int options) \ -{ \ - eigen_assert(matrix.cols() == matrix.rows()); \ - eigen_assert((options&~(EigVecMask|GenEigMask))==0 \ - && (options&EigVecMask)!=EigVecMask \ - && "invalid option parameter"); \ - bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \ - lapack_int n = matrix.cols(), lda, matrix_order, info; \ - m_eivalues.resize(n,1); \ - m_subdiag.resize(n-1); \ - m_eivec = matrix; \ -\ - if(n==1) \ - { \ - m_eivalues.coeffRef(0,0) = numext::real(matrix.coeff(0,0)); \ - if(computeEigenvectors) m_eivec.setOnes(n,n); \ - m_info = Success; \ - m_isInitialized = true; \ - m_eigenvectorsOk = computeEigenvectors; \ - return *this; \ - } \ -\ - lda = matrix.outerStride(); \ - matrix_order=MKLCOLROW; \ - char jobz, uplo='L'/*, range='A'*/; \ - jobz = computeEigenvectors ? 'V' : 'N'; \ -\ - info = LAPACKE_##MKLNAME( matrix_order, jobz, uplo, n, (MKLTYPE*)m_eivec.data(), lda, (MKLRTYPE*)m_eivalues.data() ); \ - m_info = (info==0) ? Success : NoConvergence; \ - m_isInitialized = true; \ - m_eigenvectorsOk = computeEigenvectors; \ - return *this; \ -} - - -EIGEN_MKL_EIG_SELFADJ(double, double, double, dsyev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_EIG_SELFADJ(float, float, float, ssyev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_EIG_SELFADJ(dcomplex, MKL_Complex16, double, zheev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_EIG_SELFADJ(scomplex, MKL_Complex8, float, cheev, ColMajor, LAPACK_COL_MAJOR) - -EIGEN_MKL_EIG_SELFADJ(double, double, double, dsyev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_EIG_SELFADJ(float, float, float, ssyev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_EIG_SELFADJ(dcomplex, MKL_Complex16, double, zheev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_EIG_SELFADJ(scomplex, MKL_Complex8, float, cheev, RowMajor, LAPACK_ROW_MAJOR) - -} // end namespace Eigen - -#endif // EIGEN_SAEIGENSOLVER_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/Geometry/AlignedBox.h b/uppsrc/plugin/Eigen/Eigen/src/Geometry/AlignedBox.h index c902d8f0a..066eae4f9 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Geometry/AlignedBox.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Geometry/AlignedBox.h @@ -63,7 +63,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** Default constructor initializing a null box. */ EIGEN_DEVICE_FUNC inline AlignedBox() - { if (EIGEN_CONST_CONDITIONAL(AmbientDimAtCompileTime!=Dynamic)) setEmpty(); } + { if (AmbientDimAtCompileTime!=Dynamic) setEmpty(); } /** Constructs a null box with \a _dim the dimension of the ambient space. */ EIGEN_DEVICE_FUNC inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim) diff --git a/uppsrc/plugin/Eigen/Eigen/src/Geometry/ParametrizedLine.h b/uppsrc/plugin/Eigen/Eigen/src/Geometry/ParametrizedLine.h index 3929ca87f..1e985d8cd 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Geometry/ParametrizedLine.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Geometry/ParametrizedLine.h @@ -104,44 +104,7 @@ public: template EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const; - /** Applies the transformation matrix \a mat to \c *this and returns a reference to \c *this. - * - * \param mat the Dim x Dim transformation matrix - * \param traits specifies whether the matrix \a mat represents an #Isometry - * or a more generic #Affine transformation. The default is #Affine. - */ - template - EIGEN_DEVICE_FUNC inline ParametrizedLine& transform(const MatrixBase& mat, TransformTraits traits = Affine) - { - if (traits==Affine) - direction() = (mat * direction()).normalized(); - else if (traits==Isometry) - direction() = mat * direction(); - else - { - eigen_assert(0 && "invalid traits value in ParametrizedLine::transform()"); - } - origin() = mat * origin(); - return *this; - } - - /** Applies the transformation \a t to \c *this and returns a reference to \c *this. - * - * \param t the transformation of dimension Dim - * \param traits specifies whether the transformation \a t represents an #Isometry - * or a more generic #Affine transformation. The default is #Affine. - * Other kind of transformations are not supported. - */ - template - EIGEN_DEVICE_FUNC inline ParametrizedLine& transform(const Transform& t, - TransformTraits traits = Affine) - { - transform(t.linear(), traits); - origin() += t.translation(); - return *this; - } - -/** \returns \c *this with scalar type casted to \a NewScalarType + /** \returns \c *this with scalar type casted to \a NewScalarType * * Note that if \a NewScalarType is equal to the current scalar type of \c *this * then this function smartly returns a const reference to \c *this. diff --git a/uppsrc/plugin/Eigen/Eigen/src/Geometry/Quaternion.h b/uppsrc/plugin/Eigen/Eigen/src/Geometry/Quaternion.h index f6ef1bcf6..3e5a9badb 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Geometry/Quaternion.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Geometry/Quaternion.h @@ -423,7 +423,7 @@ typedef Map, Aligned> QuaternionMapAlignedd; // Generic Quaternion * Quaternion product // This product can be specialized for a given architecture via the Arch template argument. namespace internal { -template struct quat_product +template struct quat_product { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion run(const QuaternionBase& a, const QuaternionBase& b){ return Quaternion @@ -446,8 +446,7 @@ QuaternionBase::operator* (const QuaternionBase& other) c EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) return internal::quat_product::Scalar, - EIGEN_PLAIN_ENUM_MIN(internal::traits::Alignment, internal::traits::Alignment)>::run(*this, other); + typename internal::traits::Scalar>::run(*this, other); } /** \sa operator*(Quaternion) */ @@ -672,7 +671,7 @@ EIGEN_DEVICE_FUNC inline Quaternion::Scalar> // Generic conjugate of a Quaternion namespace internal { -template struct quat_conj +template struct quat_conj { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion run(const QuaternionBase& q){ return Quaternion(q.w(),-q.x(),-q.y(),-q.z()); @@ -691,8 +690,7 @@ EIGEN_DEVICE_FUNC inline Quaternion::Scalar> QuaternionBase::conjugate() const { return internal::quat_conj::Scalar, - internal::traits::Alignment>::run(*this); + typename internal::traits::Scalar>::run(*this); } diff --git a/uppsrc/plugin/Eigen/Eigen/src/Geometry/Transform.h b/uppsrc/plugin/Eigen/Eigen/src/Geometry/Transform.h index 2d36dfadf..3f31ee45d 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Geometry/Transform.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Geometry/Transform.h @@ -335,7 +335,7 @@ public: OtherModeIsAffineCompact = OtherMode == int(AffineCompact) }; - if(EIGEN_CONST_CONDITIONAL(ModeIsAffineCompact == OtherModeIsAffineCompact)) + if(ModeIsAffineCompact == OtherModeIsAffineCompact) { // We need the block expression because the code is compiled for all // combinations of transformations and will trigger a compile time error @@ -343,7 +343,7 @@ public: m_matrix.template block(0,0) = other.matrix().template block(0,0); makeAffine(); } - else if(EIGEN_CONST_CONDITIONAL(OtherModeIsAffineCompact)) + else if(OtherModeIsAffineCompact) { typedef typename Transform::MatrixType OtherMatrixType; internal::transform_construct_from_matrix::run(this, other.matrix()); @@ -481,7 +481,7 @@ public: TransformTimeDiagonalReturnType res; res.linear().noalias() = a*b.linear(); res.translation().noalias() = a*b.translation(); - if (EIGEN_CONST_CONDITIONAL(Mode!=int(AffineCompact))) + if (Mode!=int(AffineCompact)) res.matrix().row(Dim) = b.matrix().row(Dim); return res; } @@ -755,7 +755,7 @@ template Transform& Transform::operator=(const QMatrix& other) { EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact))) + if (Mode == int(AffineCompact)) m_matrix << other.m11(), other.m21(), other.dx(), other.m12(), other.m22(), other.dy(); else @@ -801,7 +801,7 @@ Transform& Transform::operator { check_template_params(); EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact))) + if (Mode == int(AffineCompact)) m_matrix << other.m11(), other.m21(), other.dx(), other.m12(), other.m22(), other.dy(); else @@ -819,7 +819,7 @@ template QTransform Transform::toQTransform(void) const { EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact))) + if (Mode == int(AffineCompact)) return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0), m_matrix.coeff(0,1), m_matrix.coeff(1,1), m_matrix.coeff(0,2), m_matrix.coeff(1,2)); @@ -912,7 +912,7 @@ EIGEN_DEVICE_FUNC Transform& Transform::pretranslate(const MatrixBase &other) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) - if(EIGEN_CONST_CONDITIONAL(int(Mode)==int(Projective))) + if(int(Mode)==int(Projective)) affine() += other * m_matrix.row(Dim); else translation() += other; diff --git a/uppsrc/plugin/Eigen/Eigen/src/Geometry/arch/Geometry_SSE.h b/uppsrc/plugin/Eigen/Eigen/src/Geometry/arch/Geometry_SSE.h index 1a86ff837..f68cab583 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -16,17 +16,23 @@ namespace Eigen { namespace internal { template -struct quat_product +struct quat_product { + enum { + AAlignment = traits::Alignment, + BAlignment = traits::Alignment, + ResAlignment = traits >::Alignment + }; static inline Quaternion run(const QuaternionBase& _a, const QuaternionBase& _b) { Quaternion res; const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f); - __m128 a = _a.coeffs().template packet(0); - __m128 b = _b.coeffs().template packet(0); + __m128 a = _a.coeffs().template packet(0); + __m128 b = _b.coeffs().template packet(0); __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); - pstore(&res.x(), + pstoret( + &res.x(), _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)), _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0), vec4f_swizzle1(b,1,2,0,0))), @@ -36,14 +42,17 @@ struct quat_product } }; -template -struct quat_conj +template +struct quat_conj { + enum { + ResAlignment = traits >::Alignment + }; static inline Quaternion run(const QuaternionBase& q) { Quaternion res; const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f); - pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet(0))); + pstoret(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet::Alignment>(0))); return res; } }; @@ -52,6 +61,9 @@ struct quat_conj template struct cross3_impl { + enum { + ResAlignment = traits::type>::Alignment + }; static inline typename plain_matrix_type::type run(const VectorLhs& lhs, const VectorRhs& rhs) { @@ -60,7 +72,7 @@ struct cross3_impl __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); typename plain_matrix_type::type res; - pstore(&res.x(),_mm_sub_ps(mul1,mul2)); + pstoret(&res.x(),_mm_sub_ps(mul1,mul2)); return res; } }; @@ -68,9 +80,14 @@ struct cross3_impl -template -struct quat_product +template +struct quat_product { + enum { + BAlignment = traits::Alignment, + ResAlignment = traits >::Alignment + }; + static inline Quaternion run(const QuaternionBase& _a, const QuaternionBase& _b) { const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); @@ -78,8 +95,8 @@ struct quat_product Quaternion res; const double* a = _a.coeffs().data(); - Packet2d b_xy = _b.coeffs().template packet(0); - Packet2d b_zw = _b.coeffs().template packet(2); + Packet2d b_xy = _b.coeffs().template packet(0); + Packet2d b_zw = _b.coeffs().template packet(2); Packet2d a_xx = pset1(a[0]); Packet2d a_yy = pset1(a[1]); Packet2d a_zz = pset1(a[2]); @@ -97,9 +114,9 @@ struct quat_product t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw)); #ifdef EIGEN_VECTORIZE_SSE3 EIGEN_UNUSED_VARIABLE(mask) - pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2))); + pstoret(&res.x(), _mm_addsub_pd(t1, preverse(t2))); #else - pstore(&res.x(), padd(t1, pxor(mask,preverse(t2)))); + pstoret(&res.x(), padd(t1, pxor(mask,preverse(t2)))); #endif /* @@ -111,25 +128,28 @@ struct quat_product t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy)); #ifdef EIGEN_VECTORIZE_SSE3 EIGEN_UNUSED_VARIABLE(mask) - pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); + pstoret(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); #else - pstore(&res.z(), psub(t1, pxor(mask,preverse(t2)))); + pstoret(&res.z(), psub(t1, pxor(mask,preverse(t2)))); #endif return res; } }; -template -struct quat_conj +template +struct quat_conj { + enum { + ResAlignment = traits >::Alignment + }; static inline Quaternion run(const QuaternionBase& q) { Quaternion res; const __m128d mask0 = _mm_setr_pd(-0.,-0.); const __m128d mask2 = _mm_setr_pd(-0.,0.); - pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet(0))); - pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet(2))); + pstoret(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet::Alignment>(0))); + pstoret(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet::Alignment>(2))); return res; } }; diff --git a/uppsrc/plugin/Eigen/Eigen/src/Householder/BlockHouseholder.h b/uppsrc/plugin/Eigen/Eigen/src/Householder/BlockHouseholder.h index 39bf8c83d..01a7ed188 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Householder/BlockHouseholder.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Householder/BlockHouseholder.h @@ -87,7 +87,8 @@ void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vec const TriangularView V(vectors); // A -= V T V^* A - Matrix tmp = V.adjoint() * mat; // FIXME add .noalias() once the triangular product can work inplace if(forward) tmp = T.template triangularView() * tmp; diff --git a/uppsrc/plugin/Eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/uppsrc/plugin/Eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index 358444aff..facdaf890 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/uppsrc/plugin/Eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -152,13 +152,28 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar> { // Compute the inverse squared-norm of each column of mat m_invdiag.resize(mat.cols()); - for(Index j=0; j0) - m_invdiag(j) = RealScalar(1)/sum; - else - m_invdiag(j) = RealScalar(1); + m_invdiag.setZero(); + for(Index j=0; jRealScalar(0)) + m_invdiag(j) = RealScalar(1)/numext::real(m_invdiag(j)); + } + else + { + for(Index j=0; jRealScalar(0)) + m_invdiag(j) = RealScalar(1)/sum; + else + m_invdiag(j) = RealScalar(1); + } } Base::m_isInitialized = true; return *this; diff --git a/uppsrc/plugin/Eigen/Eigen/src/Jacobi/Jacobi.h b/uppsrc/plugin/Eigen/Eigen/src/Jacobi/Jacobi.h index d25af8e90..c30326e1d 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/Jacobi/Jacobi.h +++ b/uppsrc/plugin/Eigen/Eigen/src/Jacobi/Jacobi.h @@ -302,8 +302,12 @@ template void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x, DenseBase& xpr_y, const JacobiRotation& j) { typedef typename VectorX::Scalar Scalar; - enum { PacketSize = packet_traits::size }; + enum { + PacketSize = packet_traits::size, + OtherPacketSize = packet_traits::size + }; typedef typename packet_traits::type Packet; + typedef typename packet_traits::type OtherPacket; eigen_assert(xpr_x.size() == xpr_y.size()); Index size = xpr_x.size(); Index incrx = xpr_x.derived().innerStride(); @@ -321,6 +325,7 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x if(VectorX::SizeAtCompileTime == Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && + (PacketSize == OtherPacketSize) && ((incrx==1 && incry==1) || PacketSize == 1)) { // both vectors are sequentially stored in memory => vectorization @@ -329,9 +334,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x Index alignedStart = internal::first_default_aligned(y, size); Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; - const Packet pc = pset1(c); - const Packet ps = pset1(s); - conj_helper::IsComplex,false> pcj; + const OtherPacket pc = pset1(c); + const OtherPacket ps = pset1(s); + conj_helper::IsComplex,false> pcj; + conj_helper pm; for(Index i=0; i& xpr_x { Packet xi = pload(px); Packet yi = pload(py); - pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); px += PacketSize; py += PacketSize; } @@ -365,10 +371,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x Packet xi1 = ploadu(px+PacketSize); Packet yi = pload (py); Packet yi1 = pload (py+PacketSize); - pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1))); - pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); - pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1))); + pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1))); + pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1))); px += Peeling*PacketSize; py += Peeling*PacketSize; } @@ -376,8 +382,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x { Packet xi = ploadu(x+peelingEnd); Packet yi = pload (y+peelingEnd); - pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); } } @@ -393,19 +399,21 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x /*** fixed-size vectorized path ***/ else if(VectorX::SizeAtCompileTime != Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && + (PacketSize == OtherPacketSize) && (EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment)>0)) // FIXME should be compared to the required alignment { - const Packet pc = pset1(c); - const Packet ps = pset1(s); - conj_helper::IsComplex,false> pcj; + const OtherPacket pc = pset1(c); + const OtherPacket ps = pset1(s); + conj_helper::IsComplex,false> pcj; + conj_helper pm; Scalar* EIGEN_RESTRICT px = x; Scalar* EIGEN_RESTRICT py = y; for(Index i=0; i(px); Packet yi = pload(py); - pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); px += PacketSize; py += PacketSize; } diff --git a/uppsrc/plugin/Eigen/Eigen/src/LU/Inverse.h b/uppsrc/plugin/Eigen/Eigen/src/LU/Inverse.h deleted file mode 100644 index 3cf887193..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/LU/Inverse.h +++ /dev/null @@ -1,400 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2010 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_INVERSE_H -#define EIGEN_INVERSE_H - -namespace Eigen { - -namespace internal { - -/********************************** -*** General case implementation *** -**********************************/ - -template -struct compute_inverse -{ - static inline void run(const MatrixType& matrix, ResultType& result) - { - result = matrix.partialPivLu().inverse(); - } -}; - -template -struct compute_inverse_and_det_with_check { /* nothing! general case not supported. */ }; - -/**************************** -*** Size 1 implementation *** -****************************/ - -template -struct compute_inverse -{ - static inline void run(const MatrixType& matrix, ResultType& result) - { - typedef typename MatrixType::Scalar Scalar; - result.coeffRef(0,0) = Scalar(1) / matrix.coeff(0,0); - } -}; - -template -struct compute_inverse_and_det_with_check -{ - static inline void run( - const MatrixType& matrix, - const typename MatrixType::RealScalar& absDeterminantThreshold, - ResultType& result, - typename ResultType::Scalar& determinant, - bool& invertible - ) - { - using std::abs; - determinant = matrix.coeff(0,0); - invertible = abs(determinant) > absDeterminantThreshold; - if(invertible) result.coeffRef(0,0) = typename ResultType::Scalar(1) / determinant; - } -}; - -/**************************** -*** Size 2 implementation *** -****************************/ - -template -inline void compute_inverse_size2_helper( - const MatrixType& matrix, const typename ResultType::Scalar& invdet, - ResultType& result) -{ - result.coeffRef(0,0) = matrix.coeff(1,1) * invdet; - result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet; - result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet; - result.coeffRef(1,1) = matrix.coeff(0,0) * invdet; -} - -template -struct compute_inverse -{ - static inline void run(const MatrixType& matrix, ResultType& result) - { - typedef typename ResultType::Scalar Scalar; - const Scalar invdet = typename MatrixType::Scalar(1) / matrix.determinant(); - compute_inverse_size2_helper(matrix, invdet, result); - } -}; - -template -struct compute_inverse_and_det_with_check -{ - static inline void run( - const MatrixType& matrix, - const typename MatrixType::RealScalar& absDeterminantThreshold, - ResultType& inverse, - typename ResultType::Scalar& determinant, - bool& invertible - ) - { - using std::abs; - typedef typename ResultType::Scalar Scalar; - determinant = matrix.determinant(); - invertible = abs(determinant) > absDeterminantThreshold; - if(!invertible) return; - const Scalar invdet = Scalar(1) / determinant; - compute_inverse_size2_helper(matrix, invdet, inverse); - } -}; - -/**************************** -*** Size 3 implementation *** -****************************/ - -template -inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m) -{ - enum { - i1 = (i+1) % 3, - i2 = (i+2) % 3, - j1 = (j+1) % 3, - j2 = (j+2) % 3 - }; - return m.coeff(i1, j1) * m.coeff(i2, j2) - - m.coeff(i1, j2) * m.coeff(i2, j1); -} - -template -inline void compute_inverse_size3_helper( - const MatrixType& matrix, - const typename ResultType::Scalar& invdet, - const Matrix& cofactors_col0, - ResultType& result) -{ - result.row(0) = cofactors_col0 * invdet; - result.coeffRef(1,0) = cofactor_3x3(matrix) * invdet; - result.coeffRef(1,1) = cofactor_3x3(matrix) * invdet; - result.coeffRef(1,2) = cofactor_3x3(matrix) * invdet; - result.coeffRef(2,0) = cofactor_3x3(matrix) * invdet; - result.coeffRef(2,1) = cofactor_3x3(matrix) * invdet; - result.coeffRef(2,2) = cofactor_3x3(matrix) * invdet; -} - -template -struct compute_inverse -{ - static inline void run(const MatrixType& matrix, ResultType& result) - { - typedef typename ResultType::Scalar Scalar; - Matrix cofactors_col0; - cofactors_col0.coeffRef(0) = cofactor_3x3(matrix); - cofactors_col0.coeffRef(1) = cofactor_3x3(matrix); - cofactors_col0.coeffRef(2) = cofactor_3x3(matrix); - const Scalar det = (cofactors_col0.cwiseProduct(matrix.col(0))).sum(); - const Scalar invdet = Scalar(1) / det; - compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result); - } -}; - -template -struct compute_inverse_and_det_with_check -{ - static inline void run( - const MatrixType& matrix, - const typename MatrixType::RealScalar& absDeterminantThreshold, - ResultType& inverse, - typename ResultType::Scalar& determinant, - bool& invertible - ) - { - using std::abs; - typedef typename ResultType::Scalar Scalar; - Matrix cofactors_col0; - cofactors_col0.coeffRef(0) = cofactor_3x3(matrix); - cofactors_col0.coeffRef(1) = cofactor_3x3(matrix); - cofactors_col0.coeffRef(2) = cofactor_3x3(matrix); - determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum(); - invertible = abs(determinant) > absDeterminantThreshold; - if(!invertible) return; - const Scalar invdet = Scalar(1) / determinant; - compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse); - } -}; - -/**************************** -*** Size 4 implementation *** -****************************/ - -template -inline const typename Derived::Scalar general_det3_helper -(const MatrixBase& matrix, int i1, int i2, int i3, int j1, int j2, int j3) -{ - return matrix.coeff(i1,j1) - * (matrix.coeff(i2,j2) * matrix.coeff(i3,j3) - matrix.coeff(i2,j3) * matrix.coeff(i3,j2)); -} - -template -inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix) -{ - enum { - i1 = (i+1) % 4, - i2 = (i+2) % 4, - i3 = (i+3) % 4, - j1 = (j+1) % 4, - j2 = (j+2) % 4, - j3 = (j+3) % 4 - }; - return general_det3_helper(matrix, i1, i2, i3, j1, j2, j3) - + general_det3_helper(matrix, i2, i3, i1, j1, j2, j3) - + general_det3_helper(matrix, i3, i1, i2, j1, j2, j3); -} - -template -struct compute_inverse_size4 -{ - static void run(const MatrixType& matrix, ResultType& result) - { - result.coeffRef(0,0) = cofactor_4x4(matrix); - result.coeffRef(1,0) = -cofactor_4x4(matrix); - result.coeffRef(2,0) = cofactor_4x4(matrix); - result.coeffRef(3,0) = -cofactor_4x4(matrix); - result.coeffRef(0,2) = cofactor_4x4(matrix); - result.coeffRef(1,2) = -cofactor_4x4(matrix); - result.coeffRef(2,2) = cofactor_4x4(matrix); - result.coeffRef(3,2) = -cofactor_4x4(matrix); - result.coeffRef(0,1) = -cofactor_4x4(matrix); - result.coeffRef(1,1) = cofactor_4x4(matrix); - result.coeffRef(2,1) = -cofactor_4x4(matrix); - result.coeffRef(3,1) = cofactor_4x4(matrix); - result.coeffRef(0,3) = -cofactor_4x4(matrix); - result.coeffRef(1,3) = cofactor_4x4(matrix); - result.coeffRef(2,3) = -cofactor_4x4(matrix); - result.coeffRef(3,3) = cofactor_4x4(matrix); - result /= (matrix.col(0).cwiseProduct(result.row(0).transpose())).sum(); - } -}; - -template -struct compute_inverse - : compute_inverse_size4 -{ -}; - -template -struct compute_inverse_and_det_with_check -{ - static inline void run( - const MatrixType& matrix, - const typename MatrixType::RealScalar& absDeterminantThreshold, - ResultType& inverse, - typename ResultType::Scalar& determinant, - bool& invertible - ) - { - using std::abs; - determinant = matrix.determinant(); - invertible = abs(determinant) > absDeterminantThreshold; - if(invertible) compute_inverse::run(matrix, inverse); - } -}; - -/************************* -*** MatrixBase methods *** -*************************/ - -template -struct traits > -{ - typedef typename MatrixType::PlainObject ReturnType; -}; - -template -struct inverse_impl : public ReturnByValue > -{ - typedef typename MatrixType::Index Index; - typedef typename internal::eval::type MatrixTypeNested; - typedef typename remove_all::type MatrixTypeNestedCleaned; - MatrixTypeNested m_matrix; - - inverse_impl(const MatrixType& matrix) - : m_matrix(matrix) - {} - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - - template inline void evalTo(Dest& dst) const - { - const int Size = EIGEN_PLAIN_ENUM_MIN(MatrixType::ColsAtCompileTime,Dest::ColsAtCompileTime); - EIGEN_ONLY_USED_FOR_DEBUG(Size); - eigen_assert(( (Size<=1) || (Size>4) || (extract_data(m_matrix)!=extract_data(dst))) - && "Aliasing problem detected in inverse(), you need to do inverse().eval() here."); - - compute_inverse::run(m_matrix, dst); - } -}; - -} // end namespace internal - -/** \lu_module - * - * \returns the matrix inverse of this matrix. - * - * For small fixed sizes up to 4x4, this method uses cofactors. - * In the general case, this method uses class PartialPivLU. - * - * \note This matrix must be invertible, otherwise the result is undefined. If you need an - * invertibility check, do the following: - * \li for fixed sizes up to 4x4, use computeInverseAndDetWithCheck(). - * \li for the general case, use class FullPivLU. - * - * Example: \include MatrixBase_inverse.cpp - * Output: \verbinclude MatrixBase_inverse.out - * - * \sa computeInverseAndDetWithCheck() - */ -template -inline const internal::inverse_impl MatrixBase::inverse() const -{ - EIGEN_STATIC_ASSERT(!NumTraits::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES) - eigen_assert(rows() == cols()); - return internal::inverse_impl(derived()); -} - -/** \lu_module - * - * Computation of matrix inverse and determinant, with invertibility check. - * - * This is only for fixed-size square matrices of size up to 4x4. - * - * \param inverse Reference to the matrix in which to store the inverse. - * \param determinant Reference to the variable in which to store the determinant. - * \param invertible Reference to the bool variable in which to store whether the matrix is invertible. - * \param absDeterminantThreshold Optional parameter controlling the invertibility check. - * The matrix will be declared invertible if the absolute value of its - * determinant is greater than this threshold. - * - * Example: \include MatrixBase_computeInverseAndDetWithCheck.cpp - * Output: \verbinclude MatrixBase_computeInverseAndDetWithCheck.out - * - * \sa inverse(), computeInverseWithCheck() - */ -template -template -inline void MatrixBase::computeInverseAndDetWithCheck( - ResultType& inverse, - typename ResultType::Scalar& determinant, - bool& invertible, - const RealScalar& absDeterminantThreshold - ) const -{ - // i'd love to put some static assertions there, but SFINAE means that they have no effect... - eigen_assert(rows() == cols()); - // for 2x2, it's worth giving a chance to avoid evaluating. - // for larger sizes, evaluating has negligible cost and limits code size. - typedef typename internal::conditional< - RowsAtCompileTime == 2, - typename internal::remove_all::type>::type, - PlainObject - >::type MatrixType; - internal::compute_inverse_and_det_with_check::run - (derived(), absDeterminantThreshold, inverse, determinant, invertible); -} - -/** \lu_module - * - * Computation of matrix inverse, with invertibility check. - * - * This is only for fixed-size square matrices of size up to 4x4. - * - * \param inverse Reference to the matrix in which to store the inverse. - * \param invertible Reference to the bool variable in which to store whether the matrix is invertible. - * \param absDeterminantThreshold Optional parameter controlling the invertibility check. - * The matrix will be declared invertible if the absolute value of its - * determinant is greater than this threshold. - * - * Example: \include MatrixBase_computeInverseWithCheck.cpp - * Output: \verbinclude MatrixBase_computeInverseWithCheck.out - * - * \sa inverse(), computeInverseAndDetWithCheck() - */ -template -template -inline void MatrixBase::computeInverseWithCheck( - ResultType& inverse, - bool& invertible, - const RealScalar& absDeterminantThreshold - ) const -{ - RealScalar determinant; - // i'd love to put some static assertions there, but SFINAE means that they have no effect... - eigen_assert(rows() == cols()); - computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold); -} - -} // end namespace Eigen - -#endif // EIGEN_INVERSE_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/LU/PartialPivLU_MKL.h b/uppsrc/plugin/Eigen/Eigen/src/LU/PartialPivLU_MKL.h deleted file mode 100644 index 9035953c8..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/LU/PartialPivLU_MKL.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * LU decomposition with partial pivoting based on LAPACKE_?getrf function. - ******************************************************************************** -*/ - -#ifndef EIGEN_PARTIALLU_LAPACK_H -#define EIGEN_PARTIALLU_LAPACK_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -namespace internal { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_LU_PARTPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \ -template \ -struct partial_lu_impl \ -{ \ - /* \internal performs the LU decomposition in-place of the matrix represented */ \ - static lapack_int blocked_lu(lapack_int rows, lapack_int cols, EIGTYPE* lu_data, lapack_int luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ - { \ - EIGEN_UNUSED_VARIABLE(maxBlockSize);\ - lapack_int matrix_order, first_zero_pivot; \ - lapack_int m, n, lda, *ipiv, info; \ - EIGTYPE* a; \ -/* Set up parameters for ?getrf */ \ - matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - lda = luStride; \ - a = lu_data; \ - ipiv = row_transpositions; \ - m = rows; \ - n = cols; \ - nb_transpositions = 0; \ -\ - info = LAPACKE_##MKLPREFIX##getrf( matrix_order, m, n, (MKLTYPE*)a, lda, ipiv ); \ -\ - for(int i=0;i= 0); \ -/* something should be done with nb_transpositions */ \ -\ - first_zero_pivot = info; \ - return first_zero_pivot; \ - } \ -}; - -EIGEN_MKL_LU_PARTPIV(double, double, d) -EIGEN_MKL_LU_PARTPIV(float, float, s) -EIGEN_MKL_LU_PARTPIV(dcomplex, MKL_Complex16, z) -EIGEN_MKL_LU_PARTPIV(scomplex, MKL_Complex8, c) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_PARTIALLU_LAPACK_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h b/uppsrc/plugin/Eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h index 933cd564b..da85b4d6e 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +++ b/uppsrc/plugin/Eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -1004,7 +1004,7 @@ static IndexType find_ordering /* return the number of garbage collections */ COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ; /* get pivot column from head of minimum degree list */ - while (head [min_score] == COLAMD_EMPTY && min_score < n_col) + while (min_score < n_col && head [min_score] == COLAMD_EMPTY) { min_score++ ; } diff --git a/uppsrc/plugin/Eigen/Eigen/src/QR/ColPivHouseholderQR.h b/uppsrc/plugin/Eigen/Eigen/src/QR/ColPivHouseholderQR.h index 0e47c8332..a7b47d55d 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/QR/ColPivHouseholderQR.h +++ b/uppsrc/plugin/Eigen/Eigen/src/QR/ColPivHouseholderQR.h @@ -506,8 +506,8 @@ void ColPivHouseholderQR::computeInPlace() m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k); } - RealScalar threshold_helper = numext::abs2(m_colNormsUpdated.maxCoeff() * NumTraits::epsilon()) / RealScalar(rows); - RealScalar norm_downdate_threshold = numext::sqrt(NumTraits::epsilon()); + RealScalar threshold_helper = numext::abs2(m_colNormsUpdated.maxCoeff() * NumTraits::epsilon()) / RealScalar(rows); + RealScalar norm_downdate_threshold = numext::sqrt(NumTraits::epsilon()); m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) m_maxpivot = RealScalar(0); @@ -553,12 +553,12 @@ void ColPivHouseholderQR::computeInPlace() // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf // and used in LAPACK routines xGEQPF and xGEQP3. // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html - if (m_colNormsUpdated.coeffRef(j) != 0) { + if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) { RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j); temp = (RealScalar(1) + temp) * (RealScalar(1) - temp); - temp = temp < 0 ? 0 : temp; - RealScalar temp2 = temp * numext::abs2(m_colNormsUpdated.coeffRef(j) / - m_colNormsDirect.coeffRef(j)); + temp = temp < RealScalar(0) ? RealScalar(0) : temp; + RealScalar temp2 = temp * numext::abs2(m_colNormsUpdated.coeffRef(j) / + m_colNormsDirect.coeffRef(j)); if (temp2 <= norm_downdate_threshold) { // The updated norm has become too inaccurate so re-compute the column // norm directly. diff --git a/uppsrc/plugin/Eigen/Eigen/src/QR/ColPivHouseholderQR_MKL.h b/uppsrc/plugin/Eigen/Eigen/src/QR/ColPivHouseholderQR_MKL.h deleted file mode 100644 index b5b198326..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/QR/ColPivHouseholderQR_MKL.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Householder QR decomposition of a matrix with column pivoting based on - * LAPACKE_?geqp3 function. - ******************************************************************************** -*/ - -#ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_MKL_H -#define EIGEN_COLPIVOTINGHOUSEHOLDERQR_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_QR_COLPIV(EIGTYPE, MKLTYPE, MKLPREFIX, EIGCOLROW, MKLCOLROW) \ -template<> inline \ -ColPivHouseholderQR >& \ -ColPivHouseholderQR >::compute( \ - const Matrix& matrix) \ -\ -{ \ - using std::abs; \ - typedef Matrix MatrixType; \ - typedef MatrixType::Scalar Scalar; \ - typedef MatrixType::RealScalar RealScalar; \ - Index rows = matrix.rows();\ - Index cols = matrix.cols();\ - Index size = matrix.diagonalSize();\ -\ - m_qr = matrix;\ - m_hCoeffs.resize(size);\ -\ - m_colsTranspositions.resize(cols);\ - /*Index number_of_transpositions = 0;*/ \ -\ - m_nonzero_pivots = 0; \ - m_maxpivot = RealScalar(0);\ - m_colsPermutation.resize(cols); \ - m_colsPermutation.indices().setZero(); \ -\ - lapack_int lda = m_qr.outerStride(), i; \ - lapack_int matrix_order = MKLCOLROW; \ - LAPACKE_##MKLPREFIX##geqp3( matrix_order, rows, cols, (MKLTYPE*)m_qr.data(), lda, (lapack_int*)m_colsPermutation.indices().data(), (MKLTYPE*)m_hCoeffs.data()); \ - m_isInitialized = true; \ - m_maxpivot=m_qr.diagonal().cwiseAbs().maxCoeff(); \ - m_hCoeffs.adjointInPlace(); \ - RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold(); \ - lapack_int *perm = m_colsPermutation.indices().data(); \ - for(i=0;i premultiplied_threshold);\ - } \ - for(i=0;i \ -struct householder_qr_inplace_blocked \ -{ \ - static void run(MatrixQR& mat, HCoeffs& hCoeffs, \ - typename MatrixQR::Index = 32, \ - typename MatrixQR::Scalar* = 0) \ - { \ - lapack_int m = (lapack_int) mat.rows(); \ - lapack_int n = (lapack_int) mat.cols(); \ - lapack_int lda = (lapack_int) mat.outerStride(); \ - lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - LAPACKE_##MKLPREFIX##geqrf( matrix_order, m, n, (MKLTYPE*)mat.data(), lda, (MKLTYPE*)hCoeffs.data()); \ - hCoeffs.adjointInPlace(); \ - } \ -}; - -EIGEN_MKL_QR_NOPIV(double, double, d) -EIGEN_MKL_QR_NOPIV(float, float, s) -EIGEN_MKL_QR_NOPIV(dcomplex, MKL_Complex16, z) -EIGEN_MKL_QR_NOPIV(scomplex, MKL_Complex8, c) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_QR_MKL_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/SVD/BDCSVD.h b/uppsrc/plugin/Eigen/Eigen/src/SVD/BDCSVD.h index 25fca6f4d..d7a4271cb 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SVD/BDCSVD.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SVD/BDCSVD.h @@ -77,6 +77,7 @@ public: typedef _MatrixType MatrixType; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; + typedef typename NumTraits::Literal Literal; enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, @@ -259,7 +260,7 @@ BDCSVD& BDCSVD::compute(const MatrixType& matrix, unsign //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows RealScalar scale = matrix.cwiseAbs().maxCoeff(); - if(scale==RealScalar(0)) scale = RealScalar(1); + if(scale==Literal(0)) scale = Literal(1); MatrixX copy; if (m_isTranspose) copy = matrix.adjoint()/scale; else copy = matrix/scale; @@ -351,13 +352,13 @@ void BDCSVD::structured_update(Block A, co Index k1=0, k2=0; for(Index j=0; j::divide (Index firstCol, Index lastCol, Index firstRowW, l = m_naiveU.row(1).segment(firstCol, k); f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1); } - if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1; + if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1); if (r0::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n); m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal(); ArrayRef diag = m_workspace.head(n); - diag(0) = 0; + diag(0) = Literal(0); // Allocate space for singular values and vectors singVals.resize(n); @@ -590,7 +591,7 @@ void BDCSVD::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec // but others are interleaved and we must ignore them at this stage. // To this end, let's compute a permutation skipping them: Index actual_n = n; - while(actual_n>1 && diag(actual_n-1)==0) --actual_n; + while(actual_n>1 && diag(actual_n-1)==Literal(0)) --actual_n; Index m = 0; // size of the deflated problem for(Index k=0;kconsiderZero) @@ -691,7 +692,7 @@ template typename BDCSVD::RealScalar BDCSVD::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift) { Index m = perm.size(); - RealScalar res = 1; + RealScalar res = Literal(1); for(Index i=0; i::computeSingVals(const ArrayRef& col0, const ArrayRef& d Index n = col0.size(); Index actual_n = n; - while(actual_n>1 && col0(actual_n-1)==0) --actual_n; + while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n; for (Index k = 0; k < n; ++k) { - if (col0(k) == 0 || actual_n==1) + if (col0(k) == Literal(0) || actual_n==1) { // if col0(k) == 0, then entry is deflated, so singular value is on diagonal // if actual_n==1, then the deflated problem is already diagonalized singVals(k) = k==0 ? col0(0) : diag(k); - mus(k) = 0; + mus(k) = Literal(0); shifts(k) = k==0 ? col0(0) : diag(k); continue; } @@ -733,13 +734,13 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d { // Skip deflated singular values Index l = k+1; - while(col0(l)==0) { ++l; eigen_internal_assert(l::computeSingVals(const ArrayRef& col0, const ArrayRef& d << " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0) << " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n"; #endif - RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right; + RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right; // measure everything relative to shift Map diagShifted(m_workspace.data()+4*n, n); @@ -785,13 +786,13 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d // rational interpolation: fit a function of the form a / mu + b through the two previous // iterates and use its zero to compute the next iterate - bool useBisection = fPrev*fCur>0; - while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits::epsilon() * numext::maxi(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits::epsilon() && !useBisection) + bool useBisection = fPrev*fCur>Literal(0); + while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits::epsilon() * numext::maxi(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits::epsilon() && !useBisection) { ++m_numIters; // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples. - RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev); + RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev); RealScalar b = fCur - a / muCur; // And find mu such that f(mu)==0: RealScalar muZero = -a/b; @@ -803,8 +804,8 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d fCur = fZero; - if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true; - if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true; + if (shift == left && (muCur < Literal(0) || muCur > right - left)) useBisection = true; + if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true; if (abs(fCur)>abs(fPrev)) useBisection = true; } @@ -841,13 +842,13 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n"; } #endif - eigen_internal_assert(fLeft * fRight < 0); + eigen_internal_assert(fLeft * fRight < Literal(0)); - while (rightShifted - leftShifted > 2 * NumTraits::epsilon() * numext::maxi(abs(leftShifted), abs(rightShifted))) + while (rightShifted - leftShifted > Literal(2) * NumTraits::epsilon() * numext::maxi(abs(leftShifted), abs(rightShifted))) { - RealScalar midShifted = (leftShifted + rightShifted) / 2; + RealScalar midShifted = (leftShifted + rightShifted) / Literal(2); fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift); - if (fLeft * fMid < 0) + if (fLeft * fMid < Literal(0)) { rightShifted = midShifted; } @@ -858,7 +859,7 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d } } - muCur = (leftShifted + rightShifted) / 2; + muCur = (leftShifted + rightShifted) / Literal(2); } singVals[k] = shift + muCur; @@ -892,8 +893,8 @@ void BDCSVD::perturbCol0 // The offset permits to skip deflated entries while computing zhat for (Index k = 0; k < n; ++k) { - if (col0(k) == 0) // deflated - zhat(k) = 0; + if (col0(k) == Literal(0)) // deflated + zhat(k) = Literal(0); else { // see equation (3.6) @@ -918,7 +919,7 @@ void BDCSVD::perturbCol0 std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n"; #endif RealScalar tmp = sqrt(prod); - zhat(k) = col0(k) > 0 ? tmp : -tmp; + zhat(k) = col0(k) > Literal(0) ? tmp : -tmp; } } } @@ -934,7 +935,7 @@ void BDCSVD::computeSingVecs for (Index k = 0; k < n; ++k) { - if (zhat(k) == 0) + if (zhat(k) == Literal(0)) { U.col(k) = VectorType::Unit(n+1, k); if (m_compV) V.col(k) = VectorType::Unit(n, k); @@ -947,7 +948,7 @@ void BDCSVD::computeSingVecs Index i = perm(l); U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); } - U(n,k) = 0; + U(n,k) = Literal(0); U.col(k).normalize(); if (m_compV) @@ -958,7 +959,7 @@ void BDCSVD::computeSingVecs Index i = perm(l); V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); } - V(0,k) = -1; + V(0,k) = Literal(-1); V.col(k).normalize(); } } @@ -980,14 +981,14 @@ void BDCSVD::deflation43(Index firstCol, Index shift, Index i, Index RealScalar c = m_computed(start, start); RealScalar s = m_computed(start+i, start); RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); - if (r == 0) + if (r == Literal(0)) { - m_computed(start+i, start+i) = 0; + m_computed(start+i, start+i) = Literal(0); return; } m_computed(start,start) = r; - m_computed(start+i, start) = 0; - m_computed(start+i, start+i) = 0; + m_computed(start+i, start) = Literal(0); + m_computed(start+i, start+i) = Literal(0); JacobiRotation J(c/r,-s/r); if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J); @@ -1020,7 +1021,7 @@ void BDCSVD::deflation44(Index firstColu , Index firstColm, Index fi << m_computed(firstColm + i+1, firstColm+i+1) << " " << m_computed(firstColm + i+2, firstColm+i+2) << "\n"; #endif - if (r==0) + if (r==Literal(0)) { m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j); return; @@ -1029,7 +1030,7 @@ void BDCSVD::deflation44(Index firstColu , Index firstColm, Index fi s/=r; m_computed(firstColm + i, firstColm) = r; m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i); - m_computed(firstColm + j, firstColm) = 0; + m_computed(firstColm + j, firstColm) = Literal(0); JacobiRotation J(c,-s); if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J); @@ -1053,7 +1054,7 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index k, Index const RealScalar considerZero = (std::numeric_limits::min)(); RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff(); RealScalar epsilon_strict = numext::maxi(considerZero,NumTraits::epsilon() * maxDiag); - RealScalar epsilon_coarse = 8 * NumTraits::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag); + RealScalar epsilon_coarse = Literal(8) * NumTraits::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag); #ifdef EIGEN_BDCSVD_SANITY_CHECKS assert(m_naiveU.allFinite()); @@ -1081,7 +1082,7 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index k, Index #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n"; #endif - col0(i) = 0; + col0(i) = Literal(0); } //condition 4.3 diff --git a/uppsrc/plugin/Eigen/Eigen/src/SVD/JacobiSVD.h b/uppsrc/plugin/Eigen/Eigen/src/SVD/JacobiSVD.h index e0cfb6283..43488b1e0 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SVD/JacobiSVD.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SVD/JacobiSVD.h @@ -112,9 +112,11 @@ public: ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - Options = MatrixType::Options + TrOptions = RowsAtCompileTime==1 ? (MatrixType::Options & ~(RowMajor)) + : ColsAtCompileTime==1 ? (MatrixType::Options | RowMajor) + : MatrixType::Options }; - typedef Matrix + typedef Matrix TransposeTypeWithSameStorageOrder; void allocate(const JacobiSVD& svd) @@ -200,10 +202,12 @@ public: ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - Options = MatrixType::Options + TrOptions = RowsAtCompileTime==1 ? (MatrixType::Options & ~(RowMajor)) + : ColsAtCompileTime==1 ? (MatrixType::Options | RowMajor) + : MatrixType::Options }; - typedef Matrix + typedef Matrix TransposeTypeWithSameStorageOrder; void allocate(const JacobiSVD& svd) diff --git a/uppsrc/plugin/Eigen/Eigen/src/SVD/JacobiSVD_MKL.h b/uppsrc/plugin/Eigen/Eigen/src/SVD/JacobiSVD_MKL.h deleted file mode 100644 index decda7540..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/SVD/JacobiSVD_MKL.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Singular Value Decomposition - SVD. - ******************************************************************************** -*/ - -#ifndef EIGEN_JACOBISVD_MKL_H -#define EIGEN_JACOBISVD_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_SVD(EIGTYPE, MKLTYPE, MKLRTYPE, MKLPREFIX, EIGCOLROW, MKLCOLROW) \ -template<> inline \ -JacobiSVD, ColPivHouseholderQRPreconditioner>& \ -JacobiSVD, ColPivHouseholderQRPreconditioner>::compute(const Matrix& matrix, unsigned int computationOptions) \ -{ \ - typedef Matrix MatrixType; \ - typedef MatrixType::Scalar Scalar; \ - typedef MatrixType::RealScalar RealScalar; \ - allocate(matrix.rows(), matrix.cols(), computationOptions); \ -\ - /*const RealScalar precision = RealScalar(2) * NumTraits::epsilon();*/ \ - m_nonzeroSingularValues = m_diagSize; \ -\ - lapack_int lda = matrix.outerStride(), ldu, ldvt; \ - lapack_int matrix_order = MKLCOLROW; \ - char jobu, jobvt; \ - MKLTYPE *u, *vt, dummy; \ - jobu = (m_computeFullU) ? 'A' : (m_computeThinU) ? 'S' : 'N'; \ - jobvt = (m_computeFullV) ? 'A' : (m_computeThinV) ? 'S' : 'N'; \ - if (computeU()) { \ - ldu = m_matrixU.outerStride(); \ - u = (MKLTYPE*)m_matrixU.data(); \ - } else { ldu=1; u=&dummy; }\ - MatrixType localV; \ - ldvt = (m_computeFullV) ? m_cols : (m_computeThinV) ? m_diagSize : 1; \ - if (computeV()) { \ - localV.resize(ldvt, m_cols); \ - vt = (MKLTYPE*)localV.data(); \ - } else { ldvt=1; vt=&dummy; }\ - Matrix superb; superb.resize(m_diagSize, 1); \ - MatrixType m_temp; m_temp = matrix; \ - LAPACKE_##MKLPREFIX##gesvd( matrix_order, jobu, jobvt, m_rows, m_cols, (MKLTYPE*)m_temp.data(), lda, (MKLRTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \ - if (computeV()) m_matrixV = localV.adjoint(); \ - /* for(int i=0;i::Flags & RowMajorBit> > Y) { typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename NumTraits::Literal Literal; enum { StorageOrder = traits::Flags & RowMajorBit }; typedef InnerStride ColInnerStride; typedef InnerStride RowInnerStride; @@ -263,7 +265,7 @@ void upperbidiagonalization_blocked_helper(MatrixType& A, SubMatType A10( A.block(bs,0, brows-bs,bs) ); SubMatType A01( A.block(0,bs, bs,bcols-bs) ); Scalar tmp = A01(bs-1,0); - A01(bs-1,0) = 1; + A01(bs-1,0) = Literal(1); A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint(); A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01; A01(bs-1,0) = tmp; diff --git a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/AmbiVector.h b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/AmbiVector.h index 1233e164e..8a5cc91f2 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/AmbiVector.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/AmbiVector.h @@ -336,7 +336,7 @@ class AmbiVector<_Scalar,_StorageIndex>::Iterator { do { ++m_cachedIndex; - } while (m_cachedIndex::Iterator ListEl* EIGEN_RESTRICT llElements = reinterpret_cast(m_vector.m_buffer); do { m_currentEl = llElements[m_currentEl].next; - } while (m_currentEl>=0 && abs(llElements[m_currentEl].value)=0 && abs(llElements[m_currentEl].value)<=m_epsilon); if (m_currentEl<0) { m_cachedIndex = -1; @@ -363,9 +363,9 @@ class AmbiVector<_Scalar,_StorageIndex>::Iterator protected: const AmbiVector& m_vector; // the target vector - StorageIndex m_currentEl; // the current element in sparse/linked-list mode + StorageIndex m_currentEl; // the current element in sparse/linked-list mode RealScalar m_epsilon; // epsilon used to prune zero coefficients - StorageIndex m_cachedIndex; // current coordinate + StorageIndex m_cachedIndex; // current coordinate Scalar m_cachedValue; // current value bool m_isDense; // mode of the vector }; diff --git a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseAssign.h b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseAssign.h index 83776645b..18352a847 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseAssign.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseAssign.h @@ -143,10 +143,7 @@ struct Assignment dst.setZero(); internal::evaluator srcEval(src); - Index dstRows = src.rows(); - Index dstCols = src.cols(); - if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) - dst.resize(dstRows, dstCols); + resize_if_allowed(dst, src, func); internal::evaluator dstEval(dst); const Index outerEvaluationSize = (internal::evaluator::Flags&RowMajorBit) ? src.rows() : src.cols(); diff --git a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseCompressedBase.h b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseCompressedBase.h index e0850795c..5ccb46656 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseCompressedBase.h @@ -279,11 +279,11 @@ struct evaluator > Flags = Derived::Flags }; - evaluator() : m_matrix(0) + evaluator() : m_matrix(0), m_zero(0) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - explicit evaluator(const Derived &mat) : m_matrix(&mat) + explicit evaluator(const Derived &mat) : m_matrix(&mat), m_zero(0) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } @@ -296,26 +296,42 @@ struct evaluator > operator const Derived&() const { return *m_matrix; } typedef typename DenseCoeffsBase::CoeffReturnType CoeffReturnType; - Scalar coeff(Index row, Index col) const - { return m_matrix->coeff(row,col); } - + const Scalar& coeff(Index row, Index col) const + { + Index p = find(row,col); + + if(p==Dynamic) + return m_zero; + else + return m_matrix->const_cast_derived().valuePtr()[p]; + } + Scalar& coeffRef(Index row, Index col) + { + Index p = find(row,col); + eigen_assert(p!=Dynamic && "written coefficient does not exist"); + return m_matrix->const_cast_derived().valuePtr()[p]; + } + +protected: + + Index find(Index row, Index col) const { eigen_internal_assert(row>=0 && rowrows() && col>=0 && colcols()); - + const Index outer = Derived::IsRowMajor ? row : col; const Index inner = Derived::IsRowMajor ? col : row; Index start = m_matrix->outerIndexPtr()[outer]; Index end = m_matrix->isCompressed() ? m_matrix->outerIndexPtr()[outer+1] : m_matrix->outerIndexPtr()[outer] + m_matrix->innerNonZeroPtr()[outer]; - eigen_assert(end>start && "you are using a non finalized sparse matrix or written coefficient does not exist"); - const Index p = std::lower_bound(m_matrix->innerIndexPtr()+start, m_matrix->innerIndexPtr()+end,inner) - - m_matrix->innerIndexPtr(); - eigen_assert((pinnerIndexPtr()[p]==inner) && "written coefficient does not exist"); - return m_matrix->const_cast_derived().valuePtr()[p]; + eigen_assert(end>=start && "you are using a non finalized sparse matrix or written coefficient does not exist"); + const Index p = std::lower_bound(m_matrix->innerIndexPtr()+start, m_matrix->innerIndexPtr()+end,inner) - m_matrix->innerIndexPtr(); + + return ((pinnerIndexPtr()[p]==inner)) ? p : Dynamic; } const Derived *m_matrix; + const Scalar m_zero; }; } diff --git a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index c1ddd1ac1..e315e3550 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -45,7 +45,7 @@ class CwiseBinaryOpImpl EIGEN_STATIC_ASSERT(( (!internal::is_same::StorageKind, typename internal::traits::StorageKind>::value) - || ((Lhs::Flags&RowMajorBit) == (Rhs::Flags&RowMajorBit))), + || ((internal::evaluator::Flags&RowMajorBit) == (internal::evaluator::Flags&RowMajorBit))), THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH); } }; @@ -110,6 +110,7 @@ public: EIGEN_STRONG_INLINE Scalar value() const { return m_value; } EIGEN_STRONG_INLINE StorageIndex index() const { return m_id; } + EIGEN_STRONG_INLINE Index outer() const { return m_lhsIter.outer(); } EIGEN_STRONG_INLINE Index row() const { return Lhs::IsRowMajor ? m_lhsIter.row() : index(); } EIGEN_STRONG_INLINE Index col() const { return Lhs::IsRowMajor ? index() : m_lhsIter.col(); } @@ -193,6 +194,7 @@ public: EIGEN_STRONG_INLINE Scalar value() const { eigen_internal_assert(m_id, Lhs, Rhs>, Itera explicit binary_evaluator(const XprType& xpr) : Base(xpr) {} }; +// "sparse ./ dense" +template +struct binary_evaluator, Lhs, Rhs>, IteratorBased, IndexBased> + : sparse_conjunction_evaluator, Lhs, Rhs> > +{ + typedef CwiseBinaryOp, Lhs, Rhs> XprType; + typedef sparse_conjunction_evaluator Base; + explicit binary_evaluator(const XprType& xpr) : Base(xpr) {} +}; + // "sparse && sparse" template struct binary_evaluator, IteratorBased, IteratorBased> @@ -432,6 +445,7 @@ public: EIGEN_STRONG_INLINE Scalar value() const { return m_functor(m_lhsIter.value(), m_rhsIter.value()); } EIGEN_STRONG_INLINE StorageIndex index() const { return m_lhsIter.index(); } + EIGEN_STRONG_INLINE Index outer() const { return m_lhsIter.outer(); } EIGEN_STRONG_INLINE Index row() const { return m_lhsIter.row(); } EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); } @@ -503,6 +517,7 @@ public: { return m_functor(m_lhsEval.coeff(IsRowMajor?m_outer:m_rhsIter.index(),IsRowMajor?m_rhsIter.index():m_outer), m_rhsIter.value()); } EIGEN_STRONG_INLINE StorageIndex index() const { return m_rhsIter.index(); } + EIGEN_STRONG_INLINE Index outer() const { return m_rhsIter.outer(); } EIGEN_STRONG_INLINE Index row() const { return m_rhsIter.row(); } EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); } @@ -577,6 +592,7 @@ public: m_rhsEval.coeff(IsRowMajor?m_outer:m_lhsIter.index(),IsRowMajor?m_lhsIter.index():m_outer)); } EIGEN_STRONG_INLINE StorageIndex index() const { return m_lhsIter.index(); } + EIGEN_STRONG_INLINE Index outer() const { return m_lhsIter.outer(); } EIGEN_STRONG_INLINE Index row() const { return m_lhsIter.row(); } EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); } diff --git a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h index e4af49e09..941c03be3 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h @@ -80,6 +80,8 @@ public: sparse_diagonal_product_evaluator(const SparseXprType &sparseXpr, const DiagonalCoeffType &diagCoeff) : m_sparseXprImpl(sparseXpr), m_diagCoeffImpl(diagCoeff) {} + + Index nonZerosEstimate() const { return m_sparseXprImpl.nonZerosEstimate(); } protected: evaluator m_sparseXprImpl; @@ -121,6 +123,8 @@ struct sparse_diagonal_product_evaluator m_sparseXprEval; diff --git a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseMatrix.h b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseMatrix.h index fcf6dbbad..323c2323b 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseMatrix.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseMatrix.h @@ -32,18 +32,22 @@ namespace Eigen { * \tparam _Scalar the scalar type, i.e. the type of the coefficients * \tparam _Options Union of bit flags controlling the storage scheme. Currently the only possibility * is ColMajor or RowMajor. The default is 0 which means column-major. - * \tparam _Index the type of the indices. It has to be a \b signed type (e.g., short, int, std::ptrdiff_t). Default is \c int. + * \tparam _StorageIndex the type of the indices. It has to be a \b signed type (e.g., short, int, std::ptrdiff_t). Default is \c int. + * + * \warning In %Eigen 3.2, the undocumented type \c SparseMatrix::Index was improperly defined as the storage index type (e.g., int), + * whereas it is now (starting from %Eigen 3.3) deprecated and always defined as Eigen::Index. + * Codes making use of \c SparseMatrix::Index, might thus likely have to be changed to use \c SparseMatrix::StorageIndex instead. * * This class can be extended with the help of the plugin mechanism described on the page * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_SPARSEMATRIX_PLUGIN. */ namespace internal { -template -struct traits > +template +struct traits > { typedef _Scalar Scalar; - typedef _Index StorageIndex; + typedef _StorageIndex StorageIndex; typedef Sparse StorageKind; typedef MatrixXpr XprKind; enum { @@ -56,16 +60,16 @@ struct traits > }; }; -template -struct traits, DiagIndex> > +template +struct traits, DiagIndex> > { - typedef SparseMatrix<_Scalar, _Options, _Index> MatrixType; + typedef SparseMatrix<_Scalar, _Options, _StorageIndex> MatrixType; typedef typename ref_selector::type MatrixTypeNested; typedef typename remove_reference::type _MatrixTypeNested; typedef _Scalar Scalar; typedef Dense StorageKind; - typedef _Index StorageIndex; + typedef _StorageIndex StorageIndex; typedef MatrixXpr XprKind; enum { @@ -77,9 +81,9 @@ struct traits, DiagIndex> > }; }; -template -struct traits, DiagIndex> > - : public traits, DiagIndex> > +template +struct traits, DiagIndex> > + : public traits, DiagIndex> > { enum { Flags = 0 @@ -88,13 +92,13 @@ struct traits, DiagIndex> } // end namespace internal -template +template class SparseMatrix - : public SparseCompressedBase > + : public SparseCompressedBase > { typedef SparseCompressedBase Base; using Base::convert_index; - friend class SparseVector<_Scalar,0,_Index>; + friend class SparseVector<_Scalar,0,_StorageIndex>; public: using Base::isCompressed; using Base::nonZeros; @@ -984,11 +988,11 @@ void set_from_triplets(const InputIterator& begin, const InputIterator& end, Spa * an abstract iterator over a complex data-structure that would be expensive to evaluate. The triplets should rather * be explicitely stored into a std::vector for instance. */ -template +template template -void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end) +void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end) { - internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); + internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); } /** The same as setFromTriplets but when duplicates are met the functor \a dup_func is applied: @@ -1000,17 +1004,17 @@ void SparseMatrix::setFromTriplets(const InputIterators& * mat.setFromTriplets(triplets.begin(), triplets.end(), [] (const Scalar&,const Scalar &b) { return b; }); * \endcode */ -template +template template -void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func) +void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func) { - internal::set_from_triplets, DupFunctor>(begin, end, *this, dup_func); + internal::set_from_triplets, DupFunctor>(begin, end, *this, dup_func); } /** \internal */ -template +template template -void SparseMatrix::collapseDuplicates(DupFunctor dup_func) +void SparseMatrix::collapseDuplicates(DupFunctor dup_func) { eigen_assert(!isCompressed()); // TODO, in practice we should be able to use m_innerNonZeros for that task @@ -1048,9 +1052,9 @@ void SparseMatrix::collapseDuplicates(DupFunctor dup_fun m_data.resize(m_outerIndex[m_outerSize]); } -template +template template -EIGEN_DONT_INLINE SparseMatrix& SparseMatrix::operator=(const SparseMatrixBase& other) +EIGEN_DONT_INLINE SparseMatrix& SparseMatrix::operator=(const SparseMatrixBase& other) { EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) @@ -1121,8 +1125,8 @@ EIGEN_DONT_INLINE SparseMatrix& SparseMatrix -typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insert(Index row, Index col) +template +typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Scalar,_Options,_StorageIndex>::insert(Index row, Index col) { eigen_assert(row>=0 && row=0 && col::Scalar& SparseMatrix<_Scalar,_Op return insertUncompressed(row,col); } -template -EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertUncompressed(Index row, Index col) +template +EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Scalar,_Options,_StorageIndex>::insertUncompressed(Index row, Index col) { eigen_assert(!isCompressed()); @@ -1273,8 +1277,8 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& Sparse return (m_data.value(p) = 0); } -template -EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertCompressed(Index row, Index col) +template +EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Scalar,_Options,_StorageIndex>::insertCompressed(Index row, Index col) { eigen_assert(isCompressed()); @@ -1297,11 +1301,11 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& Sparse // starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g., // the 2nd inner vector... bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0)) - && (size_t(m_outerIndex[outer+1]) == m_data.size()); + && (std::size_t(m_outerIndex[outer+1]) == m_data.size()); - size_t startId = m_outerIndex[outer]; - // FIXME let's make sure sizeof(long int) == sizeof(size_t) - size_t p = m_outerIndex[outer+1]; + std::size_t startId = m_outerIndex[outer]; + // FIXME let's make sure sizeof(long int) == sizeof(std::size_t) + std::size_t p = m_outerIndex[outer+1]; ++m_outerIndex[outer+1]; double reallocRatio = 1; @@ -1382,12 +1386,12 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& Sparse namespace internal { -template -struct evaluator > - : evaluator > > +template +struct evaluator > + : evaluator > > { - typedef evaluator > > Base; - typedef SparseMatrix<_Scalar,_Options,_Index> SparseMatrixType; + typedef evaluator > > Base; + typedef SparseMatrix<_Scalar,_Options,_StorageIndex> SparseMatrixType; evaluator() : Base() {} explicit evaluator(const SparseMatrixType &mat) : Base(mat) {} }; diff --git a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseMatrixBase.h b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseMatrixBase.h index 0da4c2a36..c6b548f11 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseMatrixBase.h @@ -37,7 +37,11 @@ template class SparseMatrixBase typedef typename internal::packet_traits::type PacketScalar; typedef typename internal::traits::StorageKind StorageKind; + + /** The integer type used to \b store indices within a SparseMatrix. + * For a \c SparseMatrix it an alias of the third template parameter \c IndexType. */ typedef typename internal::traits::StorageIndex StorageIndex; + typedef typename internal::add_const_on_value_type_if_arithmetic< typename internal::packet_traits::type >::type PacketReturnType; diff --git a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h index 9e39be738..5ab64f1a8 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -47,6 +47,7 @@ template class SparseSelfAdjointView enum { Mode = _Mode, + TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0), RowsAtCompileTime = internal::traits::RowsAtCompileTime, ColsAtCompileTime = internal::traits::ColsAtCompileTime }; @@ -368,7 +369,7 @@ struct generic_product_impl dstT(dst); - internal::sparse_selfadjoint_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); + internal::sparse_selfadjoint_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); } }; diff --git a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseTriangularView.h b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseTriangularView.h index 0c27855d5..9ac120266 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseTriangularView.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseTriangularView.h @@ -55,7 +55,10 @@ template class TriangularViewImplsolveInPlace(dst); } + /** Applies the inverse of \c *this to the dense vector or matrix \a other, "in-place" */ template void solveInPlace(MatrixBase& other) const; + + /** Applies the inverse of \c *this to the sparse vector or matrix \a other, "in-place" */ template void solveInPlace(SparseMatrixBase& other) const; }; diff --git a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseView.h b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseView.h index b867877d8..7c4aea743 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseView.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/SparseView.h @@ -27,6 +27,20 @@ struct traits > : traits } // end namespace internal +/** \ingroup SparseCore_Module + * \class SparseView + * + * \brief Expression of a dense or sparse matrix with zero or too small values removed + * + * \tparam MatrixType the type of the object of which we are removing the small entries + * + * This class represents an expression of a given dense or sparse matrix with + * entries smaller than \c reference * \c epsilon are removed. + * It is the return type of MatrixBase::sparseView() and SparseMatrixBase::pruned() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::sparseView(), SparseMatrixBase::pruned() + */ template class SparseView : public SparseMatrixBase > { @@ -190,6 +204,23 @@ struct unary_evaluator, IndexBased> } // end namespace internal +/** \ingroup SparseCore_Module + * + * \returns a sparse expression of the dense expression \c *this with values smaller than + * \a reference * \a epsilon removed. + * + * This method is typically used when prototyping to convert a quickly assembled dense Matrix \c D to a SparseMatrix \c S: + * \code + * MatrixXd D(n,m); + * SparseMatrix S; + * S = D.sparseView(); // suppress numerical zeros (exact) + * S = D.sparseView(reference); + * S = D.sparseView(reference,epsilon); + * \endcode + * where \a reference is a meaningful non zero reference value, + * and \a epsilon is a tolerance factor defaulting to NumTraits::dummy_precision(). + * + * \sa SparseMatrixBase::pruned(), class SparseView */ template const SparseView MatrixBase::sparseView(const Scalar& reference, const typename NumTraits::Real& epsilon) const @@ -198,7 +229,7 @@ const SparseView MatrixBase::sparseView(const Scalar& referenc } /** \returns an expression of \c *this with values smaller than - * \a reference * \a epsilon are removed. + * \a reference * \a epsilon removed. * * This method is typically used in conjunction with the product of two sparse matrices * to automatically prune the smallest values as follows: diff --git a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/TriangularSolver.h b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/TriangularSolver.h index 19f8f6704..f9c56ba79 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SparseCore/TriangularSolver.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SparseCore/TriangularSolver.h @@ -171,6 +171,8 @@ struct sparse_solve_triangular_selector } // end namespace internal +#ifndef EIGEN_PARSED_BY_DOXYGEN + template template void TriangularViewImpl::solveInPlace(MatrixBase& other) const @@ -189,6 +191,7 @@ void TriangularViewImpl::solveInPlace(MatrixBase } // end namespace internal +#ifndef EIGEN_PARSED_BY_DOXYGEN template template void TriangularViewImpl::solveInPlace(SparseMatrixBase& other) const @@ -304,6 +308,7 @@ void TriangularViewImpl::solveInPlace(SparseMatrixBa // if (copy) // other = otherCopy; } +#endif } // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/Eigen/src/StlSupport/details.h b/uppsrc/plugin/Eigen/Eigen/src/StlSupport/details.h index e42ec024f..2cfd13e03 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/StlSupport/details.h +++ b/uppsrc/plugin/Eigen/Eigen/src/StlSupport/details.h @@ -22,13 +22,13 @@ namespace Eigen { class aligned_allocator_indirection : public EIGEN_ALIGNED_ALLOCATOR { public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef T* pointer; - typedef const T* const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef T value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; template struct rebind diff --git a/uppsrc/plugin/Eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h b/uppsrc/plugin/Eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h index 88c44bcd0..50a69f306 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/uppsrc/plugin/Eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -967,6 +967,7 @@ void SuperILU::factorize(const MatrixType& a) m_factorizationIsOk = true; } +#ifndef EIGEN_PARSED_BY_DOXYGEN template template void SuperILU::_solve_impl(const MatrixBase &b, MatrixBase& x) const @@ -1019,6 +1020,8 @@ void SuperILU::_solve_impl(const MatrixBase &b, MatrixBase > * * \sa umfpackControl() */ - void printUmfpackControl() + void umfpackReportControl() { umfpack_report_control(m_control.data(), Scalar()); } @@ -329,7 +329,7 @@ class UmfPackLU : public SparseSolverBase > * * \sa analyzePattern(), compute() */ - void printUmfpackInfo() + void umfpackReportInfo() { eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()"); umfpack_report_info(m_control.data(), m_umfpackInfo.data(), Scalar()); @@ -339,7 +339,7 @@ class UmfPackLU : public SparseSolverBase > * * \sa analyzePattern(), compute() */ - void printUmfpackStatus() { + void umfpackReportStatus() { eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()"); umfpack_report_status(m_control.data(), m_fact_errorCode, Scalar()); } @@ -410,7 +410,7 @@ class UmfPackLU : public SparseSolverBase > mutable LUMatrixType m_l; int m_fact_errorCode; UmfpackControl m_control; - UmfpackInfo m_umfpackInfo; + mutable UmfpackInfo m_umfpackInfo; mutable LUMatrixType m_u; mutable IntColVectorType m_p; diff --git a/uppsrc/plugin/Eigen/Eigen/src/misc/Solve.h b/uppsrc/plugin/Eigen/Eigen/src/misc/Solve.h deleted file mode 100644 index 7f70d60af..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/misc/Solve.h +++ /dev/null @@ -1,76 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MISC_SOLVE_H -#define EIGEN_MISC_SOLVE_H - -namespace Eigen { - -namespace internal { - -/** \class solve_retval_base - * - */ -template -struct traits > -{ - typedef typename DecompositionType::MatrixType MatrixType; - typedef Matrix ReturnType; -}; - -template struct solve_retval_base - : public ReturnByValue > -{ - typedef typename remove_all::type RhsNestedCleaned; - typedef _DecompositionType DecompositionType; - typedef ReturnByValue Base; - typedef typename Base::Index Index; - - solve_retval_base(const DecompositionType& dec, const Rhs& rhs) - : m_dec(dec), m_rhs(rhs) - {} - - inline Index rows() const { return m_dec.cols(); } - inline Index cols() const { return m_rhs.cols(); } - inline const DecompositionType& dec() const { return m_dec; } - inline const RhsNestedCleaned& rhs() const { return m_rhs; } - - template inline void evalTo(Dest& dst) const - { - static_cast*>(this)->evalTo(dst); - } - - protected: - const DecompositionType& m_dec; - typename Rhs::Nested m_rhs; -}; - -} // end namespace internal - -#define EIGEN_MAKE_SOLVE_HELPERS(DecompositionType,Rhs) \ - typedef typename DecompositionType::MatrixType MatrixType; \ - typedef typename MatrixType::Scalar Scalar; \ - typedef typename MatrixType::RealScalar RealScalar; \ - typedef typename MatrixType::Index Index; \ - typedef Eigen::internal::solve_retval_base Base; \ - using Base::dec; \ - using Base::rhs; \ - using Base::rows; \ - using Base::cols; \ - solve_retval(const DecompositionType& dec, const Rhs& rhs) \ - : Base(dec, rhs) {} - -} // end namespace Eigen - -#endif // EIGEN_MISC_SOLVE_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/misc/SparseSolve.h b/uppsrc/plugin/Eigen/Eigen/src/misc/SparseSolve.h deleted file mode 100644 index 244bb8ec7..000000000 --- a/uppsrc/plugin/Eigen/Eigen/src/misc/SparseSolve.h +++ /dev/null @@ -1,128 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_SOLVE_H -#define EIGEN_SPARSE_SOLVE_H - -namespace Eigen { - -namespace internal { - -template struct sparse_solve_retval_base; -template struct sparse_solve_retval; - -template -struct traits > -{ - typedef typename DecompositionType::MatrixType MatrixType; - typedef SparseMatrix ReturnType; -}; - -template struct sparse_solve_retval_base - : public ReturnByValue > -{ - typedef typename remove_all::type RhsNestedCleaned; - typedef _DecompositionType DecompositionType; - typedef ReturnByValue Base; - typedef typename Base::Index Index; - - sparse_solve_retval_base(const DecompositionType& dec, const Rhs& rhs) - : m_dec(dec), m_rhs(rhs) - {} - - inline Index rows() const { return m_dec.cols(); } - inline Index cols() const { return m_rhs.cols(); } - inline const DecompositionType& dec() const { return m_dec; } - inline const RhsNestedCleaned& rhs() const { return m_rhs; } - - template inline void evalTo(Dest& dst) const - { - static_cast*>(this)->evalTo(dst); - } - - protected: - template - inline void defaultEvalTo(SparseMatrix& dst) const - { - // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix. - static const int NbColsAtOnce = 4; - int rhsCols = m_rhs.cols(); - int size = m_rhs.rows(); - Eigen::Matrix tmp(size,rhsCols); - Eigen::Matrix tmpX(size,rhsCols); - for(int k=0; k(rhsCols-k, NbColsAtOnce); - tmp.leftCols(actualCols) = m_rhs.middleCols(k,actualCols); - tmpX.leftCols(actualCols) = m_dec.solve(tmp.leftCols(actualCols)); - dst.middleCols(k,actualCols) = tmpX.leftCols(actualCols).sparseView(); - } - } - const DecompositionType& m_dec; - typename Rhs::Nested m_rhs; -}; - -#define EIGEN_MAKE_SPARSE_SOLVE_HELPERS(DecompositionType,Rhs) \ - typedef typename DecompositionType::MatrixType MatrixType; \ - typedef typename MatrixType::Scalar Scalar; \ - typedef typename MatrixType::RealScalar RealScalar; \ - typedef typename MatrixType::Index Index; \ - typedef Eigen::internal::sparse_solve_retval_base Base; \ - using Base::dec; \ - using Base::rhs; \ - using Base::rows; \ - using Base::cols; \ - sparse_solve_retval(const DecompositionType& dec, const Rhs& rhs) \ - : Base(dec, rhs) {} - - - -template struct solve_retval_with_guess; - -template -struct traits > -{ - typedef typename DecompositionType::MatrixType MatrixType; - typedef Matrix ReturnType; -}; - -template struct solve_retval_with_guess - : public ReturnByValue > -{ - typedef typename DecompositionType::Index Index; - - solve_retval_with_guess(const DecompositionType& dec, const Rhs& rhs, const Guess& guess) - : m_dec(dec), m_rhs(rhs), m_guess(guess) - {} - - inline Index rows() const { return m_dec.cols(); } - inline Index cols() const { return m_rhs.cols(); } - - template inline void evalTo(Dest& dst) const - { - dst = m_guess; - m_dec._solveWithGuess(m_rhs,dst); - } - - protected: - const DecompositionType& m_dec; - const typename Rhs::Nested m_rhs; - const typename Guess::Nested m_guess; -}; - -} // namepsace internal - -} // end namespace Eigen - -#endif // EIGEN_SPARSE_SOLVE_H diff --git a/uppsrc/plugin/Eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/uppsrc/plugin/Eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 43615bd56..ebaa3f192 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/uppsrc/plugin/Eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -10,7 +10,6 @@ typedef CwiseUnaryOp, const Derived> Inverse typedef CwiseUnaryOp, const Derived> BooleanNotReturnType; typedef CwiseUnaryOp, const Derived> ExpReturnType; -typedef CwiseUnaryOp, const Derived> Expm1ReturnType; typedef CwiseUnaryOp, const Derived> LogReturnType; typedef CwiseUnaryOp, const Derived> Log1pReturnType; typedef CwiseUnaryOp, const Derived> Log10ReturnType; @@ -91,20 +90,6 @@ exp() const return ExpReturnType(derived()); } -/** \returns an expression of the coefficient-wise exponential of *this minus 1. - * - * In exact arithmetic, \c x.expm1() is equivalent to \c x.exp() - 1, - * however, with finite precision, this function is much more accurate when \c x is close to zero. - * - * \sa Math functions, exp() - */ -EIGEN_DEVICE_FUNC -inline const Expm1ReturnType -expm1() const -{ - return Expm1ReturnType(derived()); -} - /** \returns an expression of the coefficient-wise logarithm of *this. * * This function computes the coefficient-wise logarithm. The function MatrixBase::log() in the @@ -113,7 +98,7 @@ expm1() const * Example: \include Cwise_log.cpp * Output: \verbinclude Cwise_log.out * - * \sa Math functions, log() + * \sa Math functions, exp() */ EIGEN_DEVICE_FUNC inline const LogReturnType diff --git a/uppsrc/plugin/Eigen/Eigen/src/plugins/BlockMethods.h b/uppsrc/plugin/Eigen/Eigen/src/plugins/BlockMethods.h index b76973613..ac35a0086 100644 --- a/uppsrc/plugin/Eigen/Eigen/src/plugins/BlockMethods.h +++ b/uppsrc/plugin/Eigen/Eigen/src/plugins/BlockMethods.h @@ -818,7 +818,7 @@ inline typename FixedBlockXpr::Type block(Index startRow, Index sta return typename FixedBlockXpr::Type(derived(), startRow, startCol, blockRows, blockCols); } -/// This is the const version of block<>(Index, Index, Index, Index). */ +/// This is the const version of block<>(Index, Index, Index, Index). template inline const typename ConstFixedBlockXpr::Type block(Index startRow, Index startCol, Index blockRows, Index blockCols) const @@ -832,15 +832,15 @@ inline const typename ConstFixedBlockXpr::Type block(Index startRow /// Output: \verbinclude MatrixBase_col.out /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) -/// -/// \sa row(), class Block */ +/** + * \sa row(), class Block */ EIGEN_DEVICE_FUNC inline ColXpr col(Index i) { return ColXpr(derived(), i); } -/// This is the const version of col(). */ +/// This is the const version of col(). EIGEN_DEVICE_FUNC inline ConstColXpr col(Index i) const { @@ -853,8 +853,8 @@ inline ConstColXpr col(Index i) const /// Output: \verbinclude MatrixBase_row.out /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) -/// -/// \sa col(), class Block */ +/** + * \sa col(), class Block */ EIGEN_DEVICE_FUNC inline RowXpr row(Index i) { diff --git a/uppsrc/plugin/Eigen/README.md b/uppsrc/plugin/Eigen/README.md new file mode 100644 index 000000000..4654a81c3 --- /dev/null +++ b/uppsrc/plugin/Eigen/README.md @@ -0,0 +1,3 @@ +**Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms.** + +For more information go to http://eigen.tuxfamily.org/. diff --git a/uppsrc/plugin/Eigen/srcdoc.tpp/Eigen$en-us.tpp b/uppsrc/plugin/Eigen/srcdoc.tpp/Eigen$en-us.tpp index e17812e93..2b72cecaf 100644 --- a/uppsrc/plugin/Eigen/srcdoc.tpp/Eigen$en-us.tpp +++ b/uppsrc/plugin/Eigen/srcdoc.tpp/Eigen$en-us.tpp @@ -17,7 +17,7 @@ vectors, numerical solvers and related algorithms.]&] [s0; [C2 -|Matrix2d res `= a`*b;-|// Just multiply them using `*]&] [s0;#2 &] [s0;#2 &] -[s0;# [2 Eigen package is a wrapper of Eigen 3.3.1 library. It includes +[s0;# [2 Eigen package is a wrapper of Eigen 3.3.4 library. It includes the library and helper functions to integrate better Eigen with U`+`+. Starting from the 3.1.1 version, it is licensed under the ][^http`:`/`/www`.mozilla`.org`/MPL`/2`.0`/^2 MPL2][2 , which diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/Tensor b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/Tensor index 39916092b..7ecb4c74d 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/Tensor +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/Tensor @@ -13,14 +13,13 @@ #include "../../../Eigen/Core" -#if defined(EIGEN_USE_SYCL) +#ifdef EIGEN_USE_SYCL #undef min #undef max #undef isnan #undef isinf #undef isfinite #include -#include #include #include #include @@ -53,10 +52,8 @@ typedef __int32 int32_t; typedef unsigned __int32 uint32_t; typedef __int64 int64_t; typedef unsigned __int64 uint64_t; -#include #else #include -#include #endif #if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900 @@ -71,10 +68,6 @@ typedef unsigned __int64 uint64_t; #include #endif -#if defined(EIGEN_USE_LIBXSMM) -#include "libxsmm.h" -#endif - #ifdef EIGEN_USE_THREADS #include "ThreadPool" #endif diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/ThreadPool b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/ThreadPool index c34614194..09d637e9a 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/ThreadPool +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/ThreadPool @@ -50,7 +50,6 @@ #include "src/ThreadPool/ThreadLocal.h" #include "src/ThreadPool/ThreadYield.h" -#include "src/ThreadPool/ThreadCancel.h" #include "src/ThreadPool/EventCount.h" #include "src/ThreadPool/RunQueue.h" #include "src/ThreadPool/ThreadPoolInterface.h" @@ -58,18 +57,6 @@ #include "src/ThreadPool/SimpleThreadPool.h" #include "src/ThreadPool/NonBlockingThreadPool.h" - -// Use the more efficient NonBlockingThreadPool by default. -namespace Eigen { -#ifndef EIGEN_USE_SIMPLE_THREAD_POOL -template using ThreadPoolTempl = NonBlockingThreadPoolTempl; -typedef NonBlockingThreadPool ThreadPool; -#else -template using ThreadPoolTempl = SimpleThreadPoolTempl; -typedef SimpleThreadPool ThreadPool; -#endif -} // namespace Eigen - #endif #include diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/README.md b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/README.md index fbb7f3bfc..98e83811b 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/README.md @@ -75,16 +75,16 @@ large enough to hold all the data. // Map a tensor of ints on top of stack-allocated storage. int storage[128]; // 2 x 4 x 2 x 8 = 128 - TensorMap t_4d(storage, 2, 4, 2, 8); + TensorMap> t_4d(storage, 2, 4, 2, 8); // The same storage can be viewed as a different tensor. // You can also pass the sizes as an array. - TensorMap t_2d(storage, 16, 8); + TensorMap> t_2d(storage, 16, 8); // You can also map fixed-size tensors. Here we get a 1d view of // the 2d fixed-size tensor. Tensor> t_4x3; - TensorMap t_12(t_4x3, 12); + TensorMap> t_12(t_4x3, 12); #### Class TensorRef @@ -1737,9 +1737,11 @@ TODO ## Representation of scalar values -Scalar values are often represented by tensors of size 1 and rank 0.For example -Tensor::maximum() currently returns a Tensor. Similarly, the inner -product of 2 1d tensors (through contractions) returns a 0d tensor. +Scalar values are often represented by tensors of size 1 and rank 1. It would be +more logical and user friendly to use tensors of rank 0 instead. For example +Tensor::maximum() currently returns a Tensor. Similarly, the inner +product of 2 1d tensors (through contractions) returns a 1d tensor. In the +future these operations might be updated to return 0d tensors instead. ## Limitations diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index fbe340820..7a45a5cf4 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -185,12 +185,6 @@ class TensorBase return unaryExpr(internal::scalar_exp_op()); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> - expm1() const { - return unaryExpr(internal::scalar_expm1_op()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> log() const { diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h index 2c7ba961c..59bf90d93 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -276,12 +276,6 @@ struct TensorEvaluator& left_impl() const { return m_leftImpl; } - /// required by sycl in order to extract the accessor - const TensorEvaluator& right_impl() const { return m_rightImpl; } - /// required by sycl in order to extract the accessor - const Axis& axis() const { return m_axis; } protected: Dimensions m_dimensions; diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 442c14fac..20b29e5fd 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -20,70 +20,6 @@ namespace Eigen { * */ namespace internal { -#if defined(EIGEN_VECTORIZE_AVX) && defined(EIGEN_USE_LIBXSMM) -template -void pack_simple(Scalar * dst, const Scalar * src, Index cols, Index rows, Index lddst, Index ldsrc) { - size_t psize = packet_traits::size; // Packet size - typedef typename packet_traits::type Packet; // Packet type - size_t alignment = psize*sizeof(Scalar); // Needed alignment - if (rows % psize == 0 && (lddst*sizeof(Scalar)) % alignment == 0 && - (ldsrc*sizeof(Scalar)) % alignment == 0 && - reinterpret_cast(src) % alignment == 0 && - reinterpret_cast(dst) % alignment == 0) { - // Optimized version using packets - size_t num_packets = rows / psize; - for (Index col = 0; col < cols; ++col) { - EIGEN_ASM_COMMENT("begin pack_simple inner copy"); - // Unrolled manually 4 times. - for (size_t i=0; i < num_packets/4; ++i) { - internal::pstore(dst, internal::pload(src)); - dst += psize; src += psize; - internal::pstore(dst, internal::pload(src)); - dst += psize; src += psize; - internal::pstore(dst, internal::pload(src)); - dst += psize; src += psize; - internal::pstore(dst, internal::pload(src)); - dst += psize; src += psize; - } - for (size_t i=0; i < num_packets%4; ++i) { - internal::pstore(dst, internal::pload(src)); - dst += psize; src += psize; - } - dst += lddst - num_packets*psize; - src += ldsrc - num_packets*psize; - EIGEN_ASM_COMMENT("end pack_simple inner copy"); - } - } else { - // Naive memcpy calls - for (Index col = 0; col < cols; ++col) { - memcpy(dst + col*lddst, src + col*ldsrc, rows*sizeof(Scalar)); - } - } -} - -template - struct libxsmm_wrapper { - libxsmm_wrapper() {} - libxsmm_wrapper(int flags, int m, int n, int k, int lda, int ldb, int ldc, float alpha, float beta, int prefetch) {} - void operator()(const LhsScalar* a, const RhsScalar* b, Scalar* c) {} - void operator()(const LhsScalar* a, const RhsScalar* b, Scalar* c, const LhsScalar* ap, const RhsScalar* bp, const Scalar* cp) {} - }; - - template<> - struct libxsmm_wrapper: public libxsmm_mmfunction { - libxsmm_wrapper(): libxsmm_mmfunction() {} - libxsmm_wrapper(int flags, int m, int n, int k, int lda, int ldb, int ldc, float alpha, float beta, int prefetch) : - libxsmm_mmfunction(flags, m, n, k, lda, ldb, ldc, alpha, beta, prefetch) {} - }; - - template<> - struct libxsmm_wrapper: public libxsmm_mmfunction { - libxsmm_wrapper(): libxsmm_mmfunction() {} - libxsmm_wrapper(int flags, int m, int n, int k, int lda, int ldb, int ldc, float alpha, float beta, int prefetch) : - libxsmm_mmfunction(flags, m, n, k, lda, ldb, ldc, alpha, beta, prefetch) {} - }; -#endif - template struct traits > @@ -220,9 +156,9 @@ struct TensorContractionEvaluatorBase m_rightImpl(choose(Cond(Layout) == static_cast(ColMajor)>(), op.rhsExpression(), op.lhsExpression()), device), m_device(device), - m_result(NULL), m_expr_indices(op.indices()) { + m_result(NULL) { EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == - static_cast(TensorEvaluator::Layout)), + static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); @@ -381,8 +317,6 @@ struct TensorContractionEvaluatorBase } } - EnableXSMMIfPossible(eval_op_indices); - // If the layout is RowMajor, we need to reverse the m_dimensions if (static_cast(Layout) == static_cast(RowMajor)) { for (int i = 0, j = NumDims - 1; i < j; i++, j--) { @@ -393,7 +327,7 @@ struct TensorContractionEvaluatorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar * data) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { m_leftImpl.evalSubExprsIfNeeded(NULL); m_rightImpl.evalSubExprsIfNeeded(NULL); if (data) { @@ -488,13 +422,6 @@ struct TensorContractionEvaluatorBase template EIGEN_DEVICE_FUNC void evalGemm(Scalar* buffer) const { - #if defined(EIGEN_VECTORIZE_AVX) && defined(EIGEN_USE_LIBXSMM) - if (m_can_use_xsmm) { - evalGemmXSMM(buffer); - return; - } - #endif - // columns in left side, rows in right side const Index k = this->m_k_size; @@ -611,212 +538,7 @@ struct TensorContractionEvaluatorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { return m_result; } -protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void EnableXSMMIfPossible(const array, ContractDims>& eval_op_indices) { - m_can_use_xsmm = false; - -#if defined(EIGEN_VECTORIZE_AVX) && defined(EIGEN_USE_LIBXSMM) - typedef typename internal::remove_const::type LhsScalar; - typedef typename internal::remove_const::type RhsScalar; - if (!std::is_same::value || - !std::is_same::value || - !(std::is_same::value || - std::is_same::value) || - m_leftImpl.data() == NULL || - m_rightImpl.data() == NULL) { - return; - } - - // Check if we can use faster matmul algorithms. For contraction to be - // equivalent to matmul, we need both lhs and rhs contracting dims sequences - // to be either a prefix or suffix of all dims. Also, the order of both - // must be the same, so we don't have to do reordering. - // For example: - // * OK: lhs 4D, rhs 4D, contraction: [(0, 2), (1, 3)] - // * BAD: lhs 3D, rhs 3D, contraction: [(1,1)] - // * BAD: lhs 3D, rhs 3D, contraction: [(0, 0), (2, 2)] - // * BAD: lhs 3D, rhs 3D, contraction: [(0, 2), (1, 1)] - // Depending if contraction dims are prefix or suffix of all dims we need to - // pre-transpose matrices in matmul algorithm: - // lhs: prefix -> transpose, suffix -> no transpose - // rhs: prefix -> no transpose, suffix -> transpose - // For example, for lhs 2D, rhs 2D, contraction [(1, 0)] is regular, - // non-transposed matmul. - if (ContractDims == 0) { - // This case is totally uninteresting, filter it out to avoid problems - // with iterations in further tests. - return; - } - - // Check if RHS dims list is increasing. LHS already is, so if not, the - // order is different and we cannot do matmul. - for (int i = 1; i < ContractDims; i++) { - if (eval_op_indices[i].second < eval_op_indices[i-1].second) { - return; - } - } - - // Check if no holes. - int diff; - for (int i = 1; i < ContractDims; i++) { - // LHS contract dims are sorted to form an increasing seq. - diff = eval_op_indices[i].first - eval_op_indices[i-1].first; - if (diff != 1) { - return; - } - // Now we may already assume RHS contract dims seq is increasing too. - diff = eval_op_indices[i].second - eval_op_indices[i-1].second; - if (diff != 1) { - return; - } - } - - // Check if suffix or prefix. - if (eval_op_indices[0].first != 0 && - eval_op_indices[ContractDims-1].first != LDims-1) { - return; - } - if (eval_op_indices[0].second != 0 && - eval_op_indices[ContractDims-1].second != RDims-1) { - return; - } - - m_can_use_xsmm = true; - #endif - } - -#if defined(EIGEN_VECTORIZE_AVX) && defined(EIGEN_USE_LIBXSMM) - EIGEN_DEVICE_FUNC void evalGemmXSMM(Scalar* buffer) const { - // columns in left side, rows in right side - const Index k = this->m_k_size; - - // rows in left side - const Index m = this->m_i_size; - - // columns in right side - const Index n = this->m_j_size; - - const bool transposeA = !m_lhs_inner_dim_contiguous; - const bool transposeB = !m_rhs_inner_dim_contiguous; - - typedef typename internal::remove_const::type LhsScalar; - typedef typename internal::remove_const::type RhsScalar; - - internal::TensorXsmmContractionBlocking blocking( - k, m, n, 1, transposeA, transposeB); - - // Outer blocks sizes - const Index mc_outer = blocking.outer_m(); - const Index nc_outer = blocking.outer_n(); - const Index kc_outer = blocking.outer_k(); - // Inner blocks sizes - const Index mc = blocking.mc(); - const Index nc = blocking.nc(); - const Index kc = blocking.kc(); - // Decisions whether we should copy parts of matrices - const bool copyA = blocking.copyA(); - const bool copyB = blocking.copyB(); - - const LhsScalar* leftData = m_leftImpl.data(); - const RhsScalar* rightData = m_rightImpl.data(); - - const libxsmm_blasint stride_A = static_cast(transposeA ? k : m); - const libxsmm_blasint stride_B = static_cast(transposeB ? n : k); - const libxsmm_blasint stride_C = static_cast(m); - - const libxsmm_blasint stride_blockA = static_cast(mc); - // Use bigger stride to avoid hitting same cache line too often. - // This consistently gives +~0.5 Gflops. - const libxsmm_blasint stride_panelB = static_cast( - kc % 32 == 0 ? kc + 16 : kc - ); - - // Kernel for the general case (not edges) - internal::libxsmm_wrapper kernel; - - LhsScalar* blockA = NULL; - RhsScalar* panelB = NULL; - - if (copyA) { - blockA = static_cast(this->m_device.allocate(mc * kc * sizeof(LhsScalar))); - } - if (copyB) { - panelB = static_cast(this->m_device.allocate(nc_outer * stride_panelB * sizeof(RhsScalar))); - } - - const Index kernel_stride_A = copyA ? stride_blockA : stride_A; - const Index kernel_stride_B = copyB ? stride_panelB : stride_B; - kernel = internal::libxsmm_wrapper(0, mc, nc, kc, kernel_stride_A, kernel_stride_B, stride_C, 1, 1, blocking.prefetch()); - - // Outer blocking - for (Index ki_outer = 0; ki_outer < k; ki_outer += kc_outer) { - for (Index mi_outer = 0; mi_outer < m; mi_outer += mc_outer) { - for (Index ni_outer = 0; ni_outer < n; ni_outer += nc_outer) { - using numext::mini; - - Index actual_nc_outer = mini(ni_outer+nc_outer, n) - ni_outer; - - // Inner blocking - for (Index ki = ki_outer; ki < mini(ki_outer+kc_outer, k); ki += kc) { - const Index actual_kc = mini(ki_outer+kc_outer, mini(ki+kc, k)) - ki; - const float beta = ki == 0 ? 0 : 1; - - if (copyB) { - if (transposeB) { - libxsmm_otrans(panelB, rightData + ki*stride_B + ni_outer, sizeof(RhsScalar), actual_nc_outer, actual_kc, stride_B, stride_panelB); - } else { - internal::pack_simple(panelB, rightData + ni_outer*stride_B + ki, actual_nc_outer, actual_kc, stride_panelB, stride_B); - } - } - - for (Index mi = mi_outer; mi < mini(mi_outer+mc_outer, m); mi += mc) { - const Index actual_mc = mini(mi_outer+mc_outer, mini(mi+mc, m)) - mi; - - const LhsScalar* a = transposeA ? leftData + mi*stride_A + ki : - leftData + ki*stride_A + mi; - - if (copyA) { - if (transposeA) { - libxsmm_otrans(blockA, a, sizeof(LhsScalar), actual_kc, actual_mc, stride_A, stride_blockA); - } else { - internal::pack_simple(blockA, a, actual_kc, actual_mc, stride_blockA, stride_A); - } - } - const LhsScalar* actual_a = copyA ? blockA : a; - - for (Index ni = ni_outer; ni < mini(ni_outer+nc_outer, n); ni += nc) { - const Index actual_nc = mini(ni_outer+nc_outer, mini(ni+nc, n)) - ni; - - const RhsScalar* b = rightData + ni*stride_B + ki; - Scalar* c = buffer + ni*stride_C + mi; - const Scalar* cp = c + nc*stride_C; - - const RhsScalar* actual_b = copyB ? panelB + (ni-ni_outer)*stride_panelB : b; - const RhsScalar* bp = copyB ? panelB + nc*stride_panelB : b + nc*stride_B; - - if (actual_mc == mc && actual_kc == kc && actual_nc == nc && beta == 1) { - // Most used, cached kernel. - kernel(actual_a, actual_b, c, actual_a, bp, cp); - } else { - // Edges - use libxsmm kernel cache. - internal::libxsmm_wrapper(0, actual_mc, actual_nc, actual_kc, kernel_stride_A, kernel_stride_B, stride_C, 1, beta, blocking.prefetch())(actual_a, actual_b, c, actual_a, bp, cp); - } - } - } - } - } - } - } - - if (copyA) { - this->m_device.deallocate(blockA); - } - if (copyB) { - this->m_device.deallocate(panelB); - } - } -#endif - + protected: // Prevent assignment TensorContractionEvaluatorBase& operator = (const TensorContractionEvaluatorBase&); Dimensions m_dimensions; @@ -842,10 +564,6 @@ protected: TensorEvaluator m_rightImpl; const Device& m_device; Scalar* m_result; - /// required for sycl - const Indices m_expr_indices; - - bool m_can_use_xsmm; }; diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h index d34f9caee..5cf7b4f71 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h @@ -50,140 +50,6 @@ class TensorContractionBlocking { }; - -#if defined(EIGEN_USE_LIBXSMM) -template -class TensorXsmmContractionBlocking { - public: - TensorXsmmContractionBlocking(Index k, Index m, Index n, - size_t max_num_threads = 1, bool transposeA = false, - bool transposeB = false): - k_(k), m_(m), n_(n), transposeA_(transposeA), - transposeB_(transposeB), num_threads_(max_num_threads) { -#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES - if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) { - mc_ = EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M; - kc_ = EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K; - nc_ = EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N; - outer_m_ = EIGEN_TEST_SPECIFIC_OUTER_BLOCKING_SIZE_M; - outer_k_ = EIGEN_TEST_SPECIFIC_OUTER_BLOCKING_SIZE_K; - outer_n_ = EIGEN_TEST_SPECIFIC_OUTER_BLOCKING_SIZE_N; - copyA_ = EIGEN_TEST_SPECIFIC_BLOCKING_COPY_A; - copyB_ = EIGEN_TEST_SPECIFIC_BLOCKING_COPY_B; - outer_m_ = outer_m_ != 0 ? outer_m_ : m; - outer_k_ = outer_k_ != 0 ? outer_k_ : k; - outer_n_ = outer_n_ != 0 ? outer_n_ : n; - } -#else - // Defaults, possibly overriden per-platform. - copyA_ = true; - copyB_ = false; - - // If the matrix is small enough, don't do blocking, just call single xsmm - // kernel. - if (static_cast(m)*k*n <= LIBXSMM_THRESHOLD) { - mc_ = m; kc_ = k; nc_ = n; - outer_m_ = m; outer_k_ = k; outer_n_ = n; - copyA_ = false; copyB_ = false; - } else { - int arch = libxsmm_cpuid_x86(); - - if (arch == LIBXSMM_X86_AVX512_CORE) { - // skylake - mc_ = 64; kc_ = 64; nc_ = 24; - outer_m_ = 512; outer_k_ = 512; outer_n_ = 24*22; - // Hack to use this kernel architecture as the other one has performance - // issues (no hardware prefetching). - // TODO(nishantpatil): This should be removed if the issues are fixed, - // or this one becomes the default. - setenv("LIBXSMM_AVX512_CLASSIC_GEMM", "1", 1); - } else if (arch == LIBXSMM_X86_AVX2) { - // haswell - mc_ = 32; kc_ = 192; nc_ = 33; - outer_m_ = 512; outer_k_ = 3*192; outer_n_ = 33*16; - } else if (arch == LIBXSMM_X86_AVX) { - // ivybridge - mc_ = 32; kc_ = 192; nc_ = 48; - outer_m_ = 512; outer_k_ = 3*192; outer_n_ = 48*11; - } else { - // generic kernel size, usually performing well - mc_ = 32; kc_ = 128; nc_ = 32; - outer_m_ = 512; outer_k_ = 512; outer_n_ = 512; - } - - // Only copy if it makes the stride smaller. - copyA_ = copyA_ && (m > mc_); - copyB_ = copyB_ && (k > kc_); - } - - // We need to copy anyway if transposing - copyA_ = copyA_ || transposeA; - copyB_ = copyB_ || transposeB; - - // See libxsmm_gemm_prefetch_type definition in libxsmm_typedefs.h - prefetch_ = LIBXSMM_PREFETCH_AL2CL2BL2_VIA_C; - -#endif - - mc_ = mc_ > m ? m : mc_; - nc_ = nc_ > n ? n : nc_; - kc_ = kc_ > k ? k : kc_; - - size_t compute_parallelism = (m / mc_) * (n / nc_); - size_t pack_parallelism = 0; - if (copyA_) { - pack_parallelism += (m / mc_) * (k / kc_); - } - if (copyB_) { - pack_parallelism += (n / nc_) * (k / kc_); - } - size_t parallelism = numext::maxi(compute_parallelism, pack_parallelism); - - num_threads_ = numext::mini(num_threads_, - parallelism / MIN_JOBS_PER_THREAD); - num_threads_ = numext::maxi(num_threads_, 1); - - // For optimal performance outer block sizes should be multiplies of kernel - // sizes, or bigger than matrix size (=no outer blocking). - eigen_assert(outer_m_ % mc_ == 0 || outer_m_ >= m); - eigen_assert(outer_k_ % kc_ == 0 || outer_k_ >= k); - eigen_assert(outer_n_ % nc_ == 0 || outer_n_ >= n); - } - - EIGEN_ALWAYS_INLINE Index kc() const { return kc_; } - EIGEN_ALWAYS_INLINE Index mc() const { return mc_; } - EIGEN_ALWAYS_INLINE Index nc() const { return nc_; } - EIGEN_ALWAYS_INLINE Index outer_k() const { return outer_k_; } - EIGEN_ALWAYS_INLINE Index outer_m() const { return outer_m_; } - EIGEN_ALWAYS_INLINE Index outer_n() const { return outer_n_; } - EIGEN_ALWAYS_INLINE bool copyA() const { return copyA_; } - EIGEN_ALWAYS_INLINE bool copyB() const { return copyB_; } - EIGEN_ALWAYS_INLINE bool transposeA() const { return transposeA_; } - EIGEN_ALWAYS_INLINE bool transposeB() const { return transposeB_; } - EIGEN_ALWAYS_INLINE int num_threads() const { return num_threads_; } - EIGEN_ALWAYS_INLINE Index blocks_m() const { return divup(m_, mc_); } - EIGEN_ALWAYS_INLINE Index blocks_k() const { return divup(k_, kc_); } - EIGEN_ALWAYS_INLINE Index blocks_n() const { return divup(n_, nc_); } - EIGEN_ALWAYS_INLINE libxsmm_gemm_prefetch_type prefetch() const { - return prefetch_; - } - - private: - Index k_, m_, n_; - Index kc_, mc_, nc_; - Index outer_k_, outer_m_, outer_n_; - bool copyA_, copyB_, transposeA_, transposeB_; - size_t num_threads_; - - // Threshold for m*k*n to skip blocking and just call libxsmm - const double LIBXSMM_THRESHOLD = 80*80*80; - // For computing optimal number of threads - so that each thread gets at least - // that many jobs. - const double MIN_JOBS_PER_THREAD = 3; - libxsmm_gemm_prefetch_type prefetch_; -}; -#endif // EIGEN_USE_LIBXSMM - } // end namespace internal } // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h index ab320a50d..9b2cb3ff6 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h @@ -22,14 +22,8 @@ enum { /* * Implementation of the Eigen blas_data_mapper class for tensors. */ -/// The make pointer class is used by sycl in order to build the mapper class on the device. For other platform the default make pointer is used which -/// is scalar * for CoeffLoader. -template class MakePointer_ = MakePointer> struct CoeffLoader; -template class MakePointer_ = MakePointer> class BaseTensorContractionMapper; -template class MakePointer_> struct CoeffLoader { +template struct CoeffLoader { enum { DirectOffsets = false }; @@ -53,7 +47,7 @@ template class MakePointer const Tensor m_tensor; }; -template class MakePointer_> struct CoeffLoader { +template struct CoeffLoader { enum { DirectOffsets = true }; @@ -73,14 +67,13 @@ template class MakePointer_> struct CoeffLoad } private: typedef typename Tensor::Scalar Scalar; - - typename MakePointer_::Type m_data; + const Scalar* m_data; }; template class MakePointer_ = MakePointer> + int packet_size, bool inner_dim_contiguous, int Alignment> class SimpleTensorContractionMapper { public: EIGEN_DEVICE_FUNC @@ -96,7 +89,7 @@ class SimpleTensorContractionMapper { m_k_strides(k_strides) { } enum { - DirectOffsets = CoeffLoader::DirectOffsets + DirectOffsets = CoeffLoader::DirectOffsets }; EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) { @@ -213,22 +206,23 @@ class SimpleTensorContractionMapper { } protected: - CoeffLoader m_tensor; + CoeffLoader m_tensor; const nocontract_t m_nocontract_strides; const nocontract_t m_ij_strides; const contract_t m_contract_strides; const contract_t m_k_strides; }; + template class MakePointer_> -class BaseTensorContractionMapper : public SimpleTensorContractionMapper + bool inner_dim_reordered, int Alignment> +class BaseTensorContractionMapper : public SimpleTensorContractionMapper { public: - typedef SimpleTensorContractionMapper ParentMapper; + typedef SimpleTensorContractionMapper ParentMapper; EIGEN_DEVICE_FUNC BaseTensorContractionMapper(const Tensor& tensor, @@ -241,9 +235,9 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper::half HalfPacket; - template + template EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE PacketT load(Index i, Index j) const { + EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { // whole method makes column major assumption // don't need to add offsets for now (because operator handles that) @@ -281,13 +275,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapperm_tensor.coeff(last); - return pload(data); - } - - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { - return this->load(i,j); + return pload(data); } template @@ -313,11 +301,11 @@ template class MakePointer_> -class BaseTensorContractionMapper : public SimpleTensorContractionMapper + bool inner_dim_reordered, int Alignment> +class BaseTensorContractionMapper : public SimpleTensorContractionMapper { public: - typedef SimpleTensorContractionMapper ParentMapper; + typedef SimpleTensorContractionMapper ParentMapper; EIGEN_DEVICE_FUNC BaseTensorContractionMapper(const Tensor& tensor, @@ -334,12 +322,6 @@ class BaseTensorContractionMapperm_tensor.coeff(this->computeIndex(i, j)); return pload(data); } - template EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE PacketT load(Index i, Index j) const { - EIGEN_ALIGN_MAX Scalar data[1]; - data[0] = this->m_tensor.coeff(this->computeIndex(i, j)); - return pload(data); - } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const { return loadPacket(i, j); @@ -351,14 +333,14 @@ template class MakePointer_=MakePointer> + bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> class TensorContractionSubMapper { public: typedef typename Tensor::PacketReturnType Packet; typedef typename unpacket_traits::half HalfPacket; - typedef BaseTensorContractionMapper ParentMapper; - typedef TensorContractionSubMapper Self; + typedef BaseTensorContractionMapper ParentMapper; + typedef TensorContractionSubMapper Self; typedef Self LinearMapper; enum { @@ -403,14 +385,6 @@ class TensorContractionSubMapper { return m_base_mapper.template loadPacket(i + m_vert_offset, j + m_horiz_offset); } - template - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT loadPacket(Index i, Index j) const { - if (UseDirectOffsets) { - return m_base_mapper.template load(i, j); - } - return m_base_mapper.template loadPacket(i + m_vert_offset, j + m_horiz_offset); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { if (UseDirectOffsets) { return m_base_mapper.template loadHalfPacket(i, 0); @@ -418,7 +392,7 @@ class TensorContractionSubMapper { return m_base_mapper.template loadHalfPacket(i + m_vert_offset, m_horiz_offset); } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet& p) const { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { if (UseDirectOffsets) { m_base_mapper.storePacket(i, 0, p); } @@ -458,14 +432,14 @@ template class MakePointer_=MakePointer> + bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> class TensorContractionInputMapper - : public BaseTensorContractionMapper { + : public BaseTensorContractionMapper { public: typedef Scalar_ Scalar; - typedef BaseTensorContractionMapper Base; - typedef TensorContractionSubMapper SubMapper; + typedef BaseTensorContractionMapper Base; + typedef TensorContractionSubMapper SubMapper; typedef SubMapper VectorMapper; EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor, diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h deleted file mode 100644 index b170a1a5c..000000000 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +++ /dev/null @@ -1,402 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/***************************************************************** - * TensorSyclConvertToDeviceExpression.h - * - * \brief: - * TensorContractionsycl - * -*****************************************************************/ - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H -namespace Eigen { - -template struct LaunchSyclKernels; -template -struct TensorEvaluator, const Eigen::SyclDevice> : - public TensorContractionEvaluatorBase, const Eigen::SyclDevice> > { - - typedef const Eigen::SyclDevice Device; - - typedef TensorEvaluator, Device> Self; - typedef TensorContractionEvaluatorBase Base; - typedef TensorContractionOp XprType; - typedef typename internal::remove_const::type Scalar; - typedef typename XprType::Index Index; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType::type PacketReturnType; - - enum { - Layout = TensorEvaluator::Layout, - }; - - // Most of the code is assuming that both input tensors are ColMajor. If the - // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: - // If we want to compute A * B = C, where A is LHS and B is RHS, the code - // will pretend B is LHS and A is RHS. - typedef typename internal::conditional< - static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; - typedef typename internal::conditional< - static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; - - static const int LDims = - internal::array_size::Dimensions>::value; - static const int RDims = - internal::array_size::Dimensions>::value; - static const int ContractDims = internal::array_size::value; - - typedef array left_dim_mapper_t; - typedef array right_dim_mapper_t; - - typedef array contract_t; - typedef array left_nocontract_t; - typedef array right_nocontract_t; - - static const int NumDims = LDims + RDims - 2 * ContractDims; - - typedef DSizes Dimensions; - - // typedefs needed in evalTo - typedef typename internal::remove_const::type LhsScalar; - typedef typename internal::remove_const::type RhsScalar; - - typedef TensorEvaluator LeftEvaluator; - typedef TensorEvaluator RightEvaluator; - - typedef typename LeftEvaluator::Dimensions LeftDimensions; - typedef typename RightEvaluator::Dimensions RightDimensions; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : - Base(op, device) {} - - // We need to redefine this method to make nvcc happy - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { - this->m_leftImpl.evalSubExprsIfNeeded(NULL); - this->m_rightImpl.evalSubExprsIfNeeded(NULL); - if (data) { - evalTo(data); - return false; - } else { - this->m_result = static_cast(this->m_device.allocate(this->dimensions().TotalSize() * sizeof(Scalar))); - evalTo(this->m_result); - return true; - } - } - const Eigen::SyclDevice& device() const {return this->m_device;} - void evalTo(Scalar* buffer) const { - // Here is the result - if (this->m_lhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_reordered) { - evalTyped(buffer); - } - else { - evalTyped(buffer); - } - } - else { - if (this->m_rhs_inner_dim_reordered) { - evalTyped(buffer); - } - else { - evalTyped(buffer); - } - } - } - else { - if (this->m_rhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_reordered) { - evalTyped(buffer); - } - else { - evalTyped(buffer); - } - } - else { - if (this->m_rhs_inner_dim_reordered) { - evalTyped(buffer); - } - else { - evalTyped(buffer); - } - } - } - } - - template - void evalTyped(Scalar* buffer) const { - // columns in left side, rows in right side - const Index k = this->m_k_size; - EIGEN_UNUSED_VARIABLE(k) - // rows in left side - const Index m = this->m_i_size; - // columns in right side - const Index n = this->m_j_size; - - // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) - this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); - LaunchSyclKernels::Run(*this, buffer, m, n, k, - this->m_k_strides, this->m_left_contracting_strides, this->m_right_contracting_strides, - this->m_i_strides, this->m_j_strides, this->m_left_nocontract_strides, this->m_right_nocontract_strides); - } - // required by sycl to construct the expr on the device. Returns original left_impl - const TensorEvaluator& left_impl() const { - return choose(Cond(Layout) == static_cast(ColMajor)>(), this->m_leftImpl, this->m_rightImpl); - } - // required by sycl to construct the expr on the device. Returns original right_impl - const TensorEvaluator& right_impl() const { - return choose(Cond(Layout) == static_cast(ColMajor)>(), this->m_rightImpl, this->m_leftImpl); - } - // required by sycl to construct the expr on the device - const Indices& indices() const {return this->m_expr_indices;} -}; - -/// Dummy container on the device. This is used to avoid calling the constructor of TensorEvaluator for TensorContractionOp. This makes the code much faster. -template struct TensorEvaluatorContainer; -template -struct TensorEvaluatorContainer>{ - typedef Eigen::DefaultDevice Device; - typedef TensorContractionOp XprType; - typedef typename internal::remove_const::type Scalar; - typedef typename XprType::Index Index; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType::type PacketReturnType; - enum { - Layout = TensorEvaluator::Layout, - }; - - typedef typename internal::conditional(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; - typedef typename internal::conditional(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; - typedef TensorEvaluator LeftEvaluator; - typedef TensorEvaluator RightEvaluator; - - TensorEvaluatorContainer(const XprType& op, const Eigen::DefaultDevice& device) - : m_leftImpl(choose(Cond(Layout) == static_cast(ColMajor)>(), - op.lhsExpression(), op.rhsExpression()), device), - m_rightImpl(choose(Cond(Layout) == static_cast(ColMajor)>(), - op.rhsExpression(), op.lhsExpression()), device){} -LeftEvaluator m_leftImpl; -RightEvaluator m_rightImpl; -}; - - -template struct KernelConstructor{ - - typedef typename Eigen::TensorSycl::internal::createPlaceHolderExpression::Type PlaceHolderExpr; - - FunctorExpr functors; - LhsLocalAcc localLhs; - RhsLocalAcc localRhs; - OutAccessor out_res; - Index roundUpK, M, N, K; - ContractT m_k_strides, m_left_contracting_strides, m_right_contracting_strides; - LeftNocontractT m_i_strides, m_left_nocontract_strides; - RightNocontractT m_j_strides, m_right_nocontract_strides; - TupleType tuple_of_accessors; - - KernelConstructor(FunctorExpr functors_, LhsLocalAcc localLhs_, RhsLocalAcc localRhs_, OutAccessor out_res_, - Index roundUpK_, Index M_, Index N_, Index K_, ContractT m_k_strides_, ContractT m_left_contracting_strides_, - ContractT m_right_contracting_strides_, LeftNocontractT m_i_strides_, RightNocontractT m_j_strides_, - LeftNocontractT m_left_nocontract_strides_, RightNocontractT m_right_nocontract_strides_, TupleType tuple_of_accessors_) - :functors(functors_), localLhs(localLhs_), localRhs(localRhs_), out_res(out_res_), roundUpK(roundUpK_), M(M_), N(N_), K(K_), - m_k_strides(m_k_strides_), m_left_contracting_strides(m_left_contracting_strides_), - m_right_contracting_strides(m_right_contracting_strides_), - m_i_strides(m_i_strides_), m_left_nocontract_strides(m_left_nocontract_strides_), - m_j_strides(m_j_strides_), m_right_nocontract_strides(m_right_nocontract_strides_), - tuple_of_accessors(tuple_of_accessors_){} - - void operator()(cl::sycl::nd_item<1> itemID) { - typedef typename Eigen::TensorSycl::internal::ConvertToDeviceExpression::Type DevExpr; - auto device_expr =Eigen::TensorSycl::internal::createDeviceExpression(functors, tuple_of_accessors); - auto device_evaluator = TensorEvaluatorContainer(device_expr.expr, Eigen::DefaultDevice()); - typedef TensorEvaluatorContainer DevEvaluator; - typedef internal::TensorContractionInputMapper LhsMapper; - - typedef internal::TensorContractionInputMapper RhsMapper; - // initialize data mappers must happen inside the kernel for device eval - LhsMapper lhs(device_evaluator.m_leftImpl, m_left_nocontract_strides, m_i_strides, m_left_contracting_strides, m_k_strides); - RhsMapper rhs(device_evaluator.m_rightImpl, m_right_nocontract_strides, m_j_strides, m_right_contracting_strides, m_k_strides); - auto out_ptr = ConvertToActualTypeSycl(OutScalar, out_res); - // Matmul Kernel - // Thread identifiers - const int mLocalThreadId = itemID.get_local(0); // Local ID row - const int nLocalThreadId = itemID.get_local(1); // Local ID col - const int mGroupId = itemID.get_group(0); // Work-group ID row - const int nGroupId = itemID.get_group(1); // Work-group ID localCol - const int linearLocalThreadId = nLocalThreadId*LocalThreadSizeM + mLocalThreadId; // linear local thread ID - // Allocate register space - float privateLhs; - float privateRhs[WorkLoadPerThreadN]; - float privateRes[WorkLoadPerThreadM][WorkLoadPerThreadN]; - // Initialise the privateResumulation registers - for (int wLPTM=0; wLPTM(0); - } - // Tile Rhs - for (int lPTR=0; lPTR(0); - - } - // Loop over all tiles - const int numTiles = roundUpK/TileSizeDimK; - int firstHalf=0; - do { - // Synchronise - itemID.barrier(cl::sycl::access::fence_space::local_space); - // Load the next tile of Lhs and Rhs into local memory - int nextHalf = firstHalf + 1; - if (nextHalf < numTiles) { - // Tile A - for (int lPTL=0; lPTL(0); - } - // Tile B - for (int lPTR=0; lPTR(0); - } - } - // Loop over the values of a single tile - for (int k=0; k struct LaunchSyclKernels { - -static const int TileSizeDimM = 32; // Tile size for dimension M -static const int TileSizeDimN = 32; // Tile size for dimension N -static const int TileSizeDimK = 16; // Tile size for dimension K -static const int WorkLoadPerThreadM = 4; // Work load per thread in dimension M -static const int WorkLoadPerThreadN = 4; // work load per thread in dimension N -static const int LocalThreadSizeM = (TileSizeDimM/WorkLoadPerThreadM); // Local thread size for the first dimension (M here) -static const int LocalThreadSizeN = (TileSizeDimN/WorkLoadPerThreadN); // Local thread size for the second dimension (N here) -static const int LoadPerThreadLhs = ((TileSizeDimK*WorkLoadPerThreadM*WorkLoadPerThreadN)/(TileSizeDimN)); // workload per thread for Lhs expression -static const int LoadPerThreadRhs = ((TileSizeDimK*WorkLoadPerThreadM*WorkLoadPerThreadN)/(TileSizeDimM)); // workload per thread for Rhs expression - -// RoundUp function to make sure that the global threadId is divisable by local threadId -static int RoundUp(int x, int y) { - return ((((x) + (y) - 1) / (y))*(y)); -} - -template< typename Self, typename OutScalar, typename Index, typename ContractT, typename LeftNocontractT, typename RightNocontractT> - static void Run(const Self& self, OutScalar* buffer, Index M, Index N, Index K, - ContractT m_k_strides, ContractT m_left_contracting_strides, ContractT m_right_contracting_strides, - LeftNocontractT m_i_strides, RightNocontractT m_j_strides, LeftNocontractT m_left_nocontract_strides, RightNocontractT m_right_nocontract_strides){ - // create a tuple of accessors from Evaluator - typedef typename Self::XprType HostExpr; - // typedef typename Eigen::TensorSycl::internal::createPlaceHolderExpression::Type PlaceHolderExpr; - // typedef KernelNameConstructor KernelName; - auto functors = Eigen::TensorSycl::internal::extractFunctors(self); - typedef decltype(functors) FunctorExpr; - Index roundUpK = RoundUp(K, TileSizeDimK); - Index roundUpM = RoundUp(M, TileSizeDimM); - Index roundUpN = RoundUp(N, TileSizeDimN); - self.device().sycl_queue().submit([&](cl::sycl::handler &cgh) { - auto tuple_of_accessors = Eigen::TensorSycl::internal::createTupleOfAccessors(cgh, self); - typedef decltype(tuple_of_accessors) TupleType; - // Local memory for elements of Lhs - typedef cl::sycl::accessor LhsLocalAcc; - LhsLocalAcc localLhs(cl::sycl::range<1>(2* TileSizeDimM * TileSizeDimK), cgh); - // Local memory for elements of Rhs - typedef cl::sycl::accessor RhsLocalAcc; - RhsLocalAcc localRhs(cl::sycl::range<1>(2* TileSizeDimK * TileSizeDimN), cgh); - //OutScalar memory - auto out_res= self.device(). template get_sycl_accessor(cgh, buffer); - typedef decltype(out_res) OutAccessor; - // sycl parallel for - cgh.parallel_for(cl::sycl::nd_range<2>(cl::sycl::range<2>(roundUpM/WorkLoadPerThreadM, roundUpN/WorkLoadPerThreadN), - cl::sycl::range<2>(LocalThreadSizeM, LocalThreadSizeN)), - KernelConstructor(functors, - localLhs, localRhs, out_res, roundUpK, M, N, K, m_k_strides, m_left_contracting_strides, m_right_contracting_strides,m_i_strides, m_j_strides, - m_left_nocontract_strides,m_right_nocontract_strides, tuple_of_accessors)); - }); - self.device().asynchronousExec(); - } -}; - -} // end namespace Eigen -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index d30cc96ab..ee16cde9b 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -116,28 +116,6 @@ struct TensorEvaluator void evalProduct(Scalar* buffer) const { - const Index m = this->m_i_size; - const Index n = this->m_j_size; - const Index k = this->m_k_size; - if (m == 0 || n == 0 || k == 0) return; - -#if defined(EIGEN_VECTORIZE_AVX) && defined(EIGEN_USE_LIBXSMM) - if (this->m_can_use_xsmm) { - bool transposeA = !this->m_lhs_inner_dim_contiguous; - bool transposeB = !this->m_rhs_inner_dim_contiguous; - internal::TensorXsmmContractionBlocking - blocking(k, m, n, this->m_device.numThreads(), transposeA, - transposeB); - - if (blocking.num_threads() == 1) { - this->evalGemmXSMM(buffer); - } else { - ContextXsmm(this, buffer, m, n, k, blocking).run(); - } - return; - } -#endif - typedef typename internal::remove_const::type LhsScalar; @@ -169,7 +147,10 @@ struct TensorEvaluator GebpKernel; - + const Index m = this->m_i_size; + const Index n = this->m_j_size; + const Index k = this->m_k_size; + if (m == 0 || n == 0 || k == 0) return; // Compute a set of algorithm parameters: // - kernel block sizes (bm, bn, bk) @@ -1063,187 +1044,6 @@ struct TensorEvaluator - class ContextXsmm { - public: - ContextXsmm(const Self* self, Scalar* buffer, Index m, Index n, Index k, - const internal::TensorXsmmContractionBlocking& blocking): - device(self->m_device), - m(m), k(k), n(n), - stride_a(blocking.transposeA() ? k : m), - stride_b(blocking.transposeB() ? n : k), - stride_c(m), - bm(blocking.mc()), bk(blocking.kc()), bn(blocking.nc()), - blocks_m(blocking.blocks_m()), blocks_k(blocking.blocks_k()), - blocks_n(blocking.blocks_n()), - copyA(blocking.copyA()), copyB(blocking.copyB()), - transposeA(blocking.transposeA()), transposeB(blocking.transposeB()), - num_threads(blocking.num_threads()), - buffer(buffer), - leftData(self->m_leftImpl.data()), rightData(self->m_rightImpl.data()), - workers_done(blocking.num_threads()), - - packingA_jobs(0), packingB_jobs(0), compute_jobs(0), - packingA_done(blocking.blocks_m()), packingB_done(blocking.blocks_n()) {} - - void worker() { - // Pack - - if (copyA) { - while (true) { - uint32_t mk = packingA_jobs++; - Index mi = mk / blocks_k; - Index ki = mk % blocks_k; - if (mi >= blocks_m) break; - - LhsScalar * blockA = blocksA + (bk*bm) * (mi*blocks_k+ki); - if (transposeA) { - const LhsScalar * current_a = leftData + (bm*mi)*stride_a + (bk*ki); - libxsmm_otrans(blockA, current_a, sizeof(LhsScalar), actual_bk(ki), - actual_bm(mi), stride_a, bm); - } else { - const LhsScalar * current_a = leftData + (bk*ki)*stride_a + (bm*mi); - internal::pack_simple(blockA, current_a, - actual_bk(ki), actual_bm(mi), bm, stride_a); - } - packingA_done.at(mi)++; - } - } - - if (copyB) { - while (true) { - uint32_t nk = packingB_jobs++; - Index ni = nk / blocks_k; - Index ki = nk % blocks_k; - if (ni >= blocks_n) break; - - RhsScalar * blockB = blocksB + (bk*bn) * (ni*blocks_k+ki); - if (transposeB) { - const RhsScalar * current_b = rightData + (ki*bk)*stride_b + - (ni*bn); - libxsmm_otrans(blockB, current_b, sizeof(RhsScalar), actual_bn(ni), - actual_bk(ki), stride_b, bk); - } else { - const RhsScalar * current_b = rightData + (ni*bn)*stride_b + - (ki*bk); - internal::pack_simple(blockB, current_b, - actual_bn(ni), actual_bk(ki), bk, stride_b); - } - packingB_done.at(ni)++; - } - } - - // Compute - - while (true) { - uint32_t mn = compute_jobs++; - Index mi = mn / blocks_n; - Index ni = mn % blocks_n; - if (mi >= blocks_m) break; - - // Wait for mi, ni packings to be done. This is more fine-grained than - // waiting for all workers to finish packing. - while ((copyA && (packingA_done.at(mi) < blocks_k)) || - (copyB && (packingB_done.at(ni) < blocks_k))) - {} - - for (Index ki=0; ki < blocks_k; ++ki) { - const LhsScalar * current_a = copyA ? - blocksA + (bk*bm) * (mi*blocks_k+ki) : - leftData + (bk*ki)*stride_a + (bm*mi); - const RhsScalar * current_b = copyB ? - blocksB + (bk*bn) * (ni*blocks_k+ki) : - rightData + (ni*bn)*stride_b + (bk*ki); - - Index current_stride_a = copyA ? bm : stride_a; - Index current_stride_b = copyB ? bk : stride_b; - - // Memory may not be zeroed, overwrite instead of adding in first - // iteration. - float beta = ki == 0 ? 0 : 1; - - Scalar * current_c = buffer + (mi*bm) + (ni*bn)*stride_c; - internal::libxsmm_wrapper( - 0, actual_bm(mi), actual_bn(ni), actual_bk(ki), - current_stride_a, current_stride_b, stride_c, 1, beta, 0) - (current_a, current_b, current_c); - } - } - - workers_done.Notify(); - } - - void run() { - // Parallelization strategy. - // - // First pack A into blocks (sharding by m, k) and B (sharding by n,k), - // then shard by m, n. - // - // Do not use advanced ThreadPool queuing, just run a single long-standing - // function in each thread. - if (copyA) { - blocksA = static_cast(device.allocate( - (blocks_m*bm)*(blocks_k*bk)*sizeof(LhsScalar))); - } - if (copyB) { - blocksB = static_cast(device.allocate( - (blocks_n*bn)*(blocks_k*bk)*sizeof(RhsScalar))); - } - - for (Index i = 0; i < num_threads; ++i) { - device.enqueueNoNotification([=]() { worker(); }); - } - - workers_done.Wait(); - - if (copyA) { - device.deallocate(blocksA); - } - if (copyB) { - device.deallocate(blocksB); - } - } - - private: - // real block size for block index in [0, ..., blocks - 1]. - Index actual_bm(Index mi) const { - return mi != blocks_m - 1 ? bm : m + bm - bm * blocks_m; - } - Index actual_bk(Index ki) const { - return ki != blocks_k - 1 ? bk : k + bk - bk * blocks_k; - } - Index actual_bn(Index ni) const { - return ni != blocks_n - 1 ? bn : n + bn - bn * blocks_n; - } - - const Device& device; - Index m, k, n; - Index stride_a, stride_b, stride_c; - Index bm, bk, bn; // Block sizes. - Index blocks_m, blocks_k, blocks_n; // Number of blocks in each dimension. - bool copyA, copyB, transposeA, transposeB; - Index num_threads; - Scalar *buffer; - const LhsScalar *leftData; - const RhsScalar *rightData; - - LhsScalar *blocksA; - RhsScalar *blocksB; - // barrier for joining all threads after all done. - Barrier workers_done; - // "queues" of (mi,ki), (ki,ni), (mi,ni) jobs packed [0,p)x[0,q) -> [0, p*q) - std::atomic packingA_jobs; - std::atomic packingB_jobs; - std::atomic compute_jobs; - // already packed blocks for each mi-panel in A and ni-panel in B. - std::vector> packingA_done; - std::vector> packingB_done; - }; -#endif - }; } // end namespace Eigen diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index e6cee11ef..4f5767bc7 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -88,7 +88,7 @@ static void initializeDeviceProp() { #if __cplusplus >= 201103L std::atomic_thread_fence(std::memory_order_acquire); #endif - EIGEN_SLEEP(1000); + sleep(1); } } } diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h index ccaaa6cb2..9d141395b 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h @@ -45,7 +45,7 @@ struct DefaultDevice { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { -#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__) +#ifndef __CUDA_ARCH__ // Running on the host CPU return l1CacheSize(); #else @@ -55,7 +55,7 @@ struct DefaultDevice { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { -#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__) +#ifndef __CUDA_ARCH__ // Running single threaded on the host CPU return l3CacheSize(); #else diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h index 16bbbf894..7c039890e 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -16,308 +16,107 @@ #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H namespace Eigen { - - #define ConvertToActualTypeSycl(Scalar, buf_acc) reinterpret_cast::pointer_t>((&(*buf_acc.get_pointer()))) - - template class MemCopyFunctor { - public: - typedef cl::sycl::accessor read_accessor; - typedef cl::sycl::accessor write_accessor; - - MemCopyFunctor(read_accessor src_acc, write_accessor dst_acc, size_t rng, size_t i, size_t offset): m_src_acc(src_acc), m_dst_acc(dst_acc), m_rng(rng), m_i(i), m_offset(offset) {} - - void operator()(cl::sycl::nd_item<1> itemID) { - auto src_ptr = ConvertToActualTypeSycl(Scalar, m_src_acc); - auto dst_ptr = ConvertToActualTypeSycl(Scalar, m_dst_acc); - auto globalid = itemID.get_global_linear_id(); - if (globalid < m_rng) { - dst_ptr[globalid + m_i] = src_ptr[globalid + m_offset]; - } - } - - private: - read_accessor m_src_acc; - write_accessor m_dst_acc; - size_t m_rng; - size_t m_i; - size_t m_offset; - }; - - struct memsetkernelFunctor{ - typedef cl::sycl::accessor AccType; - AccType m_acc; - const size_t m_rng, m_c; - memsetkernelFunctor(AccType acc, const size_t rng, const size_t c):m_acc(acc), m_rng(rng), m_c(c){} - void operator()(cl::sycl::nd_item<1> itemID) { - auto globalid=itemID.get_global_linear_id(); - if (globalid< m_rng) m_acc[globalid] = m_c; - } - - }; - -EIGEN_STRONG_INLINE auto get_sycl_supported_devices()->decltype(cl::sycl::device::get_devices()){ - auto devices = cl::sycl::device::get_devices(); - std::vector::iterator it =devices.begin(); - while(it!=devices.end()) { - /// get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU ) - auto s= (*it).template get_info(); - std::transform(s.begin(), s.end(), s.begin(), ::tolower); - if((*it).is_cpu() && s.find("amd")!=std::string::npos){ // remove amd cpu as it is not supported by computecpp - it=devices.erase(it); - } - else{ - ++it; - } - } - return devices; -} - -struct QueueInterface { - /// class members: - bool exception_caught_ = false; - - mutable std::mutex mutex_; - +struct SyclDevice { + /// class members + /// sycl queue + mutable cl::sycl::queue m_queue; /// std::map is the container used to make sure that we create only one buffer /// per pointer. The lifespan of the buffer now depends on the lifespan of SyclDevice. /// If a non-read-only pointer is needed to be accessed on the host we should manually deallocate it. - mutable std::map> buffer_map; - /// sycl queue - mutable cl::sycl::queue m_queue; - /// creating device by using cl::sycl::selector or cl::sycl::device both are the same and can be captured through dev_Selector typename - /// SyclStreamDevice is not owned. it is the caller's responsibility to destroy it. - template explicit QueueInterface(const dev_Selector& s): + mutable std::map> buffer_map; + /// creating device by using selector + template SyclDevice(dev_Selector s) + : #ifdef EIGEN_EXCEPTIONS - m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) { + m_queue(cl::sycl::queue(s, [=](cl::sycl::exception_list l) { for (const auto& e : l) { try { - if (e) { - exception_caught_ = true; - std::rethrow_exception(e); - } + std::rethrow_exception(e); } catch (cl::sycl::exception e) { - std::cerr << e.what() << std::endl; - } + std::cout << e.what() << std::endl; + } } })) #else -m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) { - for (const auto& e : l) { - if (e) { - exception_caught_ = true; - std::cerr << "Error detected Inside Sycl Device."<< std::endl; - - } - } -})) + m_queue(cl::sycl::queue(s)) #endif {} + // destructor + ~SyclDevice() { deallocate_all(); } - /// Allocating device pointer. This pointer is actually an 8 bytes host pointer used as key to access the sycl device buffer. - /// The reason is that we cannot use device buffer as a pointer as a m_data in Eigen leafNode expressions. So we create a key - /// pointer to be used in Eigen expression construction. When we convert the Eigen construction into the sycl construction we - /// use this pointer as a key in our buffer_map and we make sure that we dedicate only one buffer only for this pointer. - /// The device pointer would be deleted by calling deallocate function. - EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - auto buf = cl::sycl::buffer(cl::sycl::range<1>(num_bytes)); - auto ptr =buf.get_access().get_pointer(); - buf.set_final_data(nullptr); - std::lock_guard lock(mutex_); - buffer_map.insert(std::pair>(static_cast(ptr),buf)); - return static_cast(ptr); - } - - /// This is used to deallocate the device pointer. p is used as a key inside - /// the map to find the device buffer and delete it. - EIGEN_STRONG_INLINE void deallocate(void *p) const { - std::lock_guard lock(mutex_); - auto it = buffer_map.find(static_cast(p)); + template void deallocate(T *p) const { + auto it = buffer_map.find(p); if (it != buffer_map.end()) { - auto num_bytes =it->second.get_size(); buffer_map.erase(it); - // Temporary solution for memory leak in computecpp. It will be fixed in the next computecpp version - std::allocator a1; // Default allocator for buffer - a1.deallocate(static_cast(p), num_bytes); + internal::aligned_free(p); } } - - EIGEN_STRONG_INLINE void deallocate_all() const { - std::lock_guard lock(mutex_); + void deallocate_all() const { + std::map>::iterator it=buffer_map.begin(); + while (it!=buffer_map.end()) { + auto p=it->first; + buffer_map.erase(it); + internal::aligned_free(const_cast(p)); + it=buffer_map.begin(); + } buffer_map.clear(); } - EIGEN_STRONG_INLINE std::map>::iterator find_buffer(const void* ptr) const { - std::lock_guard lock(mutex_); - auto it1 = buffer_map.find(static_cast(ptr)); - if (it1 != buffer_map.end()){ - return it1; - } - else{ - for(std::map>::iterator it=buffer_map.begin(); it!=buffer_map.end(); ++it){ - auto size = it->second.get_size(); - if((it->first < (static_cast(ptr))) && ((static_cast(ptr)) < (it->first + size)) ) return it; - } - } - std::cerr << "No sycl buffer found. Make sure that you have allocated memory for your buffer by calling allocate function in SyclDevice"<< std::endl; - abort(); - } - - // This function checks if the runtime recorded an error for the - // underlying stream device. - EIGEN_STRONG_INLINE bool ok() const { - if (!exception_caught_) { - m_queue.wait_and_throw(); - } - return !exception_caught_; - } - - // destructor - ~QueueInterface() { buffer_map.clear(); } -}; - -struct SyclDevice { - // class member. - QueueInterface* m_queue_stream; - /// QueueInterface is not owned. it is the caller's responsibility to destroy it. - explicit SyclDevice(QueueInterface* queue_stream) : m_queue_stream(queue_stream){} - - /// Creation of sycl accessor for a buffer. This function first tries to find + /// creation of sycl accessor for a buffer. This function first tries to find /// the buffer in the buffer_map. If found it gets the accessor from it, if not, - /// the function then adds an entry by creating a sycl buffer for that particular pointer. - template EIGEN_STRONG_INLINE cl::sycl::accessor - get_sycl_accessor(cl::sycl::handler &cgh, const void* ptr) const { - return (get_sycl_buffer(ptr).template get_access(cgh)); + ///the function then adds an entry by creating a sycl buffer for that particular pointer. + template inline cl::sycl::accessor + get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh, const T * ptr) const { + return (get_sycl_buffer(num_bytes, ptr)->template get_access(cgh)); } - /// Accessing the created sycl device buffer for the device pointer - EIGEN_STRONG_INLINE cl::sycl::buffer& get_sycl_buffer(const void * ptr) const { - return m_queue_stream->find_buffer(ptr)->second; + template inline std::pair>::iterator,bool> add_sycl_buffer(const T *ptr, size_t num_bytes) const { + using Type = cl::sycl::buffer; + std::pair>::iterator,bool> ret = buffer_map.insert(std::pair>(ptr, std::shared_ptr(new Type(cl::sycl::range<1>(num_bytes)), + [](void *dataMem) { delete static_cast(dataMem); }))); + (static_cast(buffer_map.at(ptr).get()))->set_final_data(nullptr); + return ret; } - /// This is used to prepare the number of threads and also the number of threads per block for sycl kernels - template - EIGEN_STRONG_INLINE void parallel_for_setup(Index n, Index &tileSize, Index &rng, Index &GRange) const { - tileSize =static_cast(sycl_queue().get_device(). template get_info()/2); - rng = n; - if (rng==0) rng=static_cast(1); - GRange=rng; - if (tileSize>GRange) tileSize=GRange; - else if(GRange>tileSize){ - Index xMode = static_cast(GRange % tileSize); - if (xMode != 0) GRange += static_cast(tileSize - xMode); - } + template inline cl::sycl::buffer* get_sycl_buffer(size_t num_bytes,const T * ptr) const { + return static_cast*>(add_sycl_buffer(ptr, num_bytes).first->second.get()); } - /// allocate device memory - EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const { - return m_queue_stream->allocate(num_bytes); + + /// allocating memory on the cpu + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void *allocate(size_t) const { + return internal::aligned_malloc(8); } - /// deallocate device memory - EIGEN_STRONG_INLINE void deallocate(void *p) const { - m_queue_stream->deallocate(p); - } // some runtime conditions that can be applied here - EIGEN_STRONG_INLINE bool isDeviceSuitable() const { return true; } + bool isDeviceSuitable() const { return true; } - /// the memcpy function - template EIGEN_STRONG_INLINE void memcpy(void *dst, const T *src, size_t n) const { - auto it1 = m_queue_stream->find_buffer((void*)src); - auto it2 = m_queue_stream->find_buffer(dst); - auto offset= (static_cast(static_cast(src))) - it1->first; - auto i= (static_cast(dst)) - it2->first; - offset/=sizeof(T); - i/=sizeof(T); - size_t rng, GRange, tileSize; - parallel_for_setup(n/sizeof(T), tileSize, rng, GRange); - sycl_queue().submit([&](cl::sycl::handler &cgh) { - auto src_acc =it1->second.template get_access(cgh); - auto dst_acc =it2->second.template get_access(cgh); - cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), MemCopyFunctor(src_acc, dst_acc, rng, i, offset)); - }); - asynchronousExec(); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, size_t n) const { + ::memcpy(dst, src, n); } - /// The memcpyHostToDevice is used to copy the device only pointer to a host pointer. Using the device - /// pointer created as a key we find the sycl buffer and get the host accessor with discard_write mode - /// on it. Using a discard_write accessor guarantees that we do not bring back the current value of the - /// buffer to host. Then we use the memcpy to copy the data to the host accessor. The first time that - /// this buffer is accessed, the data will be copied to the device. - template EIGEN_STRONG_INLINE void memcpyHostToDevice(T *dst, const T *src, size_t n) const { - auto host_acc= get_sycl_buffer(dst). template get_access(); - ::memcpy(host_acc.get_pointer(), src, n); + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(T *dst, const T *src, size_t n) const { + auto host_acc= (static_cast*>(add_sycl_buffer(dst, n).first->second.get()))-> template get_access(); + memcpy(host_acc.get_pointer(), src, n); } - /// The memcpyDeviceToHost is used to copy the data from host to device. Here, in order to avoid double copying the data. We create a sycl - /// buffer with map_allocator for the destination pointer with a discard_write accessor on it. The lifespan of the buffer is bound to the - /// lifespan of the memcpyDeviceToHost function. We create a kernel to copy the data, from the device- only source buffer to the destination - /// buffer with map_allocator on the gpu in parallel. At the end of the function call the destination buffer would be destroyed and the data - /// would be available on the dst pointer using fast copy technique (map_allocator). In this case we can make sure that we copy the data back - /// to the cpu only once per function call. - template EIGEN_STRONG_INLINE void memcpyDeviceToHost(void *dst, const T *src, size_t n) const { - auto it = m_queue_stream->find_buffer(src); - auto offset =static_cast(static_cast(src))- it->first; - offset/=sizeof(T); - size_t rng, GRange, tileSize; - parallel_for_setup(n/sizeof(T), tileSize, rng, GRange); - // Assuming that the dst is the start of the destination pointer - auto dest_buf = cl::sycl::buffer >(static_cast(dst), cl::sycl::range<1>(n)); - sycl_queue().submit([&](cl::sycl::handler &cgh) { - auto src_acc= it->second.template get_access(cgh); - auto dst_acc =dest_buf.template get_access(cgh); - cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), MemCopyFunctor(src_acc, dst_acc, rng, 0, offset)); - }); - asynchronousExec(); - } - /// returning the sycl queue - EIGEN_STRONG_INLINE cl::sycl::queue& sycl_queue() const { return m_queue_stream->m_queue;} - /// Here is the implementation of memset function on sycl. - EIGEN_STRONG_INLINE void memset(void *data, int c, size_t n) const { - size_t rng, GRange, tileSize; - parallel_for_setup(n, tileSize, rng, GRange); - sycl_queue().submit(memsetCghFunctor(get_sycl_buffer(static_cast(static_cast(data))),rng, GRange, tileSize, c )); - asynchronousExec(); - } - - struct memsetCghFunctor{ - cl::sycl::buffer& m_buf; - const size_t& rng , GRange, tileSize; - const int &c; - memsetCghFunctor(cl::sycl::buffer& buff, const size_t& rng_, const size_t& GRange_, const size_t& tileSize_, const int& c_) - :m_buf(buff), rng(rng_), GRange(GRange_), tileSize(tileSize_), c(c_){} - - void operator()(cl::sycl::handler &cgh) const { - auto buf_acc = m_buf.template get_access(cgh); - cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), memsetkernelFunctor(buf_acc, rng, c)); + /// whith the current implementation of sycl, the data is copied twice from device to host. This will be fixed soon. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(T *dst, const T *src, size_t n) const { + auto it = buffer_map.find(src); + if (it != buffer_map.end()) { + auto host_acc= (static_cast*>(it->second.get()))-> template get_access(); + memcpy(dst,host_acc.get_pointer(), n); + } else{ + eigen_assert("no device memory found. The memory might be destroyed before creation"); } - }; - - EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { - // FIXME - return 48*1024; } - EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { - // We won't try to take advantage of the l2 cache for the time being, and - // there is no l3 cache on cuda devices. - return firstLevelCacheSize(); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void *buffer, int c, size_t n) const { + ::memset(buffer, c, n); } - /// No need for sycl it should act the same as CPU version - EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; } - - EIGEN_STRONG_INLINE void synchronize() const { - sycl_queue().wait_and_throw(); //pass - } - - EIGEN_STRONG_INLINE void asynchronousExec() const { - sycl_queue().throw_asynchronous();//pass - } - // This function checks if the runtime recorded an error for the - // underlying stream device. - EIGEN_STRONG_INLINE bool ok() const { - return m_queue_stream->ok(); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { + return 1; } }; - } // end namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 16180ca69..069680a11 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -12,6 +12,17 @@ namespace Eigen { +// Use the SimpleThreadPool by default. We'll switch to the new non blocking +// thread pool later. +#ifndef EIGEN_USE_SIMPLE_THREAD_POOL +template using ThreadPoolTempl = NonBlockingThreadPoolTempl; +typedef NonBlockingThreadPool ThreadPool; +#else +template using ThreadPoolTempl = SimpleThreadPoolTempl; +typedef SimpleThreadPool ThreadPool; +#endif + + // Barrier is an object that allows one or more threads to wait until // Notify has been called a specified number of times. class Barrier { @@ -245,7 +256,7 @@ struct ThreadPoolDevice { // Split into halves and submit to the pool. Index mid = first + divup((last - first) / 2, block_size) * block_size; pool_->Schedule([=, &handleRange]() { handleRange(mid, last); }); - handleRange(first, mid); + pool_->Schedule([=, &handleRange]() { handleRange(first, mid); }); }; handleRange(0, n); barrier.Wait(); diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 86405e69b..b24cdebf1 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -33,7 +33,7 @@ namespace Eigen { namespace internal { template struct dget { - static const std::ptrdiff_t value = get::value; + static const std::size_t value = get::value; }; @@ -90,11 +90,9 @@ struct fixed_size_tensor_index_extraction_helper // Fixed size #ifndef EIGEN_EMULATE_CXX11_META_H template -struct Sizes { +struct Sizes : internal::numeric_list { typedef internal::numeric_list Base; - const Base t = Base(); static const std::ptrdiff_t total_size = internal::arg_prod(Indices...); - static const size_t count = Base::count; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const { return Base::count; @@ -122,16 +120,16 @@ struct Sizes { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const { - return internal::fixed_size_tensor_index_extraction_helper::run(index, t); + return internal::fixed_size_tensor_index_extraction_helper::run(index, *this); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfColMajor(const array& indices) const { - return internal::fixed_size_tensor_index_linearization_helper::run(indices, t); + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfRowMajor(const array& indices) const { - return internal::fixed_size_tensor_index_linearization_helper::run(indices, t); + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); } }; diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index 82dd1e640..06987132b 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -41,9 +41,6 @@ struct traits > // Intermediate typedef to workaround MSVC issue. typedef MakePointer_ MakePointerT; typedef typename MakePointerT::Type Type; - typedef typename MakePointerT::RefType RefType; - - }; }; @@ -120,7 +117,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const { return m_op; } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() { } diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index d6415817b..834ce07df 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -32,7 +32,6 @@ struct TensorEvaluator typedef typename Derived::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; typedef typename Derived::Dimensions Dimensions; - typedef Derived XprType; // NumDimensions is -1 for variable dim tensors static const int NumCoords = internal::traits::NumDimensions > 0 ? @@ -69,9 +68,7 @@ struct TensorEvaluator return m_data[index]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - typename internal::traits::template MakePointer::RefType - coeffRef(Index index) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { eigen_assert(m_data); return m_data[index]; } @@ -97,9 +94,7 @@ struct TensorEvaluator } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - typename internal::traits::template MakePointer::RefType - coeffRef(const array& coords) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array& coords) { eigen_assert(m_data); if (static_cast(Layout) == static_cast(ColMajor)) { return m_data[m_dims.IndexOfColMajor(coords)]; @@ -157,8 +152,6 @@ struct TensorEvaluator typedef typename Derived::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; typedef typename Derived::Dimensions Dimensions; - typedef const Derived XprType; - // NumDimensions is -1 for variable dim tensors static const int NumCoords = internal::traits::NumDimensions > 0 ? diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 930837021..bbd5eb374 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -46,8 +46,6 @@ struct traits > // Intermediate typedef to workaround MSVC issue. typedef MakePointer_ MakePointerT; typedef typename MakePointerT::Type Type; - typedef typename MakePointerT::RefType RefType; - }; }; @@ -109,7 +107,7 @@ struct TensorEvaluator, Device> }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - /// op_ is used for sycl + /// op_ is used for sycl : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL) { } diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 9a012c176..52b803d7f 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -20,19 +20,7 @@ namespace Eigen { // map_allocator. template struct MakePointer { typedef T* Type; - typedef T& RefType; }; -#if defined(EIGEN_USE_SYCL) -namespace TensorSycl { -namespace internal{ -template class ReductionFunctor; -template -class FullReductionKernelFunctor; -} -} -#endif - - template class MakePointer_ = MakePointer> class TensorMap; template class Tensor; diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 3b4f8eda1..d73f6dc68 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -33,7 +33,7 @@ struct functor_traits > */ template struct scalar_mod2_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op) + EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op); EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; } }; template @@ -42,7 +42,7 @@ struct functor_traits > template struct scalar_fmod_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op) + EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op); EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const Scalar& a, const Scalar& b) const { return numext::fmod(a, b); diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index 485a082e2..ede3939c2 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -37,8 +37,6 @@ namespace { { #ifdef __CUDA_ARCH__ return __clz(val); -#elif defined(__SYCL_DEVICE_ONLY__) - return cl::sycl::clz(val); #elif EIGEN_COMP_MSVC unsigned long index; _BitScanReverse(&index, val); @@ -55,8 +53,6 @@ namespace { { #ifdef __CUDA_ARCH__ return __clzll(val); -#elif defined(__SYCL_DEVICE_ONLY__) - return cl::sycl::clz(val); #elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64 unsigned long index; _BitScanReverse64(&index, val); @@ -92,8 +88,6 @@ namespace { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) { #if defined(__CUDA_ARCH__) return __umulhi(a, b); -#elif defined(__SYCL_DEVICE_ONLY__) - return cl::sycl::mul_hi(a, static_cast(b)); #else return (static_cast(a) * b) >> 32; #endif @@ -103,8 +97,6 @@ namespace { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { #if defined(__CUDA_ARCH__) return __umul64hi(a, b); -#elif defined(__SYCL_DEVICE_ONLY__) - return cl::sycl::mul_hi(a, static_cast(b)); #elif defined(__SIZEOF_INT128__) __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b); return static_cast(v >> 64); @@ -124,7 +116,7 @@ namespace { template struct DividerHelper<64, T> { static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { -#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__) +#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) return static_cast((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); #else const uint64_t shift = 1ULL << log_div; diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h index f92e39d69..ee0078bbc 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h @@ -51,12 +51,4 @@ #endif -#if EIGEN_OS_WIN || EIGEN_OS_WIN64 -#define EIGEN_SLEEP(n) Sleep(n) -#elif EIGEN_OS_GNULINUX -#define EIGEN_SLEEP(n) usleep(n * 1000); -#else -#define EIGEN_SLEEP(n) sleep(std::max(1, n/1000)) -#endif - #endif diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index b5ef31d55..615559d44 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -75,7 +75,6 @@ struct PacketType { HasSqrt = 1, HasRsqrt = 1, HasExp = 1, - HasExpm1 = 0, HasLog = 1, HasLog1p = 0, HasLog10 = 0, @@ -169,12 +168,12 @@ template struct IndexPair { #ifdef EIGEN_HAS_SFINAE namespace internal { - template + template EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE array customIndices2Array(IndexType& idx, numeric_list) { return { idx[Is]... }; } - template + template EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE array customIndices2Array(IndexType&, numeric_list) { return array(); diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index d582ccbe1..d34f1e328 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -299,16 +299,6 @@ template struct MemcpyTriggerForSlicing { EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; } }; #endif - -// It is very expensive to start the memcpy kernel on GPU: we therefore only -// use it for large copies. -#ifdef EIGEN_USE_SYCL -template struct MemcpyTriggerForSlicing { - EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { } - EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; } -}; -#endif - } // Eval as rvalue @@ -503,14 +493,7 @@ struct TensorEvaluator, Devi } return NULL; } - /// used by sycl - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator& impl() const{ - return m_impl; - } - /// used by sycl - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& startIndices() const{ - return m_offsets; - } + protected: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const { @@ -723,7 +706,7 @@ struct TensorEvaluator startIndicesClamped, stopIndicesClamped; @@ -828,15 +811,6 @@ struct TensorEvaluator& impl() const{return m_impl;} - protected: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const { @@ -871,10 +845,6 @@ struct TensorEvaluator m_offsets; // offset in a flattened shape const Strides m_strides; std::size_t m_block_total_size_max; - //use by sycl - const StartIndices m_exprStartIndices; - //use by sycl - const StopIndices m_exprStopIndices; }; // Eval as lvalue diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index a8e255246..647bcf108 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -200,13 +200,6 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - /// used by sycl - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PaddingDimensions& padding() const { return m_padding; } - /// used by sycl - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& padding_value() const { return m_paddingValue; } - /// used by sycl - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator& impl() const{return m_impl;} - private: EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim( Index index, int dim_index) const { diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index c841786b8..41d0d0022 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -13,7 +13,6 @@ namespace Eigen { - /** \class TensorReduction * \ingroup CXX11_Tensor_Module * @@ -692,12 +691,6 @@ struct TensorEvaluator, template friend void internal::OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); #endif -#if defined(EIGEN_USE_SYCL) - template < typename HostExpr_, typename FunctorExpr_, typename Tuple_of_Acc_, typename Dims_, typename Op_, typename Index_> friend class TensorSycl::internal::ReductionFunctor; - template friend class TensorSycl::internal::FullReductionKernelFunctor; -#endif - - template friend struct internal::InnerReducer; // Returns the Index in the input tensor of the first value that needs to be diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h index c9912d9d4..3daecb045 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h @@ -25,28 +25,61 @@ namespace Eigen { namespace internal { -template struct syclGenericBufferReducer{ +template struct syclGenericBufferReducer{ template -static void run(BufferTOut& bufOut, BufferTIn& bufI, const Eigen::SyclDevice& dev, size_t length, size_t local){ +static void run(BufferTOut* bufOut, BufferTIn& bufI, const Eigen::SyclDevice& dev, size_t length, size_t local){ do { auto f = [length, local, bufOut, &bufI](cl::sycl::handler& h) mutable { cl::sycl::nd_range<1> r{cl::sycl::range<1>{std::max(length, local)}, cl::sycl::range<1>{std::min(length, local)}}; /* Two accessors are used: one to the buffer that is being reduced, * and a second to local memory, used to store intermediate data. */ - auto aI =bufI.template get_access(h); - auto aOut =bufOut.template get_access(h); - typedef decltype(aI) InputAccessor; - typedef decltype(aOut) OutputAccessor; - typedef cl::sycl::accessor LocalAccessor; - LocalAccessor scratch(cl::sycl::range<1>(local), h); + auto aI = + bufI.template get_access(h); + auto aOut = + bufOut->template get_access(h); + cl::sycl::accessor + scratch(cl::sycl::range<1>(local), h); /* The parallel_for invocation chosen is the variant with an nd_item * parameter, since the code requires barriers for correctness. */ - h.parallel_for(r, TensorSycl::internal::GenericKernelReducer< CoeffReturnType, OutputAccessor, InputAccessor, LocalAccessor>(aOut, aI, scratch, length, local)); + h.parallel_for( + r, [aOut, aI, scratch, local, length](cl::sycl::nd_item<1> id) { + size_t globalid = id.get_global(0); + size_t localid = id.get_local(0); + /* All threads collectively read from global memory into local. + * The barrier ensures all threads' IO is resolved before + * execution continues (strictly speaking, all threads within + * a single work-group - there is no co-ordination between + * work-groups, only work-items). */ + if (globalid < length) { + scratch[localid] = aI[globalid]; + } + id.barrier(cl::sycl::access::fence_space::local_space); + + /* Apply the reduction operation between the current local + * id and the one on the other half of the vector. */ + if (globalid < length) { + int min = (length < local) ? length : local; + for (size_t offset = min / 2; offset > 0; offset /= 2) { + if (localid < offset) { + scratch[localid] += scratch[localid + offset]; + } + id.barrier(cl::sycl::access::fence_space::local_space); + } + /* The final result will be stored in local id 0. */ + if (localid == 0) { + aI[id.get_group(0)] = scratch[localid]; + if((length<=local) && globalid ==0){ + aOut[globalid]=scratch[localid]; + } + } + } + }); }; - dev.sycl_queue().submit(f); - dev.asynchronousExec(); + dev.m_queue.submit(f); + dev.m_queue.throw_asynchronous(); /* At this point, you could queue::wait_and_throw() to ensure that * errors are caught quickly. However, this would likely impact @@ -61,11 +94,11 @@ static void run(BufferTOut& bufOut, BufferTIn& bufI, const Eigen::SyclDevice& de }; +/// For now let's start with a full reducer /// Self is useless here because in expression construction we are going to treat reduction as a leafnode. /// we want to take reduction child and then build a construction and apply the full reducer function on it. Fullreducre applies the /// reduction operation on the child of the reduction. once it is done the reduction is an empty shell and can be thrown away and treated as // a leafNode. - template struct FullReducer { @@ -74,8 +107,8 @@ struct FullReducer { static void run(const Self& self, Op& reducer, const Eigen::SyclDevice& dev, CoeffReturnType* output) { typedef const typename Self::ChildType HostExpr; /// this is the child of reduction + typedef typename TensorSycl::internal::createPlaceHolderExpression::Type PlaceHolderExpr; auto functors = TensorSycl::internal::extractFunctors(self.impl()); - typedef decltype(functors) FunctorExpr; int red_factor =256; /// initial reduction. If the size is less than red_factor we only creates one thread. size_t inputSize =self.impl().dimensions().TotalSize(); size_t rng = inputSize/red_factor; // the total number of thread initially is half the size of the input @@ -83,7 +116,7 @@ struct FullReducer { if(rng ==0) { red_factor=1; }; - size_t tileSize =dev.sycl_queue().get_device(). template get_info()/2; + size_t tileSize =dev.m_queue.get_device(). template get_info()/2; size_t GRange=std::max((size_t )1, rng); // convert global range to power of 2 for redecution @@ -100,34 +133,51 @@ struct FullReducer { size_t outTileSize = tileSize; /// if the shared memory is less than the GRange, we set shared_mem size to the TotalSize and in this case one kernel would be created for recursion to reduce all to one. if (GRange < outTileSize) outTileSize=GRange; + // getting final out buffer at the moment the created buffer is true because there is no need for assign + auto out_buffer =dev.template get_sycl_buffer::type>(self.dimensions().TotalSize(), output); /// creating the shared memory for calculating reduction. /// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can /// recursively apply reduction on it in order to reduce the whole. auto temp_global_buffer =cl::sycl::buffer(cl::sycl::range<1>(GRange)); typedef typename Eigen::internal::remove_all::type Dims; - // Dims dims= self.xprDims(); - //Op functor = reducer; - dev.sycl_queue().submit([&](cl::sycl::handler &cgh) { + Dims dims= self.xprDims(); + Op functor = reducer; + dev.m_queue.submit([&](cl::sycl::handler &cgh) { // create a tuple of accessors from Evaluator auto tuple_of_accessors = TensorSycl::internal::createTupleOfAccessors(cgh, self.impl()); - typedef decltype(tuple_of_accessors) TupleType; auto tmp_global_accessor = temp_global_buffer. template get_access(cgh); - typedef decltype(tmp_global_accessor) OutAccessor; - cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(outTileSize)), - TensorSycl::internal::FullReductionKernelFunctor - (tmp_global_accessor, rng, remaining, red_factor, reducer, self.xprDims(), functors, tuple_of_accessors)); - }); - dev.asynchronousExec(); - // getting final out buffer at the moment the created buffer is true because there is no need for assign - auto out_buffer =dev.get_sycl_buffer(output); - /// This is used to recursively reduce the tmp value to an element of 1; - syclGenericBufferReducer::run(out_buffer, temp_global_buffer,dev, GRange, outTileSize); + cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(outTileSize)), [=](cl::sycl::nd_item<1> itemID) { + typedef typename TensorSycl::internal::ConvertToDeviceExpression::Type DevExpr; + auto device_expr = TensorSycl::internal::createDeviceExpression(functors, tuple_of_accessors); + /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour + /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the + /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. + const auto device_self_expr= TensorReductionOp(device_expr.expr, dims, functor); + /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is + /// the device_evaluator is detectable and recognisable on the device. + auto device_self_evaluator = Eigen::TensorEvaluator(device_self_expr, Eigen::DefaultDevice()); + /// const cast added as a naive solution to solve the qualifier drop error + auto globalid=itemID.get_global_linear_id(); + + if(globalid::reduce(device_self_evaluator, red_factor*globalid, red_factor, const_cast(functor)); + else + tmp_global_accessor.get_pointer()[globalid]=static_cast(0); + + if(remaining!=0 && globalid==0 ) + // this will add the rest of input buffer when the input size is not devidable to red_factor. + tmp_global_accessor.get_pointer()[globalid]+=InnerMostDimReducer::reduce(device_self_evaluator, red_factor*(rng), remaining, const_cast(functor)); + }); + }); + dev.m_queue.throw_asynchronous(); + +/// This is used to recursively reduce the tmp value to an element of 1; + syclGenericBufferReducer::run(out_buffer, temp_global_buffer,dev, GRange, outTileSize); } }; - template struct InnerReducer { @@ -136,28 +186,52 @@ struct InnerReducer { static bool run(const Self& self, Op& reducer, const Eigen::SyclDevice& dev, CoeffReturnType* output, typename Self::Index , typename Self::Index num_coeffs_to_preserve) { typedef const typename Self::ChildType HostExpr; /// this is the child of reduction + typedef typename TensorSycl::internal::createPlaceHolderExpression::Type PlaceHolderExpr; auto functors = TensorSycl::internal::extractFunctors(self.impl()); - typedef decltype(functors) FunctorExpr; - typename Self::Index range, GRange, tileSize; - typedef typename Eigen::internal::remove_all::type Dims; + size_t tileSize =dev.m_queue.get_device(). template get_info()/2; + + size_t GRange=num_coeffs_to_preserve; + if (tileSize>GRange) tileSize=GRange; + else if(GRange>tileSize){ + size_t xMode = GRange % tileSize; + if (xMode != 0) GRange += (tileSize - xMode); + } // getting final out buffer at the moment the created buffer is true because there is no need for assign /// creating the shared memory for calculating reduction. /// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can /// recursively apply reduction on it in order to reduce the whole. - dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange); - dev.sycl_queue().submit([&](cl::sycl::handler &cgh) { + typedef typename Eigen::internal::remove_all::type Dims; + Dims dims= self.xprDims(); + Op functor = reducer; + + dev.m_queue.submit([&](cl::sycl::handler &cgh) { // create a tuple of accessors from Evaluator auto tuple_of_accessors = TensorSycl::internal::createTupleOfAccessors(cgh, self.impl()); - typedef typename Eigen::internal::remove_all::type Tuple_of_Acc; - auto output_accessor = dev.template get_sycl_accessor(cgh, output); - - cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), - TensorSycl::internal::ReductionFunctor - (output_accessor, functors, tuple_of_accessors, self.xprDims(), reducer, range)); + auto output_accessor = dev.template get_sycl_accessor(num_coeffs_to_preserve,cgh, output); + cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) { + typedef typename TensorSycl::internal::ConvertToDeviceExpression::Type DevExpr; + auto device_expr = TensorSycl::internal::createDeviceExpression(functors, tuple_of_accessors); + /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour + /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the + /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. + const auto device_self_expr= TensorReductionOp(device_expr.expr, dims, functor); + /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is + /// the device_evaluator is detectable and recognisable on the device. + typedef Eigen::TensorEvaluator DeiceSelf; + auto device_self_evaluator = Eigen::TensorEvaluator(device_self_expr, Eigen::DefaultDevice()); + /// const cast added as a naive solution to solve the qualifier drop error + auto globalid=itemID.get_global_linear_id(); + if (globalid< static_cast(num_coeffs_to_preserve)) { + typename DeiceSelf::CoeffReturnType accum = functor.initialize(); + GenericDimReducer::reduce(device_self_evaluator, device_self_evaluator.firstInput(globalid),const_cast(functor), &accum); + functor.finalize(accum); + output_accessor.get_pointer()[globalid]= accum; + } + }); }); - dev.asynchronousExec(); + dev.m_queue.throw_asynchronous(); return false; } }; diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index edc9dd3f3..113c060e3 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -117,7 +117,7 @@ struct TensorEvaluator, Device> }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_shuffle(op.shufflePermutation()) + : m_impl(op.expression(), device) { const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); const Shuffle& shuffle = op.shufflePermutation(); @@ -187,11 +187,6 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - // required by sycl - EIGEN_STRONG_INLINE const Shuffle& shufflePermutation() const {return m_shuffle;} - // required by sycl - EIGEN_STRONG_INLINE const TensorEvaluator& impl() const {return m_impl;} - protected: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const { Index inputIndex = 0; @@ -211,12 +206,11 @@ struct TensorEvaluator, Device> return inputIndex + index * m_inputStrides[NumDims - 1]; } } + Dimensions m_dimensions; array m_outputStrides; array m_inputStrides; TensorEvaluator m_impl; - /// required by sycl - Shuffle m_shuffle; }; diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h index f8121d17b..2854a4a17 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h @@ -126,7 +126,7 @@ class TensorStorage, Options_> } else m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_dimensions = nbDimensions; } diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h index 2e61ee049..bb8800d45 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h @@ -20,14 +20,12 @@ template struct MakeGlobalPointer { typedef typename cl::sycl::global_ptr::pointer_t Type; - typedef typename cl::sycl::global_ptr::reference_t RefType; }; // global pointer to set different attribute state for a class template struct MakeLocalPointer { typedef typename cl::sycl::local_ptr::pointer_t Type; - typedef typename cl::sycl::local_ptr::reference_t RefType; }; @@ -35,9 +33,6 @@ namespace Eigen { namespace TensorSycl { namespace internal { - template struct GenericKernelReducer; - - /// This struct is used for special expression nodes with no operations (for example assign and selectOP). struct NoOP; @@ -82,10 +77,6 @@ template struct GetType{ // kernel execution using fusion #include "TensorSyclRun.h" -//sycl functors -#include "TensorSyclFunctors.h" - -#include "TensorContractionSycl.h" #endif // end of EIGEN_USE_SYCL #endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h index 113dd2557..8729c86ee 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h @@ -48,9 +48,9 @@ struct DeviceConvertor{ /// specialisation of the \ref ConvertToDeviceExpression struct when the node /// type is TensorMap #define TENSORMAPCONVERT(CVQual)\ -template class MakePointer_>\ -struct ConvertToDeviceExpression > {\ - typedef CVQual TensorMap Type;\ +template class MakePointer_>\ +struct ConvertToDeviceExpression, Options2_, MakePointer_> > {\ + typedef CVQual TensorMap, Options2_, MakeGlobalPointer> Type;\ }; TENSORMAPCONVERT(const) @@ -114,28 +114,6 @@ KERNELBROKERCONVERTREDUCTION(const) KERNELBROKERCONVERTREDUCTION() #undef KERNELBROKERCONVERTREDUCTION -#define KERNELBROKERCONVERTSLICEOP(CVQual)\ -template\ -struct ConvertToDeviceExpression >{\ - typedef CVQual TensorSlicingOp::Type> Type;\ -}; - -KERNELBROKERCONVERTSLICEOP(const) -KERNELBROKERCONVERTSLICEOP() -#undef KERNELBROKERCONVERTSLICEOP - - -#define KERNELBROKERCONVERTERSLICESTRIDEOP(CVQual)\ -template\ -struct ConvertToDeviceExpression >{\ - typedef CVQual TensorStridingSlicingOp::Type> Type;\ -}; - -KERNELBROKERCONVERTERSLICESTRIDEOP(const) -KERNELBROKERCONVERTERSLICESTRIDEOP() -#undef KERNELBROKERCONVERTERSLICESTRIDEOP - - } // namespace internal } // namespace TensorSycl } // namespace Eigen diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h index df1a732e7..7ed3a3a56 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h @@ -25,21 +25,12 @@ namespace Eigen { namespace TensorSycl { namespace internal { - -template -struct DeviceFixedSizeTensor; - -template -struct DeviceFixedSizeTensor>{ - template - static EIGEN_ALWAYS_INLINE Expr instantiate(Data& dt) {return Expr(ConvertToActualTypeSycl(typename Expr::Scalar, dt), Indices...);} -}; /// this class is used by EvalToOp in order to create an lhs expression which is /// a pointer from an accessor on device-only buffer template struct EvalToLHSConstructor { PtrType expr; - EvalToLHSConstructor(const utility::tuple::Tuple &t) : expr(ConvertToActualTypeSycl(typename Eigen::internal::remove_all::type, utility::tuple::get(t))) {} + EvalToLHSConstructor(const utility::tuple::Tuple &t): expr((&(*(utility::tuple::get(t).get_pointer())))) {} }; /// \struct ExprConstructor is used to reconstruct the expression on the device and @@ -54,39 +45,21 @@ struct ExprConstructor; /// specialisation of the \ref ExprConstructor struct when the node type is /// TensorMap #define TENSORMAP(CVQual)\ -template class MakePointer_, size_t N, typename... Params>\ -struct ExprConstructor< CVQual TensorMap,\ -CVQual PlaceHolder, N>, Params...>{\ - typedef CVQual TensorMap Type;\ +struct ExprConstructor< CVQual TensorMap, Options2_, MakeGlobalPointer>,\ +CVQual PlaceHolder, Options3_, MakePointer_>, N>, Params...>{\ + typedef CVQual TensorMap, Options2_, MakeGlobalPointer> Type;\ Type expr;\ template \ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t)\ - : expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get(t)), fd.dimensions())){}\ + : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), fd.dimensions())) {}\ }; - TENSORMAP(const) TENSORMAP() #undef TENSORMAP -/// specialisation of the \ref ExprConstructor struct when the node type is -/// TensorMap -#define TENSORMAPFIXEDSIZE(CVQual)\ -template class MakePointer_, size_t N, typename... Params>\ -struct ExprConstructor< CVQual TensorMap, Options_, MakeGlobalPointer>,\ -CVQual PlaceHolder, Options_, MakePointer_>, N>, Params...>{\ - typedef CVQual TensorMap, Options_, MakeGlobalPointer> Type;\ - Type expr;\ - template \ - ExprConstructor(FuncDetector &, const utility::tuple::Tuple &t)\ - : expr(DeviceFixedSizeTensor::instantiate(utility::tuple::get(t))){}\ -}; -TENSORMAPFIXEDSIZE(const) -TENSORMAPFIXEDSIZE() -#undef TENSORMAPFIXEDSIZE - #define UNARYCATEGORY(CVQual)\ template class UnaryCategory, typename OP, typename OrigRHSExpr, typename RHSExpr, typename... Params>\ struct ExprConstructor, CVQual UnaryCategory, Params...> {\ @@ -189,7 +162,7 @@ struct ExprConstructor, CVQual ASSIGN() #undef ASSIGN /// specialisation of the \ref ExprConstructor struct when the node type is -/// TensorEvalToOp /// 0 here is the output number in the buffer +/// TensorEvalToOp #define EVALTO(CVQual)\ template \ struct ExprConstructor, CVQual TensorEvalToOp, Params...> {\ @@ -215,11 +188,11 @@ template \ struct ExprConstructor,\ CVQual PlaceHolder, N>, Params...> {\ typedef CVQual TensorMap::Scalar,\ - TensorForcedEvalOp::NumDimensions, Eigen::internal::traits>::Layout, typename TensorForcedEvalOp::Index>, Eigen::internal::traits>::Layout, MakeGlobalPointer> Type;\ + TensorForcedEvalOp::NumDimensions, 0, typename TensorForcedEvalOp::Index>, 0, MakeGlobalPointer> Type;\ Type expr;\ template \ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t)\ - : expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get(t)), fd.dimensions())) {}\ + : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), fd.dimensions())) {}\ }; FORCEDEVAL(const) @@ -240,89 +213,17 @@ struct ExprConstructor, N>, Params...> {\ static const size_t NumIndices= ValueCondition< TensorReductionOp::NumDimensions==0, 1, TensorReductionOp::NumDimensions >::Res;\ typedef CVQual TensorMap::Scalar,\ - NumIndices, Eigen::internal::traits>::Layout, typename TensorReductionOp::Index>, Eigen::internal::traits>::Layout, MakeGlobalPointer> Type;\ + NumIndices, 0, typename TensorReductionOp::Index>, 0, MakeGlobalPointer> Type;\ Type expr;\ template \ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t)\ - :expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get(t)), fd.dimensions())) {}\ + : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), fd.dimensions())) {}\ }; SYCLREDUCTIONEXPR(const) SYCLREDUCTIONEXPR() #undef SYCLREDUCTIONEXPR - - -#define SYCLSLICEOPEXPR(CVQual)\ -template\ -struct ExprConstructor , CVQual TensorSlicingOp, Params... >{\ - typedef ExprConstructor my_xpr_type;\ - typedef CVQual TensorSlicingOp Type;\ - my_xpr_type xprExpr;\ - Type expr;\ - template \ - ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple &t)\ - : xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.startIndices(), funcD.dimensions()) {}\ -}; - -SYCLSLICEOPEXPR(const) -SYCLSLICEOPEXPR() -#undef SYCLSLICEOPEXPR - - -#define SYCLSLICESTRIDEOPEXPR(CVQual)\ -template\ -struct ExprConstructor, CVQual TensorStridingSlicingOp, Params... >{\ - typedef ExprConstructor my_xpr_type;\ - typedef CVQual TensorStridingSlicingOp Type;\ - my_xpr_type xprExpr;\ - Type expr;\ - template \ - ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple &t)\ - : xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.startIndices(), funcD.stopIndices(),funcD.strides()) {}\ -}; - -SYCLSLICESTRIDEOPEXPR(const) -SYCLSLICESTRIDEOPEXPR() -#undef SYCLSLICESTRIDEOPEXPR - -#define SYCLRESHAPEANDSHUFFLEOPEXPRCONST(OPEXPR, CVQual)\ -template\ -struct ExprConstructor , CVQual OPEXPR , Params... >{\ - typedef ExprConstructor my_xpr_type;\ - typedef CVQual OPEXPR Type ;\ - my_xpr_type xprExpr;\ - Type expr;\ - template \ - ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple &t)\ - : xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.param()) {}\ -}; - -SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorReshapingOp, const) -SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorReshapingOp, ) - -SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, const) -SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, ) -#undef SYCLRESHAPEANDSHUFFLEOPEXPRCONST - -#define SYCLPADDINGOPEXPRCONST(OPEXPR, CVQual)\ -template\ -struct ExprConstructor , CVQual OPEXPR , Params... >{\ - typedef ExprConstructor my_xpr_type;\ - typedef CVQual OPEXPR Type ;\ - my_xpr_type xprExpr;\ - Type expr;\ - template \ - ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple &t)\ - : xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.param() , funcD.scalar_param()) {}\ -}; - -SYCLPADDINGOPEXPRCONST(TensorPaddingOp, const) -SYCLPADDINGOPEXPRCONST(TensorPaddingOp, ) -#undef SYCLPADDINGOPEXPRCONST - - - /// template deduction for \ref ExprConstructor struct template auto createDeviceExpression(FuncD &funcD, const utility::tuple::Tuple &t) diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h index 876fcd45e..b1da6858e 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h @@ -43,191 +43,159 @@ template struct ExtractAccessor; struct AccessorConstructor{ - template static inline auto getTuple(cl::sycl::handler& cgh, const Arg& eval) + template static inline auto getTuple(cl::sycl::handler& cgh, Arg eval) -> decltype(ExtractAccessor::getTuple(cgh, eval)) { return ExtractAccessor::getTuple(cgh, eval); } - template static inline auto getTuple(cl::sycl::handler& cgh, const Arg1& eval1, const Arg2& eval2) + template static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1, Arg2 eval2) -> decltype(utility::tuple::append(ExtractAccessor::getTuple(cgh, eval1), ExtractAccessor::getTuple(cgh, eval2))) { return utility::tuple::append(ExtractAccessor::getTuple(cgh, eval1), ExtractAccessor::getTuple(cgh, eval2)); } - template static inline auto getTuple(cl::sycl::handler& cgh, const Arg1& eval1 , const Arg2& eval2 , const Arg3& eval3) + template static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1 , Arg2 eval2 , Arg3 eval3) -> decltype(utility::tuple::append(ExtractAccessor::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor::getTuple(cgh, eval2), ExtractAccessor::getTuple(cgh, eval3)))) { return utility::tuple::append(ExtractAccessor::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor::getTuple(cgh, eval2), ExtractAccessor::getTuple(cgh, eval3))); } - template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, const Arg& eval) - -> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor(cgh,eval.data()))){ - return utility::tuple::make_tuple(eval.device().template get_sycl_accessor(cgh,eval.data())); + template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, Arg eval) + -> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor::type>(eval.dimensions().TotalSize(), cgh,eval.data()))){ + return utility::tuple::make_tuple(eval.device().template get_sycl_accessor::type>(eval.dimensions().TotalSize(), cgh,eval.data())); } }; /// specialisation of the \ref ExtractAccessor struct when the node type is -/// TensorCwiseNullaryOp, TensorCwiseUnaryOp and TensorBroadcastingOp -#define SYCLUNARYCATEGORYEXTACC(CVQual)\ -template class UnaryCategory, typename OP, typename RHSExpr, typename Dev>\ -struct ExtractAccessor, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ - -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\ - return AccessorConstructor::getTuple(cgh, eval.impl());\ - }\ +/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp and const TensorBroadcastingOp +template class UnaryCategory, typename OP, typename RHSExpr, typename Dev> +struct ExtractAccessor, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) + -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){ + return AccessorConstructor::getTuple(cgh, eval.impl()); + } }; -SYCLUNARYCATEGORYEXTACC(const) -SYCLUNARYCATEGORYEXTACC() -#undef SYCLUNARYCATEGORYEXTACC - +/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseNullaryOp, TensorCwiseUnaryOp and TensorBroadcastingOp +template class UnaryCategory, typename OP, typename RHSExpr, typename Dev> +struct ExtractAccessor, Dev> > +: ExtractAccessor, Dev> > {}; +/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorCwiseBinaryOp +template class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> +struct ExtractAccessor, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) + -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){ + return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl()); + } +}; /// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseBinaryOp -#define SYCLBINARYCATEGORYEXTACC(CVQual)\ -template class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>\ -struct ExtractAccessor, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ - -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){\ - return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());\ - }\ -}; - -SYCLBINARYCATEGORYEXTACC(const) -SYCLBINARYCATEGORYEXTACC() -#undef SYCLBINARYCATEGORYEXTACC +template class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> +struct ExtractAccessor, Dev> > +: ExtractAccessor, Dev> >{}; /// specialisation of the \ref ExtractAccessor struct when the node type is /// const TensorCwiseTernaryOp -#define SYCLTERNARYCATEGORYEXTACC(CVQual)\ -template class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>\ -struct ExtractAccessor, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ - -> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){\ - return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl());\ - }\ +template class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> +struct ExtractAccessor, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) + -> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){ + return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl()); + } }; -SYCLTERNARYCATEGORYEXTACC(const) -SYCLTERNARYCATEGORYEXTACC() -#undef SYCLTERNARYCATEGORYEXTACC +/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseTernaryOp +template class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> +struct ExtractAccessor, Dev> > +: ExtractAccessor, Dev> >{}; +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseSelectOp. This is a special case where there is no OP +template +struct ExtractAccessor, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) + -> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){ + return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl()); + } +}; /// specialisation of the \ref ExtractAccessor struct when the node type is /// TensorCwiseSelectOp. This is a special case where there is no OP -#define SYCLSELECTOPEXTACC(CVQual)\ -template \ -struct ExtractAccessor, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ - -> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){\ - return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl());\ - }\ -}; +template +struct ExtractAccessor, Dev> > +: ExtractAccessor, Dev> >{}; -SYCLSELECTOPEXTACC(const) -SYCLSELECTOPEXTACC() -#undef SYCLSELECTOPEXTACC +/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorAssignOp +template +struct ExtractAccessor, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) + -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){ + return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl()); + } +}; /// specialisation of the \ref ExtractAccessor struct when the node type is TensorAssignOp -#define SYCLTENSORASSIGNOPEXTACC(CVQual)\ -template \ -struct ExtractAccessor, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ - -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){\ - return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());\ - }\ -}; - - SYCLTENSORASSIGNOPEXTACC(const) - SYCLTENSORASSIGNOPEXTACC() - #undef SYCLTENSORASSIGNOPEXTACC +template +struct ExtractAccessor, Dev> > +: ExtractAccessor, Dev> >{}; /// specialisation of the \ref ExtractAccessor struct when the node type is const TensorMap #define TENSORMAPEXPR(CVQual, ACCType)\ template \ struct ExtractAccessor, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator, Dev>& eval)\ + static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator, Dev> eval)\ -> decltype(AccessorConstructor::template getAccessor(cgh, eval)){\ return AccessorConstructor::template getAccessor(cgh, eval);\ }\ }; - TENSORMAPEXPR(const, cl::sycl::access::mode::read) TENSORMAPEXPR(, cl::sycl::access::mode::read_write) #undef TENSORMAPEXPR -/// specialisation of the \ref ExtractAccessor struct when the node type is TensorForcedEvalOp -#define SYCLFORCEDEVALEXTACC(CVQual)\ -template \ -struct ExtractAccessor, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ - -> decltype(AccessorConstructor::template getAccessor(cgh, eval)){\ - return AccessorConstructor::template getAccessor(cgh, eval);\ - }\ +/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorForcedEvalOp +template +struct ExtractAccessor, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) + -> decltype(AccessorConstructor::template getAccessor(cgh, eval)){ + return AccessorConstructor::template getAccessor(cgh, eval); + } }; -SYCLFORCEDEVALEXTACC(const) -SYCLFORCEDEVALEXTACC() -#undef SYCLFORCEDEVALEXTACC +/// specialisation of the \ref ExtractAccessor struct when the node type is TensorForcedEvalOp +template +struct ExtractAccessor, Dev> > +: ExtractAccessor, Dev> >{}; +/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorEvalToOp +template +struct ExtractAccessor, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator, Dev> eval) + -> decltype(utility::tuple::append(AccessorConstructor::template getAccessor(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){ + return utility::tuple::append(AccessorConstructor::template getAccessor(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl())); + } +}; /// specialisation of the \ref ExtractAccessor struct when the node type is TensorEvalToOp -#define SYCLEVALTOEXTACC(CVQual)\ -template \ -struct ExtractAccessor, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator, Dev>& eval)\ - -> decltype(utility::tuple::append(AccessorConstructor::template getAccessor(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){\ - return utility::tuple::append(AccessorConstructor::template getAccessor(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()));\ - }\ -}; +template +struct ExtractAccessor, Dev> > +: ExtractAccessor, Dev> >{}; -SYCLEVALTOEXTACC(const) -SYCLEVALTOEXTACC() -#undef SYCLEVALTOEXTACC +/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorReductionOp +template +struct ExtractAccessor, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) + -> decltype(AccessorConstructor::template getAccessor(cgh, eval)){ + return AccessorConstructor::template getAccessor(cgh, eval); + } +}; /// specialisation of the \ref ExtractAccessor struct when the node type is TensorReductionOp -#define SYCLREDUCTIONEXTACC(CVQual)\ -template \ -struct ExtractAccessor, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ - -> decltype(AccessorConstructor::template getAccessor(cgh, eval)){\ - return AccessorConstructor::template getAccessor(cgh, eval);\ - }\ -}; - -SYCLREDUCTIONEXTACC(const) -SYCLREDUCTIONEXTACC() -#undef SYCLREDUCTIONEXTACC - -/// specialisation of the \ref ExtractAccessor struct when the node type is -/// const TensorSlicingOp. This is a special case where there is no OP -#define SYCLSLICEOPEXTACC(CVQual)\ -template \ -struct ExtractAccessor, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ - -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\ - return AccessorConstructor::getTuple(cgh, eval.impl());\ - }\ -}; - -SYCLSLICEOPEXTACC(const) -SYCLSLICEOPEXTACC() -#undef SYCLSLICEOPEXTACC - -#define SYCLSLICESTRIDEOPEXTACC(CVQual)\ -template\ -struct ExtractAccessor, Dev> >{\ - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ - -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\ - return AccessorConstructor::getTuple(cgh, eval.impl());\ - }\ -}; - -SYCLSLICESTRIDEOPEXTACC(const) -SYCLSLICESTRIDEOPEXTACC() -#undef SYCLSLICESTRIDEOPEXTACC - +template +struct ExtractAccessor, Dev> > +: ExtractAccessor, Dev> >{}; /// template deduction for \ref ExtractAccessor template -auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& eval) --> decltype(ExtractAccessor::getTuple(cgh, eval)) { - return ExtractAccessor::getTuple(cgh, eval); +auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr) +-> decltype(ExtractAccessor::getTuple(cgh, expr)) { + return ExtractAccessor::getTuple(cgh, expr); } } /// namespace TensorSycl diff --git a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h index 6f9ab57af..427125343 100644 --- a/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h +++ b/uppsrc/plugin/Eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h @@ -36,232 +36,135 @@ namespace internal { template struct FunctorExtractor{ typedef typename Evaluator::Dimensions Dimensions; const Dimensions m_dimensions; - EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + const Dimensions& dimensions() const { return m_dimensions; } FunctorExtractor(const Evaluator& expr) : m_dimensions(expr.dimensions()) {} }; -#define SYCLEXTRTENSORMAPFIXEDSIZE(CVQual)\ -template class MakePointer_, typename Dev>\ -struct FunctorExtractor< TensorEvaluator , Options_, MakePointer_> , Dev> >{\ -FunctorExtractor(const TensorEvaluator , Options_, MakePointer_> , Dev>& ){}\ +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp, and const TensorBroadcastingOp +template