Class LibMatrixMult
- java.lang.Object
-
- org.apache.sysds.runtime.matrix.data.LibMatrixMult
-
public class LibMatrixMult extends Object
MB: Library for matrix multiplications including MM, MV, VV for all combinations of dense, sparse, ultrasparse representations and special operations such as transpose-self matrix multiplication.In general all implementations use internally dense outputs for direct access, but change the final result to sparse if necessary. The only exceptions are ultra-sparse matrix mult, wsloss and wsigmoid.
-
-
Field Summary
Fields Modifier and Type Field Description static int
L2_CACHESIZE
static int
L3_CACHESIZE
-
Method Summary
All Methods Static Methods Concrete Methods Modifier and Type Method Description static long
copyUpperToLowerTriangle(MatrixBlock ret)
static long
copyUpperToLowerTriangleDense(MatrixBlock ret)
Used for all version of TSMM where the result is known to be symmetric.static long
copyUpperToLowerTriangleSparse(MatrixBlock ret)
static double
dotProduct(double[] a, double[] b, int[] aix, int ai, int bi, int len)
static double
dotProduct(double[] a, double[] b, int ai, int bi, int len)
static MatrixBlock
emptyMatrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret)
static boolean
isOuterProductTSMM(int rlen, int clen, boolean left)
static boolean
isSkinnyRightHandSide(long m1rlen, long m1clen, long m2rlen, long m2clen, boolean inclCacheSize)
static boolean
isSparseOutputMatrixMult(MatrixBlock m1, MatrixBlock m2)
static boolean
isSparseOutputTSMM(MatrixBlock m1)
static boolean
isSparseOutputTSMM(MatrixBlock m1, boolean ultraSparse)
static boolean
isUltraSparseMatrixMult(MatrixBlock m1, MatrixBlock m2, boolean m1Perm)
static MatrixBlock
matrixMult(MatrixBlock m1, MatrixBlock m2)
Performs a matrix multiplication All variants use a IKJ access pattern, and internally use dense output.static MatrixBlock
matrixMult(MatrixBlock m1, MatrixBlock m2, int k)
Performs a matrix multiplication All variants use a IKJ access pattern, and internally use dense output.static MatrixBlock
matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret)
Performs a matrix multiplication and stores the result in the output matrix.static MatrixBlock
matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, boolean fixedRet)
This method allows one to disabling exam sparsity.static MatrixBlock
matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, boolean fixedRet, int k)
Performs a matrix multiplication and stores the result in the output matrix.static MatrixBlock
matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int k)
Performs a multi-threaded matrix multiplication and stores the result in the output matrix.static void
matrixMultChain(MatrixBlock mX, MatrixBlock mV, MatrixBlock mW, MatrixBlock ret, MapMultChain.ChainType ct)
Performs a matrix multiplication chain operation of type t(X)%*%(X%*%v) or t(X)%*%(w*(X%*%v)).static void
matrixMultChain(MatrixBlock mX, MatrixBlock mV, MatrixBlock mW, MatrixBlock ret, MapMultChain.ChainType ct, int k)
Performs a parallel matrix multiplication chain operation of type t(X)%*%(X%*%v) or t(X)%*%(w*(X%*%v)).static void
matrixMultDenseDenseMM(DenseBlock a, DenseBlock b, DenseBlock c, int n, int cd, int rl, int ru, int cl, int cu)
static void
matrixMultDenseDenseMMDedup(DenseBlockFP64DEDUP a, DenseBlock b, DenseBlockFP64DEDUP c, int n, int cd, int rl, int ru, ConcurrentHashMap<double[],double[]> cache)
static void
matrixMultPermute(MatrixBlock pm1, MatrixBlock m2, MatrixBlock ret1, MatrixBlock ret2)
static void
matrixMultPermute(MatrixBlock pm1, MatrixBlock m2, MatrixBlock ret1, MatrixBlock ret2, int k)
static void
matrixMultTransposeSelf(MatrixBlock m1, MatrixBlock ret, boolean leftTranspose)
static void
matrixMultTransposeSelf(MatrixBlock m1, MatrixBlock ret, boolean leftTranspose, boolean copyToLowerTriangle)
static void
matrixMultTransposeSelf(MatrixBlock m1, MatrixBlock ret, boolean leftTranspose, int k)
TSMM with optional transposed left side or not (Transposed self matrix multiplication)static void
matrixMultWCeMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, double eps, MatrixBlock ret, WeightedCrossEntropy.WCeMMType wt)
static void
matrixMultWCeMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, double eps, MatrixBlock ret, WeightedCrossEntropy.WCeMMType wt, int k)
static void
matrixMultWDivMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock mX, MatrixBlock ret, WeightedDivMM.WDivMMType wt)
NOTE: This operation has limited NaN support, which is acceptable because all our sparse-safe operations have only limited NaN support.static void
matrixMultWDivMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock mX, MatrixBlock ret, WeightedDivMM.WDivMMType wt, int k)
NOTE: This operation has limited NaN support, which is acceptable because all our sparse-safe operations have only limited NaN support.static void
matrixMultWSigmoid(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WeightedSigmoid.WSigmoidType wt)
static void
matrixMultWSigmoid(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WeightedSigmoid.WSigmoidType wt, int k)
static void
matrixMultWSLoss(MatrixBlock mX, MatrixBlock mU, MatrixBlock mV, MatrixBlock mW, MatrixBlock ret, WeightedSquaredLoss.WeightsType wt)
static void
matrixMultWSLoss(MatrixBlock mX, MatrixBlock mU, MatrixBlock mV, MatrixBlock mW, MatrixBlock ret, WeightedSquaredLoss.WeightsType wt, int k)
static void
matrixMultWuMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WeightedUnaryMM.WUMMType wt, ValueFunction fn)
static void
matrixMultWuMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WeightedUnaryMM.WUMMType wt, ValueFunction fn, int k)
static MatrixBlock
prepMatrixMultTransposeSelfInput(MatrixBlock m1, boolean leftTranspose, boolean par)
static boolean
satisfiesMultiThreadingConstraints(MatrixBlock m1, boolean checkMem, boolean checkFLOPs, long FPfactor, int k)
static boolean
satisfiesMultiThreadingConstraints(MatrixBlock m1, int k)
static boolean
satisfiesMultiThreadingConstraints(MatrixBlock m1, MatrixBlock m2, boolean checkMem, boolean checkFLOPs, long FPfactor, int k)
static void
vectAdd(double[] a, double[] c, int[] aix, int ai, int ci, int alen)
static void
vectAdd(double[] a, double[] c, int ai, int ci, int len)
static void
vectAdd(double[] a, double bval, double[] c, int ai, int ci, int len)
static void
vectAddInPlace(double aval, double[] c, int ci, int len)
static void
vectMultiplyAdd(double aval, double[] b, double[] c, int[] bix, int bi, int ci, int len)
static void
vectMultiplyAdd(double aval, double[] b, double[] c, int bi, int ci, int len)
static void
vectMultiplyInPlace(double aval, double[] c, int ci, int len)
static void
vectMultiplyWrite(double[] a, double[] b, double[] c, int[] bix, int ai, int bi, int ci, int len)
static void
vectMultiplyWrite(double[] a, double[] b, double[] c, int ai, int bi, int ci, int len)
static void
vectMultiplyWrite(double aval, double[] b, double[] c, int bi, int ci, int len)
-
-
-
Field Detail
-
L2_CACHESIZE
public static final int L2_CACHESIZE
- See Also:
- Constant Field Values
-
L3_CACHESIZE
public static final int L3_CACHESIZE
- See Also:
- Constant Field Values
-
-
Method Detail
-
matrixMult
public static MatrixBlock matrixMult(MatrixBlock m1, MatrixBlock m2)
Performs a matrix multiplication All variants use a IKJ access pattern, and internally use dense output. After the actual computation, we recompute nnz and check for sparse/dense representation.- Parameters:
m1
- first matrixm2
- second matrix- Returns:
- ret Matrix Block
-
matrixMult
public static MatrixBlock matrixMult(MatrixBlock m1, MatrixBlock m2, int k)
Performs a matrix multiplication All variants use a IKJ access pattern, and internally use dense output. After the actual computation, we recompute nnz and check for sparse/dense representation.- Parameters:
m1
- first matrixm2
- second matrixk
- maximum parallelism- Returns:
- ret Matrix Block
-
matrixMult
public static MatrixBlock matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret)
Performs a matrix multiplication and stores the result in the output matrix. All variants use a IKJ access pattern, and internally use dense output. After the actual computation, we recompute nnz and check for sparse/dense representation.- Parameters:
m1
- first matrixm2
- second matrixret
- result matrix- Returns:
- ret Matrix Block
-
matrixMult
public static MatrixBlock matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, boolean fixedRet)
This method allows one to disabling exam sparsity. This feature is useful if matrixMult is used as an intermediate operation (for example: LibMatrixDNN). It makes sense for LibMatrixDNN because the output is internally consumed by another dense instruction, which makes repeated conversion to sparse wasteful. This should be used in rare cases and if you are unsure, use the method 'matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret)' instead.- Parameters:
m1
- first matrixm2
- second matrixret
- result matrixfixedRet
- if true, output representation is fixed and nnzs not recomputed- Returns:
- ret Matrix Block
-
matrixMult
public static MatrixBlock matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int k)
Performs a multi-threaded matrix multiplication and stores the result in the output matrix. The parameter k (k>=1) determines the max parallelism k' with k'=min(k, vcores, m1.rlen).- Parameters:
m1
- first matrixm2
- second matrixret
- result matrixk
- maximum parallelism- Returns:
- ret Matrix Block
-
matrixMult
public static MatrixBlock matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, boolean fixedRet, int k)
Performs a matrix multiplication and stores the result in the output matrix. All variants use a IKJ access pattern, and internally use dense output. After the actual computation, we recompute nnz and check for sparse/dense representation. This method allows one to disabling exam sparsity. This feature is useful if matrixMult is used as an intermediate operation (for example: LibMatrixDNN). It makes sense for LibMatrixDNN because the output is internally consumed by another dense instruction, which makes repeated conversion to sparse wasteful. This should be used in rare cases and if you are unsure, use the method 'matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret)' instead. The parameter k (k>=1) determines the max parallelism k' with k'=min(k, vcores, m1.rlen).- Parameters:
m1
- first matrixm2
- second matrixret
- result matrixfixedRet
- if true, output representation is fixed and nnzs not recomputedk
- maximum parallelism- Returns:
- ret Matrix Block
-
emptyMatrixMult
public static MatrixBlock emptyMatrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret)
-
matrixMultChain
public static void matrixMultChain(MatrixBlock mX, MatrixBlock mV, MatrixBlock mW, MatrixBlock ret, MapMultChain.ChainType ct)
Performs a matrix multiplication chain operation of type t(X)%*%(X%*%v) or t(X)%*%(w*(X%*%v)). All variants use a IKJ access pattern, and internally use dense output. After the actual computation, we recompute nnz and check for sparse/dense representation.- Parameters:
mX
- X matrixmV
- v matrixmW
- w matrixret
- result matrixct
- chain type
-
matrixMultChain
public static void matrixMultChain(MatrixBlock mX, MatrixBlock mV, MatrixBlock mW, MatrixBlock ret, MapMultChain.ChainType ct, int k)
Performs a parallel matrix multiplication chain operation of type t(X)%*%(X%*%v) or t(X)%*%(w*(X%*%v)). The parameter k (k>=1) determines the max parallelism k' with k'=min(k, vcores, m1.rlen). NOTE: This multi-threaded mmchain operation has additional memory requirements of k*ncol(X)*8bytes for partial aggregation. Current max memory: 256KB; otherwise redirect to sequential execution.- Parameters:
mX
- X matrixmV
- v matrixmW
- w matrixret
- result matrixct
- chain typek
- maximum parallelism
-
matrixMultTransposeSelf
public static void matrixMultTransposeSelf(MatrixBlock m1, MatrixBlock ret, boolean leftTranspose)
-
matrixMultTransposeSelf
public static void matrixMultTransposeSelf(MatrixBlock m1, MatrixBlock ret, boolean leftTranspose, boolean copyToLowerTriangle)
-
matrixMultTransposeSelf
public static void matrixMultTransposeSelf(MatrixBlock m1, MatrixBlock ret, boolean leftTranspose, int k)
TSMM with optional transposed left side or not (Transposed self matrix multiplication)- Parameters:
m1
- The matrix to do tsmmret
- The output matrix to allocate the result toleftTranspose
- If the left side should be considered transposedk
- the number of threads to use
-
matrixMultPermute
public static void matrixMultPermute(MatrixBlock pm1, MatrixBlock m2, MatrixBlock ret1, MatrixBlock ret2)
-
matrixMultPermute
public static void matrixMultPermute(MatrixBlock pm1, MatrixBlock m2, MatrixBlock ret1, MatrixBlock ret2, int k)
-
matrixMultWSLoss
public static void matrixMultWSLoss(MatrixBlock mX, MatrixBlock mU, MatrixBlock mV, MatrixBlock mW, MatrixBlock ret, WeightedSquaredLoss.WeightsType wt)
-
matrixMultWSLoss
public static void matrixMultWSLoss(MatrixBlock mX, MatrixBlock mU, MatrixBlock mV, MatrixBlock mW, MatrixBlock ret, WeightedSquaredLoss.WeightsType wt, int k)
-
matrixMultWSigmoid
public static void matrixMultWSigmoid(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WeightedSigmoid.WSigmoidType wt)
-
matrixMultWSigmoid
public static void matrixMultWSigmoid(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WeightedSigmoid.WSigmoidType wt, int k)
-
matrixMultWDivMM
public static void matrixMultWDivMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock mX, MatrixBlock ret, WeightedDivMM.WDivMMType wt)
NOTE: This operation has limited NaN support, which is acceptable because all our sparse-safe operations have only limited NaN support. If this is not intended behavior, please disable the rewrite. In detail, this operator will produce for W/(U%*%t(V)) a zero intermediate for each zero in W (even if UVij is zero which would give 0/0=NaN) but INF/-INF for non-zero entries in V where the corresponding cell in (Y%*%X) is zero.- Parameters:
mW
- matrix WmU
- matrix UmV
- matrix VmX
- matrix Xret
- result typewt
- weighted divide matrix multiplication type
-
matrixMultWDivMM
public static void matrixMultWDivMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock mX, MatrixBlock ret, WeightedDivMM.WDivMMType wt, int k)
NOTE: This operation has limited NaN support, which is acceptable because all our sparse-safe operations have only limited NaN support. If this is not intended behavior, please disable the rewrite. In detail, this operator will produce for W/(U%*%t(V)) a zero intermediate for each zero in W (even if UVij is zero which would give 0/0=NaN) but INF/-INF for non-zero entries in V where the corresponding cell in (Y%*%X) is zero.- Parameters:
mW
- matrix WmU
- matrix UmV
- matrix VmX
- matrix Xret
- result matrixwt
- weighted divide matrix multiplication typek
- maximum parallelism
-
matrixMultWCeMM
public static void matrixMultWCeMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, double eps, MatrixBlock ret, WeightedCrossEntropy.WCeMMType wt)
-
matrixMultWCeMM
public static void matrixMultWCeMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, double eps, MatrixBlock ret, WeightedCrossEntropy.WCeMMType wt, int k)
-
matrixMultWuMM
public static void matrixMultWuMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WeightedUnaryMM.WUMMType wt, ValueFunction fn)
-
matrixMultWuMM
public static void matrixMultWuMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WeightedUnaryMM.WUMMType wt, ValueFunction fn, int k)
-
matrixMultDenseDenseMMDedup
public static void matrixMultDenseDenseMMDedup(DenseBlockFP64DEDUP a, DenseBlock b, DenseBlockFP64DEDUP c, int n, int cd, int rl, int ru, ConcurrentHashMap<double[],double[]> cache)
-
matrixMultDenseDenseMM
public static void matrixMultDenseDenseMM(DenseBlock a, DenseBlock b, DenseBlock c, int n, int cd, int rl, int ru, int cl, int cu)
-
dotProduct
public static double dotProduct(double[] a, double[] b, int ai, int bi, int len)
-
dotProduct
public static double dotProduct(double[] a, double[] b, int[] aix, int ai, int bi, int len)
-
vectMultiplyAdd
public static void vectMultiplyAdd(double aval, double[] b, double[] c, int bi, int ci, int len)
-
vectMultiplyAdd
public static void vectMultiplyAdd(double aval, double[] b, double[] c, int[] bix, int bi, int ci, int len)
-
vectMultiplyWrite
public static void vectMultiplyWrite(double aval, double[] b, double[] c, int bi, int ci, int len)
-
vectMultiplyInPlace
public static void vectMultiplyInPlace(double aval, double[] c, int ci, int len)
-
vectMultiplyWrite
public static void vectMultiplyWrite(double[] a, double[] b, double[] c, int ai, int bi, int ci, int len)
-
vectMultiplyWrite
public static void vectMultiplyWrite(double[] a, double[] b, double[] c, int[] bix, int ai, int bi, int ci, int len)
-
vectAdd
public static void vectAdd(double[] a, double bval, double[] c, int ai, int ci, int len)
-
vectAdd
public static void vectAdd(double[] a, double[] c, int ai, int ci, int len)
-
vectAdd
public static void vectAdd(double[] a, double[] c, int[] aix, int ai, int ci, int alen)
-
vectAddInPlace
public static void vectAddInPlace(double aval, double[] c, int ci, int len)
-
copyUpperToLowerTriangle
public static long copyUpperToLowerTriangle(MatrixBlock ret)
-
copyUpperToLowerTriangleDense
public static long copyUpperToLowerTriangleDense(MatrixBlock ret)
Used for all version of TSMM where the result is known to be symmetric. Hence, we compute only the upper triangular matrix and copy this partial result down to lower triangular matrix once.- Parameters:
ret
- matrix- Returns:
- number of non zeros
-
copyUpperToLowerTriangleSparse
public static long copyUpperToLowerTriangleSparse(MatrixBlock ret)
-
prepMatrixMultTransposeSelfInput
public static MatrixBlock prepMatrixMultTransposeSelfInput(MatrixBlock m1, boolean leftTranspose, boolean par)
-
isSkinnyRightHandSide
public static boolean isSkinnyRightHandSide(long m1rlen, long m1clen, long m2rlen, long m2clen, boolean inclCacheSize)
-
satisfiesMultiThreadingConstraints
public static boolean satisfiesMultiThreadingConstraints(MatrixBlock m1, int k)
-
satisfiesMultiThreadingConstraints
public static boolean satisfiesMultiThreadingConstraints(MatrixBlock m1, boolean checkMem, boolean checkFLOPs, long FPfactor, int k)
-
satisfiesMultiThreadingConstraints
public static boolean satisfiesMultiThreadingConstraints(MatrixBlock m1, MatrixBlock m2, boolean checkMem, boolean checkFLOPs, long FPfactor, int k)
-
isUltraSparseMatrixMult
public static boolean isUltraSparseMatrixMult(MatrixBlock m1, MatrixBlock m2, boolean m1Perm)
-
isSparseOutputMatrixMult
public static boolean isSparseOutputMatrixMult(MatrixBlock m1, MatrixBlock m2)
-
isSparseOutputTSMM
public static boolean isSparseOutputTSMM(MatrixBlock m1)
-
isSparseOutputTSMM
public static boolean isSparseOutputTSMM(MatrixBlock m1, boolean ultraSparse)
-
isOuterProductTSMM
public static boolean isOuterProductTSMM(int rlen, int clen, boolean left)
-
-