Package org.apache.sysds.runtime.util
Class UtilFunctions
- java.lang.Object
-
- org.apache.sysds.runtime.util.UtilFunctions
-
public class UtilFunctions extends Object
-
-
Field Summary
Fields Modifier and Type Field Description static long
ADD_PRIME1
static int
DIVIDE_PRIME
static double
DOUBLE_EPS
-
Method Summary
All Methods Static Methods Concrete Methods Modifier and Type Method Description static String
addTimeToDate(String dateString, int amountToAdd, String timeformat)
static String
columnStringToCSVString(String input, String separator)
static int
compareTo(Types.ValueType vt, Object in1, Object in2)
static int
compareVersion(String version1, String version2)
Compares two version strings of format x.y.z, where x is major, y is minor, and z is maintenance release.static long
computeBlockIndex(long cellIndex, int blockSize)
Computes the 1-based block index based on the global cell index and block size meta data.static long
computeBlockNumber(int[] ix, long[] dims, int blen)
Calculates the number of the block this index refers to (basically a linearisation).static int
computeBlockSize(long len, long blockIndex, long blockSize)
Computes the actual block size based on matrix dimension, block index, and block size meta data.static int
computeCellInBlock(long cellIndex, int blockSize)
Computes the 0-based cell-in-block index based on the global cell index and block size meta data.static long
computeCellIndex(long blockIndex, int blockSize, int cellInBlock)
Computes the global 1-based cell index based on the block index, block size meta data, and specific 0-based in-block cell index.static long[]
computeNextTensorIndexes(TensorCharacteristics tc, long[] ix)
Computes the next tensor indexes array.static int
computeNnz(double[] a, int ai, int len)
static int
computeNnz(float[] a, int ai, int len)
static int
computeNnz(int[] a, int ai, int len)
static int
computeNnz(long[] a, int ai, int len)
static int
computeNnz(String[] a, int ai, int len)
static int
computeNnz(BitSet a, int ai, int len)
static long
computeNnz(SparseBlock a, int[] aix, int ai, int alen)
static void
computeSliceInfo(TensorCharacteristics tc, long[] blockIx, int[] outDims, int[] offset)
Computes the slice dimensions and offsets for the block slice of another tensor with the size given byTensorCharacteristics
.static long[]
computeTensorIndexes(TensorCharacteristics tc, long blockIndex)
Computes the tensor indexes array given a blockIndex we ant to compute.static boolean
containsZero(double[] data, int pos, int len)
static double[]
convertStringToDoubleArray(String[] original)
static String[]
copyAsStringToArray(String[] input, Object value)
static Types.ValueType[]
copyOf(Types.ValueType[] schema1, Types.ValueType[] schema2)
static int
countNonZeros(double[] data, int pos, int len)
static String
dateFormat(long date, String outputFormat)
static String
dateFormat(String dateString, String outputFormat)
static String
dateFormat(String dateString, String inputFormat, String outputFormat)
static Object
doubleToObject(Types.ValueType vt, double in)
static Object
doubleToObject(Types.ValueType vt, double in, boolean sparse)
static String
formatMemorySize(long arg)
Format a memory size with g/m/k quantifiers into its number representation.static int
frequency(Types.ValueType[] schema, Types.ValueType vt)
static FrameBlock
generateRandomFrameBlock(int rows, int cols, Types.ValueType[] schema, Random random)
Generates a random FrameBlock with given parameters.static Object
generateRandomValueFromValueType(Types.ValueType valueType, Random random)
Generates a random value for a given Value Typestatic ArrayList<Integer>
getAlignedBlockSizes(int len, int k, int align)
static int
getAsciiAtIdx(String s, int idx)
static ArrayList<Integer>
getBalancedBlockSizesDefault(int len, int k, boolean constK)
static void
getBlockBounds(TensorIndexes ix, long[] dims, int blen, int[] lower, int[] upper)
static int[]
getBlockSizes(int num, int numBlocks)
static String[]
getDominantDateFormat(String[] values)
static double
getDouble(Object obj)
static int
getEndIndex(int arrayLength, int startIndex, int blockSize)
static <T> T
getSafe(Future<T> task)
static IndexRange
getSelectedRangeForZeroOut(IndexedMatrixValue in, int blen, IndexRange indexRange)
static IndexRange
getSelectedRangeForZeroOut(Pair<Long,FrameBlock> in, int blen, IndexRange indexRange, long lSrcRowIndex, long lDestRowIndex)
static int[]
getSeqArray(int low, int up, int incr)
Obtain sequence arraystatic long
getSeqLength(double from, double to, double incr)
static long
getSeqLength(double from, double to, double incr, boolean check)
static List<Integer>
getSeqList(int low, int up, int incr)
Obtain sequence liststatic int[]
getSortedSampleIndexes(int range, int sampleSize)
static int[]
getSortedSampleIndexes(int range, int sampleSize, long seed)
static String[]
getSplittedStringAsArray(String input)
static List<Pair<Integer,Integer>>
getTaskRangesDefault(int len, int k)
static String[]
getTimestamp(String[] values)
static double
getWordErrorRate(String r, String h)
Computes the word error rate (Levenshtein distance at word level): wer = (numSubst + numDel + numIns) / length(r) This code has been adapted from Apache Commons Lang 3.12 (getLevenshteinDistance, but for words instead of characters).static int
intHashCode(int key1, int key2)
static int
intHashCodeRobust(int key1, int key2)
static boolean
isBoolean(String str)
static String
isDateColumn(String values)
static boolean
isInBlockRange(MatrixIndexes ix, int blen, long rl, long ru, long cl, long cu)
static boolean
isInBlockRange(MatrixIndexes ix, int blen, IndexRange ixrange)
static boolean
isInFrameBlockRange(Long ix, int blen, long rl, long ru)
static boolean
isInFrameBlockRange(Long ix, int blen, IndexRange ixrange)
static boolean
isIntegerNumber(String str)
static boolean
isNonZero(Object obj)
static boolean
isSpecial(double value)
static double
jaccardSim(String x, String y)
static int
longHashCode(long key1)
static int
longHashCode(long key1, long key2)
Returns the hash code for a long-long pair.static int
longHashCode(long key1, long key2, long key3)
Returns the hash code for a long-long-long triple.static byte
max(byte[] array)
static Types.ValueType[]
nCopies(int n, Types.ValueType vt)
static int
nextIntPow2(int in)
static boolean
objectToBoolean(Types.ValueType vt, Object in)
static char
objectToCharacter(Types.ValueType vt, Object in)
static double
objectToDouble(Types.ValueType vt, Object in)
static double
objectToDoubleSafe(Types.ValueType vt, Object in)
static float
objectToFloat(Types.ValueType vt, Object in)
static int
objectToInteger(Types.ValueType vt, Object in)
static long
objectToLong(Types.ValueType vt, Object in)
static Object
objectToObject(Types.ValueType vt, Object in)
static Object
objectToObject(Types.ValueType vt, Object in, boolean ignoreNull)
static String
objectToString(Object in)
static String
objectToString(Object in, boolean ignoreNull)
Convert object to stringstatic long
parseMemorySize(String arg)
Parses a memory size with optional g/m/k quantifiers into its number representation.static double
parseToDouble(String str, Set<String> isNan)
Safe double parsing including handling of NAs.static int
parseToInt(String str)
static long
parseToLong(String str)
static long
pow(int base, int exp)
static long
prod(int[] arr)
static long
prod(int[] arr, int off)
static long
prod(long[] arr)
static String
quote(String s)
static int
roundToNext(int val, int factor)
static String[]
splitRecodeEntry(String s)
static Object
stringToObject(Types.ValueType vt, String in)
static Types.ValueType[]
stringToValueType(String[] schemaValues)
Generates a ValueType array from a String arraystatic int
toInt(double val)
static int
toInt(Object obj)
static long
toLong(double val)
static long
toMillis(String dateString)
static long
toMillis(String dateString, String dateFormat)
static String[]
toStringArray(Object[] original)
static String
unquote(String s)
-
-
-
Field Detail
-
DOUBLE_EPS
public static final double DOUBLE_EPS
-
ADD_PRIME1
public static final long ADD_PRIME1
- See Also:
- Constant Field Values
-
DIVIDE_PRIME
public static final int DIVIDE_PRIME
- See Also:
- Constant Field Values
-
-
Method Detail
-
intHashCode
public static int intHashCode(int key1, int key2)
-
intHashCodeRobust
public static int intHashCodeRobust(int key1, int key2)
-
longHashCode
public static int longHashCode(long key1)
-
longHashCode
public static int longHashCode(long key1, long key2)
Returns the hash code for a long-long pair. This is the default hash function for the keys of a distributed matrix in MR/Spark.- Parameters:
key1
- first long keykey2
- second long key- Returns:
- hash code
-
longHashCode
public static int longHashCode(long key1, long key2, long key3)
Returns the hash code for a long-long-long triple. This is the default hash function for the keys of a distributed matrix in MR/Spark.- Parameters:
key1
- first long keykey2
- second long keykey3
- third long key- Returns:
- hash code
-
nextIntPow2
public static int nextIntPow2(int in)
-
pow
public static long pow(int base, int exp)
-
computeBlockIndex
public static long computeBlockIndex(long cellIndex, int blockSize)
Computes the 1-based block index based on the global cell index and block size meta data. See computeCellIndex for the inverse operation.- Parameters:
cellIndex
- global cell indexblockSize
- block size- Returns:
- 1-based block index
-
computeCellInBlock
public static int computeCellInBlock(long cellIndex, int blockSize)
Computes the 0-based cell-in-block index based on the global cell index and block size meta data. See computeCellIndex for the inverse operation.- Parameters:
cellIndex
- global cell indexblockSize
- block size- Returns:
- 0-based cell-in-block index
-
computeCellIndex
public static long computeCellIndex(long blockIndex, int blockSize, int cellInBlock)
Computes the global 1-based cell index based on the block index, block size meta data, and specific 0-based in-block cell index. NOTE: this is equivalent to cellIndexCalculation.- Parameters:
blockIndex
- block indexblockSize
- block sizecellInBlock
- 0-based cell-in-block index- Returns:
- global 1-based cell index
-
computeBlockSize
public static int computeBlockSize(long len, long blockIndex, long blockSize)
Computes the actual block size based on matrix dimension, block index, and block size meta data. For boundary blocks, the actual block size is less or equal than the block size meta data; otherwise they are identical.- Parameters:
len
- matrix dimensionblockIndex
- block indexblockSize
- block size metadata- Returns:
- actual block size
-
computeNextTensorIndexes
public static long[] computeNextTensorIndexes(TensorCharacteristics tc, long[] ix)
Computes the next tensor indexes array.- Parameters:
tc
- the tensor characteristicsix
- the tensor indexes array (will be changed)- Returns:
- the tensor indexes array (changed)
-
computeTensorIndexes
public static long[] computeTensorIndexes(TensorCharacteristics tc, long blockIndex)
Computes the tensor indexes array given a blockIndex we ant to compute. Note that if a sequence of tensor indexes array will be computed, it is faster to useUtilFunctions.computeNextTensorIndexes(TensorCharacteristics,long[])
.- Parameters:
tc
- the tensor characteristicsblockIndex
- the number of the block ([0-tc.getNumBlocks()
[ valid)- Returns:
- the tensor index array
-
computeSliceInfo
public static void computeSliceInfo(TensorCharacteristics tc, long[] blockIx, int[] outDims, int[] offset)
Computes the slice dimensions and offsets for the block slice of another tensor with the size given byTensorCharacteristics
.- Parameters:
tc
- tensor characteristics of the block to sliceblockIx
- the tensor block indexoutDims
- the slice dimension sizeoffset
- the offset where the slice should start
-
computeBlockNumber
public static long computeBlockNumber(int[] ix, long[] dims, int blen)
Calculates the number of the block this index refers to (basically a linearisation).- Parameters:
ix
- the dimensional indexesdims
- length of dimensionsblen
- length of blocks- Returns:
- the number of the block
-
getTaskRangesDefault
public static List<Pair<Integer,Integer>> getTaskRangesDefault(int len, int k)
-
getBalancedBlockSizesDefault
public static ArrayList<Integer> getBalancedBlockSizesDefault(int len, int k, boolean constK)
-
getAlignedBlockSizes
public static ArrayList<Integer> getAlignedBlockSizes(int len, int k, int align)
-
isInBlockRange
public static boolean isInBlockRange(MatrixIndexes ix, int blen, long rl, long ru, long cl, long cu)
-
isInFrameBlockRange
public static boolean isInFrameBlockRange(Long ix, int blen, long rl, long ru)
-
isInBlockRange
public static boolean isInBlockRange(MatrixIndexes ix, int blen, IndexRange ixrange)
-
isInFrameBlockRange
public static boolean isInFrameBlockRange(Long ix, int blen, IndexRange ixrange)
-
getSelectedRangeForZeroOut
public static IndexRange getSelectedRangeForZeroOut(IndexedMatrixValue in, int blen, IndexRange indexRange)
-
getSelectedRangeForZeroOut
public static IndexRange getSelectedRangeForZeroOut(Pair<Long,FrameBlock> in, int blen, IndexRange indexRange, long lSrcRowIndex, long lDestRowIndex)
-
parseToDouble
public static double parseToDouble(String str, Set<String> isNan)
Safe double parsing including handling of NAs. Previously, we also used this wrapper for handling thread contention in multi-threaded environments because Double.parseDouble relied on a synchronized cache (which was replaced with thread-local caches in JDK8).- Parameters:
str
- string to parse to doubleisNan
- collection of Nan string which if encountered should be parsed to nan value- Returns:
- double value
-
parseToInt
public static int parseToInt(String str)
-
parseToLong
public static long parseToLong(String str)
-
toInt
public static int toInt(double val)
-
toLong
public static long toLong(double val)
-
toInt
public static int toInt(Object obj)
-
getSeqLength
public static long getSeqLength(double from, double to, double incr)
-
getSeqLength
public static long getSeqLength(double from, double to, double incr, boolean check)
-
getSeqList
public static List<Integer> getSeqList(int low, int up, int incr)
Obtain sequence list- Parameters:
low
- lower bound (inclusive)up
- upper bound (inclusive)incr
- increment- Returns:
- list of integers
-
getSeqArray
public static int[] getSeqArray(int low, int up, int incr)
Obtain sequence array- Parameters:
low
- lower bound (inclusive)up
- upper bound (inclusive)incr
- increment- Returns:
- array of integers
-
roundToNext
public static int roundToNext(int val, int factor)
-
doubleToObject
public static Object doubleToObject(Types.ValueType vt, double in)
-
doubleToObject
public static Object doubleToObject(Types.ValueType vt, double in, boolean sparse)
-
stringToObject
public static Object stringToObject(Types.ValueType vt, String in)
-
objectToDoubleSafe
public static double objectToDoubleSafe(Types.ValueType vt, Object in)
-
objectToDouble
public static double objectToDouble(Types.ValueType vt, Object in)
-
objectToFloat
public static float objectToFloat(Types.ValueType vt, Object in)
-
objectToCharacter
public static char objectToCharacter(Types.ValueType vt, Object in)
-
objectToInteger
public static int objectToInteger(Types.ValueType vt, Object in)
-
objectToLong
public static long objectToLong(Types.ValueType vt, Object in)
-
objectToBoolean
public static boolean objectToBoolean(Types.ValueType vt, Object in)
-
objectToString
public static String objectToString(Object in, boolean ignoreNull)
Convert object to string- Parameters:
in
- objectignoreNull
- If this flag has set, it will ignore null. This flag is mainly used in merge functionality to override data with "null" data.- Returns:
- string representation of object
-
objectToObject
public static Object objectToObject(Types.ValueType vt, Object in)
-
objectToObject
public static Object objectToObject(Types.ValueType vt, Object in, boolean ignoreNull)
-
compareTo
public static int compareTo(Types.ValueType vt, Object in1, Object in2)
-
compareVersion
public static int compareVersion(String version1, String version2)
Compares two version strings of format x.y.z, where x is major, y is minor, and z is maintenance release.- Parameters:
version1
- first version stringversion2
- second version string- Returns:
- 1 if version1 greater, -1 if version2 greater, 0 if equal
-
isBoolean
public static boolean isBoolean(String str)
-
isIntegerNumber
public static boolean isIntegerNumber(String str)
-
isSpecial
public static boolean isSpecial(double value)
-
getSortedSampleIndexes
public static int[] getSortedSampleIndexes(int range, int sampleSize)
-
getSortedSampleIndexes
public static int[] getSortedSampleIndexes(int range, int sampleSize, long seed)
-
max
public static byte max(byte[] array)
-
getAsciiAtIdx
public static int getAsciiAtIdx(String s, int idx)
-
parseMemorySize
public static long parseMemorySize(String arg)
Parses a memory size with optional g/m/k quantifiers into its number representation.- Parameters:
arg
- memory size as readable string- Returns:
- byte count of memory size
-
formatMemorySize
public static String formatMemorySize(long arg)
Format a memory size with g/m/k quantifiers into its number representation.- Parameters:
arg
- byte count of memory size- Returns:
- memory size as readable string
-
getDouble
public static double getDouble(Object obj)
-
isNonZero
public static boolean isNonZero(Object obj)
-
computeNnz
public static int computeNnz(double[] a, int ai, int len)
-
computeNnz
public static int computeNnz(float[] a, int ai, int len)
-
computeNnz
public static int computeNnz(long[] a, int ai, int len)
-
computeNnz
public static int computeNnz(int[] a, int ai, int len)
-
computeNnz
public static int computeNnz(BitSet a, int ai, int len)
-
computeNnz
public static int computeNnz(String[] a, int ai, int len)
-
computeNnz
public static long computeNnz(SparseBlock a, int[] aix, int ai, int alen)
-
nCopies
public static Types.ValueType[] nCopies(int n, Types.ValueType vt)
-
frequency
public static int frequency(Types.ValueType[] schema, Types.ValueType vt)
-
copyOf
public static Types.ValueType[] copyOf(Types.ValueType[] schema1, Types.ValueType[] schema2)
-
countNonZeros
public static int countNonZeros(double[] data, int pos, int len)
-
containsZero
public static boolean containsZero(double[] data, int pos, int len)
-
prod
public static long prod(long[] arr)
-
prod
public static long prod(int[] arr)
-
prod
public static long prod(int[] arr, int off)
-
getBlockBounds
public static void getBlockBounds(TensorIndexes ix, long[] dims, int blen, int[] lower, int[] upper)
-
toMillis
public static long toMillis(String dateString)
-
dateFormat
public static String dateFormat(String dateString, String inputFormat, String outputFormat)
-
addTimeToDate
public static String addTimeToDate(String dateString, int amountToAdd, String timeformat)
-
columnStringToCSVString
public static String columnStringToCSVString(String input, String separator)
-
generateRandomFrameBlock
public static FrameBlock generateRandomFrameBlock(int rows, int cols, Types.ValueType[] schema, Random random)
Generates a random FrameBlock with given parameters.- Parameters:
rows
- frame rowscols
- frame colsschema
- frame schemarandom
- random number generator- Returns:
- FrameBlock
-
generateRandomValueFromValueType
public static Object generateRandomValueFromValueType(Types.ValueType valueType, Random random)
Generates a random value for a given Value Type- Parameters:
valueType
- the ValueType of which to generate the valuerandom
- random number generator- Returns:
- Object
-
stringToValueType
public static Types.ValueType[] stringToValueType(String[] schemaValues)
Generates a ValueType array from a String array- Parameters:
schemaValues
- the string schema of which to generate the ValueType- Returns:
- ValueType[]
-
getEndIndex
public static int getEndIndex(int arrayLength, int startIndex, int blockSize)
-
getBlockSizes
public static int[] getBlockSizes(int num, int numBlocks)
-
getSafe
public static <T> T getSafe(Future<T> task)
-
convertStringToDoubleArray
public static double[] convertStringToDoubleArray(String[] original)
-
getWordErrorRate
public static double getWordErrorRate(String r, String h)
Computes the word error rate (Levenshtein distance at word level): wer = (numSubst + numDel + numIns) / length(r) This code has been adapted from Apache Commons Lang 3.12 (getLevenshteinDistance, but for words instead of characters).- Parameters:
r
- reference stringh
- hypothesis string- Returns:
- word error rate (WER)
-
-