public final class HadoopUtil extends Object
Modifier and Type | Method and Description |
---|---|
static String |
buildDirList(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.FileStatus fileStatus)
Builds a comma-separated list of input splits
|
static String |
buildDirList(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.FileStatus fileStatus,
org.apache.hadoop.fs.PathFilter pathFilter)
Builds a comma-separated list of input splits
|
static void |
cacheFiles(org.apache.hadoop.fs.Path fileToCache,
org.apache.hadoop.conf.Configuration conf) |
static String |
calcRelativeFilePath(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path filePath) |
static long |
countRecords(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration conf) |
static long |
countRecords(org.apache.hadoop.fs.Path path,
PathType pt,
org.apache.hadoop.fs.PathFilter filter,
org.apache.hadoop.conf.Configuration conf)
Count all the records in a directory using a
SequenceFileDirValueIterator |
static void |
delete(org.apache.hadoop.conf.Configuration conf,
Iterable<org.apache.hadoop.fs.Path> paths) |
static void |
delete(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path... paths) |
static org.apache.hadoop.fs.Path |
findInCacheByPartOfFilename(String partOfFilename,
URI[] localFiles)
Finds a file in the DistributedCache
|
static org.apache.hadoop.fs.Path[] |
getCachedFiles(org.apache.hadoop.conf.Configuration conf)
Retrieves paths to cached files.
|
static String |
getCustomJobName(String className,
org.apache.hadoop.mapreduce.JobContext job,
Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper,
Class<? extends org.apache.hadoop.mapreduce.Reducer> reducer) |
static org.apache.hadoop.fs.FileStatus[] |
getFileStatus(org.apache.hadoop.fs.Path path,
PathType pathType,
org.apache.hadoop.fs.PathFilter filter,
Comparator<org.apache.hadoop.fs.FileStatus> ordering,
org.apache.hadoop.conf.Configuration conf) |
static org.apache.hadoop.fs.Path |
getSingleCachedFile(org.apache.hadoop.conf.Configuration conf)
Return the first cached file in the list, else null if thre are no cached files.
|
static org.apache.hadoop.fs.FileStatus[] |
listStatus(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path) |
static org.apache.hadoop.fs.FileStatus[] |
listStatus(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path,
org.apache.hadoop.fs.PathFilter filter) |
static InputStream |
openStream(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration conf) |
static org.apache.hadoop.mapreduce.Job |
prepareJob(org.apache.hadoop.fs.Path inputPath,
org.apache.hadoop.fs.Path outputPath,
Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormat,
Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper,
Class<? extends org.apache.hadoop.io.Writable> mapperKey,
Class<? extends org.apache.hadoop.io.Writable> mapperValue,
Class<? extends org.apache.hadoop.mapreduce.OutputFormat> outputFormat,
org.apache.hadoop.conf.Configuration conf)
Create a map-only Hadoop Job out of the passed in parameters.
|
static org.apache.hadoop.mapreduce.Job |
prepareJob(org.apache.hadoop.fs.Path inputPath,
org.apache.hadoop.fs.Path outputPath,
Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormat,
Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper,
Class<? extends org.apache.hadoop.io.Writable> mapperKey,
Class<? extends org.apache.hadoop.io.Writable> mapperValue,
Class<? extends org.apache.hadoop.mapreduce.Reducer> reducer,
Class<? extends org.apache.hadoop.io.Writable> reducerKey,
Class<? extends org.apache.hadoop.io.Writable> reducerValue,
Class<? extends org.apache.hadoop.mapreduce.OutputFormat> outputFormat,
org.apache.hadoop.conf.Configuration conf)
Create a map and reduce Hadoop job.
|
static int |
readInt(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration configuration) |
static void |
setSerializations(org.apache.hadoop.conf.Configuration configuration) |
static void |
writeInt(int value,
org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration configuration) |
public static org.apache.hadoop.mapreduce.Job prepareJob(org.apache.hadoop.fs.Path inputPath, org.apache.hadoop.fs.Path outputPath, Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormat, Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper, Class<? extends org.apache.hadoop.io.Writable> mapperKey, Class<? extends org.apache.hadoop.io.Writable> mapperValue, Class<? extends org.apache.hadoop.mapreduce.OutputFormat> outputFormat, org.apache.hadoop.conf.Configuration conf) throws IOException
public static org.apache.hadoop.mapreduce.Job prepareJob(org.apache.hadoop.fs.Path inputPath, org.apache.hadoop.fs.Path outputPath, Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormat, Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper, Class<? extends org.apache.hadoop.io.Writable> mapperKey, Class<? extends org.apache.hadoop.io.Writable> mapperValue, Class<? extends org.apache.hadoop.mapreduce.Reducer> reducer, Class<? extends org.apache.hadoop.io.Writable> reducerKey, Class<? extends org.apache.hadoop.io.Writable> reducerValue, Class<? extends org.apache.hadoop.mapreduce.OutputFormat> outputFormat, org.apache.hadoop.conf.Configuration conf) throws IOException
inputPath
- The input Path
outputPath
- The output Path
inputFormat
- The InputFormat
mapper
- The Mapper
class to usemapperKey
- The Writable
key class. If the Mapper is a no-op,
this value may be nullmapperValue
- The Writable
value class. If the Mapper is a no-op,
this value may be nullreducer
- The Reducer
to usereducerKey
- The reducer key class.reducerValue
- The reducer value class.outputFormat
- The OutputFormat
.conf
- The Configuration
to use.Job
.IOException
- if there is a problem with the IO.getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
,
prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class,
org.apache.hadoop.conf.Configuration)
public static String getCustomJobName(String className, org.apache.hadoop.mapreduce.JobContext job, Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper, Class<? extends org.apache.hadoop.mapreduce.Reducer> reducer)
public static void delete(org.apache.hadoop.conf.Configuration conf, Iterable<org.apache.hadoop.fs.Path> paths) throws IOException
IOException
public static void delete(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path... paths) throws IOException
IOException
public static long countRecords(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public static long countRecords(org.apache.hadoop.fs.Path path, PathType pt, org.apache.hadoop.fs.PathFilter filter, org.apache.hadoop.conf.Configuration conf) throws IOException
SequenceFileDirValueIterator
path
- The Path
to countpt
- The PathType
filter
- Apply the PathFilter
. May be nullconf
- The Hadoop Configuration
IOException
- if there was an IO errorpublic static InputStream openStream(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public static org.apache.hadoop.fs.FileStatus[] getFileStatus(org.apache.hadoop.fs.Path path, PathType pathType, org.apache.hadoop.fs.PathFilter filter, Comparator<org.apache.hadoop.fs.FileStatus> ordering, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public static org.apache.hadoop.fs.FileStatus[] listStatus(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path) throws IOException
IOException
public static org.apache.hadoop.fs.FileStatus[] listStatus(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path, org.apache.hadoop.fs.PathFilter filter) throws IOException
IOException
public static void cacheFiles(org.apache.hadoop.fs.Path fileToCache, org.apache.hadoop.conf.Configuration conf)
public static org.apache.hadoop.fs.Path getSingleCachedFile(org.apache.hadoop.conf.Configuration conf) throws IOException
conf
- - MapReduce ConfigurationIOException
- - IO Exceptionpublic static org.apache.hadoop.fs.Path[] getCachedFiles(org.apache.hadoop.conf.Configuration conf) throws IOException
conf
- - MapReduce ConfigurationIOException
- - IO ExceptionIllegalStateException
- if no cache files are foundpublic static void setSerializations(org.apache.hadoop.conf.Configuration configuration)
public static void writeInt(int value, org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration configuration) throws IOException
IOException
public static int readInt(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration configuration) throws IOException
IOException
public static String buildDirList(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.FileStatus fileStatus) throws IOException
fs
- - File SystemfileStatus
- - File StatusIOException
- - IO Exceptionpublic static String buildDirList(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.FileStatus fileStatus, org.apache.hadoop.fs.PathFilter pathFilter) throws IOException
fs
- - File SystemfileStatus
- - File StatuspathFilter
- - path filterIOException
- - IO Exceptionpublic static String calcRelativeFilePath(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path filePath) throws IOException
configuration
- - configurationfilePath
- - Input File PathIOException
- - IO Exceptionpublic static org.apache.hadoop.fs.Path findInCacheByPartOfFilename(String partOfFilename, URI[] localFiles)
partOfFilename
- a substring of the file namelocalFiles
- holds references to files stored in distributed cacheCopyright © 2008–2015 The Apache Software Foundation. All rights reserved.