@InterfaceAudience.Public
public class LoadIncrementalHFiles
extends org.apache.hadoop.conf.Configured
implements org.apache.hadoop.util.Tool
Modifier and Type | Class and Description |
---|---|
static class |
LoadIncrementalHFiles.LoadQueueItem
Represents an HFile waiting to be loaded.
|
Modifier and Type | Field and Description |
---|---|
static String |
ALWAYS_COPY_FILES |
static String |
CREATE_TABLE_CONF_KEY |
static String |
IGNORE_UNMATCHED_CF_CONF_KEY |
static String |
MAX_FILES_PER_REGION_PER_FAMILY |
static String |
NAME |
Constructor and Description |
---|
LoadIncrementalHFiles(org.apache.hadoop.conf.Configuration conf) |
Modifier and Type | Method and Description |
---|---|
protected ClientServiceCallable<byte[]> |
buildClientServiceCallable(Connection conn,
TableName tableName,
byte[] first,
Collection<LoadIncrementalHFiles.LoadQueueItem> lqis,
boolean copyFile) |
protected void |
bulkLoadPhase(Table table,
Connection conn,
ExecutorService pool,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
org.apache.hbase.thirdparty.com.google.common.collect.Multimap<ByteBuffer,LoadIncrementalHFiles.LoadQueueItem> regionGroups,
boolean copyFile,
Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> item2RegionMap)
This takes the LQI's grouped by likely regions and attempts to bulk load them.
|
Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> |
doBulkLoad(Map<byte[],List<org.apache.hadoop.fs.Path>> map,
Admin admin,
Table table,
RegionLocator regionLocator,
boolean silence,
boolean copyFile)
Perform a bulk load of the given directory into the given pre-existing table.
|
Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> |
doBulkLoad(org.apache.hadoop.fs.Path hfofDir,
Admin admin,
Table table,
RegionLocator regionLocator)
Perform a bulk load of the given directory into the given pre-existing table.
|
Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> |
doBulkLoad(org.apache.hadoop.fs.Path hfofDir,
Admin admin,
Table table,
RegionLocator regionLocator,
boolean silence,
boolean copyFile)
Perform a bulk load of the given directory into the given pre-existing table.
|
protected Pair<List<LoadIncrementalHFiles.LoadQueueItem>,String> |
groupOrSplit(org.apache.hbase.thirdparty.com.google.common.collect.Multimap<ByteBuffer,LoadIncrementalHFiles.LoadQueueItem> regionGroups,
LoadIncrementalHFiles.LoadQueueItem item,
Table table,
Pair<byte[][],byte[][]> startEndKeys)
Attempt to assign the given load queue item into its target region group.
|
static byte[][] |
inferBoundaries(SortedMap<byte[],Integer> bdryMap)
Infers region boundaries for a new table.
|
void |
loadHFileQueue(Table table,
Connection conn,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
Pair<byte[][],byte[][]> startEndKeys)
Used by the replication sink to load the hfiles from the source cluster.
|
void |
loadHFileQueue(Table table,
Connection conn,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
Pair<byte[][],byte[][]> startEndKeys,
boolean copyFile)
Used by the replication sink to load the hfiles from the source cluster.
|
static void |
main(String[] args) |
void |
prepareHFileQueue(Map<byte[],List<org.apache.hadoop.fs.Path>> map,
Table table,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
boolean silence)
Prepare a collection of
LoadIncrementalHFiles.LoadQueueItem from list of source hfiles contained in the
passed directory and validates whether the prepared queue has all the valid table column
families in it. |
void |
prepareHFileQueue(org.apache.hadoop.fs.Path hfilesDir,
Table table,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
boolean validateHFile)
Prepare a collection of
LoadIncrementalHFiles.LoadQueueItem from list of source hfiles contained in the
passed directory and validates whether the prepared queue has all the valid table column
families in it. |
void |
prepareHFileQueue(org.apache.hadoop.fs.Path hfilesDir,
Table table,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
boolean validateHFile,
boolean silence)
Prepare a collection of
LoadIncrementalHFiles.LoadQueueItem from list of source hfiles contained in the
passed directory and validates whether the prepared queue has all the valid table column
families in it. |
Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> |
run(Map<byte[],List<org.apache.hadoop.fs.Path>> family2Files,
TableName tableName)
Perform bulk load on the given table.
|
int |
run(String[] args) |
Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> |
run(String hfofDir,
TableName tableName)
Perform bulk load on the given table.
|
void |
setBulkToken(String stagingDir)
Called from replication sink, where it manages bulkToken(staging directory) by itself.
|
protected List<LoadIncrementalHFiles.LoadQueueItem> |
tryAtomicRegionLoad(ClientServiceCallable<byte[]> serviceCallable,
TableName tableName,
byte[] first,
Collection<LoadIncrementalHFiles.LoadQueueItem> lqis)
Attempts to do an atomic load of many hfiles into a region.
|
public static final String NAME
public static final String MAX_FILES_PER_REGION_PER_FAMILY
public static final String CREATE_TABLE_CONF_KEY
public static final String IGNORE_UNMATCHED_CF_CONF_KEY
public static final String ALWAYS_COPY_FILES
public LoadIncrementalHFiles(org.apache.hadoop.conf.Configuration conf)
public void prepareHFileQueue(org.apache.hadoop.fs.Path hfilesDir, Table table, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, boolean validateHFile) throws IOException
LoadIncrementalHFiles.LoadQueueItem
from list of source hfiles contained in the
passed directory and validates whether the prepared queue has all the valid table column
families in it.hfilesDir
- directory containing list of hfiles to be loaded into the tabletable
- table to which hfiles should be loadedqueue
- queue which needs to be loaded into the tablevalidateHFile
- if true hfiles will be validated for its formatIOException
- If any I/O or network error occurredpublic void prepareHFileQueue(org.apache.hadoop.fs.Path hfilesDir, Table table, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, boolean validateHFile, boolean silence) throws IOException
LoadIncrementalHFiles.LoadQueueItem
from list of source hfiles contained in the
passed directory and validates whether the prepared queue has all the valid table column
families in it.hfilesDir
- directory containing list of hfiles to be loaded into the tabletable
- table to which hfiles should be loadedqueue
- queue which needs to be loaded into the tablevalidateHFile
- if true hfiles will be validated for its formatsilence
- true to ignore unmatched column familiesIOException
- If any I/O or network error occurredpublic void prepareHFileQueue(Map<byte[],List<org.apache.hadoop.fs.Path>> map, Table table, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, boolean silence) throws IOException
LoadIncrementalHFiles.LoadQueueItem
from list of source hfiles contained in the
passed directory and validates whether the prepared queue has all the valid table column
families in it.map
- map of family to List of hfilestable
- table to which hfiles should be loadedqueue
- queue which needs to be loaded into the tablesilence
- true to ignore unmatched column familiesIOException
- If any I/O or network error occurredpublic Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> doBulkLoad(org.apache.hadoop.fs.Path hfofDir, Admin admin, Table table, RegionLocator regionLocator) throws TableNotFoundException, IOException
hfofDir
- the directory that was provided as the output path of a job using
HFileOutputFormatadmin
- the Admintable
- the table to load intoregionLocator
- region locatorTableNotFoundException
- if table does not yet existIOException
public Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> doBulkLoad(Map<byte[],List<org.apache.hadoop.fs.Path>> map, Admin admin, Table table, RegionLocator regionLocator, boolean silence, boolean copyFile) throws TableNotFoundException, IOException
map
- map of family to List of hfilesadmin
- the Admintable
- the table to load intoregionLocator
- region locatorsilence
- true to ignore unmatched column familiescopyFile
- always copy hfiles if trueTableNotFoundException
- if table does not yet existIOException
public Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> doBulkLoad(org.apache.hadoop.fs.Path hfofDir, Admin admin, Table table, RegionLocator regionLocator, boolean silence, boolean copyFile) throws TableNotFoundException, IOException
hfofDir
- the directory that was provided as the output path of a job using
HFileOutputFormatadmin
- the Admintable
- the table to load intoregionLocator
- region locatorsilence
- true to ignore unmatched column familiescopyFile
- always copy hfiles if trueTableNotFoundException
- if table does not yet existIOException
public void loadHFileQueue(Table table, Connection conn, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, Pair<byte[][],byte[][]> startEndKeys) throws IOException
table
- Table to which these hfiles should be loaded toconn
- Connection to usequeue
- LoadIncrementalHFiles.LoadQueueItem
has hfiles yet to be loadedstartEndKeys
- starting and ending row keys of the regionIOException
public void loadHFileQueue(Table table, Connection conn, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, Pair<byte[][],byte[][]> startEndKeys, boolean copyFile) throws IOException
table
- Table to which these hfiles should be loaded toconn
- Connection to usequeue
- LoadIncrementalHFiles.LoadQueueItem
has hfiles yet to be loadedstartEndKeys
- starting and ending row keys of the regionIOException
protected void bulkLoadPhase(Table table, Connection conn, ExecutorService pool, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, org.apache.hbase.thirdparty.com.google.common.collect.Multimap<ByteBuffer,LoadIncrementalHFiles.LoadQueueItem> regionGroups, boolean copyFile, Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> item2RegionMap) throws IOException
protected for testing.
IOException
protected ClientServiceCallable<byte[]> buildClientServiceCallable(Connection conn, TableName tableName, byte[] first, Collection<LoadIncrementalHFiles.LoadQueueItem> lqis, boolean copyFile)
protected Pair<List<LoadIncrementalHFiles.LoadQueueItem>,String> groupOrSplit(org.apache.hbase.thirdparty.com.google.common.collect.Multimap<ByteBuffer,LoadIncrementalHFiles.LoadQueueItem> regionGroups, LoadIncrementalHFiles.LoadQueueItem item, Table table, Pair<byte[][],byte[][]> startEndKeys) throws IOException
protected for testing
IOException
- if an IO failure is encounteredprotected List<LoadIncrementalHFiles.LoadQueueItem> tryAtomicRegionLoad(ClientServiceCallable<byte[]> serviceCallable, TableName tableName, byte[] first, Collection<LoadIncrementalHFiles.LoadQueueItem> lqis) throws IOException
NOTE: To maintain row atomicity guarantees, region server callable should succeed atomically and fails atomically.
Protected for testing.
IOException
public Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> run(String hfofDir, TableName tableName) throws IOException
hfofDir
- the directory that was provided as the output path of a job using
HFileOutputFormattableName
- the table to load intoIOException
public Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> run(Map<byte[],List<org.apache.hadoop.fs.Path>> family2Files, TableName tableName) throws IOException
family2Files
- map of family to List of hfilestableName
- the table to load intoIOException
public int run(String[] args) throws Exception
run
in interface org.apache.hadoop.util.Tool
Exception
public void setBulkToken(String stagingDir)
stagingDir
- staging directory pathpublic static byte[][] inferBoundaries(SortedMap<byte[],Integer> bdryMap)
Parameter:
bdryMap is a map between keys to an integer belonging to {+1, -1}
Algo:
Copyright © 2007–2019 Cloudera. All rights reserved.