|
@@ -76,9 +76,11 @@ void CombineRunnerWrapper::combine(CombineContext type, KVIterator * kvIterator,
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
MapOutputCollector::MapOutputCollector(uint32_t numberPartitions, SpillOutputService * spillService)
|
|
MapOutputCollector::MapOutputCollector(uint32_t numberPartitions, SpillOutputService * spillService)
|
|
- : _config(NULL), _numPartitions(numberPartitions), _buckets(NULL), _keyComparator(NULL),
|
|
|
|
- _combineRunner(NULL), _spilledRecords(NULL), _spillOutput(spillService),
|
|
|
|
- _defaultBlockSize(0), _pool(NULL) {
|
|
|
|
|
|
+ : _config(NULL), _numPartitions(numberPartitions), _buckets(NULL),
|
|
|
|
+ _keyComparator(NULL), _combineRunner(NULL),
|
|
|
|
+ _mapOutputRecords(NULL), _mapOutputBytes(NULL),
|
|
|
|
+ _mapOutputMaterializedBytes(NULL), _spilledRecords(NULL),
|
|
|
|
+ _spillOutput(spillService), _defaultBlockSize(0), _pool(NULL) {
|
|
_pool = new MemoryPool();
|
|
_pool = new MemoryPool();
|
|
}
|
|
}
|
|
|
|
|
|
@@ -108,7 +110,7 @@ MapOutputCollector::~MapOutputCollector() {
|
|
}
|
|
}
|
|
|
|
|
|
void MapOutputCollector::init(uint32_t defaultBlockSize, uint32_t memoryCapacity,
|
|
void MapOutputCollector::init(uint32_t defaultBlockSize, uint32_t memoryCapacity,
|
|
- ComparatorPtr keyComparator, Counter * spilledRecords, ICombineRunner * combiner) {
|
|
|
|
|
|
+ ComparatorPtr keyComparator, ICombineRunner * combiner) {
|
|
|
|
|
|
this->_combineRunner = combiner;
|
|
this->_combineRunner = combiner;
|
|
|
|
|
|
@@ -128,7 +130,15 @@ void MapOutputCollector::init(uint32_t defaultBlockSize, uint32_t memoryCapacity
|
|
_buckets[partitionId] = pb;
|
|
_buckets[partitionId] = pb;
|
|
}
|
|
}
|
|
|
|
|
|
- _spilledRecords = spilledRecords;
|
|
|
|
|
|
+ _mapOutputRecords = NativeObjectFactory::GetCounter(
|
|
|
|
+ TaskCounters::TASK_COUNTER_GROUP, TaskCounters::MAP_OUTPUT_RECORDS);
|
|
|
|
+ _mapOutputBytes = NativeObjectFactory::GetCounter(
|
|
|
|
+ TaskCounters::TASK_COUNTER_GROUP, TaskCounters::MAP_OUTPUT_BYTES);
|
|
|
|
+ _mapOutputMaterializedBytes = NativeObjectFactory::GetCounter(
|
|
|
|
+ TaskCounters::TASK_COUNTER_GROUP,
|
|
|
|
+ TaskCounters::MAP_OUTPUT_MATERIALIZED_BYTES);
|
|
|
|
+ _spilledRecords = NativeObjectFactory::GetCounter(
|
|
|
|
+ TaskCounters::TASK_COUNTER_GROUP, TaskCounters::SPILLED_RECORDS);
|
|
|
|
|
|
_collectTimer.reset();
|
|
_collectTimer.reset();
|
|
}
|
|
}
|
|
@@ -155,9 +165,6 @@ void MapOutputCollector::configure(Config * config) {
|
|
|
|
|
|
ComparatorPtr comparator = getComparator(config, _spec);
|
|
ComparatorPtr comparator = getComparator(config, _spec);
|
|
|
|
|
|
- Counter * spilledRecord = NativeObjectFactory::GetCounter(TaskCounters::TASK_COUNTER_GROUP,
|
|
|
|
- TaskCounters::SPILLED_RECORDS);
|
|
|
|
-
|
|
|
|
ICombineRunner * combiner = NULL;
|
|
ICombineRunner * combiner = NULL;
|
|
if (NULL != config->get(NATIVE_COMBINER)
|
|
if (NULL != config->get(NATIVE_COMBINER)
|
|
// config name for old api and new api
|
|
// config name for old api and new api
|
|
@@ -166,7 +173,7 @@ void MapOutputCollector::configure(Config * config) {
|
|
combiner = new CombineRunnerWrapper(config, _spillOutput);
|
|
combiner = new CombineRunnerWrapper(config, _spillOutput);
|
|
}
|
|
}
|
|
|
|
|
|
- init(defaultBlockSize, capacity, comparator, spilledRecord, combiner);
|
|
|
|
|
|
+ init(defaultBlockSize, capacity, comparator, combiner);
|
|
}
|
|
}
|
|
|
|
|
|
KVBuffer * MapOutputCollector::allocateKVBuffer(uint32_t partitionId, uint32_t kvlength) {
|
|
KVBuffer * MapOutputCollector::allocateKVBuffer(uint32_t partitionId, uint32_t kvlength) {
|
|
@@ -182,7 +189,7 @@ KVBuffer * MapOutputCollector::allocateKVBuffer(uint32_t partitionId, uint32_t k
|
|
if (NULL == spillpath || spillpath->length() == 0) {
|
|
if (NULL == spillpath || spillpath->length() == 0) {
|
|
THROW_EXCEPTION(IOException, "Illegal(empty) spill files path");
|
|
THROW_EXCEPTION(IOException, "Illegal(empty) spill files path");
|
|
} else {
|
|
} else {
|
|
- middleSpill(*spillpath, "");
|
|
|
|
|
|
+ middleSpill(*spillpath, "", false);
|
|
delete spillpath;
|
|
delete spillpath;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -193,6 +200,8 @@ KVBuffer * MapOutputCollector::allocateKVBuffer(uint32_t partitionId, uint32_t k
|
|
THROW_EXCEPTION(OutOfMemoryException, "key/value pair larger than io.sort.mb");
|
|
THROW_EXCEPTION(OutOfMemoryException, "key/value pair larger than io.sort.mb");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+ _mapOutputRecords->increase();
|
|
|
|
+ _mapOutputBytes->increase(kvlength - KVBuffer::headerLength());
|
|
return dest;
|
|
return dest;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -272,10 +281,9 @@ void MapOutputCollector::sortPartitions(SortOrder orderType, SortAlgorithm sortT
|
|
}
|
|
}
|
|
|
|
|
|
void MapOutputCollector::middleSpill(const std::string & spillOutput,
|
|
void MapOutputCollector::middleSpill(const std::string & spillOutput,
|
|
- const std::string & indexFilePath) {
|
|
|
|
|
|
+ const std::string & indexFilePath, bool final) {
|
|
|
|
|
|
uint64_t collecttime = _collectTimer.now() - _collectTimer.last();
|
|
uint64_t collecttime = _collectTimer.now() - _collectTimer.last();
|
|
- const uint64_t M = 1000000; //million
|
|
|
|
|
|
|
|
if (spillOutput.empty()) {
|
|
if (spillOutput.empty()) {
|
|
THROW_EXCEPTION(IOException, "MapOutputCollector: Spill file path empty");
|
|
THROW_EXCEPTION(IOException, "MapOutputCollector: Spill file path empty");
|
|
@@ -293,10 +301,24 @@ void MapOutputCollector::middleSpill(const std::string & spillOutput,
|
|
info->path = spillOutput;
|
|
info->path = spillOutput;
|
|
uint64_t spillTime = timer.now() - timer.last() - metrics.sortTime;
|
|
uint64_t spillTime = timer.now() - timer.last() - metrics.sortTime;
|
|
|
|
|
|
- LOG(
|
|
|
|
- "[MapOutputCollector::mid_spill] Sort and spill: {spilled file path: %s, id: %d, collect: %"PRIu64" ms, sort: %"PRIu64" ms, spill: %"PRIu64" ms, records: %"PRIu64", uncompressed total bytes: %"PRIu64", compressed total bytes: %"PRIu64"}",
|
|
|
|
- info->path.c_str(), _spillInfos.getSpillCount(), collecttime / M, metrics.sortTime / M, spillTime / M,
|
|
|
|
- metrics.recordCount, info->getEndPosition(), info->getRealEndPosition());
|
|
|
|
|
|
+ const uint64_t M = 1000000; //million
|
|
|
|
+ LOG("%s-spill: { id: %d, collect: %"PRIu64" ms, "
|
|
|
|
+ "in-memory sort: %"PRIu64" ms, in-memory records: %"PRIu64", "
|
|
|
|
+ "merge&spill: %"PRIu64" ms, uncompressed size: %"PRIu64", "
|
|
|
|
+ "real size: %"PRIu64" path: %s }",
|
|
|
|
+ final ? "Final" : "Mid",
|
|
|
|
+ _spillInfos.getSpillCount(),
|
|
|
|
+ collecttime / M,
|
|
|
|
+ metrics.sortTime / M,
|
|
|
|
+ metrics.recordCount,
|
|
|
|
+ spillTime / M,
|
|
|
|
+ info->getEndPosition(),
|
|
|
|
+ info->getRealEndPosition(),
|
|
|
|
+ spillOutput.c_str());
|
|
|
|
+
|
|
|
|
+ if (final) {
|
|
|
|
+ _mapOutputMaterializedBytes->increase(info->getRealEndPosition());
|
|
|
|
+ }
|
|
|
|
|
|
if (indexFilePath.length() > 0) {
|
|
if (indexFilePath.length() > 0) {
|
|
info->writeSpillInfo(indexFilePath);
|
|
info->writeSpillInfo(indexFilePath);
|
|
@@ -320,11 +342,8 @@ void MapOutputCollector::middleSpill(const std::string & spillOutput,
|
|
void MapOutputCollector::finalSpill(const std::string & filepath,
|
|
void MapOutputCollector::finalSpill(const std::string & filepath,
|
|
const std::string & idx_file_path) {
|
|
const std::string & idx_file_path) {
|
|
|
|
|
|
- const uint64_t M = 1000000; //million
|
|
|
|
- LOG("[MapOutputCollector::final_merge_and_spill] Spilling file path: %s", filepath.c_str());
|
|
|
|
-
|
|
|
|
if (_spillInfos.getSpillCount() == 0) {
|
|
if (_spillInfos.getSpillCount() == 0) {
|
|
- middleSpill(filepath, idx_file_path);
|
|
|
|
|
|
+ middleSpill(filepath, idx_file_path, true);
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -339,16 +358,32 @@ void MapOutputCollector::finalSpill(const std::string & filepath,
|
|
|
|
|
|
SortMetrics metrics;
|
|
SortMetrics metrics;
|
|
sortPartitions(_spec.sortOrder, _spec.sortAlgorithm, NULL, metrics);
|
|
sortPartitions(_spec.sortOrder, _spec.sortAlgorithm, NULL, metrics);
|
|
- LOG("[MapOutputCollector::mid_spill] Sort final in memory kvs: {sort: %"PRIu64" ms, records: %"PRIu64"}",
|
|
|
|
- metrics.sortTime / M, metrics.recordCount);
|
|
|
|
|
|
|
|
merger->addMergeEntry(new MemoryMergeEntry(_buckets, _numPartitions));
|
|
merger->addMergeEntry(new MemoryMergeEntry(_buckets, _numPartitions));
|
|
|
|
|
|
Timer timer;
|
|
Timer timer;
|
|
merger->merge();
|
|
merger->merge();
|
|
- LOG(
|
|
|
|
- "[MapOutputCollector::final_merge_and_spill] Merge and Spill:{spilled file id: %d, merge and spill time: %"PRIu64" ms}",
|
|
|
|
- _spillInfos.getSpillCount(), (timer.now() - timer.last()) / M);
|
|
|
|
|
|
+
|
|
|
|
+ uint64_t outputSize;
|
|
|
|
+ uint64_t realOutputSize;
|
|
|
|
+ uint64_t recordCount;
|
|
|
|
+ writer->getStatistics(outputSize, realOutputSize, recordCount);
|
|
|
|
+
|
|
|
|
+ const uint64_t M = 1000000; //million
|
|
|
|
+ LOG("Final-merge-spill: { id: %d, in-memory sort: %"PRIu64" ms, "
|
|
|
|
+ "in-memory records: %"PRIu64", merge&spill: %"PRIu64" ms, "
|
|
|
|
+ "records: %"PRIu64", uncompressed size: %"PRIu64", "
|
|
|
|
+ "real size: %"PRIu64" path: %s }",
|
|
|
|
+ _spillInfos.getSpillCount(),
|
|
|
|
+ metrics.sortTime / M,
|
|
|
|
+ metrics.recordCount,
|
|
|
|
+ (timer.now() - timer.last()) / M,
|
|
|
|
+ recordCount,
|
|
|
|
+ outputSize,
|
|
|
|
+ realOutputSize,
|
|
|
|
+ filepath.c_str());
|
|
|
|
+
|
|
|
|
+ _mapOutputMaterializedBytes->increase(realOutputSize);
|
|
|
|
|
|
delete merger;
|
|
delete merger;
|
|
|
|
|