db.user.find({age:28}) 命令QueryPlanner::plan()只会生成一个执行计划,_pickBestPlan直接选择这个计划。
db.user.find({age:28}).sort({wages:1}) 命令QueryPlanner::plan()会生成两个执行计划,_pickBestPlan循环执行两个计划,根据返回结果等参数进行评分,选择评分高的计划执行。
如果db.user.find({age:28}).sort({wages:1}) 命令每次都执行这个复杂的过程,mongodb性能肯定会有所折扣,mongodb在此类情况下增加了缓存。
本篇主要讲讲缓存db.user.find({age:28}).sort({wages:1})缓存key生成策略和put到缓存流程。
mongo/db/exec/multi_plan.cpp方法pickBestPlan选择评分最后的计划,选择评分计划之后,判断这个命令query是否可以放入缓存中,主要代码块是: if (PlanCache::shouldCacheQuery(*_query) && canCache) ,请看下面代码:
Status MultiPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
// Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of
// execution work that happens here, so this is needed for the time accounting to
// make sense.
ScopedTimer timer(getClock(), &_commonStats.executionTimeMillis);
size_t numWorks = getTrialPeriodWorks(getOpCtx(), collection());
size_t numResults = getTrialPeriodNumToReturn(*_query);
LOG(3) << "conca pickBestPlan numWorks: " <toString());
LOG(2) << "Winning plan: " << Explain::getPlanSummary(bestCandidate.root);
_backupPlanIdx = kNoSuchPlan;
if (bestSolution->hasBlockingStage && (0 == alreadyProduced.size())) {
LOG(3) << "Winner has blocking stage, looking for backup plan...";
for (auto&& ix : candidateOrder) {
if (!_candidates[ix].solution->hasBlockingStage) {
LOG(3) << "Candidate " << ix << " is backup child";
_backupPlanIdx = ix;
break;
}
}
}
// Even if the query is of a cacheable shape, the caller might have indicated that we shouldn't
// write to the plan cache.
//
// TODO: We can remove this if we introduce replanning logic to the SubplanStage.
bool canCache = (_cachingMode == CachingMode::AlwaysCache);
if (_cachingMode == CachingMode::SometimesCache) {
// In "sometimes cache" mode, we cache unless we hit one of the special cases below.
canCache = true;
if (ranking->tieForBest) {
// The winning plan tied with the runner-up and we're using "sometimes cache" mode. We
// will not write a plan cache entry.
canCache = false;
// These arrays having two or more entries is implied by 'tieForBest'.
invariant(ranking->scores.size() > 1U);
invariant(ranking->candidateOrder.size() > 1U);
size_t winnerIdx = ranking->candidateOrder[0];
size_t runnerUpIdx = ranking->candidateOrder[1];
LOG(1) << "Winning plan tied with runner-up. Not caching."
<< " query: " << redact(_query->toStringShort())
<< " winner score: " << ranking->scores[0]
<< " winner summary: " << Explain::getPlanSummary(_candidates[winnerIdx].root)
<< " runner-up score: " << ranking->scores[1] << " runner-up summary: "
<< Explain::getPlanSummary(_candidates[runnerUpIdx].root);
}
if (alreadyProduced.empty()) {
// We're using the "sometimes cache" mode, and the winning plan produced no results
// during the plan ranking trial period. We will not write a plan cache entry.
canCache = false;
size_t winnerIdx = ranking->candidateOrder[0];
LOG(1) << "Winning plan had zero results. Not caching."
<< " query: " << redact(_query->toStringShort())
<< " winner score: " << ranking->scores[0]
<< " winner summary: " << Explain::getPlanSummary(_candidates[winnerIdx].root);
}
}
// Store the choice we just made in the cache, if the query is of a type that is safe to
// cache.
if (PlanCache::shouldCacheQuery(*_query) && canCache) {
// Create list of candidate solutions for the cache with
// the best solution at the front.
std::vector solutions;
// Generate solutions and ranking decisions sorted by score.
for (auto&& ix : candidateOrder) {
solutions.push_back(_candidates[ix].solution.get());
}
// Insert the failed plans in the back.
for (auto&& ix : failedCandidates) {
solutions.push_back(_candidates[ix].solution.get());
}
// Check solution cache data. Do not add to cache if
// we have any invalid SolutionCacheData data.
// XXX: One known example is 2D queries
bool validSolutions = true;
for (size_t ix = 0; ix < solutions.size(); ++ix) {
if (nullptr == solutions[ix]->cacheData.get()) {
LOG(3) << "Not caching query because this solution has no cache data: "
<< redact(solutions[ix]->toString());
validSolutions = false;
break;
}
}
if (validSolutions) {
CollectionQueryInfo::get(collection())
.getPlanCache()
->set(*_query,
solutions,
std::move(ranking),
getOpCtx()->getServiceContext()->getPreciseClockSource()->now())
.transitional_ignore();
}
}
return Status::OK();
}
获取CollectionQueryInfo::get(collection()).getPlanCache()获取对象PlanCache,user表有单独的PlanCache对象,PlanCache的set方法主要讲query和解决方案、评分都缓存起来。
mongo/db/query/plan_cache.cpp中set代码:
Status PlanCache::set(const CanonicalQuery& query,
const std::vector& solns,
std::unique_ptr why,
Date_t now,
boost::optional worksGrowthCoefficient) {
invariant(why);
if (solns.empty()) {
return Status(ErrorCodes::BadValue, "no solutions provided");
}
if (why->stats.size() != solns.size()) {
return Status(ErrorCodes::BadValue, "number of stats in decision must match solutions");
}
if (why->scores.size() != why->candidateOrder.size()) {
return Status(ErrorCodes::BadValue,
"number of scores in decision must match viable candidates");
}
if (why->candidateOrder.size() + why->failedCandidates.size() != solns.size()) {
return Status(ErrorCodes::BadValue,
"the number of viable candidates plus the number of failed candidates must "
"match the number of solutions");
}
const auto key = computeKey(query);
const size_t newWorks = why->stats[0]->common.works;
stdx::lock_guard cacheLock(_cacheMutex);
bool isNewEntryActive = false;
uint32_t queryHash;
uint32_t planCacheKey;
if (internalQueryCacheDisableInactiveEntries.load()) {
// All entries are always active.
isNewEntryActive = true;
planCacheKey = canonical_query_encoder::computeHash(key.stringData());
queryHash = canonical_query_encoder::computeHash(key.getStableKeyStringData());
} else {
PlanCacheEntry* oldEntry = nullptr;
Status cacheStatus = _cache.get(key, &oldEntry);
invariant(cacheStatus.isOK() || cacheStatus == ErrorCodes::NoSuchKey);
if (oldEntry) {
queryHash = oldEntry->queryHash;
planCacheKey = oldEntry->planCacheKey;
} else {
planCacheKey = canonical_query_encoder::computeHash(key.stringData());
queryHash = canonical_query_encoder::computeHash(key.getStableKeyStringData());
}
const auto newState = getNewEntryState(
query,
queryHash,
planCacheKey,
oldEntry,
newWorks,
worksGrowthCoefficient.get_value_or(internalQueryCacheWorksGrowthCoefficient));
if (!newState.shouldBeCreated) {
return Status::OK();
}
isNewEntryActive = newState.shouldBeActive;
}
auto newEntry(PlanCacheEntry::create(
solns, std::move(why), query, queryHash, planCacheKey, now, isNewEntryActive, newWorks));
std::unique_ptr evictedEntry = _cache.add(key, newEntry.release());
if (nullptr != evictedEntry.get()) {
LOG(1) << query.nss() << ": plan cache maximum size exceeded - "
<< "removed least recently used entry " << redact(evictedEntry->toString());
}
return Status::OK();
}
const auto key = computeKey(query)将query命令转换成key;
PlanCache::getNewEntryState判断这个key是否需要创建缓存;
创建缓存对象 auto newEntry(PlanCacheEntry::create(solns, std::move(why), query, queryHash, planCacheKey, now, isNewEntryActive, newWorks))
_cache.add放入key和newEntry对象;_cache在plan_cache.h声明代码:
LRUKeyValue _cache;
user表能缓存多少个命令的计划呢?_cache大小默认是多少呢?看看PlanCache构造函数代码:
PlanCache::PlanCache() : PlanCache(internalQueryCacheSize.load()) {}
PlanCache::PlanCache(size_t size) : _cache(size) {}
PlanCache::~PlanCache() {}
build/debug/mongo/db/query/query_knobs_gen.h中internalQueryCacheSize的定义代码如下,默认大小是20个。
extern AtomicWord internalQueryCacheSize;
constexpr auto kInternalQueryCacheFeedbacksStoredDefault = 20;
query_knobs_gen.h实际不存在源代码,query_knobs_gen.h是根据query_knobs.idl生成而来的。
query命令转换成key,跟那些参数有关系呢?
mongo/db/query/plan_cache.cpp的computeKey方法代码:
PlanCacheKey PlanCache::computeKey(const CanonicalQuery& cq) const {
const auto shapeString = cq.encodeKey();
StringBuilder indexabilityKeyBuilder;
encodeIndexability(cq.root(), _indexabilityState, &indexabilityKeyBuilder);
LOG(1) << "conca " << "computeKey :" << shapeString << "||"<< indexabilityKeyBuilder.str()<<" end";
return PlanCacheKey(std::move(shapeString), indexabilityKeyBuilder.str());
}
打印结果如下,打印结果可以看出跟过滤器和排序有关系、过滤器中索引是否起作用有关系。
conn1] conca computeKey :eqage~awages||<1> end
cq.encodeKey()输出的内容是eqage~awages
encodeIndexability输出的内容是<1>
encodeIndexability主要是判断过滤器中的索引是否起作用,<1>起作用,<0>不起作用,一个索引一个标记位,核心代码如下:
const char kEncodeDiscriminatorsBegin = '<';
const char kEncodeDiscriminatorsEnd = '>';
...
void encodeIndexability(const MatchExpression* tree,
const PlanCacheIndexabilityState& indexabilityState,
StringBuilder* keyBuilder) {
if (!tree->path().empty()) {
const IndexToDiscriminatorMap& discriminators =
indexabilityState.getDiscriminators(tree->path());
IndexToDiscriminatorMap wildcardDiscriminators =
indexabilityState.buildWildcardDiscriminators(tree->path());
if (!discriminators.empty() || !wildcardDiscriminators.empty()) {
*keyBuilder << kEncodeDiscriminatorsBegin;
// For each discriminator on this path, append the character '0' or '1'.
encodeIndexabilityForDiscriminators(tree, discriminators, keyBuilder);
encodeIndexabilityForDiscriminators(tree, wildcardDiscriminators, keyBuilder);
*keyBuilder << kEncodeDiscriminatorsEnd;
}
} else if (tree->matchType() == MatchExpression::MatchType::NOT) {
// If the node is not compatible with any type of index, add a single '0' discriminator
// here. Otherwise add a '1'.
*keyBuilder << kEncodeDiscriminatorsBegin;
*keyBuilder << QueryPlannerIXSelect::logicalNodeMayBeSupportedByAnIndex(tree);
*keyBuilder << kEncodeDiscriminatorsEnd;
}
for (size_t i = 0; i < tree->numChildren(); ++i) {
encodeIndexability(tree->getChild(i), indexabilityState, keyBuilder);
}
}
db.user.find({age:28}).sort({wages:1}) 输出的编码信息是eqage~awages,mongo/db/query/canonical_query.cpp关键代码如下:
CanonicalQuery::QueryShapeString CanonicalQuery::encodeKey() const {
return canonical_query_encoder::encode(*this);
}
mongo/db/query/canonical_query_encoder.cpp的encode方法对query方法提取关键信息,提取过滤器(eq比较age字段),提取排序字段(a正序wages),提取投影(无),提取语法规范(无)
关键代码如下:
CanonicalQuery::QueryShapeString encode(const CanonicalQuery& cq) {
StringBuilder keyBuilder;
encodeKeyForMatch(cq.root(), &keyBuilder);
encodeKeyForSort(cq.getQueryRequest().getSort(), &keyBuilder);
encodeKeyForProj(cq.getProj(), &keyBuilder);
encodeCollation(cq.getCollator(), &keyBuilder);
return keyBuilder.str();
}
uint32_t computeHash(StringData key) {
return SimpleStringDataComparator::kInstance.hash(key);
}
/**
* String encoding of MatchExpression::MatchType.
*/
const char* encodeMatchType(MatchExpression::MatchType mt) {
switch (mt) {
...
case MatchExpression::LTE:
return "le";
case MatchExpression::LT:
return "lt";
case MatchExpression::EQ:
return "eq";
}
void encodeKeyForSort(const BSONObj& sortObj, StringBuilder* keyBuilder) {
if (sortObj.isEmpty()) {
return;
}
*keyBuilder << kEncodeSortSection;
BSONObjIterator it(sortObj);
while (it.more()) {
BSONElement elt = it.next();
// $meta text score
if (QueryRequest::isTextScoreMeta(elt)) {
*keyBuilder << "t";
}
// Ascending
else if (elt.numberInt() == 1) {
*keyBuilder << "a";
}
// Descending
else {
*keyBuilder << "d";
}
encodeUserString(elt.fieldName(), keyBuilder);
// Sort argument separator
if (it.more()) {
*keyBuilder << ",";
}
}
}
mongo/db/query/plan_cache.cpp代码:
PlanCache::NewEntryState PlanCache::getNewEntryState(const CanonicalQuery& query,
uint32_t queryHash,
uint32_t planCacheKey,
PlanCacheEntry* oldEntry,
size_t newWorks,
double growthCoefficient) {
NewEntryState res;
if (!oldEntry) {
LOG(1) << "Creating inactive cache entry for query shape " << redact(query.toStringShort())
<< " queryHash " << unsignedIntToFixedLengthHex(queryHash) << " planCacheKey "
<< unsignedIntToFixedLengthHex(planCacheKey) << " with works value " << newWorks;
res.shouldBeCreated = true;
res.shouldBeActive = false;
return res;
}
...
return res;
}
打印结果:
conn1] Creating inactive cache entry for query shape ns: db.user query: { age: 28.0 } sort: { wages: 1.0 } projection: {} queryHash A13CF6D8 planCacheKey FD90BAA0 with works value 6
db.user.find({age:28}).sort({wages:1})的query输出的内容是eqage~awages;索引信息输出的内容是<1>;
key.stringData()结果是:eqage~awages<1>
key.getStableKeyStringData()结果是:eqage~awages
planCacheKey = canonical_query_encoder::computeHash(key.stringData());结果是:4254120608
queryHash = canonical_query_encoder::computeHash(key.getStableKeyStringData());结果是:2705127128
unsignedIntToFixedLengthHex(planCacheKey)结果是:FD90BAA0
unsignedIntToFixedLengthHex(queryHash)结果是:A13CF6D8
总结:query的key和参数名字有关系,和参数值没有关系,关键参数有:提取过滤器(eq比较age字段),提取排序字段(a正序wages),提取投影(无),提取语法规范(无)<索引是否起作用>
db.user.find({age:28}).sort({wages:1})的key是:eqage~awages<1>
db.user.find({age:1008}).sort({wages:1})的key是:eqage~awages<1>
db.user.find({age:28}) 的key是:eqage<1>
db.user.find({age:668}) 的key是:eqage<1>
下面用实际sql验证query的key和参数名字有关系,和参数值没有关系:
db.user.explain().find({age:28}).sort({wages:1})输出的""planCacheKey" : "FD90BAA0",
> db.user.explain().find({age:28}).sort({wages:1})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "db.user",
"indexFilterSet" : false,
"parsedQuery" : {
"age" : {
"$eq" : 28
}
},
"queryHash" : "A13CF6D8",
"planCacheKey" : "FD90BAA0",
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"wages" : 1
},
"memLimit" : 104857600,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"age" : 1
},
"indexName" : "age_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"age" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"age" : [
"[28.0, 28.0]"
]
}
}
}
}
}
...
db.user.explain().find({age:1008}).sort({wages:1})输出的""planCacheKey" : "FD90BAA0",
> db.user.explain().find({age:1008}).sort({wages:1})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "db.user",
"indexFilterSet" : false,
"parsedQuery" : {
"age" : {
"$eq" : 1008
}
},
"queryHash" : "A13CF6D8",
"planCacheKey" : "FD90BAA0",
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"wages" : 1
},
"memLimit" : 104857600,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"age" : 1
},
"indexName" : "age_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"age" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"age" : [
"[1008.0, 1008.0]"
]
}
}
...