ES节点简单监控

本文通过ES提供的Restful Api来简单监控各节点的内存及负载。

#!/usr/bin/python
#coding:utf8

import sys, re, time, datetime, socket, smtplib
import os, urllib, urllib2, json, string

DOMAIN_NAME = "es.op.xxx.com"
NODES_URL = "http://"+DOMAIN_NAME+":9200/_cluster/state/nodes,master_node"
NODE_STAT_URL = "http://"+DOMAIN_NAME+":9200/_nodes/%s/stats?human=true"
HEAP_THRESHOLD = 90
LOAD_THRESHOLD = 30

def main():
nodes_name = get_nodes()
for node_id in nodes_name :
    node_info = get_node_info(node_id)
    indices, jvm, load = extract_node_info(node_info)
    if jvm['heap_used_percent'] < HEAP_THRESHOLD and load[2] < LOAD_THRESHOLD:
        continue;
    print time.ctime(), nodes_name[node_id], jvm, load, indices
    content =  "机器名: ".decode('utf-8') + nodes_name[node_id] + "\n";
    content += "当前索引: ".decode('utf-8') + str(indices['current_index']) + "\n";
    content += "当前查询: ".decode('utf-8') + str(indices['current_query']) + "\n";
    content += "当前合并: ".decode('utf-8') + str(indices['current_merge']) + "\n";
    content += "堆使用: ".decode('utf-8') + str(jvm['heap_used']) + " (" + str(jvm['heap_used_percent']) + "%)" + "\n";
    content += "Old GC: ".decode('utf-8') + str(jvm['old_gc_count']) + "\n";
    content += "Young GC: ".decode('utf-8') + str(jvm['young_gc_count']) + "\n";
    content += "OS Load: ".decode('utf-8') + ','.join(str(x) for x in load) + "\n";
    mailSender(content)

########################################
# 获取ES集群所有节点:host_name node_id
########################################
def get_nodes():
    page = urllib2.urlopen(NODES_URL)
    res = json.loads(page.read())
    nodes = res.get("nodes")
    nodes_name = {}
    for id, node_info in nodes.iteritems():
        nodes_name[id] = node_info.get('name').decode('utf-8')
    return nodes_name

#########################################
# 获取ES集群节点状态
#########################################
def get_node_info(node_id):
    node_info_url = NODE_STAT_URL % node_id
    page = urllib2.urlopen(node_info_url)
    res = json.loads(page.read())
    node_info = res.get("nodes", {}).get(node_id)
    return node_info

##########################################
# 提取单个节点的状态信息
##########################################
def extract_node_info(node_info):
    indices = {}
    indices['current_index'] = node_info.get('indices',{}).get('indexing',{}).get('index_current',0); 
    indices['current_query'] = node_info.get('indices',{}).get('search',{}).get('query_current',0); 
    indices['current_merge'] = node_info.get('indices',{}).get('merges',{}).get('current',0); 

    jvm = {}
    org_jvm_info = node_info.get('jvm',{})
    jvm['heap_used'] = org_jvm_info.get('mem',{}).get('heap_used',0);  
    jvm['heap_used_percent'] = org_jvm_info.get('mem',{}).get('heap_used_percent',0); 
    jvm['non_heap_used'] = org_jvm_info.get('mem',{}).get('non_heap_used',0); 
    jvm['old_used'] = org_jvm_info.get('mem',{}).get('pools',{}).get('old',{}).get('used',0);
    jvm['young_used'] = org_jvm_info.get('mem',{}).get('pools',{}).get('young',{}).get('used',0);  
    jvm['survivor_used'] = org_jvm_info.get('mem',{}).get('pools',{}).get('survivor',{}).get('used',0);  

    jvm['young_gc_count'] = org_jvm_info.get('gc',{}).get('collectors',{}).get('young',{}).get('collection_count',0); 
    jvm['old_gc_count'] = org_jvm_info.get('gc',{}).get('collectors',{}).get('old',{}).get('collection_count',0); 

    load = node_info.get('os',{}).get('load_average',[]);
    return indices, jvm, load

if __name__ == "__main__":
    main()

对于CPU及Load太高的机器,发送报警邮件:

from email.mime.text import MIMEText
from email.header import Header

sender = '[email protected]'
receiver = ['[email protected]','[email protected]','[email protected]']
subject = 'ES Cluster Monitor: Heap(' + str(HEAP_THRESHOLD) + '%)' + ' Load(' + str(LOAD_THRESHOLD) + ')'
smtpServer = 'smtp.exmail.qq.com'
userName = '[email protected]'
password = 'xxx'
charset = 'gb2312'

def mailSender(content):
    msg = MIMEText(content, 'plain', charset)
    msg['From'] = sender
    msg['To'] = ','.join(receiver)
    msg['Subject'] = Header(subject, charset)
    smtp = smtplib.SMTP()
    smtp.connect(smtpServer)
    smtp.login(userName, password)
    smtp.sendmail(sender, receiver, msg.as_string())
    smtp.quit()

报警邮件如下:

机器名: idc02-xxx-es-06
当前索引: 2360563
当前查询: 0
当前合并: 2
堆使用: 29.1gb (92%)
Old GC: 5523
Young GC: 1360285
OS Load: 2.95,2.82,2.21

你可能感兴趣的:(Elasticsearch)