Sample script to show a python REST client of Commander, presenting the work flow of monitoring and repair. Simply put into crontab to work.
We define monitoring as an operation to periodically check the health status of a large number of web servers by obtaining health related metrics. Here we present a sample REST client to Commander. It does a single time checking health status and then conduct remediation (repair) work flow. Simply put this script into crontab to enable scheduled monitoring and remediation.
#!/usr/bin/env python
# --------------------------------------
# Author : yangli8@ebay.com
# Date : 01/11/2014
# Description :
# This script is a sample basic workflow to check the health status for a list of servers,
# returns the aggregated health report. For those unhealthy servers, send server restart command
# to bring them back to normal status.
# This script can be easily configured as a crontab job to be executed regularly.
# --------------------------------------
import sys
import json
import urllib
import urllib2
import time
if sys.version_info < (3, 0):
from urllib2 import urlopen
else:
from urllib.request import urlopen
def main():
#list all the servers need to send request to
hosts = ["www.restcommander.com","www.yangli907.com","www.jeffpei.com"]
superman_server = "http://localhost:9000/"
complete_uri = superman_server+"commands/genUpdateSendCommandWithReplaceVarMapNodeSpecificAdhocJson" #URL to the REST Commander API
serverHealthJson = getServerHealthStatus(complete_uri, hosts) # Call REST Commander API to get server health status json response
unhealthyNodeList = getUnhealthyNodeList(serverHealthJson) # Parse the aggregated json response to get unhealthy node list
restartBadNodeList(unhealthyNodeList,superman_server) # Call REST Commander API to restart unhealthy servers
def getServerHealthStatus(server, targets):
headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
data={}
data["targetNodes"]=targets #define the target group
#the following attributes defined the agent command parameters
data["useNewAgentCommand"]="true"
data["newAgentCommandLine"]="GET_VALIDATE_INTERNALS GET http 80 /validateInternals.html 0 0 5000 SUPERMAN_GLOBAL"
data["newAgentCommandContentTemplate"]="$AM_FULL_CONTENT"
#The following attribtes defined the response aggregation preference
data["willAggregateResponse"]="true" #to aggregate the raw response
data["aggregationType"]="PATTERN_SERVER_HEALTH" #parsing regex Content-type
data["useNewAggregation"]="true"
raw_regex=".*<td>Server-Is-Healthy</td>\s*<td>(.*?)</td>[\s\S]*"
data["newAggregationExpression"]=urllib.quote_plus(raw_regex)
print "The json body for request is: \n %s" % json.dumps(data, sort_keys=True, indent=3, separators=(',', ':'))
#Define the request post body and headers
req = urllib2.Request(server, json.dumps(data), headers)
response = urllib2.urlopen(req)
#parse the server response to json format
responseJson = json.load(response)
return responseJson
def getUnhealthyNodeList(response):
#
#Parse the aggregated server json response,
#put all nodes with unhealthy status to a list
#
print "The aggregated json response returned from Commander is \n %s" % json.dumps(response, sort_keys=True, indent=3, separators=(',', ':'))
healthyNodeList=[]
unhealthyNodeList=[]
try:
for group in response["aggregationValueToNodesList"]:
hosts = group["nodeList"]
status = group["value"]
if status == "False":
for host in hosts:
unhealthyNodeList.append(host)
elif status == "True":
for host in hosts:
healthyNodeList.append(host)
print "The healthy nodes are: %s" % healthyNodeList
print "The unhealthy nodes are: %s" % unhealthyNodeList
return unhealthyNodeList
except KeyError:
print("ERROR: unable to parse the response")
sys.exit(1)
def restartBadNodeList(hosts,servers):
#
#This function is a sample to demostrate using Commander to send restart command to unhealthy servers.
#The post command is for demo purpose only, no actual request is posted for server restart.
#
print "*******starting restart server********"
headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
data={}
data["targetNodes"]=hosts #define the target group
data["useNewAgentCommand"]="true"
data["newAgentCommandLine"]="POST_RESTART_SERVER POST http 80 /restart 0 0 5000 SUPERMAN_GLOBAL"
data["newAgentCommandContentTemplate"]="$AM_FULL_CONTENT"
req = urllib2.Request(servers, json.dumps(data), headers)
#response = urllib2.urlopen(req)
#responseJson = json.load(response)
time.sleep(3)
print "*******finished restart server********"
if __name__ == "__main__":
main()
The output of the script is as follows:
The json body for request is:
{
"aggregationType":"PATTERN_SERVER_HEALTH",
"newAgentCommandContentTemplate":"$AM_FULL_CONTENT",
"newAgentCommandLine":"GET_VALIDATE_INTERNALS GET http 80 /validateInternals.html 0 0 5000 SUPERMAN_GLOBAL",
"newAggregationExpression":".%2A%3Ctd%3EServer-Is-Healthy%3C%2Ftd%3E%5Cs%2A%3Ctd%3E%28.%2A%3F%29%3C%2Ftd%3E%5B%5Cs%5CS%5D%2A",
"targetNodes":[
"www.restcommander.com",
"www.yangli907.com",
"www.jeffpei.com"
],
"useNewAgentCommand":"true",
"useNewAggregation":"true",
"willAggregateResponse":"true"
}
The aggregated json response returned from Superman is
{
"aggregationMap":{
"False":"1",
"True":"2"
},
"aggregationValueToNodesList":[
{
"isError":false,
"nodeList":[
"www.yangli907.com"
],
"value":"False"
},
{
"isError":false,
"nodeList":[
"www.jeffpei.com",
"www.restcommander.com"
],
"value":"True"
}
]
}
The healthy nodes are: [u'www.jeffpei.com', u'www.restcommander.com']
The unhealthy nodes are: [u'www.yangli907.com']
*******starting restart server********
*******finished restart server********
In the example above, you first need to define the following parameters in the http request sent to REST Commander web service:
1. Target server groups which contains all the machines to be monitored;
POST URL: http://localhost:9000/commands/generateUpdateSendAgentCommandToAdhocNodeGroup
POST BODY:
{
"targetNodes":[
"www.restcommander.com",
"www.jeffpei.com",
"www.yangli907.com"
],
"willAggregateResponse":true,
"useNewAggregation":true,
"agentCommandType":"GET_VALIDATE_INTERNAL",
"aggregationType":"PATTERN_PARSE_MONITOR_HEALTH",
"newAgentCommandLine":"GET_VALIDATE_INTERNAL GET http 80 /validateInternals.html 0 0 5000 SUPERMAN_GLOBAL",
"newAggregationExpression":".%2A%3Ctd%3EServer-Is-Healthy%3C%2Ftd%3E%5Cs%2A%3Ctd%3E%28.%2A%3F%29%3C%2Ftd%3E%5B%5Cs%5CS%5D%2A",
"useNewAgentCommand":"true"}
{
"aggregationMap":{
"False":"1",
"True":"2"
},
"aggregationValueToNodesList":[
{
"isError":false,
"nodeList":[
"www.yangli907.com"
],
"value":"False"
},
{
"isError":false,
"nodeList":[
"www.jeffpei.com",
"www.restcommander.com"
],
"value":"True"
}
]
}