Sample script to show a python REST client of Commander, presenting the work flow of monitoring and repair. Simply put into crontab to work.
We define monitoring as an operation to periodically check the health status of a large number of web servers by obtaining health related metrics. Here we present a sample REST client to Commander. It does a single time checking health status and then conduct remediation (repair) work flow. Simply put this script into crontab to enable scheduled monitoring and remediation.
#!/usr/bin/env python # -------------------------------------- # Author : yangli8@ebay.com # Date : 01/11/2014 # Description : # This script is a sample basic workflow to check the health status for a list of servers, # returns the aggregated health report. For those unhealthy servers, send server restart command # to bring them back to normal status. # This script can be easily configured as a crontab job to be executed regularly. # -------------------------------------- import sys import json import urllib import urllib2 import time if sys.version_info < (3, 0): from urllib2 import urlopen else: from urllib.request import urlopen def main(): #list all the servers need to send request to hosts = ["www.restcommander.com","www.yangli907.com","www.jeffpei.com"] superman_server = "http://localhost:9000/" complete_uri = superman_server+"commands/genUpdateSendCommandWithReplaceVarMapNodeSpecificAdhocJson" #URL to the REST Commander API serverHealthJson = getServerHealthStatus(complete_uri, hosts) # Call REST Commander API to get server health status json response unhealthyNodeList = getUnhealthyNodeList(serverHealthJson) # Parse the aggregated json response to get unhealthy node list restartBadNodeList(unhealthyNodeList,superman_server) # Call REST Commander API to restart unhealthy servers def getServerHealthStatus(server, targets): headers = {'Content-type': 'application/json', 'Accept': 'text/plain'} data={} data["targetNodes"]=targets #define the target group #the following attributes defined the agent command parameters data["useNewAgentCommand"]="true" data["newAgentCommandLine"]="GET_VALIDATE_INTERNALS GET http 80 /validateInternals.html 0 0 5000 SUPERMAN_GLOBAL" data["newAgentCommandContentTemplate"]="$AM_FULL_CONTENT" #The following attribtes defined the response aggregation preference data["willAggregateResponse"]="true" #to aggregate the raw response data["aggregationType"]="PATTERN_SERVER_HEALTH" #parsing regex Content-type data["useNewAggregation"]="true" raw_regex=".*<td>Server-Is-Healthy</td>\s*<td>(.*?)</td>[\s\S]*" data["newAggregationExpression"]=urllib.quote_plus(raw_regex) print "The json body for request is: \n %s" % json.dumps(data, sort_keys=True, indent=3, separators=(',', ':')) #Define the request post body and headers req = urllib2.Request(server, json.dumps(data), headers) response = urllib2.urlopen(req) #parse the server response to json format responseJson = json.load(response) return responseJson def getUnhealthyNodeList(response): # #Parse the aggregated server json response, #put all nodes with unhealthy status to a list # print "The aggregated json response returned from Commander is \n %s" % json.dumps(response, sort_keys=True, indent=3, separators=(',', ':')) healthyNodeList=[] unhealthyNodeList=[] try: for group in response["aggregationValueToNodesList"]: hosts = group["nodeList"] status = group["value"] if status == "False": for host in hosts: unhealthyNodeList.append(host) elif status == "True": for host in hosts: healthyNodeList.append(host) print "The healthy nodes are: %s" % healthyNodeList print "The unhealthy nodes are: %s" % unhealthyNodeList return unhealthyNodeList except KeyError: print("ERROR: unable to parse the response") sys.exit(1) def restartBadNodeList(hosts,servers): # #This function is a sample to demostrate using Commander to send restart command to unhealthy servers. #The post command is for demo purpose only, no actual request is posted for server restart. # print "*******starting restart server********" headers = {'Content-type': 'application/json', 'Accept': 'text/plain'} data={} data["targetNodes"]=hosts #define the target group data["useNewAgentCommand"]="true" data["newAgentCommandLine"]="POST_RESTART_SERVER POST http 80 /restart 0 0 5000 SUPERMAN_GLOBAL" data["newAgentCommandContentTemplate"]="$AM_FULL_CONTENT" req = urllib2.Request(servers, json.dumps(data), headers) #response = urllib2.urlopen(req) #responseJson = json.load(response) time.sleep(3) print "*******finished restart server********" if __name__ == "__main__": main()
The output of the script is as follows:
The json body for request is: { "aggregationType":"PATTERN_SERVER_HEALTH", "newAgentCommandContentTemplate":"$AM_FULL_CONTENT", "newAgentCommandLine":"GET_VALIDATE_INTERNALS GET http 80 /validateInternals.html 0 0 5000 SUPERMAN_GLOBAL", "newAggregationExpression":".%2A%3Ctd%3EServer-Is-Healthy%3C%2Ftd%3E%5Cs%2A%3Ctd%3E%28.%2A%3F%29%3C%2Ftd%3E%5B%5Cs%5CS%5D%2A", "targetNodes":[ "www.restcommander.com", "www.yangli907.com", "www.jeffpei.com" ], "useNewAgentCommand":"true", "useNewAggregation":"true", "willAggregateResponse":"true" } The aggregated json response returned from Superman is { "aggregationMap":{ "False":"1", "True":"2" }, "aggregationValueToNodesList":[ { "isError":false, "nodeList":[ "www.yangli907.com" ], "value":"False" }, { "isError":false, "nodeList":[ "www.jeffpei.com", "www.restcommander.com" ], "value":"True" } ] } The healthy nodes are: [u'www.jeffpei.com', u'www.restcommander.com'] The unhealthy nodes are: [u'www.yangli907.com'] *******starting restart server******** *******finished restart server********
In the example above, you first need to define the following parameters in the http request sent to REST Commander web service:
1. Target server groups which contains all the machines to be monitored;POST URL: http://localhost:9000/commands/generateUpdateSendAgentCommandToAdhocNodeGroup POST BODY: { "targetNodes":[ "www.restcommander.com", "www.jeffpei.com", "www.yangli907.com" ], "willAggregateResponse":true, "useNewAggregation":true, "agentCommandType":"GET_VALIDATE_INTERNAL", "aggregationType":"PATTERN_PARSE_MONITOR_HEALTH", "newAgentCommandLine":"GET_VALIDATE_INTERNAL GET http 80 /validateInternals.html 0 0 5000 SUPERMAN_GLOBAL", "newAggregationExpression":".%2A%3Ctd%3EServer-Is-Healthy%3C%2Ftd%3E%5Cs%2A%3Ctd%3E%28.%2A%3F%29%3C%2Ftd%3E%5B%5Cs%5CS%5D%2A", "useNewAgentCommand":"true"}
{ "aggregationMap":{ "False":"1", "True":"2" }, "aggregationValueToNodesList":[ { "isError":false, "nodeList":[ "www.yangli907.com" ], "value":"False" }, { "isError":false, "nodeList":[ "www.jeffpei.com", "www.restcommander.com" ], "value":"True" } ] }