Skip to content

Commit d08e7e7

Browse files
authored
Merge pull request IBM#170 from etsai7/master
Python Log Analysis for API Run Times
2 parents 71758b5 + a7cd361 commit d08e7e7

File tree

19 files changed

+1046
-0
lines changed

19 files changed

+1046
-0
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,6 @@ report.html
3131

3232
#CSM test config
3333
csm_test.cfg
34+
35+
#Mac files
36+
*.DS_Store

csm_big_data/Use-Cases/elasticTest.py

+158
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
#!/bin/python
2+
# encoding: utf-8
3+
#================================================================================
4+
#
5+
# elasticTest.py
6+
#
7+
# © Copyright IBM Corporation 2015-2018. All Rights Reserved
8+
#
9+
# This program is licensed under the terms of the Eclipse Public License
10+
# v1.0 as published by the Eclipse Foundation and available at
11+
# http://www.eclipse.org/legal/epl-v10.html
12+
#
13+
# U.S. Government Users Restricted Rights: Use, duplication or disclosure
14+
# restricted by GSA ADP Schedule Contract with IBM Corp.
15+
#
16+
#================================================================================
17+
from datetime import datetime
18+
from elasticsearch import Elasticsearch
19+
from elasticsearch.serializer import JSONSerializer
20+
21+
# Options
22+
import getopt
23+
24+
# Load the csm component:
25+
import sys
26+
sys.path.append('/u/jdunham/bluecoral/bluecoral/work/csm/lib')
27+
import lib_csm_py as csm
28+
import lib_csm_wm_py as wm
29+
30+
short_opts = "ha:j:J:H:k:"
31+
long_opts = ["help","allocation_id=","primary_job_id=", "secondary_job_id=", "hostnames=", "key="]
32+
cluster=['10.7.4.15:9200']
33+
34+
# TODO HELP
35+
36+
37+
def query( input_details ):
38+
''' Queries CSM and Elastic to get a listing of records with a matching key. '''
39+
# Query CSM for allocation details
40+
csm.init_lib()
41+
alloc_input=wm.allocation_query_input_t()
42+
alloc_input.allocation_id = input_details["allocation_id"]
43+
alloc_input.primary_job_id = input_details["primary_job_id"]
44+
alloc_input.secondary_job_id = input_details["secondary_job_id"]
45+
46+
rc,handler,alloc_output=wm.allocation_query(alloc_input)
47+
48+
if rc is 0:
49+
allocation = alloc_output.allocation
50+
51+
# Build some of the searches.
52+
end_time = "*"
53+
if allocation.history is not None:
54+
end_time = allocation.history.end_time.replace(' ','T') + "Z"
55+
56+
timerange='''@timestamp:[{0}Z TO {1}]'''.format(allocation.begin_time.replace(' ','T'), end_time)
57+
58+
# The hostname query.
59+
hostnames='syslogHostname:('
60+
host_count=0
61+
if input_details["hostnames"] is not None:
62+
for i in range (0, allocation.num_nodes):
63+
host=allocation.get_compute_nodes(i)
64+
if host in input_details["hostnames"]:
65+
host_count += 1
66+
hostnames += "{0} OR ".format(host)
67+
else:
68+
for i in range (0, allocation.num_nodes):
69+
host_count += 1
70+
hostnames += "{0} OR ".format(allocation.get_compute_nodes(i))
71+
72+
hostnames = hostnames[:-4]
73+
74+
if host_count is 0:
75+
print("No hosts found matching query.")
76+
return 1;
77+
78+
# The message portion of the query, splat query needs special TLC.
79+
message="message:"
80+
keys = input_details["key"].split(',')
81+
if input_details["key"] is not "*":
82+
for key in keys:
83+
message += '"{0}",'.format(key)
84+
message=message[:-1]
85+
else:
86+
message += "*"
87+
88+
aggregation='aggs:{ keys: { filter key_count : { value_count: { "field" : " "} }'
89+
90+
# Open a connection to the elastic cluster.
91+
es = Elasticsearch(
92+
cluster,
93+
sniff_on_start=True,
94+
sniff_on_connection_fail=True,
95+
sniffer_timeout=60
96+
)
97+
query='{0} AND {1} AND {2})'.format(message, timerange, hostnames)
98+
#query='message:"{0}" AND {1}'.format(input_details["key"], timerange)
99+
print(query)
100+
res = es.search(
101+
index="_all",
102+
q=query
103+
)
104+
105+
print("Got %d Hits:" % res['hits']['total'])
106+
#if res['hits']['total'] > 0:
107+
# print(res['hits']['hits'][0])
108+
109+
csm.api_object_destroy(handler)
110+
csm.term_lib()
111+
#end query(input_details)
112+
113+
def main(args):
114+
# The configuration variables
115+
input_details={
116+
"allocation_id" : -1,
117+
"primary_job_id" : -1,
118+
"secondary_job_id" : 0,
119+
"hostnames" : None,
120+
"key" : "*"
121+
}
122+
123+
# Load the configuration options TODO may want to use the code we had.
124+
try:
125+
opts, optargs = getopt.getopt(args[1:], short_opts, long_opts)
126+
except getopt.GetoptError as err:
127+
print("Invalid option detected: %s", err)
128+
sys.exit(1)
129+
130+
for o,a in opts:
131+
if o in ("-h", "--help"):
132+
print("help not implemented")
133+
sys.exit()
134+
135+
elif o in ("-a", "--allocation_id"):
136+
input_details["allocation_id"] = int(a)
137+
138+
elif o in ("-j", "--primary_job_id"):
139+
input_details["primary_job_id"] = int(a)
140+
141+
elif o in ("-J", "--secondary_job_id"):
142+
input_details["secondary_job_id"] = int(a)
143+
144+
elif o in ("-H", "--hostnames"):
145+
input_details["hostnames"] = a.split(',')
146+
147+
elif o in ("-k", "--key"):
148+
input_details["key"] = a
149+
150+
else :
151+
assert False, "Option %s unhandled" % o
152+
153+
query(input_details)
154+
# End main(args)
155+
156+
if __name__ == "__main__":
157+
sys.exit(main(sys.argv))
158+

csm_big_data/Use-Cases/findJobKeys.py

+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#!/bin/python
2+
# encoding: utf-8
3+
#================================================================================
4+
#
5+
# findJobKeys.py
6+
#
7+
# © Copyright IBM Corporation 2015-2018. All Rights Reserved
8+
#
9+
# This program is licensed under the terms of the Eclipse Public License
10+
# v1.0 as published by the Eclipse Foundation and available at
11+
# http://www.eclipse.org/legal/epl-v10.html
12+
#
13+
# U.S. Government Users Restricted Rights: Use, duplication or disclosure
14+
# restricted by GSA ADP Schedule Contract with IBM Corp.
15+
#
16+
#================================================================================
17+
18+
import argparse
19+
import sys
20+
import os
21+
from elasticsearch import Elasticsearch
22+
from elasticsearch.serializer import JSONSerializer
23+
24+
25+
TARGET_ENV='CAST_ELASTIC'
26+
27+
def main(args):
28+
29+
# Specify the arguments.
30+
parser = argparse.ArgumentParser(
31+
description='''A tool for finding keywords during the run time of a job.''')
32+
33+
parser.add_argument( '-a', '--allocationid', metavar='int', dest='allocation_id', default=-1,
34+
help='The allocation ID of the job.')
35+
parser.add_argument( '-j', '--jobid', metavar='int', dest='job_id', default=-1,
36+
help='The job ID of the job.')
37+
parser.add_argument( '-s', '--jobidsecondary', metavar='int', dest='job_id_secondary', default=0,
38+
help='The secondary job ID of the job (default : 0).')
39+
parser.add_argument( '-t', '--target', metavar='hostname:port', dest='target', default=None,
40+
help='An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".')
41+
parser.add_argument( '-k', '--keywords', metavar='key', dest='keywords', nargs='*', default=['*'],
42+
help='A list of keywords to search for in the Big Data Store (default : *).')
43+
parser.add_argument( '-H', '--hostnames', metavar='host', dest='hosts', nargs='*', default=None,
44+
help='A list of hostnames to filter the results to ')
45+
46+
args = parser.parse_args()
47+
48+
# If the target wasn't specified check the environment for the target value, printing help on failure.
49+
if args.target == None:
50+
if TARGET_ENV in os.environ:
51+
args.target = os.environ[TARGET_ENV]
52+
else:
53+
parser.print_help()
54+
print("Missing target, '%s' was not set." % TARGET_ENV)
55+
return 2
56+
57+
58+
# Open a connection to the elastic cluster, if this fails is wrong on the server.
59+
es = Elasticsearch(
60+
args.target,
61+
sniff_on_start=True,
62+
sniff_on_connection_fail=True,
63+
sniffer_timeout=60
64+
)
65+
66+
# Build the query to get the time range.
67+
should_query='{{"query":{{"bool":{{"should":[{0}]}}}}}}'
68+
match_clause= '{{"match":{{"{0}":{1}}}}}'
69+
70+
if args.allocation_id > 0 :
71+
tr_query = should_query.format(
72+
match_clause.format("data.allocation_id", args.allocation_id))
73+
else :
74+
tr_query = should_query.format(
75+
"{0},{1}".format(
76+
match_clause.format("data.primary_job_id", args.job_id ),
77+
match_clause.format("data.secondary_job_id", args.job_id_secondary )))
78+
79+
# Execute the query on the cast-allocation index.
80+
tr_res = es.search(
81+
index="cast-allocation",
82+
body=tr_query
83+
)
84+
total_hits = tr_res["hits"]["total"]
85+
86+
print("Got {0} Hit(s) for specified job, searching for keywords.".format(total_hits))
87+
if total_hits != 1:
88+
print("This implementation only supports queries where the hit count is equal to 1.")
89+
return 3
90+
91+
# TODO make this code more fault tolerant
92+
tr_data = tr_res["hits"]["hits"][0]["_source"]["data"]
93+
94+
# ---------------------------------------------------------------------------------------------
95+
96+
# Build the hostnames string:
97+
if args.hosts is None:
98+
args.hosts = tr_data["compute_nodes"]
99+
hostnames="hostname:({0})".format(" OR ".join(args.hosts))
100+
101+
# ---------------------------------------------------------------------------------------------
102+
103+
# Determine the timerange:
104+
start_time='"{0}Z"'.format(tr_data["begin_time"])
105+
# If a history is present end_time is end_time, otherwise it's now.
106+
if "history" in tr_data:
107+
end_time='"{0}Z"'.format(tr_data["history"]["end_time"])
108+
else:
109+
end_time="*"
110+
timerange='''@timestamp:[{0} TO {1}]'''.format(start_time, end_time)
111+
112+
# ---------------------------------------------------------------------------------------------
113+
114+
# Build the message query.
115+
message="message:{0}".format(",".join(args.keywords))
116+
117+
# ---------------------------------------------------------------------------------------------
118+
119+
# Submit the query, this is lucene syntax.
120+
keyword_query="{0} AND {1} AND {2}".format(message, timerange, hostnames)
121+
print keyword_query
122+
key_res = es.search(
123+
index="_all",
124+
q=keyword_query
125+
)
126+
127+
print("Got %d keyword hits." % key_res['hits']['total'])
128+
129+
130+
131+
if __name__ == "__main__":
132+
sys.exit(main(sys.argv))

0 commit comments

Comments
 (0)