mew2057
diff --git a/‎.gitignore
+3 b/‎.gitignore
+3
diff --git a/‎csm_big_data/Use-Cases/elasticTest.py
+158 b/‎csm_big_data/Use-Cases/elasticTest.py
+158
diff --git a/‎csm_big_data/Use-Cases/findJobKeys.py
+132 b/‎csm_big_data/Use-Cases/findJobKeys.py
+132
@@ -31,3 +31,6 @@ report.html
 
 #CSM test config
 csm_test.cfg
+
+#Mac files
+*.DS_Store
@@ -0,0 +1,158 @@
+#!/bin/python
+# encoding: utf-8
+#================================================================================
+#
+#    elasticTest.py
+#
+#  © Copyright IBM Corporation 2015-2018. All Rights Reserved
+#
+#    This program is licensed under the terms of the Eclipse Public License
+#    v1.0 as published by the Eclipse Foundation and available at
+#    http://www.eclipse.org/legal/epl-v10.html
+#
+#    U.S. Government Users Restricted Rights:  Use, duplication or disclosure
+#    restricted by GSA ADP Schedule Contract with IBM Corp.
+#
+#================================================================================
+from datetime import datetime
+from elasticsearch import Elasticsearch
+from elasticsearch.serializer import JSONSerializer
+
+# Options
+import getopt
+
+# Load the csm component:
+import sys
+sys.path.append('/u/jdunham/bluecoral/bluecoral/work/csm/lib')
+import lib_csm_py as csm
+import lib_csm_wm_py as wm
+
+short_opts = "ha:j:J:H:k:"
+long_opts  = ["help","allocation_id=","primary_job_id=", "secondary_job_id=", "hostnames=", "key="]
+cluster=['10.7.4.15:9200']
+
+# TODO HELP
+
+
+def query( input_details ):
+    ''' Queries CSM and Elastic to get a listing of records with a matching key. '''
+    # Query CSM for allocation details
+    csm.init_lib()
+    alloc_input=wm.allocation_query_input_t()
+    alloc_input.allocation_id    = input_details["allocation_id"]
+    alloc_input.primary_job_id   = input_details["primary_job_id"]
+    alloc_input.secondary_job_id = input_details["secondary_job_id"] 
+
+    rc,handler,alloc_output=wm.allocation_query(alloc_input)
+    
+    if rc is 0:
+        allocation = alloc_output.allocation
+
+        # Build some of the searches.
+        end_time = "*"
+        if allocation.history is not None:
+            end_time = allocation.history.end_time.replace(' ','T')  + "Z"
+
+        timerange='''@timestamp:[{0}Z TO {1}]'''.format(allocation.begin_time.replace(' ','T'), end_time)
+
+        # The hostname query.
+        hostnames='syslogHostname:('
+        host_count=0
+        if input_details["hostnames"] is not None:
+            for i in range (0, allocation.num_nodes):
+                host=allocation.get_compute_nodes(i)
+                if host in input_details["hostnames"]:
+                    host_count += 1
+                    hostnames += "{0} OR ".format(host)
+        else:
+            for i in range (0, allocation.num_nodes):
+                host_count += 1
+                hostnames += "{0} OR ".format(allocation.get_compute_nodes(i))
+            
+        hostnames = hostnames[:-4]
+
+        if host_count is 0:
+            print("No hosts found matching query.")
+            return 1;
+        
+        # The message portion of the query, splat query needs special TLC.
+        message="message:"
+        keys = input_details["key"].split(',')
+        if input_details["key"] is not "*":
+            for key in keys:
+                message += '"{0}",'.format(key)
+            message=message[:-1]
+        else:
+            message += "*"
+
+        aggregation='aggs:{ keys: { filter key_count : { value_count: { "field" : " "} }'
+
+        # Open a connection to the elastic cluster.
+        es = Elasticsearch(
+            cluster,
+            sniff_on_start=True,
+            sniff_on_connection_fail=True,
+            sniffer_timeout=60
+        )
+        query='{0} AND {1} AND {2})'.format(message, timerange, hostnames)
+        #query='message:"{0}" AND {1}'.format(input_details["key"], timerange)
+        print(query)
+        res = es.search( 
+            index="_all",
+            q=query
+        )
+        
+        print("Got %d Hits:" % res['hits']['total'])
+        #if res['hits']['total'] > 0:
+        #    print(res['hits']['hits'][0])
+    
+    csm.api_object_destroy(handler)
+    csm.term_lib()
+#end query(input_details)
+
+def main(args):
+    # The configuration variables
+    input_details={
+        "allocation_id"    : -1,
+        "primary_job_id"   : -1,
+        "secondary_job_id" : 0,
+        "hostnames"        : None,
+        "key"              : "*"
+    }
+    
+    # Load the configuration options TODO may want to use the code we had.
+    try:
+        opts, optargs = getopt.getopt(args[1:], short_opts, long_opts)
+    except getopt.GetoptError as err:
+        print("Invalid option detected: %s", err)
+        sys.exit(1)
+    
+    for o,a in opts:
+        if o in ("-h", "--help"):
+            print("help not implemented")
+            sys.exit()
+        
+        elif o in ("-a", "--allocation_id"):
+            input_details["allocation_id"]    = int(a)
+
+        elif o in ("-j", "--primary_job_id"):
+            input_details["primary_job_id"]   = int(a)
+        
+        elif o in ("-J", "--secondary_job_id"):
+            input_details["secondary_job_id"] = int(a)
+        
+        elif o in ("-H", "--hostnames"):
+            input_details["hostnames"]        = a.split(',')
+        
+        elif o in ("-k", "--key"):
+            input_details["key"]              = a
+        
+        else :
+            assert False, "Option %s unhandled" % o
+
+    query(input_details)
+# End main(args)
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))
+
@@ -0,0 +1,132 @@
+#!/bin/python
+# encoding: utf-8
+#================================================================================
+#
+#    findJobKeys.py
+#
+#    © Copyright IBM Corporation 2015-2018. All Rights Reserved
+#
+#    This program is licensed under the terms of the Eclipse Public License
+#    v1.0 as published by the Eclipse Foundation and available at
+#    http://www.eclipse.org/legal/epl-v10.html
+#
+#    U.S. Government Users Restricted Rights:  Use, duplication or disclosure
+#    restricted by GSA ADP Schedule Contract with IBM Corp.
+#
+#================================================================================
+
+import argparse
+import sys
+import os
+from elasticsearch import Elasticsearch
+from elasticsearch.serializer import JSONSerializer
+
+
+TARGET_ENV='CAST_ELASTIC'
+
+def main(args):
+
+    # Specify the arguments.
+    parser = argparse.ArgumentParser(
+        description='''A tool for finding keywords during the run time of a job.''')
+    
+    parser.add_argument( '-a', '--allocationid', metavar='int', dest='allocation_id', default=-1,
+        help='The allocation ID of the job.')
+    parser.add_argument( '-j', '--jobid', metavar='int', dest='job_id', default=-1,
+        help='The job ID of the job.')
+    parser.add_argument( '-s', '--jobidsecondary', metavar='int', dest='job_id_secondary', default=0,
+        help='The secondary job ID of the job (default : 0).')
+    parser.add_argument( '-t', '--target', metavar='hostname:port', dest='target', default=None, 
+        help='An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".')
+    parser.add_argument( '-k', '--keywords', metavar='key', dest='keywords', nargs='*', default=['*'],
+        help='A list of keywords to search for in the Big Data Store (default : *).')
+    parser.add_argument( '-H', '--hostnames', metavar='host', dest='hosts', nargs='*', default=None,
+        help='A list of hostnames to filter the results to ')
+
+    args = parser.parse_args()
+
+    # If the target wasn't specified check the environment for the target value, printing help on failure.
+    if args.target == None:
+        if TARGET_ENV in os.environ:
+            args.target = os.environ[TARGET_ENV]
+        else:
+            parser.print_help()
+            print("Missing target, '%s' was not set." % TARGET_ENV)
+            return 2
+
+    
+    # Open a connection to the elastic cluster, if this fails is wrong on the server.
+    es = Elasticsearch(
+        args.target, 
+        sniff_on_start=True,
+        sniff_on_connection_fail=True,
+        sniffer_timeout=60
+    )
+
+    # Build the query to get the time range.
+    should_query='{{"query":{{"bool":{{"should":[{0}]}}}}}}'
+    match_clause= '{{"match":{{"{0}":{1}}}}}'
+
+    if args.allocation_id > 0 :
+        tr_query = should_query.format(
+            match_clause.format("data.allocation_id", args.allocation_id))
+    else : 
+        tr_query = should_query.format(
+            "{0},{1}".format(
+                match_clause.format("data.primary_job_id", args.job_id ),
+                match_clause.format("data.secondary_job_id", args.job_id_secondary )))
+            
+    # Execute the query on the cast-allocation index.
+    tr_res = es.search(
+        index="cast-allocation",
+        body=tr_query
+    )
+    total_hits = tr_res["hits"]["total"]
+
+    print("Got {0} Hit(s) for specified job, searching for keywords.".format(total_hits))
+    if total_hits != 1:
+        print("This implementation only supports queries where the hit count is equal to 1.")
+        return 3
+
+    # TODO make this code more fault tolerant
+    tr_data = tr_res["hits"]["hits"][0]["_source"]["data"]
+
+    # ---------------------------------------------------------------------------------------------
+    
+    # Build the hostnames string:
+    if args.hosts is None: 
+        args.hosts = tr_data["compute_nodes"]
+    hostnames="hostname:({0})".format(" OR ".join(args.hosts))
+    
+    # ---------------------------------------------------------------------------------------------
+
+    # Determine the timerange:
+    start_time='"{0}Z"'.format(tr_data["begin_time"])
+    # If a history is present end_time is end_time, otherwise it's now.
+    if "history" in tr_data:
+        end_time='"{0}Z"'.format(tr_data["history"]["end_time"])
+    else:
+        end_time="*"
+    timerange='''@timestamp:[{0} TO {1}]'''.format(start_time, end_time)
+    
+    # ---------------------------------------------------------------------------------------------
+
+    # Build the message query.
+    message="message:{0}".format(",".join(args.keywords))
+
+    # ---------------------------------------------------------------------------------------------
+
+    # Submit the query, this is lucene syntax.
+    keyword_query="{0} AND {1} AND {2}".format(message, timerange, hostnames)
+    print keyword_query
+    key_res = es.search(
+        index="_all",
+        q=keyword_query
+    )
+    
+    print("Got %d keyword hits." % key_res['hits']['total'])
+
+    
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))