#!/usr/bin/env python3
# ----------------------------------------------------------------------------
# communication_timeout --collect edm timeout info
#
# January 2022, Anand Kumar, Akshatha
#
# Copyright (c) 2022-2023 by cisco Systems, Inc.
# All rights reserved.
# ----------------------------------------------------------------------------

import subprocess
import re
import json
from datetime import datetime
import os

import signal

COMM_STUB_FILE = "/tmp/comm_timeout_stub.txt"

SUCCESS = "success"
FAILURE = "failure"

def handler(signum, frame):
    str = FAILURE+"$"
    print(str)
    sys.exit(0)

def parse_dummy_communication_timeout_process(metric):
    process_name = "hcmgr_dummy"
    node_loc = "0_RP0"
    metric['communication_timeout_metric']['metric_info'][process_name] = {}
    metric['communication_timeout_metric']['metric_info'][process_name]['loc'] = node_loc

    process_name = "sdr_instmgrdummy"
    node_loc = "0_RP1"
    metric['communication_timeout_metric']['metric_info'][process_name] = {}
    metric['communication_timeout_metric']['metric_info'][process_name]['loc'] = node_loc

def get_vm_time(cmd):
    status, output = subprocess.getstatusoutput(cmd)
    return(output)

def is_process_timestamp_need_to_consider(op):

    date_time = op.split("Request", 1)[0]
    date_time_str = date_time.split(".")[0]
    mi = date_time.split(".")[1]
    date_time_obj = datetime.strptime(date_time_str, '%b %d %H:%M:%S')
    hms = str(date_time_obj.time())
    h, m ,s = hms.split(":")
    ymd = str(date_time_obj.date())
    y, mo, d = ymd.split("-")
    year_cmd = "date '+%Y'"
    year = get_vm_time(year_cmd)
    edm_time = datetime(int(year), int(mo), int(d), int(h), int(m), int(s), int(mi))
    edm_timestamp = (edm_time - datetime(1970, 1, 1)).total_seconds()

    cmd = "date '+%Y-%m-%d %T'"
    curr_time = get_vm_time(cmd)
    ymd, hms = curr_time.split()
    y, m, d = ymd.split("-")
    h, mm, s = hms.split(":")
    mi = 000000
    ctime = datetime(int(y), int(m), int(d), int(h), int(mm), int(s), int(mi))
    ctimestamp = (ctime - datetime(1970, 1, 1)).total_seconds()
    time_diff = ctimestamp - edm_timestamp
    if time_diff < 3600:
        return True
    return False

def parse_communication_timeout_process(output, metric):
    process_dict = {}
    list_output = output.splitlines()
    for op in reversed(list_output):
        process_name = None
        node_loc = None
        try:
            val = re.search("No response from EDM .* within .* seconds", op)
            if val:
                process_name = val.group(0).split()[4].replace("'", "")
                node_loc = val.group(0).split()[8].replace(")", "")
                if process_name is not None and node_loc is not None and process_dict.get(process_name) is None:
                    if is_process_timestamp_need_to_consider(op):
                        node_loc = node_loc.replace("/", "_")
                        metric['communication_timeout_metric']['metric_info'][process_name] = {}
                        metric['communication_timeout_metric']['metric_info'][process_name]['loc'] = node_loc
                        process_dict[process_name] = node_loc
        except:
            pass

def get_communication_timeout_process(data):
    str = ""
    data = data.replace("{", "").replace("}", "")
    data_list = data.split(",")
    for li in data_list:
        process_name = (li.split(":")[0]).replace('"', "").replace("'", "")
        location = (li.split(":")[-1]).replace('"', "").replace("'", "")
        str += process_name + ":" + location + "\n"
    return(str)

def func_communication_timeout():
    metric_name = "communication-timeout"
    metric = {}
    health_state = ""
    health_msg = "" 
    metric_info = "{}"
    output = ""  
    metric['communication_timeout_metric'] = {}
    metric['communication_timeout_metric']['metric_info'] = {}

    if os.path.exists(COMM_STUB_FILE):
        parse_dummy_communication_timeout_process(metric)
    else:   
        cmd = "sysdb_show_ltrace -T client_error -s active | grep 'timed out'"
        status, output = subprocess.getstatusoutput(cmd)
        parse_communication_timeout_process(output, metric)
    if metric['communication_timeout_metric']['metric_info']:
        health_state = 'Warning'
        health_msg = 'Communication Timeout detected'
    else:
        health_state = 'Normal'
        health_msg = 'No Communication Timeout detected'

    last_update = datetime.now().strftime('%-d %b %H:%M:%S.%f')
    if(metric['communication_timeout_metric']['metric_info']):
        metric_info = get_communication_timeout_process(str(metric['communication_timeout_metric']['metric_info']))

    output = SUCCESS+"$"+metric_name+"$"+health_state+"$"+health_msg+"$"+last_update+"$"+metric_info+"$" 
    print(output)

if __name__ == '__main__':
    signal.signal(signal.SIGALRM, handler)
    signal.alarm(7)
    func_communication_timeout()

