Monitoring the System Programmatically

For an overview of the SDK monitoring functionality, see Data-Plane Redundancy.

Overview of APIs

The APIs that allow you to monitor and handle failure states programmatically are in three locations:

KCOM Monitoring Example

The following test code provides a basic template for setting up handlers to obtain information about interfaces through KCOM.

#include <stdio.h>
#include <net/if_dl.h>
#include <jnx/jnx_types.h>
#include <jnx/junos_kcom.h>
#include <isc/eventlib.h>
#include <errno.h>
#include <jnx/if_pub.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include <sys/queue.h>

void test_ifd_apis(void);


/*
 * test_ifd_print_op()
 *
 * Function to return the ifd operation string.
 */
static const char *
test_ifd_print_op (int op)
{
    switch(op) {
        case KCOM_ADD:
            return "ADD";
        case KCOM_DELETE:
            return "DELETE";
        case KCOM_CHANGE:
            return "CHANGE";
        case KCOM_GET:
            return "GET";
        default:
            return "UNKNOWN";
    }
}
/*
 * print_redundancy_state
 *
 * String describing the state of rms is returned.
 */
static const char *
print_redundancy_state (u_int8_t state)
{
    switch (state) {
    case KCOM_IFDEV_REDUNDANCY_STATE_WAITING_FOR_PRIMARY:
        return "Waiting for primary";
    case KCOM_IFDEV_REDUNDANCY_STATE_PRIMARY_ACTIVE:
        return "Primary active";
    case KCOM_IFDEV_REDUNDANCY_STATE_SECONDARY_ACTIVE:
        return "Secondary active";
    case KCOM_IFDEV_REDUNDANCY_STATE_NONE_ACTIVE:
        return "Both primary and secondary are down";
    default:
        return "UNKNOWN";
    }
}

/*
 * print_redundancy_cmd
 *
 * String describing the command for rms is returned.
 */
static const char *
print_redundancy_cmd (u_int8_t cmd)
{
    switch (cmd) {
    case KCOM_IFDEV_REDUNDANCY_CMD_NONE:
        return "Command invalid";
    case KCOM_IFDEV_REDUNDANCY_CMD_SWITCH:
        return "Switch to seconday";
    case KCOM_IFDEV_REDUNDANCY_CMD_REVERT:
        return "Revert to primary";
    default:
        return "UNKNOWN";
    }
}
    
/*
 * test_ifd_print_ifdev_info()
 *
 * Function to print the ifd information.
 */
static void
test_ifd_print_ifdev_info (kcom_ifdev_t *ifd)
{
    kcom_ifdev_redundancy_info_t *ifd_redundancy_info;

    printf("\nDEVICE Information:\n");

    printf("name:         %s\n", ifd->ifdev_name);

    if (junos_kcom_ifd_down(ifd)) {
        printf("Device Down\n");
    } else {
        printf("Device Up\n");
    }

    printf("Device index: %d\n", ifd->ifdev_index);
    printf("Dev flags:    %d\n", ifd->ifdev_devflags);
    printf("Dlags:        %d\n", ifd->ifdev_flags);
    printf("Snmp id:      %d\n", ifd->ifdev_snmp_id);
    printf("Speed:        %d\n", ifd->ifdev_media_speed);
    printf("MTU:          %d\n", ifd->ifdev_media_mtu);
    printf("Nic:          %d\n", ifd->ifdev_media_nic);
    printf("Port:         %d\n", ifd->ifdev_media_pic);
    printf("Pic port:     %d\n", ifd->ifdev_media_picport);
    printf("Operation:    %s\n", test_ifd_print_op(ifd->ifdev_op));

    if (ifd->ifdev_media_type == IFMT_ETHER) {
        printf("Ethernet device\n");
    } else if (ifd->ifdev_media_type == IFMT_SONET) {
        printf("Local device\n");
    }

    ifd_redundancy_info = junos_kcom_ifd_retrieve_redundancy_info(&ifd->ifdev_detail_info_list);
    if (ifd_redundancy_info) {
        printf("Extensive info state: %s\n", 
               print_redundancy_state(ifd_redundancy_info->state));
        printf("Extensive info cmd: %s\n", 
               print_redundancy_cmd(ifd_redundancy_info->cmd));
        printf("Extensive info last update: %ld %ld\n", 
               (ifd_redundancy_info->lastupdate).tv_sec,
               (ifd_redundancy_info->lastupdate).tv_usec);
        printf("Extensive info primary: %s\n", 
               ifd_redundancy_info->primary_ifdev_name);
        printf("Extensive info secondary: %s\n", 
               ifd_redundancy_info->secondary_ifdev_name);
        printf("Extensive info active_ifd: %s\n", 
               ifd_redundancy_info->active_ifdev_name);
        junos_kcom_msg_free(ifd_redundancy_info);
    }
}

/*
 * test_ifd_handler()
 *
 * Asynchronous message handler that is called when 
 * there is a change in the ifd.
 */
static int
test_ifd_handler(kcom_ifdev_t *msg, void *userinfo __unused)
{
    printf("Got Async IFD message\n");

    /* print the message returned from KCOM */
    test_ifd_print_ifdev_info(msg);

    /* free the message */
    junos_kcom_msg_free(msg);

    return 0;
}


/*
 * test_ifd_get_handler()
 *
 * Synchronous message handler that gets called when info
 * for all the ifds is retrieved.
 */
static int
test_ifd_get_handler(kcom_ifdev_t *msg, void *user_info __unused)
{
    /* print the message got from KCOM */
    test_ifd_print_ifdev_info(msg);

    /* free the message */
    junos_kcom_msg_free(msg);

    return 0;
}


/*
 * test_ifd_apis()
 *
 * Function to handle registering for ifd notification, 
 * making synchronous calls to retrieve ifd information.
 */

void
test_ifd_apis(void)
{
    kcom_ifdev_t ifd;
    ifl_idx_t idx;
    int error;
   
    junos_kcom_ifd_set_detail_info(KCOM_IFDEV_REDUNDANCY_INFO);

    printf("Verifying IFD APIs\n");
    junos_kcom_register_ifd_handler(NULL, test_ifd_handler);
    
    printf("Verifying kcom_ifd_get_by_index\n");
    error = junos_kcom_ifd_get_by_index(170, &ifd);
    if (!error)
        test_ifd_print_ifdev_info(&ifd);
    
    printf("Verifying kcom_ifd_get_by_snmp_index\n");
    idx.x = 156;
    error = junos_kcom_ifd_get_by_snmp_index(idx, &ifd);
    if (!error)
        test_ifd_print_ifdev_info(&ifd);

    printf("Verifying kcom_ifd_get_by_name\n");
    error = junos_kcom_ifd_get_by_name("rms0", &ifd);
    if (!error)
        test_ifd_print_ifdev_info(&ifd);

    printf("Print All interface information\n");
    junos_kcom_ifd_get_all(test_ifd_get_handler, NULL, NULL);
   
}
int
main (int argc __unused , char *argv[] __unused)
{
    evContext context;
        
    evCreate(&context);

    if (junos_kcom_init(0, context) != KCOM_OK) {
        printf("Failed to initialize KCOM\n");
        return -1;
    }

    test_ifd_apis();

    evMainLoopSyncSighdl(context);
    
    junos_kcom_shutdown();

    return 0;
}

Health Monitoring Example

The dpm-ctrl_main.c code in the Dynamic Policy Manager sample application performs health monitoring as follows. (This code is in your development sandbox at sandbox/src/sbin/dpm-ctrl.)

The application defines a handle for the Health Monitor API.

static struct msvcspmon_client_info * mci;

msvcspmon_client_info is defined as follows in sandbox/src/junos/lib/libmsvcs-pmon/h/jnx/msvcs-pmon_lib.h:

struct msvcspmon_client_info {
         pconn_client_t *client;
         char location[MSVCSPMON_LOC_NAME_LEN];
         struct msvcspmon_hb_oper_info *hb_oper_info;
         msvcspmon_connection_t connection_status;
         msvcspmon_heartbeat_setup_status_t hb_setup_status;
         msvcspmon_heartbeat_monitor_setup_status_t hb_mon_setup_status;
         msvcspmon_heartbeat_monitor_failed_t hb_monitor;
         msvcspmon_add_serice_status_t add_service_status;
         msvcspmon_delete_serice_status_t delete_service_status;
         msvcspmon_get_serice_t get_service;
     };

Next, the application registers with the health monitor process:

    mci = msvcspmon_register(ctx, hm_connection_status);
    
    if(mci == NULL) {
        LOG(LOG_ERR, "%s: Registering for health monitoring failed",
                __func__);
        goto failed;
    }

The mhsi variable points to the structure msvcspmon_hb_setup_info, which is defined as follows:

 struct msvcspmon_hb_setup_info {
          pid_t pid;                          /* pid of the process */
          int thread_id;                      /* Thread id if applicable */
          int keepalive_interval;             /* Keepalive heartbeat interval in milliseconds */
          int no_of_missed_keepalive;         /* Number of missed keepalive intervals that will trigger action */
          int action;                         /* Action code */
          char name[MSVCSPMON_PROC_NAME_LEN]; /* Unique process/thread name */
      };

 

The connection status callback reports the status of the connection between the health monitor process and the Routing Engine, and is defined in the following code.

static void
hm_connection_status(msvcspmon_connection_status_t status)
{
    struct msvcspmon_hb_setup_info mhsi;
    
    switch(status)
    {
    case MSVCSPMON_REGISTER_SUCCESS:
        LOG(LOG_INFO, "%s: Connection status reported as up", __func__);
        
        
        bzero(&mhsi, sizeof(struct msvcspmon_hb_setup_info));
        // mhsi.pid will be setup by API
        mhsi.keepalive_interval = DPM_CTRL_HB_INTERVAL;           /* Earlier code sets to 3                       */
        mhsi.no_of_missed_keepalive = DPM_CTRL_SICK_INTERVALS;    /* Earlier code sets to 3; if the process       */ 
                                                                  /* this many heartbests, it is "sick, so let the*/ 
                                                                  /* the health monitoring infrastructure handle  */
                                                                  /* (usually by restarting and logging the       */
                                                                  /*  process)                                    */
                                                                  
        mhsi.action = MSVCSPMON_ACTION_DEFAULT;
        strncpy(mhsi.name, DPM_CTRL_HM_IDENTIFIER, MSVCSPMON_PROC_NAME_LEN);
        msvcspmon_setup_heartbeat(mci, &mhsi, heartbeat_setup_status);
        
        break;
        
    case MSVCSPMON_CLOSED:
        LOG(LOG_INFO, "%s: Connection status reported as closed", __func__);
        break;
        
    case MSVCSPMON_FAILED: 
        LOG(LOG_ERR, "%s: Connection status reported as failed", __func__);
        break;
    
    default:
        LOG(LOG_ERR, "%s: Connection status reported as unknown", __func__);
        break;
    }
}

The call to msvcspmon_setup_heartbeat() in the previous code invokes the heartbeat_setup_status() callback, which calls the msvcspmon_heartbeat() function to create the heartbeat on a timer:

static void
heartbeat_setup_status(msvcspmon_hb_setup_status_t status)
{
    if(status != MSVCSPMON_HB_SETUP_SUCCESS) {
        LOG(LOG_ERR, "%s: msvcs-pmon infrastructure could not "
                "setup a heartbeat monitor", __func__);
        return;
    }
    LOG(LOG_INFO, "%s: msvcs-pmon infrastructure setup "
            "a heartbeat monitor for this daemon", __func__);
    
    // turn it on
    msvcspmon_heartbeat_state(mci, MSVCSPMON_HEARTBEAT_ON);
    msvcspmon_heartbeat(mci);
    
    evInitID(&hb_timer);
    if(evSetTimer(ev_ctx, send_hb, NULL, evAddTime(evNowTime(), 
            evConsTime(DPM_CTRL_HB_INTERVAL, 0)),
            evConsTime(DPM_CTRL_HB_INTERVAL, 0), &hb_timer)) {

        LOG(LOG_EMERG, "%s: Failed to initialize a timer to send heartbeats. "
                "(Error: %m)", __func__);
    }
}

The send_hb() function is defined as follows:

static void
send_hb(evContext ctx __unused,
        void * uap __unused,
        struct timespec due __unused,
        struct timespec inter __unused)
{
    msvcspmon_heartbeat(mci);
}

2007-2009 Juniper Networks, Inc. All rights reserved. The information contained herein is confidential information of Juniper Networks, Inc., and may not be used, disclosed, distributed, modified, or copied without the prior written consent of Juniper Networks, Inc. in an express license. This information is subject to change by Juniper Networks, Inc. Juniper Networks, the Juniper Networks logo, and JUNOS are registered trademarks of Juniper Networks, Inc. in the United States and other countries. All other trademarks, service marks, registered trademarks, or registered service marks are the property of their respective owners.
Generated on Sun May 30 20:26:47 2010 for Juniper Networks Partner Solution Development Platform JUNOS SDK 10.2R1 by Doxygen 1.4.5