new event SYS_NODE_UNREACHABLE; node unreachable because of intermediate network...
authorVictor Kirhenshtein <victor@netxms.org>
Fri, 28 Dec 2012 13:48:58 +0000 (13:48 +0000)
committerVictor Kirhenshtein <victor@netxms.org>
Fri, 28 Dec 2012 13:48:58 +0000 (13:48 +0000)
12 files changed:
ChangeLog
include/netxmsdb.h
include/nxevent.h
sql/events.in
src/libnxlp/parser.cpp
src/server/core/correlate.cpp
src/server/core/events.cpp
src/server/core/interface.cpp
src/server/core/netsrv.cpp
src/server/core/node.cpp
src/server/include/nms_objects.h
src/server/tools/nxdbmgr/upgrade.cpp

index 4490391..1603901 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -14,7 +14,8 @@
        - Select all/unselect all in alarms list
        - Multipliers for graphs and last values: binary (power of two) and decimal (power of ten)
 - API for creating embedded application agents
-- Fixed issues: #23, #47, #49, #162, #163, #177, #178, #186, #188, #189, #192, #196
+- Fixed issues: #23, #47, #49, #162, #163, #177, #178, #186, #188, #189, #192,
+                #196
 
 
 *
index 4840d78..4c63ee0 100644 (file)
@@ -23,6 +23,6 @@
 #ifndef _netxmsdb_h
 #define _netxmsdb_h
 
-#define DB_FORMAT_VERSION   266
+#define DB_FORMAT_VERSION   267
 
 #endif
index b47192a..d0176ca 100644 (file)
 #define EVENT_CONTAINER_AUTOUNBIND        65
 #define EVENT_TEMPLATE_AUTOAPPLY          66
 #define EVENT_TEMPLATE_AUTOREMOVE         67
+#define EVENT_NODE_UNREACHABLE            68
 
 #define EVENT_SNMP_UNMATCHED_TRAP         500
 #define EVENT_SNMP_COLD_START             501
index 06ebdc9..48c64e9 100644 (file)
@@ -217,7 +217,7 @@ INSERT INTO event_cfg (event_code,event_name,severity,flags,message,description)
        );
 INSERT INTO event_cfg (event_code,event_name,severity,flags,message,description) VALUES
        (
-               EVENT_SNMP_FAIL, 'SYS_SNMP_UNREACHEABLE', 
+               EVENT_SNMP_FAIL, 'SYS_SNMP_UNREACHABLE', 
                EVENT_SEVERITY_WARNING, 1,
                'SNMP agent is not responding',
                'Generated when node#27s SNMP agent is not responding.#0D#0A' CONCAT
@@ -226,7 +226,7 @@ INSERT INTO event_cfg (event_code,event_name,severity,flags,message,description)
        );
 INSERT INTO event_cfg (event_code,event_name,severity,flags,message,description) VALUES
        (
-               EVENT_AGENT_FAIL, 'SYS_AGENT_UNREACHEABLE',
+               EVENT_AGENT_FAIL, 'SYS_AGENT_UNREACHABLE',
                EVENT_SEVERITY_WARNING, 1,
                'Native agent is not responding',
                'Generated when node#27s native agent is not responding.#0D#0A' CONCAT
@@ -753,6 +753,15 @@ INSERT INTO event_cfg (event_code,event_name,severity,flags,message,description)
                '   3) Template ID#0D#0A' CONCAT
                '   4) Template name'
        );
+INSERT INTO event_cfg (event_code,event_name,severity,flags,message,description) VALUES
+       (
+               EVENT_NODE_UNREACHABLE, 'SYS_NODE_UNREACHABLE',
+               EVENT_SEVERITY_CRITICAL, 1,
+               'Node unreachable because of network failure',
+               'Generated when node is unreachable by management server because of network failure.#0D#0A' CONCAT
+               'Parameters:#0D#0A' CONCAT
+               '   No event-specific parameters'
+       );
 
 
 /*
index 63235fe..62b640b 100644 (file)
 #include "libnxlp.h"
 #include <expat.h>
 
-
-//
-// Context state texts
-//
-
+/**
+ * Context state texts
+ */
 static const TCHAR *s_states[] = { _T("MANUAL"), _T("AUTO"), _T("INACTIVE") };
 
-
-//
-// XML parser state for creating LogParser object from XML
-//
-
+/**
+ * XML parser state for creating LogParser object from XML
+ */
 #define XML_STATE_INIT        -1
 #define XML_STATE_END         -2
 #define XML_STATE_ERROR       -255
@@ -53,7 +49,6 @@ static const TCHAR *s_states[] = { _T("MANUAL"), _T("AUTO"), _T("INACTIVE") };
 #define XML_STATE_MACRO       11
 #define XML_STATE_DESCRIPTION 12
 
-
 struct XML_PARSER_STATE
 {
        LogParser *parser;
index 363744e..8e3002a 100644 (file)
@@ -135,6 +135,7 @@ void CorrelateEvent(Event *pEvent)
          }
          break;
       case EVENT_NODE_DOWN:
+               case EVENT_NODE_UNREACHABLE:
          node->setLastEventId(LAST_EVENT_NODE_DOWN, pEvent->getId());
          C_SysNodeDown(node, pEvent);
          break;
index 2a83299..b91ab09 100644 (file)
@@ -870,29 +870,23 @@ BOOL PostEventEx(Queue *pQueue, DWORD dwEventCode, DWORD dwSourceId, const char
    return bResult;
 }
 
-
-//
-// Resend events from specific queue to system event queue
-//
-
+/**
+ * Resend events from specific queue to system event queue
+ */
 void ResendEvents(Queue *pQueue)
 {
-   void *pEvent;
-
    while(1)
    {
-      pEvent = pQueue->Get();
+      void *pEvent = pQueue->Get();
       if (pEvent == NULL)
          break;
       g_pEventQueue->Put(pEvent);
    }
 }
 
-
-//
-// Create NXMP record for event
-//
-
+/**
+ * Create NXMP record for event
+ */
 void CreateNXMPEventRecord(String &str, DWORD dwCode)
 {
    EVENT_TEMPLATE *p;
@@ -923,11 +917,9 @@ void CreateNXMPEventRecord(String &str, DWORD dwCode)
    RWLockUnlock(m_rwlockTemplateAccess);
 }
 
-
-//
-// Resolve event name
-//
-
+/**
+ * Resolve event name
+ */
 BOOL EventNameFromCode(DWORD dwCode, TCHAR *pszBuffer)
 {
    EVENT_TEMPLATE *p;
index ed9672a..0436baa 100644 (file)
@@ -512,6 +512,13 @@ void Interface::StatusPoll(ClientSession *pSession, DWORD dwRqId,  Queue *pEventQ
                if ((m_dot1xPaeAuthState == PAE_STATE_FORCE_UNAUTH) && (newStatus < STATUS_MAJOR))
                        newStatus = STATUS_MAJOR;
        }
+
+       // Reset status to unknown if node has known network connectivity problems
+       if ((newStatus == STATUS_CRITICAL) && (pNode->getRuntimeFlags() & NDF_NETWORK_PATH_PROBLEM))
+       {
+               newStatus = STATUS_UNKNOWN;
+               DbgPrintf(6, _T("StatusPoll(%s): Status for interface %s reset to UNKNOWN"), pNode->Name(), m_szName);
+       }
    
        if (newStatus == m_iPendingStatus)
        {
index 79837d4..46835db 100644 (file)
 
 #include "nxcore.h"
 
-
-//
-// Default constructor
-//
-
-NetworkService::NetworkService()
-               :NetObj()
+/**
+ * Default constructor
+ */
+NetworkService::NetworkService() : NetObj()
 {
    m_iServiceType = NETSRV_HTTP;
    m_pHostNode = NULL;
@@ -49,7 +46,7 @@ NetworkService::NetworkService()
  */
 NetworkService::NetworkService(int iServiceType, WORD wProto, WORD wPort,
                                TCHAR *pszRequest, TCHAR *pszResponse,
-                               Node *pHostNode, DWORD dwPollerNode)
+                                                                                Node *pHostNode, DWORD dwPollerNode) : NetObj()
 {
    m_iServiceType = iServiceType;
    m_pHostNode = pHostNode;
@@ -64,22 +61,18 @@ NetworkService::NetworkService(int iServiceType, WORD wProto, WORD wPort,
    m_bIsHidden = TRUE;
 }
 
-
-//
-// Destructor
-//
-
+/**
+ * Destructor
+ */
 NetworkService::~NetworkService()
 {
    safe_free(m_pszRequest);
    safe_free(m_pszResponse);
 }
 
-
-//
-// Save object to database
-//
-
+/**
+ * Save object to database
+ */
 BOOL NetworkService::SaveToDB(DB_HANDLE hdb)
 {
    TCHAR *pszEscRequest, *pszEscResponse, szQuery[16384], szIpAddr[32];
@@ -138,11 +131,9 @@ BOOL NetworkService::SaveToDB(DB_HANDLE hdb)
    return TRUE;
 }
 
-
-//
-// Load properties from database
-//
-
+/**
+ * Load properties from database
+ */
 BOOL NetworkService::CreateFromDB(DWORD dwId)
 {
    TCHAR szQuery[256];
@@ -229,11 +220,9 @@ BOOL NetworkService::CreateFromDB(DWORD dwId)
    return bResult;
 }
 
-
-//
-// Delete object from database
-//
-
+/**
+ * Delete object from database
+ */
 BOOL NetworkService::DeleteFromDB()
 {
    TCHAR szQuery[128];
@@ -248,11 +237,9 @@ BOOL NetworkService::DeleteFromDB()
    return bSuccess;
 }
 
-
-//
-// Create CSCP message with object's data
-//
-
+/**
+ * Create NXCP message with object's data
+ */
 void NetworkService::CreateMessage(CSCPMessage *pMsg)
 {
    NetObj::CreateMessage(pMsg);
@@ -265,11 +252,9 @@ void NetworkService::CreateMessage(CSCPMessage *pMsg)
        pMsg->SetVariable(VID_REQUIRED_POLLS, (WORD)m_iRequiredPollCount);
 }
 
-
-//
-// Modify object from message
-//
-
+/**
+ * Modify object from message
+ */
 DWORD NetworkService::ModifyFromMessage(CSCPMessage *pRequest, BOOL bAlreadyLocked)
 {
    if (!bAlreadyLocked)
@@ -347,13 +332,10 @@ DWORD NetworkService::ModifyFromMessage(CSCPMessage *pRequest, BOOL bAlreadyLock
    return NetObj::ModifyFromMessage(pRequest, TRUE);
 }
 
-
-//
-// Perform status poll on network service
-//
-
-void NetworkService::StatusPoll(ClientSession *pSession, DWORD dwRqId,
-                                Node *pPollerNode, Queue *pEventQueue)
+/**
+ * Perform status poll on network service
+ */
+void NetworkService::StatusPoll(ClientSession *pSession, DWORD dwRqId, Node *pPollerNode, Queue *pEventQueue)
 {
    int oldStatus = m_iStatus, newStatus;
    Node *pNode;
@@ -411,6 +393,13 @@ void NetworkService::StatusPoll(ClientSession *pSession, DWORD dwRqId,
       newStatus = STATUS_UNKNOWN;
    }
 
+       // Reset status to unknown if node has known network connectivity problems
+       if ((newStatus == STATUS_CRITICAL) && (pNode->getRuntimeFlags() & NDF_NETWORK_PATH_PROBLEM))
+       {
+               newStatus = STATUS_UNKNOWN;
+               DbgPrintf(6, _T("StatusPoll(%s): Status for network service %s reset to UNKNOWN"), pNode->Name(), m_szName);
+       }
+   
    if (newStatus != oldStatus)
    {
                if (newStatus == m_iPendingStatus)
@@ -439,11 +428,9 @@ void NetworkService::StatusPoll(ClientSession *pSession, DWORD dwRqId,
    SendPollerMsg(dwRqId, _T("   Finished status poll on network service %s\r\n"), m_szName);
 }
 
-
-//
-// Handler for object deletion
-//
-
+/**
+ * Handler for object deletion
+ */
 void NetworkService::OnObjectDelete(DWORD dwObjectId)
 {
        LockData();
index a075a45..4cb2ad0 100644 (file)
@@ -988,12 +988,11 @@ void Node::statusPoll(ClientSession *pSession, DWORD dwRqId, int nPoller)
    DWORD i, dwPollListSize, dwOldFlags = m_dwFlags;
    NetObj *pPollerNode = NULL, **ppPollList;
    BOOL bAllDown;
-   Queue *pQueue;    // Delayed event queue
        SNMP_Transport *pTransport;
        Cluster *pCluster;
    time_t tNow, tExpire;
 
-   pQueue = new Queue;
+   Queue *pQueue = new Queue;     // Delayed event queue
    SetPollerInfo(nPoller, _T("wait for lock"));
    pollerLock();
    m_pPollRequestor = pSession;
@@ -1206,6 +1205,8 @@ skip_snmp_check:
                    (!(m_dwFlags & NF_DISABLE_SNMP)))
                   if (!(m_dwDynamicFlags & NDF_SNMP_UNREACHABLE))
                      bAllDown = FALSE;
+
+               DbgPrintf(6, _T("StatusPoll(%s): bAllDown=%s, dynFlags=0x%08X"), m_szName, bAllDown ? _T("true") : _T("false"), m_dwDynamicFlags);
                if (bAllDown)
                {
                   if (!(m_dwDynamicFlags & NDF_UNREACHABLE))
@@ -1213,8 +1214,36 @@ skip_snmp_check:
                      m_dwDynamicFlags |= NDF_UNREACHABLE;
                                m_tDownSince = time(NULL);
                           SetPollerInfo(nPoller, _T("check network path"));
-                               checkNetworkPath(dwRqId);
-                     PostEvent(EVENT_NODE_DOWN, m_dwId, NULL);
+                               if (checkNetworkPath(dwRqId))
+                               {
+                             m_dwDynamicFlags |= NDF_NETWORK_PATH_PROBLEM;
+
+                                       // Set interfaces and network services to UNKNOWN state
+                                       LockChildList(FALSE);
+                                       for(i = 0, bAllDown = TRUE; i < m_dwChildCount; i++)
+                                               if (((m_pChildList[i]->Type() == OBJECT_INTERFACE) || (m_pChildList[i]->Type() == OBJECT_NETWORKSERVICE)) &&
+                                                        (m_pChildList[i]->Status() == STATUS_CRITICAL))
+                                               {
+                                                       m_pChildList[i]->resetStatus();
+                                               }
+                                       UnlockChildList();
+
+                                       // Clear delayed event queue
+                                       while(1)
+                                       {
+                                               Event *pEvent = (Event *)pQueue->Get();
+                                               if (pEvent == NULL)
+                                                       break;
+                                               delete pEvent;
+                                       }
+                                       delete_and_null(pQueue);
+
+                                       PostEvent(EVENT_NODE_UNREACHABLE, m_dwId, NULL);
+                               }
+                               else
+                               {
+                                       PostEvent(EVENT_NODE_DOWN, m_dwId, NULL);
+                               }
                      SendPollerMsg(dwRqId, POLLER_ERROR _T("Node is unreachable\r\n"));
                   }
                   else
@@ -1227,7 +1256,7 @@ skip_snmp_check:
                        m_tDownSince = 0;
                   if (m_dwDynamicFlags & NDF_UNREACHABLE)
                   {
-                     m_dwDynamicFlags &= ~(NDF_UNREACHABLE | NDF_SNMP_UNREACHABLE | NDF_AGENT_UNREACHABLE);
+                     m_dwDynamicFlags &= ~(NDF_UNREACHABLE | NDF_SNMP_UNREACHABLE | NDF_AGENT_UNREACHABLE | NDF_NETWORK_PATH_PROBLEM);
                      PostEvent(EVENT_NODE_UP, m_dwId, NULL);
                      SendPollerMsg(dwRqId, POLLER_INFO _T("Node recovered from unreachable state\r\n"));
                                goto restart_agent_check;
@@ -1240,8 +1269,11 @@ skip_snmp_check:
        }
 
    // Send delayed events and destroy delayed event queue
-   ResendEvents(pQueue);
-   delete pQueue;
+       if (pQueue != NULL)
+       {
+               ResendEvents(pQueue);
+               delete pQueue;
+       }
 
    // Call hooks in loaded modules
    for(DWORD i = 0; i < g_dwNumModules; i++)
@@ -1282,21 +1314,23 @@ skip_snmp_check:
 
 /**
  * Check network path between node and management server to detect possible intermediate node failure
+ *
+ * @return true if network path problems found
  */
-void Node::checkNetworkPath(DWORD dwRqId)
+bool Node::checkNetworkPath(DWORD dwRqId)
 {
    Node *mgmtNode = (Node *)FindObjectById(g_dwMgmtNode);
    if (mgmtNode == NULL)
        {
                DbgPrintf(5, _T("Node::checkNetworkPath(%s [%d]): cannot find management node"), m_szName, m_dwId);
-               return;
+               return false;
        }
 
        NetworkPath *trace = TraceRoute(mgmtNode, this);
    if (trace == NULL)
        {
                DbgPrintf(5, _T("Node::checkNetworkPath(%s [%d]): trace not available"), m_szName, m_dwId);
-               return;
+               return false;
        }
        DbgPrintf(5, _T("Node::checkNetworkPath(%s [%d]): trace available, %d hops, %s"),
                  m_szName, m_dwId, trace->getHopCount(), trace->isComplete() ? _T("complete") : _T("incomplete"));
@@ -1341,6 +1375,7 @@ restart:
                goto restart;
        }
    delete trace;
+       return pathProblemFound;
 }
 
 /**
index 11bd5bc..7473761 100644 (file)
@@ -101,8 +101,9 @@ extern Node a;
 #define NDF_CONFIGURATION_POLL_PASSED  0x1000
 #define NDF_QUEUED_FOR_TOPOLOGY_POLL   0x2000
 #define NDF_DELETE_IN_PROGRESS         0x4000
+#define NDF_NETWORK_PATH_PROBLEM       0x8000
 
-#define NDF_PERSISTENT (NDF_UNREACHABLE | NDF_AGENT_UNREACHABLE | NDF_SNMP_UNREACHABLE | NDF_CPSNMP_UNREACHABLE)
+#define NDF_PERSISTENT (NDF_UNREACHABLE | NDF_NETWORK_PATH_PROBLEM | NDF_AGENT_UNREACHABLE | NDF_SNMP_UNREACHABLE | NDF_CPSNMP_UNREACHABLE)
 
 #define __NDF_FLAGS_DEFINED
 
@@ -878,7 +879,7 @@ protected:
        void checkBridgeMib(SNMP_Transport *pTransport);
        void checkIfXTable(SNMP_Transport *pTransport);
        void executeHookScript(const TCHAR *hookName);
-       void checkNetworkPath(DWORD dwRqId);
+       bool checkNetworkPath(DWORD dwRqId);
 
        void applyUserTemplates();
 
index 49cad39..4cb483f 100644 (file)
@@ -262,6 +262,18 @@ static BOOL CreateEventTemplate(int code, const TCHAR *name, int severity, int f
 }
 
 /**
+ * Upgrade from V266 to V267
+ */
+static BOOL H_UpgradeFromV266(int currVersion, int newVersion)
+{
+       CHK_EXEC(CreateEventTemplate(EVENT_NODE_UNREACHABLE, _T("SYS_NODE_UNREACHABLE"), EVENT_SEVERITY_CRITICAL,
+                                    EF_LOG, _T("Node unreachable because of network failure"),
+                                                                                 _T("Generated when node is unreachable by management server because of network failure.\r\nParameters:\r\n   No event-specific parameters")));
+       CHK_EXEC(SQLQuery(_T("UPDATE metadata SET var_value='267' WHERE var_name='SchemaVersion'")));
+       return TRUE;
+}
+
+/**
  * Upgrade from V265 to V266
  */
 static BOOL H_UpgradeFromV265(int currVersion, int newVersion)
@@ -6541,6 +6553,7 @@ static struct
        { 263, 264, H_UpgradeFromV263 },
        { 264, 265, H_UpgradeFromV264 },
        { 265, 266, H_UpgradeFromV265 },
+       { 266, 267, H_UpgradeFromV266 },
    { 0, 0, NULL }
 };