added check for istalled peer node DB lock in active/passive cluster (issue #185)
authorVictor Kirhenshtein <victor@netxms.org>
Thu, 3 Nov 2016 07:42:49 +0000 (09:42 +0200)
committerVictor Kirhenshtein <victor@netxms.org>
Thu, 3 Nov 2016 07:43:36 +0000 (09:43 +0200)
ChangeLog
src/server/core/config.cpp
src/server/core/main.cpp

index 26a3de4..ebefa41 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -15,7 +15,7 @@
 - SMS driver for SMSEagle gateway
 - Fixed FDB handling on Cisco switches
 - Added support for CHAP, MS-CHAPv1, and MS-CHAPv2 in RADIUS authentication
-- Fixed issues: #130, #876, #953, #1043, #1097, #1192, #1206, #1285, #1312, #1318, #1320, #1326
+- Fixed issues: #130, #185, #876, #953, #1043, #1097, #1192, #1206, #1285, #1312, #1318, #1320, #1326
 
 
 *
index 9016ce4..e3fdc1b 100644 (file)
@@ -29,6 +29,7 @@
 extern char g_szCodePage[];
 extern TCHAR *g_moduleLoadList;
 extern TCHAR *g_pdsLoadList;
+extern InetAddressList g_peerNodeAddrList;
 
 /**
  * database connection parameters
@@ -46,6 +47,11 @@ TCHAR g_szDbSchema[MAX_DB_NAME] = _T("");
  */
 static UINT32 s_debugLevel = (UINT32)NXCONFIG_UNINITIALIZED_VALUE;
 
+/**
+ * Peer node information
+ */
+static TCHAR s_peerNode[MAX_DB_STRING];
+
 /**
  * Config file template
  */
@@ -75,6 +81,7 @@ static NX_CFG_TEMPLATE m_cfgTemplate[] =
    { _T("LogRotationMode"), CT_LONG, 0, 0, 0, 0, &g_logRotationMode, NULL },
    { _T("MaxLogSize"), CT_SIZE_BYTES, 0, 0, 0, 0, &g_maxLogSize, NULL },
    { _T("Module"), CT_STRING_LIST, '\n', 0, 0, 0, &g_moduleLoadList, NULL },
+   { _T("PeerNode"), CT_STRING, 0, 0, MAX_DB_STRING, 0, s_peerNode, NULL },
    { _T("PerfDataStorageDriver"), CT_STRING_LIST, '\n', 0, 0, 0, &g_pdsLoadList, NULL },
    { _T("ProcessAffinityMask"), CT_LONG, 0, 0, 0, 0, &g_processAffinityMask, NULL },
    { _T(""), CT_END_OF_LIST, 0, 0, 0, 0, NULL, NULL }
@@ -158,6 +165,21 @@ stop_search:
 
        // Decrypt password
    DecryptPassword(g_szDbLogin, g_szDbPassword, g_szDbPassword, MAX_PASSWORD);
+
+   // Parse peer node information
+   if (s_peerNode[0] != 0)
+   {
+      int count = 0;
+      TCHAR **list = SplitString(s_peerNode, _T(','), &count);
+      for(int i = 0; i < count; i++)
+      {
+         InetAddress a = InetAddress::resolveHostName(list[i]);
+         if (a.isValidUnicast())
+            g_peerNodeAddrList.add(a);
+         free(list[i]);
+      }
+      free(list);
+   }
    return bSuccess;
 }
 
index 957eca7..1c7db72 100644 (file)
@@ -156,6 +156,7 @@ int g_requiredPolls = 1;
 DB_DRIVER g_dbDriver = NULL;
 ThreadPool NXCORE_EXPORTABLE *g_mainThreadPool = NULL;
 INT16 g_defaultAgentCacheMode = AGENT_CACHE_OFF;
+InetAddressList g_peerNodeAddrList;
 
 /**
  * Static data
@@ -444,34 +445,79 @@ static BOOL InitCryptografy()
 /**
  * Check if process with given PID exists and is a NetXMS server process
  */
-static BOOL IsNetxmsdProcess(UINT32 dwPID)
+static bool IsNetxmsdProcess(UINT32 pid)
 {
 #ifdef _WIN32
-       HANDLE hProcess;
-       TCHAR szExtModule[MAX_PATH], szIntModule[MAX_PATH];
-       BOOL bRet = FALSE;
-
-       hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, dwPID);
+       bool result = false;
+       HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid);
        if (hProcess != NULL)
        {
+          TCHAR szExtModule[MAX_PATH], szIntModule[MAX_PATH];
                if ((GetModuleBaseName(hProcess, NULL, szExtModule, MAX_PATH) > 0) &&
-                               (GetModuleBaseName(GetCurrentProcess(), NULL, szIntModule, MAX_PATH) > 0))
+                        (GetModuleBaseName(GetCurrentProcess(), NULL, szIntModule, MAX_PATH) > 0))
                {
-                       bRet = !_tcsicmp(szExtModule, szIntModule);
+                       result = (_tcsicmp(szExtModule, szIntModule) == 0);
                }
                else
                {
                        // Cannot read process name, for safety assume that it's a server process
-                       bRet = TRUE;
+                       result = true;
                }
                CloseHandle(hProcess);
        }
-       return bRet;
+       return result;
 #else
-       return (kill((pid_t)dwPID, 0) != -1);
+       return kill((pid_t)pid, 0) != -1;
 #endif
 }
 
+/**
+ * Check if remote netxmsd is running
+ */
+static bool PeerNodeIsRunning(const InetAddress& addr)
+{
+   bool result = false;
+
+   TCHAR keyFile[MAX_PATH];
+   _tcscpy(keyFile, g_netxmsdDataDir);
+   _tcscat(keyFile, DFILE_KEYS);
+   RSA *key = LoadRSAKeys(keyFile);
+
+   AgentConnection *ac = new AgentConnection(addr);
+   if (ac->connect(key))
+   {
+      TCHAR result[MAX_RESULT_LENGTH];
+#ifdef _WIN32
+      UINT32 rcc = ac->getParameter(_T("Process.Count(netxmsd.exe)"), MAX_RESULT_LENGTH, result);
+#else
+      UINT32 rcc = ac->getParameter(_T("Process.Count(netxmsd)"), MAX_RESULT_LENGTH, result);
+#endif
+      ac->decRefCount();
+      if (key != NULL)
+         RSA_free(key);
+      if (rcc == ERR_SUCCESS)
+      {
+         return _tcstol(result, NULL, 10) > 0;
+      }
+   }
+   else
+   {
+      ac->decRefCount();
+      if (key != NULL)
+         RSA_free(key);
+   }
+
+   UINT16 port = (UINT16)ConfigReadInt(_T("ClientListenerPort"), SERVER_LISTEN_PORT_FOR_CLIENTS);
+   SOCKET s = ConnectToHost(addr, port, 5000);
+   if (s != INVALID_SOCKET)
+   {
+      shutdown(s, SHUT_RDWR);
+      closesocket(s);
+      result = true;
+   }
+   return result;
+}
+
 /**
  * Database event handler
  */
@@ -701,27 +747,34 @@ retry_db_lock:
    InetAddress addr;
        if (!InitLocks(&addr, buffer))
        {
-      if (!addr.isValid())    // Some SQL problems
-               {
-                       nxlog_write(MSG_INIT_LOCKS_FAILED, EVENTLOG_ERROR_TYPE, NULL);
-               }
-               else     // Database already locked by another server instance
+               if (addr.isValidUnicast())     // Database already locked by another server instance
                {
                        // Check for lock from crashed/terminated local process
                        if (GetLocalIpAddr().equals(addr))
                        {
-                               UINT32 dwPID;
-
-                               dwPID = ConfigReadULong(_T("DBLockPID"), 0);
-                               if (!IsNetxmsdProcess(dwPID) || (dwPID == GetCurrentProcessId()))
+                               UINT32 pid = ConfigReadULong(_T("DBLockPID"), 0);
+                               if (!IsNetxmsdProcess(pid) || (pid == GetCurrentProcessId()))
                                {
                                        UnlockDB();
                                        nxlog_write(MSG_DB_LOCK_REMOVED, EVENTLOG_INFORMATION_TYPE, NULL);
                                        goto retry_db_lock;
                                }
                        }
+                       else if (g_peerNodeAddrList.hasAddress(addr))
+                       {
+                          if (!PeerNodeIsRunning(addr))
+                          {
+               UnlockDB();
+               nxlog_write(MSG_DB_LOCK_REMOVED, EVENTLOG_INFORMATION_TYPE, NULL);
+               goto retry_db_lock;
+                          }
+                       }
                        nxlog_write(MSG_DB_LOCKED, EVENTLOG_ERROR_TYPE, "As", &addr, buffer);
                }
+               else
+      {
+         nxlog_write(MSG_INIT_LOCKS_FAILED, EVENTLOG_ERROR_TYPE, NULL);
+      }
                return FALSE;
        }
        g_flags |= AF_DB_LOCKED;