- few more performance parameters added
[public/netxms.git] / src / agent / subagents / oracle / main.cpp
1 /*
2 ** NetXMS subagent for Oracle monitoring
3 ** Copyright (C) 2009-2012 Raden Solutions
4 **/
5
6 #include "oracle_subagent.h"
7
8 CONDITION g_shutdownCondition;
9 MUTEX g_paramAccessMutex;
10 int g_dbCount;
11 DB_DRIVER g_driverHandle = NULL;
12 DatabaseInfo g_dbInfo[MAX_DATABASES];
13 DatabaseData g_dbData[MAX_DATABASES];
14
15 THREAD_RESULT THREAD_CALL queryThread(void *arg);
16
17 DBParameterGroup g_paramGroup[] = {
18 {
19 700, _T("Oracle.Sessions."),
20 _T("select ") DB_NULLARG_MAGIC _T(" ValueName, count(*) Count from v$session"),
21 2, { NULL }, 0
22 },
23 {
24 700, _T("Oracle.Cursors."),
25 _T("select ") DB_NULLARG_MAGIC _T(" ValueName, sum(a.value) Count from v$sesstat a, v$statname b, v$session s where a.statistic# = b.statistic# and s.sid=a.sid and b.name = 'opened cursors current'"),
26 2, { NULL }, 0
27 },
28 {
29 700, _T("Oracle.Objects."),
30 _T("select ") DB_NULLARG_MAGIC _T(" ValueName, count(*) InvalidCount from dba_objects where status!='VALID'"),
31 2, { NULL }, 0
32 },
33 {
34 700, _T("Oracle.DBInfo."),
35 _T("select ") DB_NULLARG_MAGIC _T(" ValueName, name Name, to_char(created) CreateDate, log_mode LogMode, open_mode OpenMode from v$database"),
36 5, { NULL }, 0
37 },
38 {
39 700, _T("Oracle.Instance."),
40 _T("select ") DB_NULLARG_MAGIC _T(" ValueName, version Version, status Status, archiver ArchiverStatus, shutdown_pending ShutdownPending from v$instance"),
41 5, { NULL }, 0
42 },
43 {
44 1000, _T("Oracle.TableSpaces."),
45 _T("select d.tablespace_name ValueName, d.status Status, d.contents Type, to_char(round(used_percent,2)) UsedPct from dba_tablespaces d, dba_tablespace_usage_metrics m where d.tablespace_name=m.tablespace_name"),
46 3, { NULL }, 0
47 },
48 {
49 700, _T("Oracle.Dual."),
50 _T("select ") DB_NULLARG_MAGIC _T(" ValueName, decode(count(*),1,0,1) ExcessRows from dual"),
51 1, { NULL }, 0
52 },
53 {
54 700, _T("Oracle.Performance."),
55 _T("select ") DB_NULLARG_MAGIC _T(" ValueName, (select s.value PhysReads from v$sysstat s, v$statname n where n.name='physical reads' and n.statistic#=s.statistic#) PhysReads, ")
56 _T("(select s.value LogicReads from v$sysstat s, v$statname n where n.name='session logical reads' and n.statistic#=s.statistic#) LogicReads, ")
57 _T("(select round((sum(decode(name,'consistent gets',value,0))+sum(decode(name,'db block gets',value,0))-sum(decode(name,'physical reads',value, 0)))/(sum(decode(name,'consistent gets',value,0))+sum(decode(name,'db block gets',value,0)))*100,2) from v$sysstat) CacheHitRatio, ")
58 _T("(select round(sum(waits)*100/sum(gets),2) from v$rollstat) RollbackWaitRatio, ")
59 _T("(select round((1-(sum(getmisses)/sum(gets)))*100,2) from v$rowcache) DictCacheHitRatio, ")
60 _T("(select round(sum(pins)/(sum(pins)+sum(reloads))*100,2) from v$librarycache) LibCacheHitRatio, ")
61 _T("(select round((100*b.value)/decode((a.value+b.value),0,1,(a.value+b.value)),2) from v$sysstat a,v$sysstat b where a.name='sorts (disk)' and b.name='sorts (memory)') MemorySortRatio, ")
62 _T("(select round(nvl((sum(busy)/(sum(busy)+sum(idle)))*100,0),2) from v$dispatcher) DispatcherWorkload, ")
63 _T("(select bytes from v$sgastat where name='free memory' and pool='shared pool') FreeSharedPool ")
64 _T("from DUAL "),
65 3, { NULL }, 0
66 },
67 {
68 700, _T("Oracle.CriticalStats."),
69 _T("select ") DB_NULLARG_MAGIC _T(" ValueName, (select count(*) TSOFF from dba_tablespaces where status <> 'ONLINE') TSOffCount, ")
70 _T("(select count(*) DFOFF from V$DATAFILE where status not in ('ONLINE','SYSTEM')) DFOffCount, ")
71 _T("(select count(*) from dba_segments where max_extents = extents) FullSegmentsCount, ")
72 _T("(select count(*) from dba_rollback_segs where status <> 'ONLINE') RBSegsNotOnlineCount, ")
73 _T("decode(sign(decode((select upper(log_mode) from v$database),'ARCHIVELOG',1,0)-")
74 _T("decode((select upper(value) from v$parameter where upper(name)='LOG_ARCHIVE_START'),'TRUE',1,0)),1, 1, 0) AutoArchivingOff, ")
75 _T("(select count(file#) from v$datafile_header where recover ='YES') DatafilesNeedMediaRecovery, ")
76 _T("(select count(*) FROM dba_jobs where NVL(failures,0) <> 0) FailedJobs ")
77 _T("from DUAL"),
78 5, { NULL }, 0
79 },
80 0
81 };
82
83 //
84 // Handler functions
85 //
86
87 LONG getParameters(const TCHAR *parameter, const TCHAR *argument, TCHAR *value)
88 {
89 LONG ret = SYSINFO_RC_UNSUPPORTED;
90 TCHAR dbId[MAX_STR];
91 TCHAR entity[MAX_STR];
92
93 // Get id of the database requested
94 if (!AgentGetParameterArg(parameter, 1, dbId, MAX_STR))
95 return ret;
96 if (!AgentGetParameterArg(parameter, 2, entity, MAX_STR) || entity[0] == _T('\0'))
97 nx_strncpy(entity, DB_NULLARG_MAGIC, MAX_STR);
98
99 AgentWriteDebugLog(7, _T("%s: got request for params: dbid='%s', param='%s'"), MYNAMESTR, dbId, parameter);
100
101 // Loop through databases and find an entry in g_dbInfo[] for this id
102 for (int i = 0; i <= g_dbCount; i++)
103 {
104 if (!_tcsnicmp(g_dbInfo[i].id, dbId, MAX_STR)) // found DB
105 {
106 if (argument[0] == _T('R'))
107 {
108 ret_string(value, g_dbInfo[i].connected ? _T("YES") : _T("NO"));
109 ret = SYSINFO_RC_SUCCESS;
110 }
111 // Loop through parameter groups and check whose prefix matches the parameter requested
112 for (int k = 0; argument[0] == _T('X') && g_paramGroup[k].prefix; k++)
113 {
114 if (!_tcsnicmp(g_paramGroup[k].prefix, parameter, _tcslen(g_paramGroup[k].prefix))) // found prefix
115 {
116 MutexLock(g_dbInfo[i].accessMutex);
117 // Loop through the values
118 AgentWriteDebugLog(9, _T("%s: valuecount %d"), MYNAMESTR, g_paramGroup[k].valueCount[i]);
119 for (int j = 0; j < g_paramGroup[k].valueCount[i]; j++)
120 {
121 StringMap* map = (g_paramGroup[k].values[i])[j].attrs;
122 TCHAR* name = (g_paramGroup[k].values[i])[j].name;
123 if (!_tcsnicmp(name, entity, MAX_STR)) // found value which matches the parameters argument
124 {
125 TCHAR key[MAX_STR];
126 nx_strncpy(key, parameter + _tcslen(g_paramGroup[k].prefix), MAX_STR);
127 TCHAR* place = _tcschr(key, _T('('));
128 if (place != NULL)
129 {
130 *place = _T('\0');
131 const TCHAR* dbval = map->get(key);
132 ret_string(value, dbval);
133 ret = SYSINFO_RC_SUCCESS;
134 }
135 break;
136 }
137 }
138 MutexUnlock(g_dbInfo[i].accessMutex);
139
140 break;
141 }
142 }
143 break;
144 }
145 }
146
147 return ret;
148 }
149
150
151 //
152 // Subagent initialization
153 //
154
155 static BOOL SubAgentInit(Config *config)
156 {
157 BOOL result = TRUE;
158 static DatabaseInfo info;
159 int i;
160 static NX_CFG_TEMPLATE configTemplate[] =
161 {
162 { _T("Id"), CT_STRING, 0, 0, MAX_STR, 0, info.id },
163 { _T("Name"), CT_STRING, 0, 0, MAX_STR, 0, info.name },
164 { _T("TnsName"), CT_STRING, 0, 0, MAX_STR, 0, info.name },
165 { _T("UserName"), CT_STRING, 0, 0, MAX_USERNAME, 0, info.username },
166 { _T("Password"), CT_STRING, 0, 0, MAX_PASSWORD, 0, info.password },
167 { _T(""), CT_END_OF_LIST, 0, 0, 0, 0, NULL }
168 };
169
170 // Init db driver
171 #ifdef _WIN32
172 g_driverHandle = DBLoadDriver(_T("oracle.ddr"), NULL, TRUE, NULL, NULL);
173 #else
174 g_driverHandle = DBLoadDriver(LIBDIR "/libnxddr_oracle.so", NULL, TRUE, NULL, NULL);
175 #endif
176 if (g_driverHandle == NULL)
177 {
178 AgentWriteLog(EVENTLOG_ERROR_TYPE, _T("%s: failed to load db driver"), MYNAMESTR);
179 result = FALSE;
180 }
181
182 if (result)
183 {
184 g_shutdownCondition = ConditionCreate(TRUE);
185 }
186
187 // Load configuration from "oracle" section to allow simple configuration
188 // of one database without XML includes
189 memset(&info, 0, sizeof(info));
190 g_dbCount = -1;
191 if (config->parseTemplate(_T("ORACLE"), configTemplate))
192 {
193 if (info.name[0] != 0)
194 {
195 if (info.id[0] == 0)
196 _tcscpy(info.id, info.name);
197 memcpy(&g_dbInfo[++g_dbCount], &info, sizeof(DatabaseInfo));
198 g_dbInfo[g_dbCount].accessMutex = MutexCreate();
199 }
200 }
201
202 // Load full-featured XML configuration
203 if (g_dbCount == -1) // Didn't load anything from the .conf file
204 {
205 for (i = 1; result && i <= MAX_DATABASES; i++)
206 {
207 TCHAR section[MAX_STR];
208 memset((void*)&info, 0, sizeof(info));
209 _sntprintf(section, MAX_STR, _T("oracle/databases/database#%d"), i);
210 if ((result = config->parseTemplate(section, configTemplate)) != TRUE)
211 {
212 AgentWriteLog(EVENTLOG_ERROR_TYPE, _T("%s: error parsing configuration template"), MYNAMESTR);
213 return FALSE;
214 }
215 if (info.name[0] != _T('\0'))
216 memcpy((void*)&g_dbInfo[++g_dbCount], (void*)&info, sizeof(info));
217 else
218 continue;
219 if (info.username[0] == '\0' || info.password[0] == '\0')
220 {
221 AgentWriteLog(EVENTLOG_ERROR_TYPE, _T("%s: error getting username and/or password for "), MYNAMESTR);
222 result = FALSE;
223 }
224 if (result && (g_dbInfo[g_dbCount].accessMutex = MutexCreate()) == NULL)
225 {
226 AgentWriteLog(EVENTLOG_ERROR_TYPE, _T("%s: failed to create mutex (%d)"), MYNAMESTR, i);
227 result = FALSE;
228 }
229 }
230 }
231
232 // Exit if no usable configuration found
233 if (result && g_dbCount < 0)
234 {
235 AgentWriteLog(EVENTLOG_ERROR_TYPE, _T("%s: no databases to monitor"), MYNAMESTR);
236 result = FALSE;
237 }
238
239 // Run query thread for each database configured
240 for (i = 0; result && i <= g_dbCount; i++)
241 {
242 g_dbInfo[i].queryThreadHandle = ThreadCreateEx(queryThread, 0, CAST_TO_POINTER(i, void *));
243 }
244
245 return result;
246 }
247
248
249 //
250 // Shutdown handler
251 //
252
253 static void SubAgentShutdown(void)
254 {
255 AgentWriteLog(EVENTLOG_INFORMATION_TYPE, _T("%s: shutting down"), MYNAMESTR);
256 ConditionSet(g_shutdownCondition);
257 for (int i = 0; i <= g_dbCount; i++)
258 {
259 ThreadJoin(g_dbInfo[i].queryThreadHandle);
260 MutexDestroy(g_dbInfo[i].accessMutex);
261 }
262 ConditionDestroy(g_shutdownCondition);
263 }
264
265 //
266 // Figure out Oracle DBMS version
267 //
268
269 static int getOracleVersion(DB_HANDLE handle)
270 {
271 TCHAR versionString[32];
272
273 DB_RESULT result = DBSelect(handle,_T("select version from v$instance"));
274 if (result == NULL)
275 {
276 AgentWriteLog(EVENTLOG_WARNING_TYPE, _T("%s: query from v$instance failed"), MYNAMESTR);
277 return 700; // assume Oracle 7.0 by default
278 }
279
280 DBGetField(result, 0, 0, versionString, 32);
281 int major = 0, minor = 0;
282 _stscanf(versionString, _T("%d.%d"), &major, &minor);
283 DBFreeResult(result);
284
285 return major * 100 + minor * 10;
286 }
287
288 //
289 // Thread for SQL queries
290 //
291
292 THREAD_RESULT THREAD_CALL queryThread(void* arg)
293 {
294 int dbIndex = CAST_FROM_POINTER(arg, int);
295 DatabaseInfo& db = g_dbInfo[dbIndex];
296 const DWORD pollInterval = 60 * 1000L; // 1 minute
297 int waitTimeout;
298 QWORD startTimeMs;
299 TCHAR errorText[DBDRV_MAX_ERROR_TEXT];
300
301 while (true)
302 {
303 db.handle = DBConnect(g_driverHandle, db.name, NULL /* db.server */, db.username, db.password, NULL, errorText);
304 DBEnableReconnect(db.handle, false);
305 if (db.handle != NULL)
306 {
307 AgentWriteLog(EVENTLOG_INFORMATION_TYPE, _T("%s: connected to DB '%s'"), MYNAMESTR, db.name);
308 db.connected = true;
309 db.version = getOracleVersion(db.handle);
310 }
311
312 while (db.connected)
313 {
314 startTimeMs = GetCurrentTimeMs();
315
316 // Do queries
317 if (!(db.connected = getParametersFromDB(dbIndex)))
318 {
319 break;
320 }
321
322 waitTimeout = pollInterval - DWORD(GetCurrentTimeMs() - startTimeMs);
323 if (ConditionWait(g_shutdownCondition, waitTimeout < 0 ? 1 : waitTimeout))
324 goto finish;
325 }
326
327 // Try to reconnect every 30 secs
328 if (ConditionWait(g_shutdownCondition, DWORD(30 * 1000)))
329 break;
330 }
331
332 finish:
333 if (db.connected && db.handle != NULL)
334 {
335 DBDisconnect(db.handle);
336 }
337
338 return THREAD_OK;
339 }
340
341
342 bool getParametersFromDB( int dbIndex )
343 {
344 bool ret = true;
345 DatabaseInfo& info = g_dbInfo[dbIndex];
346
347 if (!info.connected)
348 {
349 return false;
350 }
351
352 MutexLock(info.accessMutex);
353
354 for (int i = 0; g_paramGroup[i].prefix; i++)
355 {
356 AgentWriteDebugLog(7, _T("%s: got entry for '%s'"), MYNAMESTR, g_paramGroup[i].prefix);
357
358 if (g_paramGroup[i].version > info.version) // this parameter group is not supported for this DB
359 continue;
360
361 // Release previously allocated array of values for this group
362 for (int j = 0; g_paramGroup[i].values[dbIndex] && j < g_paramGroup[i].valueCount[dbIndex]; j++)
363 delete (g_paramGroup[i].values[dbIndex])[j].attrs;
364 safe_free_and_null(g_paramGroup[i].values[dbIndex]);
365
366 DB_RESULT queryResult = DBSelect(info.handle, g_paramGroup[i].query);
367 if (queryResult == NULL)
368 {
369 ret = false;
370 break;
371 }
372
373 int rows = DBGetNumRows(queryResult);
374 g_paramGroup[i].values[dbIndex] = (DBParameter*)malloc(sizeof(DBParameter) * rows);
375 g_paramGroup[i].valueCount[dbIndex] = rows;
376 for (int j = 0; j < rows; j++)
377 {
378 TCHAR colname[MAX_STR];
379 DBGetField(queryResult, j, 0, (g_paramGroup[i].values[dbIndex])[j].name, MAX_STR);
380 (g_paramGroup[i].values[dbIndex])[j].attrs = new StringMap;
381 for (int k = 1; DBGetColumnName(queryResult, k, colname, MAX_STR); k++)
382 {
383 TCHAR colval[MAX_STR];
384 DBGetField(queryResult, j, k, colval, MAX_STR);
385 // AgentWriteDebugLog(9, _T("%s: getParamsFromDB: colname '%s' ::: colval '%s'"), MYNAMESTR, colname, colval);
386 (g_paramGroup[i].values[dbIndex])[j].attrs->set(colname, colval);
387 }
388 }
389
390 DBFreeResult(queryResult);
391 }
392
393 MutexUnlock(info.accessMutex);
394
395 return ret;
396 }
397
398 //
399 // Subagent information
400 //
401
402
403 static NETXMS_SUBAGENT_PARAM m_parameters[] =
404 {
405 { _T("Oracle.Sessions.Count(*)"), getParameters, "X", DCI_DT_INT, _T("Oracle/Sessions: Number of sessions opened") },
406 { _T("Oracle.Cursors.Count(*)"), getParameters, "X", DCI_DT_INT, _T("Oracle/Cursors: Current number of opened cursors systemwide") },
407 { _T("Oracle.DBInfo.IsReachable(*)"), getParameters, "R", DCI_DT_STRING, _T("Oracle/Info: Database is reachable") },
408 { _T("Oracle.DBInfo.Name(*)"), getParameters, "X", DCI_DT_STRING, _T("Oracle/Info: Database name") },
409 { _T("Oracle.DBInfo.CreateDate(*)"), getParameters, "X", DCI_DT_STRING, _T("Oracle/Info: Database creation date") },
410 { _T("Oracle.DBInfo.LogMode(*)"), getParameters, "X", DCI_DT_STRING, _T("Oracle/Info: Database log mode") },
411 { _T("Oracle.DBInfo.OpenMode(*)"), getParameters, "X", DCI_DT_STRING, _T("Oracle/Info: Database open mode") },
412 { _T("Oracle.TableSpaces.Status(*)"), getParameters, "X", DCI_DT_STRING, _T("Oracle/Tablespaces: Status") },
413 { _T("Oracle.TableSpaces.Type(*)"), getParameters, "X", DCI_DT_STRING, _T("Oracle/Tablespaces: Type") },
414 { _T("Oracle.TableSpaces.UsedPct(*)"), getParameters, "X", DCI_DT_INT, _T("Oracle/Tablespaces: Percentage used") },
415 { _T("Oracle.Instance.Version(*)"), getParameters, "X", DCI_DT_STRING, _T("Oracle/Instance: DBMS Version") },
416 { _T("Oracle.Instance.Status(*)"), getParameters, "X", DCI_DT_STRING, _T("Oracle/Instance: Status") },
417 { _T("Oracle.Instance.ArchiverStatus(*)"), getParameters, "X", DCI_DT_STRING, _T("Oracle/Instance: Archiver status") },
418 { _T("Oracle.Instance.ShutdownPending(*)"), getParameters, "X", DCI_DT_STRING, _T("Oracle/Instance: Is shutdown pending") },
419 { _T("Oracle.CriticalStats.TSOffCount(*)"), getParameters, "X", DCI_DT_INT, _T("Oracle/CriticalStats: Number of offline tablespaces") },
420 { _T("Oracle.CriticalStats.DFOffCount(*)"), getParameters, "X", DCI_DT_INT, _T("Oracle/CriticalStats: Number of offline datafiles") },
421 { _T("Oracle.CriticalStats.FullSegmentsCount(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/CriticalStats: Number of segments that cannot extend") },
422 { _T("Oracle.CriticalStats.RBSegsNotOnlineCount(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/CriticalStats: Number of rollback segments not online") },
423 { _T("Oracle.CriticalStats.AutoArchivingOff(*)"), getParameters, "X", DCI_DT_STRING, _T("Oracle/CriticalStats: Archive logs enabled but auto archiving off ") },
424 { _T("Oracle.CriticalStats.DatafilesNeedMediaRecovery(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/CriticalStats: Number of datafiles that need media recovery") },
425 { _T("Oracle.CriticalStats.FailedJobs(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/CriticalStats: Number of failed jobs") },
426 { _T("Oracle.Dual.ExcessRows(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/Dual: Excessive rows") },
427 { _T("Oracle.Performance.PhysReads(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/Performance: Number of physical reads") },
428 { _T("Oracle.Performance.LogicReads(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/Performance: Number of logical reads") },
429 { _T("Oracle.Performance.CacheHitRatio(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/Performance: Data buffer cache hit ratio") },
430 { _T("Oracle.Performance.LibCacheHitRatio(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/Performance: Library cache hit ratio") },
431 { _T("Oracle.Performance.DictCacheHitRatio(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/Performance: Dictionary cache hit ratio") },
432 { _T("Oracle.Performance.RollbackWaitRatio(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/Performance: Ratio of waits for requests to rollback segments") },
433 { _T("Oracle.Performance.MemorySortRatio(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/Performance: PGA memory sort ratio") },
434 { _T("Oracle.Performance.DispatcherWorkload(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/Performance: Dispatcher workload (percentage)") },
435 { _T("Oracle.Performance.FreeSharedPool(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/Performance: Free space in shared pool (bytes)") },
436 { _T("Oracle.Objects.InvalidCount(*)"), getParameters, "X", DCI_DT_INT64, _T("Oracle/Objects: Number of invalid objects in DB") }
437 };
438
439 /*
440 static NETXMS_SUBAGENT_ENUM m_enums[] =
441 {
442 };
443 */
444
445 static NETXMS_SUBAGENT_INFO m_info =
446 {
447 NETXMS_SUBAGENT_INFO_MAGIC,
448 _T("ORACLE"), NETXMS_VERSION_STRING,
449 SubAgentInit, SubAgentShutdown, NULL,
450 sizeof(m_parameters) / sizeof(NETXMS_SUBAGENT_PARAM), m_parameters,
451 0, NULL,
452 /*sizeof(m_parameters) / sizeof(NETXMS_SUBAGENT_PARAM),
453 m_parameters,
454 sizeof(m_enums) / sizeof(NETXMS_SUBAGENT_ENUM),
455 m_enums,*/
456 0, NULL
457 };
458
459
460 //
461 // Entry point for NetXMS agent
462 //
463
464 DECLARE_SUBAGENT_ENTRY_POINT(ORACLE)
465 {
466 *ppInfo = &m_info;
467 return TRUE;
468 }
469
470
471 //
472 // DLL entry point
473 //
474
475 #ifdef _WIN32
476
477 BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved)
478 {
479 if (dwReason == DLL_PROCESS_ATTACH)
480 DisableThreadLibraryCalls(hInstance);
481 return TRUE;
482 }
483
484 #endif