Handle hostnames with upper-case letters
[webmin.git] / status / monitor.pl
1 #!/usr/local/bin/perl
2 # monitor.pl
3 # Check all the monitors and send email if something is down
4
5 $no_acl_check++;
6 delete($ENV{'FOREIGN_MODULE_NAME'});
7 delete($ENV{'SCRIPT_NAME'});
8 delete($ENV{'SERVER_ROOT'});
9 require './status-lib.pl';
10
11 # Check if the monitor should be run now
12 @tm = localtime(time());
13 if ($ARGV[0] ne "--force") {
14         @hours = split(/\s+/, $config{'sched_hours'});
15         !@hours || &indexof($tm[2], @hours) >= 0 || exit;
16         @days = split(/\s+/, $config{'sched_days'});
17         !@days || &indexof($tm[6], @days) >= 0 || exit;
18         }
19 else {
20         shift(@ARGV);
21         }
22
23 # Check for list of monitors to limit refresh to
24 %onlycheck = map { $_, 1 } @ARGV;
25
26 # Open status and number of fails files
27 &lock_file($oldstatus_file);
28 &read_file($oldstatus_file, \%oldstatus);
29 &lock_file($fails_file);
30 &read_file($fails_file, \%fails);
31
32 # Get the list of services, ordered so that those with dependencies are first
33 @services = &list_services();
34 @services = sort { &sort_func($a, $b) } @services;
35 if (keys %onlycheck) {
36         @services = grep { $onlycheck{$_->{'id'}} } @services;
37         }
38
39 # Check for services that are down
40 $nowunix = time();
41 $now = &make_date($nowunix);
42 ($nowdate, $nowtime) = split(/\s+/, $now);
43 $thishost = &get_system_hostname();
44 $ecount = 0;
45 foreach $serv (@services) {
46         if ($serv->{'nosched'} == 1) {
47                 # Scheduled checking totally disabled
48                 delete($oldstatus{$serv->{'id'}});
49                 next;
50                 }
51         @remotes = &expand_remotes($serv);
52
53         # Check if we depend on something that is down
54         if ($serv->{'depend'} && defined($oldstatus{$serv->{'depend'}})) {
55                 $depend = &get_service($serv->{'depend'});
56                 $depstats = &expand_oldstatus($oldstatus{$serv->{'depend'}},
57                                               $depend);
58                 @depremotes = split(/\s+/, $depend->{'remote'});
59                 if ($depstats->{$depremotes[0]} != 1) {
60                         # It is .. mark all as failed dependencies
61                         $oldstatus{$serv->{'id'}} = 
62                                 join(" ", map { "$_=-4" } @remotes);
63                         next;
64                         }
65                 }
66
67         # Find the current status
68         $warn = $serv->{'nosched'} == 0 ? $config{'sched_warn'} :
69                 $serv->{'nosched'} - 2;
70         @stats = &service_status($serv);
71         $oldstats = &expand_oldstatus($oldstatus{$serv->{'id'}}, $serv);
72
73         # Find the notification modes
74         %notify = map { $_, 1 } split(/\s+/, $serv->{'notify'});
75
76         # If the number of fails before warning is > 1, then the status may
77         # still be considered OK even if it is down right now
78         local $up = $stat->{'up'};
79         if ($up != 1 && $serv->{'fails'} > 1) {
80                 $fails{$serv->{'id'}}++;
81                 if ($fails{$serv->{'id'}} < $serv->{'fails'}) {
82                         # Not really down yet
83                         $up = 1;
84                         }
85                 }
86         else {
87                 $fails{$serv->{'id'}} = 0;
88                 }
89
90         # Check for a status change or failure on each monitored host,
91         # and perform the appropriate action
92         $newstats = { };
93         foreach $r (@remotes) {
94                 # Work out the hostname
95                 local $host = $r eq "*" ? $thishost : $r;
96                 $o = $oldstats->{$r};
97
98                 # Get the up/down status
99                 local ($stat) = grep { $_->{'remote'} eq $r } @stats;
100                 if (!$stat) {
101                         print STDERR "Failed to find status for $r!\n";
102                         next;
103                         }
104
105                 # If the number of fails before warning is > 1, then the status
106                 # may still be considered OK even if it is down right now
107                 local $up = $stat->{'up'};
108                 local $fid = $serv->{'id'}."-".$r;
109                 if ($up != 1 && $serv->{'fails'} > 1) {
110                         # Not up, but more than one failure is needed for it to
111                         # be considered down for alerting purposes.
112                         $fails{$fid}++;
113                         if ($fails{$fid} < $serv->{'fails'}) {
114                                 # Not really down yet
115                                 $up = 1;
116                                 }
117                         }
118                 else {
119                         $fails{$fid} = 0;
120                         }
121
122                 $thisemail = undef;
123                 $suffix = undef;
124                 $out = undef;
125                 if ($warn == 0 && $up == 0 && $o) {
126                         # Service has just gone down
127                         $suffix = "down";
128                         $out = &run_on_command($serv, $serv->{'ondown'});
129                         }
130                 elsif ($warn == 1 && $up != $o &&
131                        (defined($o) || $up == 0)) {
132                         # Service has changed status
133                         if ($up == 0) {
134                                 # A monitor has gone down
135                                 $suffix = "down";
136                                 $out = &run_on_command($serv, $serv->{'ondown'});
137                                 }
138                         elsif ($up == 1 && $o != -4) {
139                                 # A monitor has come back up after being down
140                                 $suffix = "up";
141                                 $out = &run_on_command($serv, $serv->{'onup'});
142                                 }
143                         elsif ($up == -1) {
144                                 # Detected that a program the monitor depends on
145                                 # is not installed
146                                 $suffix = "un";
147                                 }
148                         elsif ($up == -2) {
149                                 # Cannot contact remote Webmin
150                                 $suffix = "webmin";
151                                 }
152                         elsif ($up == -3) {
153                                 # Monitor function timed out
154                                 $suffix = "timed";
155                                 $out = &run_on_command($serv,
156                                                        $serv->{'ontimeout'});
157                                 }
158                         }
159                 elsif ($warn == 2 && $up == 0) {
160                         # Service is down now
161                         $suffix = "isdown";
162                         $out = &run_on_command($serv, $serv->{'ondown'});
163                         }
164
165                 # If something happened, notify people
166                 if ($suffix) {
167                         $subj = &text('monitor_sub_'.$suffix,
168                                       $serv->{'desc'}, $host);
169                         if ($notify{'pager'}) {
170                                 $pager_msg .= &make_message($suffix, $host,
171                                                             $serv, 'pager');
172                                 }
173                         if ($notify{'sms'}) {
174                                 $sms_msg .= &make_message($suffix, $host,
175                                                           $serv, 'sms');
176                                 }
177                         if ($notify{'snmp'}) {
178                                 push(@snmp_msg, &make_message($suffix, $host,
179                                                               $serv, 'snmp'));
180                                 }
181                         if ($notify{'email'}) {
182                                 $thisemail .= &make_message($suffix, $host,
183                                                               $serv, 'email');
184                                 if ($out) {
185                                         $thisemail .= $out;
186                                         }
187                                 $thisemail .= "\n";
188                                 $ecount++;
189                                 }
190                         }
191                 $newstats->{$r} = $up;
192
193                 if ($serv->{'email'} && $thisemail) {
194                         # If this service has an extra email address specified,
195                         # send to it
196                         &send_status_email($thisemail,
197                           $config{'subject_mode'} ? $subj : &text('monitor_sub', $subj),
198                           $serv->{'email'});
199                         }
200
201                 $email .= $thisemail;
202                 if ($config{'sched_single'} && $email) {
203                         # Force the sending of one email and page per report
204                         &send_status_email($email,
205                           $config{'subject_mode'} ? $subj : &text('monitor_sub', $subj),
206                           $config{'sched_email'});
207                         undef($email);
208                         if ($pager_msg) {
209                                 &send_status_pager($pager_msg);
210                                 undef($pager_msg);
211                                 }
212                         if ($sms_msg) {
213                                 &send_status_sms($sms_msg);
214                                 undef($sms_msg);
215                                 }
216                         }
217
218                 # If any SNMP messages are defined, send them
219                 if (@snmp_msg) {
220                         &send_status_trap(@snmp_msg);
221                         undef(@snmp_msg);
222                         }
223                 }
224
225         # Update old status hash
226         $oldstatus{$serv->{'id'}} =
227                 join(" ", map { "$_=$newstats->{$_}" } @remotes);
228         }
229
230 # Close oldstatus and fails files
231 &write_file($oldstatus_file, \%oldstatus);
232 &unlock_file($oldstatus_file);
233 &write_file($fails_file, \%fails);
234 &unlock_file($fails_file);
235
236 # Send the email and page with all messages, if necessary
237 if ($ecount && !$config{'sched_single'}) {
238         &send_status_email($email,
239                            $config{'subject_mode'} ? $text{'monitor_sub2'} :
240                            $ecount == 1 ? &text('monitor_sub', $subj) :
241                                           &text('monitor_sub3', $ecount),
242                            $config{'sched_email'});
243         }
244 if ($pager_msg && !$config{'sched_single'}) {
245         &send_status_pager($pager_msg);
246         }
247 if ($sms_msg && !$config{'sched_single'}) {
248         &send_status_sms($sms_msg);
249         }
250
251 # send_status_email(text, subject, email-to)
252 sub send_status_email
253 {
254 return if (!$_[2]);
255 &foreign_require("mailboxes", "mailboxes-lib.pl");
256
257 # Construct and send the email (using correct encoding for body)
258 local $from = $config{'sched_from'} ? $config{'sched_from'}
259                                     : &mailboxes::get_from_address();
260 &mailboxes::send_text_mail($from, $_[2], undef, $_[1], $_[0],
261                            $config{'sched_smtp'});
262 }
263
264 # send_status_pager(text)
265 # Send some message with the pager command, if configured
266 sub send_status_pager
267 {
268 local ($text) = @_;
269 return if (!$config{'sched_pager'});
270 return if (!$config{'pager_cmd'});
271 system("$config{'pager_cmd'} ".quotemeta($config{'sched_pager'})." ".
272        quotemeta($text)." >/dev/null 2>&1 </dev/null");
273 }
274
275 # send_status_sms(text)
276 sub send_status_sms
277 {
278 local ($text) = @_;
279 return if (!$text || !$config{'sched_carrier'} || !$config{'sched_sms'});
280 &foreign_require("mailboxes", "mailboxes-lib.pl");
281
282 local $from = $config{'sched_from'} ? $config{'sched_from'}
283                                     : &mailboxes::get_from_address();
284 local ($carrier) = grep { $_->{'id'} eq $config{'sched_carrier'} }
285                         &list_sms_carriers();
286 return if (!$carrier);
287 local $email = $config{'sched_sms'}."\@".$carrier->{'domain'};
288 &mailboxes::send_text_mail($from, $email, undef, undef, $text,
289                            $config{'sched_smtp'});
290 }
291
292 # send_status_trap(msg, ...)
293 # Send an SNMP trap for some message, if configured
294 sub send_status_trap
295 {
296 return if (!$config{'snmp_server'});
297
298 # Connect to SNMP server
299 eval "use Net::SNMP qw(OCTET_STRING)";
300 if (!$@) {
301         # Using the Net::SNMP module
302         local ($session, $error) = Net::SNMP->session(
303                 "-hostname" => $config{'snmp_server'},
304                 "-port" => 162,
305                 "-version" => $config{'snmp_version'},
306                 "-community" => $config{'snmp_community'},
307                 );
308         if ($error) {
309                 print STDERR "SNMP connect failed : $error\n";
310                 return;
311                 }
312
313         # Build OIDs list
314         local (@oids, $m);
315         foreach $m (@_) {
316                 local $oid = $config{'snmp_trap'};
317                 push(@oids, $oid, 4, $m);
318                 }
319
320         # Send off a trap
321         local $rv;
322         if ($config{'snmp_version'} == 1) {
323                 $rv = $session->trap(
324                         "-varbindlist" => \@oids);
325                 }
326         elsif ($config{'snmp_version'} >= 2) {
327                 @oids = ( "1.3.6.1.2.1.1.3.0", 67, 0,
328                           "1.3.6.1.6.3.1.1.4.1.0", 6, $oids[0],
329                           @oids );
330                 $rv = $session->snmpv2_trap(
331                         "-varbindlist" => \@oids);
332                 }
333         if (!$rv) {
334                 print STDERR "trap failed! : ",$session->error(),"\n";
335                 }
336         return;
337         }
338 eval "use SNMP_Session";
339 if (!$@) {
340         # Using the SNMP::Session module
341         eval "use BER";
342         local $session = $config{'snmp_version'} == 1 ?
343                         SNMP_Session->open($config{'snmp_server'},
344                                            $config{'snmp_community'}, 162) :
345                         SNMPv2c_Session->open($config{'snmp_server'},
346                                            $config{'snmp_community'}, 162);
347         if (!$session) {
348                 print STDERR "SNMP connect to $config{'snmp_server'} failed\n";
349                 return;
350                 }
351
352         local $rv;
353         if ($config{'snmp_version'} == 1) {
354                 local @myoid= ( 1,3,6,1,4,1 );
355                 local @oids;
356                 foreach my $m (@_) {
357                         push(@oids, [
358                                 encode_oid(split(/\./, $config{'snmp_trap'})),
359                                 encode_string($m) ]);
360                         }
361                 $rv = $session->trap_request_send(
362                         encode_oid(@myoid),
363                         encode_ip_address(&to_ipaddress(&get_system_hostname())),
364                         encode_int(2),
365                         encode_int(0),
366                         encode_timeticks(0),
367                         @oids
368                         );
369                 }
370         elsif ($config{'snmp_version'} == 2) {
371                 @oids = ( "1.3.6.1.2.1.1.3.0", 67, 0,
372                           "1.3.6.1.6.3.1.1.4.1.0", 6, $oids[0],
373                           @oids );
374                 $rv = $session->v2_trap_request_send(\@oids, 0);
375                 }
376         if (!$rv) {
377                 print STDERR "trap failed!\n";
378                 }
379         
380         return;
381         }
382 print STDERR "No SNMP perl module found\n";
383 }
384
385 # run_on_command(&serv, command)
386 sub run_on_command
387 {
388 return undef if (!$_[1]);
389 local $out;
390 if ($_[0]->{'runon'} && $_[0]->{'remote'}) {
391         # Run on the remote host
392         local $cmd = quotemeta($_[1]);
393         $remote_error_msg = undef;
394         &remote_error_setup(\&remote_error_callback);
395         if ($config{'output'}) {
396                 $out = &remote_eval($_[0]->{'remote'}, "status",
397                              "`($cmd) 2>&1 </dev/null`");
398                 }
399         else {
400                 &remote_eval($_[0]->{'remote'}, "status",
401                              "system('($cmd) >/dev/null 2>&1 </dev/null')");
402                 }
403         &remote_error_setup(undef);
404         if ($remote_error_msg) {
405                 return &text('monitor_runerr', $_[1], $_[0]->{'remote'},
406                              $remote_error_msg);
407                 }
408         return &text('monitor_run1', $_[1], $_[0]->{'remote'})."\n";
409         }
410 else {
411         # Just run locally
412         if ($config{'output'}) {
413                 $out = `($_[1]) 2>&1 </dev/null`;
414                 return &text('monitor_run2', $_[1])."\n".
415                        $out;
416                 }
417         else {
418                 system("($_[1]) >/dev/null 2>&1 </dev/null");
419                 return &text('monitor_run2', $_[1])."\n";
420                 }
421         }
422 }
423
424 sub remote_error_callback
425 {
426 $remote_error_msg = $_[0];
427 }
428
429 # Returns 1 if b should be first, -1 if a should be first, 0 if same
430 sub sort_func
431 {
432 local ($a, $b) = @_;
433 if ($a->{'id'} eq $b->{'id'}) {
434         return 0;
435         }
436 elsif (!$a->{'depend'} && !$b->{'depend'}) {
437         return $a->{'desc'} cmp $b->{'desc'};
438         }
439 elsif ($a->{'depend'} && !$b->{'depend'}) {
440         return 1;
441         }
442 elsif (!$a->{'depend'} && $b->{'depend'}) {
443         return -1;
444         }
445 else {
446         return $a->{'depend'} eq $b->{'id'} ? 1 :
447                $b->{'depend'} eq $a->{'id'} ? -1 :
448                 $a->{'desc'} cmp $b->{'desc'};
449         }
450 }
451
452 # quoted_encode(text)
453 sub quoted_encode
454 {
455 local $t = $_[0];
456 $t =~ s/([=\177-\377])/sprintf("=%2.2X",ord($1))/ge;
457 return $t;
458 }
459
460 # make_message(status, host, &server, type)
461 # Returns the message for some email, SMS or SNMP. May use a template, or
462 # the built-in default.
463 sub make_message
464 {
465 local ($suffix, $host, $serv, $type) = @_;
466 local $tmpl = $serv->{'tmpl'} ? &get_template($serv->{'tmpl'}) : undef;
467 if ($tmpl && $tmpl->{$type}) {
468         # Construct from template
469         local %hash = ( 'DESC' => $serv->{'desc'},
470                         'HOST' => $host,
471                         'DATE' => $nowdate,
472                         'TIME' => $nowtime,
473                         'STATUS' => $text{'mon_'.$suffix},
474                         uc($suffix) => 1,
475                       );
476         foreach my $s (@monitor_statuses) {
477                 $hash{uc($s)} ||= 0;
478                 }
479         foreach my $k (keys %$serv) {
480                 $hash{'SERVICE_'.uc($k)} = $serv->{$k};
481                 }
482         local $rv = &substitute_template($tmpl->{$type}, \%hash);
483         $rv =~ s/[\r\n]+$//;
484         $rv .= "\n";
485         return $rv;
486         }
487 else {
488         # Use built-in
489         if ($type eq 'sms') {
490                 return &text('monitor_pager_'.$suffix,
491                              $host, $serv->{'desc'}, $now);
492                 }
493         elsif ($type eq 'pager') {
494                 return &text('monitor_pager_'.$suffix,
495                              $host, $serv->{'desc'}, $now);
496                 }
497         elsif ($type eq 'snmp') {
498                 return &text('monitor_snmp_'.$suffix,
499                              $host, $serv->{'desc'});
500                 }
501         elsif ($type eq 'email') {
502                 return &text('monitor_email_'.$suffix,
503                              $host, $serv->{'desc'}, $now)."\n";
504                 }
505         }
506 }
507