%PDF- %PDF-
Direktori : /usr/share/perl5/vendor_perl/Munin/Master/ |
Current File : //usr/share/perl5/vendor_perl/Munin/Master/ProcessManager.pm |
package Munin::Master::ProcessManager; # $Id$ use warnings; use strict; use Carp; use English qw(-no_match_vars); use IO::Socket; use POSIX qw(:sys_wait_h); use Storable qw(nstore_fd fd_retrieve); use Time::HiRes; use Munin::Common::Timeout; use Munin::Master::Config; use Munin::Master::Logger; use Log::Log4perl qw( :easy ); my $E_DIED = 18; my $E_TIMED_OUT = 19; my $E_CONNECT = 20; my $config = Munin::Master::Config->instance()->{config}; sub new { my ($class, $result_callback, $error_callback) = @_; croak "Argument exception: result_callback" unless ref $result_callback eq 'CODE'; $error_callback ||= sub { warn "Worker failed: @_" }; my $self = { max_concurrent => $config->{max_processes}, socket_file => "$config->{rundir}/munin-master-processmanager-$$.sock", result_callback => $result_callback, error_callback => $error_callback, worker_timeout => 180, timeout => 240, accept_timeout => 10, active_workers => {}, result_queue => {}, pid2worker => {}, }; return bless $self, $class; } sub add_workers { my ($self, @workers) = @_; for my $worker (@workers) { croak "Argument exception: \@workers" unless $worker->isa('Munin::Master::Worker'); } $self->{workers} = \@workers; } sub start_work { my ($self) = @_; DEBUG "[DEBUG] Starting work"; my $sock = $self->_prepare_unix_socket(); $self->_start_waiting_workers(); do_with_timeout($self->{timeout}, sub { $self->_collect_results($sock); return 1; }) or croak "Work timed out before all workers finished"; $self->{workers} = []; DEBUG "[DEBUG] Work done"; $self->_free_socket($sock); } sub _prepare_unix_socket { my ($self) = @_; if ( -e $self->{socket_file} ) { unlink $self->{socket_file} or croak "unlink failed: $!"; } socket my $sock, PF_UNIX, SOCK_STREAM, 0 or croak "socket failed: $!"; bind $sock, sockaddr_un($self->{socket_file}) or croak "bind failed: $!"; chmod oct(700), $self->{socket_file} or croak "chmod failed: $!"; listen $sock, SOMAXCONN or croak "listen failed: $!"; return $sock; } sub _start_waiting_workers { my ($self) = @_; while (@{$self->{workers}}) { DEBUG "[DEBUG] Active workers: " . (scalar keys %{$self->{active_workers}}) . "/" . $self->{max_concurrent}; last if scalar keys %{$self->{active_workers}} == $self->{max_concurrent}; # Here we just start'em, check results in _collect_results $self->_start_next_worker(); } continue { # Sleep for a random time, to avoid too much process creation all at once sleep ( rand ($config->{worker_start_delay}) ) if $config->{worker_start_delay}; } } sub _start_next_worker { my ($self) = @_; my $worker = pop @{$self->{workers}}; my $pid = fork; if (!defined $pid) { LOGCROAK "Forking worker failed: $!"; } elsif ($pid) { $self->{active_workers}{$pid} = $worker; $self->{result_queue}{$worker->{ID}} = $worker; } else { $0 .= " [$worker]"; my $res = $self->_do_work($worker); DEBUG "[DEBUG] Exit status $res for worker $worker"; exit $res; } } sub _collect_results { my ($self, $sock) = @_; while (%{$self->{result_queue}} || @{$self->{workers}}) { do { $self->_vet_finished_workers(); $self->_start_waiting_workers(); } while (!%{$self->{result_queue}} && @{$self->{workers}}); my $worker_sock; DEBUG "[DEBUG] Active workers: " . (scalar keys %{$self->{active_workers}}) . "/" . $self->{max_concurrent}; if (not %{$self->{active_workers}}) { # Nothing left do do last; } my $timed_out = !do_with_timeout($self->{accept_timeout}, sub { accept $worker_sock, $sock; return 1; }); if ($timed_out) { if (defined($self->{host})) { my $nodedesignation = $self->{host}."/". $self->{host}{address}.":".$self->{host}{port}; WARN "[WARNING] Calling accept to collect results from " . $nodedesignation . " failed. This could be due to network problems/firewalled munin-node. Remaining workers: " . join(", ", keys %{$self->{result_queue}}); } else { INFO "[INFO] Remaining workers: " . join(", ", keys %{$self->{result_queue}}); } next; } # if timed_out next unless fileno $worker_sock; my $res = fd_retrieve($worker_sock); my ($worker_id, $real_res) = @$res; delete $self->{result_queue}{$worker_id}; $self->{result_callback}($res) if defined $real_res; } while (%{$self->{active_workers}}) { $self->_vet_finished_workers(); } } sub _free_socket { my ($self, $sock) = @_; unlink $self->{socket_file} or $! ne 'No such file or directory' && croak "unlink failed: $!"; close $sock or croak "socket close failed: $!"; } sub _vet_finished_workers { my ($self) = @_; while ((my $worker_pid = waitpid(-1, WNOHANG)) > 0) { if ($CHILD_ERROR) { DEBUG "[DEBUG] Got error from $worker_pid: $CHILD_ERROR. Handling error"; $self->_handle_worker_error($worker_pid); } my $child_exit = $CHILD_ERROR >> 8; my $child_signal = $CHILD_ERROR & 127; INFO "[INFO] Reaping $self->{active_workers}{$worker_pid}. Exit value/signal: $child_exit/$child_signal"; delete $self->{active_workers}{$worker_pid}; } } sub _handle_worker_error { my ($self, $worker_pid) = @_; my %code2msg = ( $E_TIMED_OUT => 'Timed out', $E_DIED => 'Died', $E_CONNECT => 'Connection failed', ); my $worker_id = $self->{active_workers}{$worker_pid}{ID}; my $exit_code = $CHILD_ERROR >> 8; $self->{error_callback}($self->{active_workers}{$worker_pid}, $code2msg{$exit_code} || $exit_code); } sub _do_work { my ($self, $worker) = @_; DEBUG "[DEBUG] Starting $worker"; my $retval = 0; my $res; eval { my $timed_out = !do_with_timeout($self->{worker_timeout}, sub { $res = $worker->do_work(); return 1; }); if ($timed_out) { ERROR "[ERROR] $worker timed out"; $res = undef; $retval = $E_TIMED_OUT; } elsif (!defined($res)) { ERROR "[ERROR] $worker failed to connect to node"; $retval = $E_CONNECT; } }; if ($EVAL_ERROR) { $EVAL_ERROR =~ s/\R//; ERROR "[ERROR] $worker died with '$EVAL_ERROR'"; $res = undef; $retval = $E_DIED; } # Stop trying if we already erred. return $retval if $retval; my $sock; unless (socket $sock, PF_UNIX, SOCK_STREAM, 0) { ERROR "[ERROR] Unable to create socket: $!"; return $E_DIED; } unless (connect $sock, sockaddr_un($self->{socket_file})) { ERROR "[ERROR] Unable to connect to socket: $!"; return $E_DIED; } nstore_fd([ $worker->{ID}, $res], $sock); close $sock; return $retval; } 1; __END__ =head1 NAME Munin::Master::ProcessManager - Manager for parallel execution of Workers. =head1 SYNOPSIS use Munin::Master::ProcessManager; my $pm = Munin::Master::ProcessManager->new(sub { my ($res) = @_; # Do something with $res ... }); $pm->add_workers(...); $pm->start_work(); =head1 DESCRIPTION FIX =head1 METHODS =over =item B<new> FIX =item B<add_workers> FIX =item B<start_work> FIX =back