1 package Onis::Data::Core;
5 Onis::Data::Core - User management
9 Store data to the internal structure, care about users, nicks and idents and
10 dispatch to plugins. The core of the data even..
18 use Onis::Config qw(get_config);
19 use Onis::Users qw(chatter_to_name);
20 use Onis::Data::Persistent;
21 use Onis::Parser::Persistent qw(get_absolute_time);
23 =head1 NAMING CONVENTION
25 Each and every person in the IRC can be identified by a three-tupel: B<nick>,
26 B<user> and B<host>, most often seen as I<nick!user@host>.
28 The combination of B<user> and B<host> is called an B<ident> here and written
29 I<user@host>. The combination of all three parts is called a B<chatter> here,
30 though it's rarely used.
32 A B<name> is the name of the "user" as defined in the F<users.conf>. Therefore,
33 the F<users.conf> defines a mapping of B<chatter> -E<gt> B<name>.
37 our $GeneralCounters = Onis::Data::Persistent->new ('GeneralCounters', 'key', 'value');
38 our $NickToIdentCache = Onis::Data::Persistent->new ('NickToIdentCache', 'nick', 'ident');
39 our $ChatterList = Onis::Data::Persistent->new ('ChatterList', 'chatter', 'counter');
40 our $ChannelNames = Onis::Data::Persistent->new ('ChannelNames', 'channel', 'counter');
42 @Onis::Data::Core::EXPORT_OK =
44 store unsharp calculate_nicks
46 get_all_nicks get_channel get_main_nick
47 nick_to_ident ident_to_nick
48 nick_to_name ident_to_name
49 get_total_lines get_most_recent_time nick_rename print_output register_plugin
51 @Onis::Data::Core::ISA = ('Exporter');
53 our $LinesThisRun = 0;
55 our $PluginCallbacks = {};
56 our $OutputCallbacks = [];
60 our %NickToIdent = ();
61 our %IdentToNick = ();
63 =head1 CONFIGURATION OPTIONS
67 =item B<unsharp>: I<medium>;
69 Sets the amount of unsharping onis should do. Valid options are I<none>,
70 I<light>, I<medium> and I<hard>.
76 does not do any unsharping.
80 Leaves IP-addresses as they are. The deepest subdomains containing numbers have
81 those numbers removed. So C<dsl-084-056-107-131.arcor-ip.net> becomes
82 C<dsl-*-*-*-*.arcor-ip.net>.
86 Removes the last byte from IP-adresses. So C<84.56.107.131> becomes
87 C<84.56.107.*>. Hostnames have the deepest subdomains removed if they contain
88 numers, so C<dsl-084-056-107-131.arcor-ip.net> becomes C<*.arcor-ip.net> while
89 C<shell.franken.de> is not modified. This is the default and recommended
94 Handles IP-addresses as I<medium>. Hostnames have all subdomains removed, so
95 C<p5493EC60.dip.t-dialin.net> becomes C<*.t-dialin.net> and C<shell.franken.de>
96 becomes C<*.franken.de>.
102 our $UNSHARP = 'MEDIUM';
103 if (get_config ('unsharp'))
105 my $tmp = get_config ('unsharp');
109 if ($tmp eq 'NONE' or $tmp eq 'LIGHT'
117 print STDERR $/, __FILE__, ": ``$tmp'' is not a valid value for config option ``unsharp''.",
118 $/, __FILE__, ": Using standard value ``MEDIUM''.";
122 =item B<channel>: I<name>;
124 Sets the name of the channel. This is mostly automatically figured out, use
125 this if onis doesn't get it right or you want another name..
134 my $VERSION = '$Id$';
135 print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG);
139 =head1 EXPORTED FUNCTIONS
143 =item B<store> (I<$type>, I<$data>)
145 Passes I<$data> (a hashref) to all plugins which registered for I<$type>. This
146 is the actual workhorse when parsing the file since it will be called once for
149 It will fill I<$data> with I<host>, I<user> and I<ident> if these fields are
150 missing but have been seen for this nick before.
157 my $type = $data->{'type'};
158 my ($nick, $user, $host);
161 if (!defined ($type))
163 print STDERR $/, __FILE__, ": Plugin data did not include a type. This line will be skipped." if ($::DEBUG & 0x20);
167 if (!defined ($data->{'nick'}))
169 print STDERR $/, __FILE__, ": Plugin data did not include a nick. This line will be skipped." if ($::DEBUG & 0x20);
173 $nick = $data->{'nick'};
175 if (defined ($data->{'host'}))
180 ($user, $host) = unsharp ($data->{'host'});
181 $ident = "$user\@$host";
183 $data->{'host'} = $host;
184 $data->{'user'} = $user;
185 $data->{'ident'} = $ident;
187 $NickToIdentCache->put ($nick, $ident);
189 $chatter = "$nick!$ident";
190 ($counter) = $ChatterList->get ($chatter);
191 $counter ||= 0; $counter++;
192 $ChatterList->put ($chatter, $counter);
194 elsif (($ident) = $NickToIdentCache->get ($nick))
196 my $chatter = "$nick!$ident";
198 ($user, $host) = split (m/@/, $ident);
200 $data->{'host'} = $host;
201 $data->{'user'} = $user;
202 $data->{'ident'} = $ident;
204 ($counter) = $ChatterList->get ($chatter);
205 $counter ||= 0; $counter++;
206 $ChatterList->put ($chatter, $counter);
210 $data->{'host'} = $host = '';
211 $data->{'user'} = $user = '';
212 $data->{'ident'} = $ident = '';
215 if ($::DEBUG & 0x0100)
217 print STDERR $/, __FILE__, ": id ($nick) = ", $ident;
220 if (defined ($data->{'channel'}))
222 my $chan = lc ($data->{'channel'});
223 my ($count) = $ChannelNames->get ($chan);
224 $count ||= 0; $count++;
225 $ChannelNames->put ($chan, $count);
228 if (!defined ($data->{'epoch'}))
230 $data->{'epoch'} = get_absolute_time ();
233 if ($::DEBUG & 0x400)
235 my @keys = keys (%$data);
239 my $val = $data->{$key};
240 print STDERR $/, __FILE__, ': ';
241 printf STDERR ("%10s: %s", $key, $val);
246 my ($counter) = $GeneralCounters->get ('lines_total');
249 $GeneralCounters->put ('lines_total', $counter);
251 my ($time) = $GeneralCounters->get ('most_recent_time');
253 $time = $data->{'epoch'} if ($time < $data->{'epoch'});
254 $GeneralCounters->put ('most_recent_time', $time);
259 if (defined ($PluginCallbacks->{$type}))
261 for (@{$PluginCallbacks->{$type}})
270 =item (I<$user>, I<$host>) = B<unsharp> (I<$ident>)
272 Takes an ident (i.e. a user-host-pair, e.g. I<user@host.domain.com> or
273 I<login@123.123.123.123>) and "unsharps it". The unsharp version is then
288 print STDERR $/, __FILE__, ": Unsharp ``$ident''" if ($::DEBUG & 0x100);
290 ($user, $host) = split (m/@/, $ident, 2);
292 @parts = split (m/\./, $host);
293 $num_parts = scalar (@parts);
295 if (($UNSHARP ne 'NONE')
296 and ($user =~ m/^[\~\^\-\+\=](.+)$/))
301 if ($UNSHARP eq 'NONE')
303 return ($user, $host);
305 elsif ($host =~ m/^[\d\.]{7,15}$/)
307 if ($UNSHARP ne 'LIGHT')
314 for ($i = 0; $i < ($num_parts - 2); $i++)
316 if ($UNSHARP eq 'LIGHT')
318 if ($parts[$i] !~ s/\d+/*/g)
323 elsif ($UNSHARP eq 'MEDIUM')
325 if ($parts[$i] =~ m/\d/)
334 else # ($UNSHARP eq 'HARD')
341 $host = lc (join ('.', @parts));
342 $host =~ s/\*(?:\.\*)+/*/;
344 print STDERR " -> ``$user\@$host''" if ($::DEBUG & 0x100);
345 return ($user, $host);
348 =item B<calculate_nicks> ()
350 Iterates over all chatters found so far, trying to figure out which belong to
351 the same person. This function has to be called before any calls to
352 B<get_all_nicks>, B<get_main_nick>, B<get_print_name> and B<nick_to_ident>.
354 This is normally the step after having parsed all files and before doing any
355 output. After this function has been run all the other informative functions
356 return actually usefull information..
358 It does the following: First, it iterates over all chatters and splits them up
359 into nicks and idents. If a (user)name is found for the ident it (the ident) is
360 replaced with it (the name).
362 In the second step we iterate over all nicks that have been found and
363 determines the most active ident for each nick. After this has been done each
364 nick is associated with exactly one ident, but B<not> vice versa.
366 The final step is to iterate over all idents and determine the most active nick
367 for each ident. After some thought you will agree that now each ident exists
368 only once and so does every nick.
379 for ($ChatterList->keys ())
382 my ($nick, $ident) = split (m/!/, $chatter);
383 my ($counter) = $ChatterList->get ($chatter);
385 $nicks->{$nick}{$ident} = 0 unless (defined ($nicks->{$nick}{$ident}));
386 $nicks->{$nick}{$ident} += $counter;
392 my $this_ident = 'unidentified';
397 for (keys %{$nicks->{$this_nick}})
400 my $name = chatter_to_name ("$this_nick!$ident");
401 my $num = $nicks->{$this_nick}{$ident};
407 if (($num >= $this_max) or !$this_name)
410 $this_ident = $ident;
416 if (($num >= $this_max) and !$this_name)
419 $this_ident = $ident;
424 print $/, __FILE__, ": max_ident ($this_nick) = $this_ident" if ($::DEBUG & 0x100);
426 if ($this_ident ne 'unidentified')
430 $name2nick->{$this_name}{$this_nick} = 0 unless (defined ($name2nick->{$this_name}{$this_nick}));
431 $name2nick->{$this_name}{$this_nick} += $this_total;
433 $name2ident->{$this_name}{$this_ident} = 0 unless (defined ($name2nick->{$this_name}{$this_ident}));
434 $name2ident->{$this_name}{$this_ident} += $this_total;
438 $idents->{$this_ident}{$this_nick} = 0 unless (defined ($idents->{$this_ident}{$this_nick}));
439 $idents->{$this_ident}{$this_nick} += $this_total;
442 elsif ($::DEBUG & 0x100)
444 print STDERR $/, __FILE__, ": Ignoring unidentified nick ``$this_nick''";
453 my @other_nicks = ();
455 my @nicks = keys (%{$idents->{$this_ident}});
460 my $num = $idents->{$this_ident}{$nick};
462 if ($num > $this_max)
464 if ($this_nick) { push (@other_nicks, $this_nick); }
470 push (@other_nicks, $nick);
474 print STDERR $/, __FILE__, ": max_nick ($this_ident) = $this_nick" if ($::DEBUG & 0x100);
476 for (@other_nicks, $this_nick)
478 push (@AllNicks, $_);
479 $NickToNick{$_} = $this_nick;
480 $NickToIdent{$_} = $this_ident;
483 $IdentToNick{$this_ident} = $this_nick;
486 for (keys %$name2nick)
493 my @other_nicks = ();
494 my @other_idents = ();
496 for (keys %{$name2nick->{$name}})
499 my $num = $name2nick->{$name}{$nick};
503 push (@other_nicks, $max_nick) if ($max_nick);
509 push (@other_nicks, $nick);
514 for (keys %{$name2ident->{$name}})
517 my $num = $name2ident->{$name}{$ident};
521 push (@other_idents, $max_ident) if ($max_ident);
527 push (@other_idents, $ident);
531 for (@other_nicks, $max_nick)
533 push (@AllNicks, $_);
534 $NickToNick{$_} = $max_nick;
535 $NickToIdent{$_} = $max_ident;
538 for (@other_idents, $max_ident)
540 $IdentToNick{$_} = $max_nick;
545 =item I<@nicks> = B<get_all_nicks> ()
547 Returns an array of all seen nicks.
556 =item I<$channel> = B<get_channel> ()
558 Returns the name of the channel we're generating stats for.
564 my $chan = '#unknown';
565 if (get_config ('channel'))
567 $chan = get_config ('channel');
572 for ($ChannelNames->keys ())
575 my ($num) = $ChannelNames->get ($c);
576 if (defined ($num) and ($num > $max))
584 # Fix network-safe channel named (RFC 2811)
585 if ($chan =~ m/^![A-Z0-9]{5}(.+)/)
593 =item I<$main> = B<get_main_nick> (I<$nick>)
595 Returns the main nick for I<$nick> or an empty string if the nick is unknown..
602 if (defined ($NickToNick{$nick}))
604 return ($NickToNick{$nick});
612 =item I<$ident> = B<nick_to_ident> (I<$nick>)
614 Returns the ident for this nick or an empty string if unknown. Before
615 B<calculate_nicks> is run it will use the database to find the most recent
616 mapping. After B<calculate_nicks> is run the calculated mapping will be used.
627 if (defined ($NickToIdent{$nick}))
629 $ident = $NickToIdent{$nick};
634 ($ident) = $NickToIdentCache->get ($nick);
641 =item I<$nick> = B<ident_to_nick> (I<$ident>)
643 Returns the nick for the given ident or an empty string if unknown.
651 if (defined ($IdentToNick{$ident}))
653 return ($IdentToNick{$ident});
661 =item I<$name> = B<nick_to_name> (I<$nick>)
663 Return the name associated with I<$nick>.
670 my $ident = nick_to_ident ($nick);
674 return (chatter_to_name ("$nick!$ident"));
682 =item I<$name> = B<ident_to_name> (I<$ident>)
684 Returns the name associated with I<$ident>.
691 my $nick = ident_to_nick ($ident);
695 return (chatter_to_name ("$nick!$ident"));
703 =item I<$lines> = B<get_total_lines> ()
705 Returns the total number of lines parsed so far.
711 my ($total) = $GeneralCounters->get ('lines_total');
713 return (qw()) unless ($total);
715 return ($total, $LinesThisRun);
718 =item I<$epoch> = B<get_most_recent_time> ()
720 Returns the epoch of the most recent line received from the parser.
724 sub get_most_recent_time
726 my ($time) = $GeneralCounters->get ('most_recent_time');
732 =item B<nick_rename> (I<$old_nick>, I<$new_nick>)
734 Keeps track of a nick's hostname if the nick changes.
740 my $old_nick = shift;
741 my $new_nick = shift;
744 ($ident) = $NickToIdentCache->get ($old_nick);
746 if (defined ($ident) and ($ident))
748 $NickToIdentCache->put ($new_nick, $ident);
752 =item B<print_output> ()
754 Print the output. Should be called only once..
761 if (!get_total_lines ())
763 print STDERR <<'MESSAGE';
767 The most common reasons for this are:
768 - The logfile used was empty.
769 - The ``logtype'' setting did not match the logfile.
770 - The logfile did not include a date.
778 for (@$OutputCallbacks)
784 =item I<$data> = B<register_plugin> (I<$type>, I<$sub_ref>)
786 Register a subroutine for the given type. Returns a reference to the internal
787 data object. This will change soon, don't use it anymore if possible.
797 if (ref ($sub_ref) ne "CODE")
799 print STDERR $/, __FILE__, ": Plugin tried to register a non-code reference. Ignoring it.";
803 if ($type eq 'OUTPUT')
805 push (@$OutputCallbacks, $sub_ref);
809 if (!defined ($PluginCallbacks->{$type}))
811 $PluginCallbacks->{$type} = [];
815 push (@{$PluginCallbacks->{$type}}, $sub_ref);
817 print STDERR $/, __FILE__, ': ', scalar (caller ()), " registered for ``$type''." if ($::DEBUG & 0x800);
824 Florian octo Forster E<lt>octo at verplant.orgE<gt>