123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283 |
- #!/usr/bin/perl
- # update channel metadata, etc
- use local::lib;
- use v5.10;
- use strict;
- use warnings;
- use FindBin qw( $RealBin );
- use lib "$RealBin/lib";
- BEGIN {
- $ENV{TZ} = 'UTC';
- $ENV{CRON} = !$ENV{DEBUG};
- }
- use Cpanel::JSON::XS qw( encode_json );
- use DateTime ();
- use List::Util qw( any );
- use LogBot::Config qw( find_config load_all_configs load_config reload_config save_config );
- use LogBot::Database qw( dbh replace_sql_placeholders );
- use LogBot::Util qw( file_for logbot_init nick_is_bot round spurt );
- use LogBot::Web::Colour qw( nick_hash );
- use Mojo::Log ();
- use Readonly;
- # channels with activity are archived after 6 months.
- Readonly::Scalar my $ARCHIVE_TIME => 60 * 60 * 24 * (365 / 2);
- # newly logged channels have 5 days for activity before they are deleted.
- # a channel is considered empty if there's fewer than EMPTY_THRESHOLD messages.
- Readonly::Scalar my $EMPTY_THRESHOLD => 5;
- Readonly::Scalar my $DELETE_TIME => 60 * 60 * 24 * 5;
- my @configs;
- if (@ARGV && $ARGV[0] eq '--all') {
- shift;
- @configs = values %{ load_all_configs() };
- } else {
- push @configs, load_config(find_config(shift));
- }
- @configs || die "syntax: syntax: logbot-nightly <config file|--all>\n";
- my $log = Mojo::Log->new(path => "$RealBin/log/nightly.log");
- foreach my $config (sort { $a->{name} cmp $b->{name} } @configs) {
- logbot_init($config, name => 'logbot-nightly', quiet => 1);
- # generate meta data
- generate_meta($config) unless $ENV{NO_META};
- # stop logging and leave stale channels
- say $config->{name}, ': archiving stale channels' if $ENV{DEBUG};
- archive_stale_channels($config);
- # remove orphaned data
- say $config->{name}, ': removing orphaned entries' if $ENV{DEBUG};
- remove_orphaned_entries($config);
- }
- sub generate_meta {
- my ($config) = @_;
- # generate network meta (all channels)
- generate_channel_meta($config, undef);
- generate_hours_meta($config, undef);
- foreach my $channel ('_empty', sort keys %{ $config->{channels} }) {
- next if $config->{channels}->{$channel}->{no_logs};
- say $config->{name}, ': ', $channel if $ENV{DEBUG};
- # generate data for about #channel
- generate_channel_meta($config, $channel);
- generate_hours_meta($config, $channel);
- generate_nicks_meta($config, $channel);
- }
- delete $config->{channels}->{_empty};
- }
- sub generate_channel_meta {
- my ($config, $channel) = @_;
- my $dbh = dbh($config);
- my $now = DateTime->now();
- my $output_file = file_for($config, 'meta', $channel, 'meta');
- my $six_months = $now->clone()->subtract(months => 6);
- my $one_month = $now->clone()->subtract(months => 1);
- my $meta = {};
- if ($channel) {
- #<<<
- $meta->{first_time} = $dbh->selectrow_array(
- 'SELECT time FROM logs WHERE channel = ? ORDER BY time ASC LIMIT 1',
- undef, $channel
- );
- $meta->{active_events} = $dbh->selectrow_array(
- 'SELECT COUNT(*) FROM logs WHERE channel = ? AND time >= ' . $six_months->epoch,
- undef, $channel
- );
- $meta->{active_nicks} = $dbh->selectrow_array(
- 'SELECT COUNT(*) FROM ('
- . 'SELECT DISTINCT nick FROM logs WHERE channel = ? AND time >= ' . $six_months->epoch . ')',
- undef, $channel
- );
- $meta->{event_count} = $dbh->selectrow_array(
- 'SELECT COUNT(*) FROM logs WHERE channel = ?',
- undef, $channel
- );
- #>>>
- } else {
- #<<<
- $meta->{first_time} = $dbh->selectrow_array(
- 'SELECT time FROM logs ORDER BY time ASC LIMIT 1'
- );
- $meta->{active_events} = $dbh->selectrow_array(
- 'SELECT COUNT(*) FROM logs WHERE time >= ' . $six_months->epoch
- );
- $meta->{active_nicks} = $dbh->selectrow_array(
- 'SELECT COUNT(*) FROM ('
- . 'SELECT DISTINCT nick FROM logs WHERE time >= ' . $six_months->epoch . ')'
- );
- $meta->{event_count} = $dbh->selectrow_array(
- 'SELECT COUNT(*) FROM logs'
- );
- #>>>
- }
- $meta->{active_events_days} = $six_months->delta_days($now)->in_units('days');
- # because we calculate the event count nightly for the network-wide stats,
- # round to nearest 1000 to make it clear it's an approximation
- if (!$channel) {
- $meta->{event_count} = round($meta->{event_count} / 1000) * 1000;
- }
- say ' ', encode_json($meta) if $ENV{DEBUG};
- spurt($output_file, encode_json($meta));
- }
- sub generate_hours_meta {
- my ($config, $channel) = @_;
- my $dbh = dbh($config);
- my $output_file = file_for($config, 'meta', $channel, 'hours');
- my $sql = 'SELECT CAST(time % (24 * 3600) AS INT), COUNT(*) FROM logs';
- $sql .= ' WHERE channel = ?' if $channel;
- $sql .= ' GROUP BY CAST(time % (24 * 3600) AS INT)';
- my @values;
- push @values, $channel if $channel;
- say ' ', replace_sql_placeholders($dbh, $sql, \@values) if $ENV{DEBUG};
- my %values;
- foreach my $row (@{ $dbh->selectall_arrayref($sql, undef, @values) }) {
- my ($ss, $value) = @{$row};
- my $hh = sprintf('%.1f', $ss / (60 * 60));
- $hh = '0.0' if $hh == 24;
- $values{$hh} += $value;
- }
- my @data;
- for (my $hh = 0; $hh < 24; $hh += 0.1) {
- $hh = sprintf('%.1f', $hh);
- push @data, [$hh * 1.0, ($values{$hh} || 0) * 1];
- }
- my $json = {
- data => \@data,
- lines => {
- show => \1,
- fill => \1,
- },
- };
- spurt($output_file, encode_json($json));
- }
- sub generate_nicks_meta {
- my ($config, $channel) = @_;
- my $dbh = dbh($config);
- my $output_file = file_for($config, 'meta', $channel, 'nicks');
- my $sql = 'SELECT nick, COUNT(*) AS count FROM logs';
- $sql .= ' WHERE channel = ?' if $channel;
- $sql .= ' GROUP BY nick';
- $sql .= ' ORDER BY COUNT(*) DESC, nick ASC';
- $sql .= ' LIMIT 20';
- my @values;
- push @values, $channel if $channel;
- say ' ', replace_sql_placeholders($dbh, $sql, \@values) if $ENV{DEBUG};
- my $data = $dbh->selectall_arrayref($sql, { Slice => {} }, @values);
- my $json = [];
- foreach my $event (@{$data}) {
- $event->{hash} = nick_hash($event->{nick});
- $event->{bot} = nick_is_bot($config, $event->{nick});
- push @{$json}, $event;
- }
- spurt($output_file, encode_json($json));
- }
- sub archive_stale_channels {
- my ($config) = @_;
- my $now = time();
- my $reload = 0;
- foreach my $channel (sort keys %{ $config->{channels} }) {
- next if $config->{channels}->{$channel}->{disabled};
- next if $config->{channels}->{$channel}->{archived};
- my $dbh = dbh($config);
- # some messages are always ignored when determining channel activity
- my @ignore;
- # notices from the irc network itself
- # just stripping ^irc. isn't ideal but works for most networks.
- (my $network = $config->{irc}->{host}) =~ s/^irc\.//;
- $network =~ s/:\d+$//;
- push @ignore, "type = 2 AND nick LIKE '%.$network'";
- # ignore our own messages
- push @ignore, 'nick = ' . $dbh->quote($config->{irc}->{nick});
- my $where = 'channel = ' . $dbh->quote($channel) . ' AND NOT ((' . join(') OR (', @ignore) . '))';
- # empty channels
- my $count = $dbh->selectrow_array('SELECT COUNT(*) FROM logs WHERE ' . $where);
- if ($count < $EMPTY_THRESHOLD) {
- my $first_time =
- $dbh->selectrow_array('SELECT time FROM logs WHERE channel = ? ORDER BY time LIMIT 1', undef, $channel);
- next if $first_time && $now - $first_time <= $DELETE_TIME;
- my $msg = $config->{name} . ": channel $channel is empty ($count) and stale, deleting";
- $log->info($msg);
- say $msg if $ENV{DEBUG};
- $config = reload_config($config);
- delete $config->{channels}->{$channel};
- save_config($config);
- next;
- }
- # stale channels
- my $last_time = $dbh->selectrow_array('SELECT time FROM logs WHERE ' . $where . ' ORDER BY time DESC LIMIT 1');
- next unless $last_time;
- if ($now - $last_time > $ARCHIVE_TIME) {
- my $msg = $config->{name} . ": $channel is stale, archiving";
- $log->info($msg);
- say $msg if $ENV{DEBUG};
- $config = reload_config($config);
- $config->{channels}->{$channel}->{archived} = 1;
- save_config($config);
- next;
- }
- }
- if ($reload) {
- system("$RealBin/logbot-irc", $config->{_derived}->{file}, '--reload');
- }
- }
- sub remove_orphaned_entries {
- my ($config) = @_;
- my $dbh = dbh($config, read_write => 1);
- my @channels = sort keys %{ $config->{channels} };
- my $db_channels = $dbh->selectcol_arrayref('SELECT DISTINCT channel FROM logs ORDER BY channel');
- my @orphans;
- foreach my $channel (@{$db_channels}) {
- next if any { $_ eq $channel } @channels;
- push @orphans, $channel;
- }
- if (@orphans) {
- say 'deleting orphaned channels: ', join(', ', @orphans) if $ENV{DEBUG};
- $dbh->do('DELETE FROM logs WHERE channel in (' . join(',', ('?') x scalar(@orphans)) . ')', undef, @orphans);
- }
- $dbh->do('DELETE FROM topics WHERE NOT (channel in (' . join(',', ('?') x scalar(@channels)) . '))',
- undef, @channels);
- }
|