logbot-nightly 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. #!/usr/bin/perl
  2. # update channel metadata, etc
  3. use local::lib;
  4. use v5.10;
  5. use strict;
  6. use warnings;
  7. use FindBin qw( $RealBin );
  8. use lib "$RealBin/lib";
  9. BEGIN {
  10. $ENV{TZ} = 'UTC';
  11. $ENV{CRON} = !$ENV{DEBUG};
  12. }
  13. use Cpanel::JSON::XS qw( encode_json );
  14. use DateTime ();
  15. use List::Util qw( any );
  16. use LogBot::Config qw( find_config load_all_configs load_config reload_config save_config );
  17. use LogBot::Database qw( dbh replace_sql_placeholders );
  18. use LogBot::Util qw( file_for logbot_init nick_is_bot round spurt );
  19. use LogBot::Web::Colour qw( nick_hash );
  20. use Mojo::Log ();
  21. use Readonly;
  22. # channels with activity are archived after 6 months.
  23. Readonly::Scalar my $ARCHIVE_TIME => 60 * 60 * 24 * (365 / 2);
  24. # newly logged channels have 5 days for activity before they are deleted.
  25. # a channel is considered empty if there's fewer than EMPTY_THRESHOLD messages.
  26. Readonly::Scalar my $EMPTY_THRESHOLD => 5;
  27. Readonly::Scalar my $DELETE_TIME => 60 * 60 * 24 * 5;
  28. my @configs;
  29. if (@ARGV && $ARGV[0] eq '--all') {
  30. shift;
  31. @configs = values %{ load_all_configs() };
  32. } else {
  33. push @configs, load_config(find_config(shift));
  34. }
  35. @configs || die "syntax: syntax: logbot-nightly <config file|--all>\n";
  36. my $log = Mojo::Log->new(path => "$RealBin/log/nightly.log");
  37. foreach my $config (sort { $a->{name} cmp $b->{name} } @configs) {
  38. logbot_init($config, name => 'logbot-nightly', quiet => 1);
  39. # generate meta data
  40. generate_meta($config) unless $ENV{NO_META};
  41. # stop logging and leave stale channels
  42. say $config->{name}, ': archiving stale channels' if $ENV{DEBUG};
  43. archive_stale_channels($config);
  44. # remove orphaned data
  45. say $config->{name}, ': removing orphaned entries' if $ENV{DEBUG};
  46. remove_orphaned_entries($config);
  47. }
  48. sub generate_meta {
  49. my ($config) = @_;
  50. # generate network meta (all channels)
  51. generate_channel_meta($config, undef);
  52. generate_hours_meta($config, undef);
  53. foreach my $channel ('_empty', sort keys %{ $config->{channels} }) {
  54. next if $config->{channels}->{$channel}->{no_logs};
  55. say $config->{name}, ': ', $channel if $ENV{DEBUG};
  56. # generate data for about #channel
  57. generate_channel_meta($config, $channel);
  58. generate_hours_meta($config, $channel);
  59. generate_nicks_meta($config, $channel);
  60. }
  61. delete $config->{channels}->{_empty};
  62. }
  63. sub generate_channel_meta {
  64. my ($config, $channel) = @_;
  65. my $dbh = dbh($config);
  66. my $now = DateTime->now();
  67. my $output_file = file_for($config, 'meta', $channel, 'meta');
  68. my $six_months = $now->clone()->subtract(months => 6);
  69. my $one_month = $now->clone()->subtract(months => 1);
  70. my $meta = {};
  71. if ($channel) {
  72. #<<<
  73. $meta->{first_time} = $dbh->selectrow_array(
  74. 'SELECT time FROM logs WHERE channel = ? ORDER BY time ASC LIMIT 1',
  75. undef, $channel
  76. );
  77. $meta->{active_events} = $dbh->selectrow_array(
  78. 'SELECT COUNT(*) FROM logs WHERE channel = ? AND time >= ' . $six_months->epoch,
  79. undef, $channel
  80. );
  81. $meta->{active_nicks} = $dbh->selectrow_array(
  82. 'SELECT COUNT(*) FROM ('
  83. . 'SELECT DISTINCT nick FROM logs WHERE channel = ? AND time >= ' . $six_months->epoch . ')',
  84. undef, $channel
  85. );
  86. $meta->{event_count} = $dbh->selectrow_array(
  87. 'SELECT COUNT(*) FROM logs WHERE channel = ?',
  88. undef, $channel
  89. );
  90. #>>>
  91. } else {
  92. #<<<
  93. $meta->{first_time} = $dbh->selectrow_array(
  94. 'SELECT time FROM logs ORDER BY time ASC LIMIT 1'
  95. );
  96. $meta->{active_events} = $dbh->selectrow_array(
  97. 'SELECT COUNT(*) FROM logs WHERE time >= ' . $six_months->epoch
  98. );
  99. $meta->{active_nicks} = $dbh->selectrow_array(
  100. 'SELECT COUNT(*) FROM ('
  101. . 'SELECT DISTINCT nick FROM logs WHERE time >= ' . $six_months->epoch . ')'
  102. );
  103. $meta->{event_count} = $dbh->selectrow_array(
  104. 'SELECT COUNT(*) FROM logs'
  105. );
  106. #>>>
  107. }
  108. $meta->{active_events_days} = $six_months->delta_days($now)->in_units('days');
  109. # because we calculate the event count nightly for the network-wide stats,
  110. # round to nearest 1000 to make it clear it's an approximation
  111. if (!$channel) {
  112. $meta->{event_count} = round($meta->{event_count} / 1000) * 1000;
  113. }
  114. say ' ', encode_json($meta) if $ENV{DEBUG};
  115. spurt($output_file, encode_json($meta));
  116. }
  117. sub generate_hours_meta {
  118. my ($config, $channel) = @_;
  119. my $dbh = dbh($config);
  120. my $output_file = file_for($config, 'meta', $channel, 'hours');
  121. my $sql = 'SELECT CAST(time % (24 * 3600) AS INT), COUNT(*) FROM logs';
  122. $sql .= ' WHERE channel = ?' if $channel;
  123. $sql .= ' GROUP BY CAST(time % (24 * 3600) AS INT)';
  124. my @values;
  125. push @values, $channel if $channel;
  126. say ' ', replace_sql_placeholders($dbh, $sql, \@values) if $ENV{DEBUG};
  127. my %values;
  128. foreach my $row (@{ $dbh->selectall_arrayref($sql, undef, @values) }) {
  129. my ($ss, $value) = @{$row};
  130. my $hh = sprintf('%.1f', $ss / (60 * 60));
  131. $hh = '0.0' if $hh == 24;
  132. $values{$hh} += $value;
  133. }
  134. my @data;
  135. for (my $hh = 0; $hh < 24; $hh += 0.1) {
  136. $hh = sprintf('%.1f', $hh);
  137. push @data, [$hh * 1.0, ($values{$hh} || 0) * 1];
  138. }
  139. my $json = {
  140. data => \@data,
  141. lines => {
  142. show => \1,
  143. fill => \1,
  144. },
  145. };
  146. spurt($output_file, encode_json($json));
  147. }
  148. sub generate_nicks_meta {
  149. my ($config, $channel) = @_;
  150. my $dbh = dbh($config);
  151. my $output_file = file_for($config, 'meta', $channel, 'nicks');
  152. my $sql = 'SELECT nick, COUNT(*) AS count FROM logs';
  153. $sql .= ' WHERE channel = ?' if $channel;
  154. $sql .= ' GROUP BY nick';
  155. $sql .= ' ORDER BY COUNT(*) DESC, nick ASC';
  156. $sql .= ' LIMIT 20';
  157. my @values;
  158. push @values, $channel if $channel;
  159. say ' ', replace_sql_placeholders($dbh, $sql, \@values) if $ENV{DEBUG};
  160. my $data = $dbh->selectall_arrayref($sql, { Slice => {} }, @values);
  161. my $json = [];
  162. foreach my $event (@{$data}) {
  163. $event->{hash} = nick_hash($event->{nick});
  164. $event->{bot} = nick_is_bot($config, $event->{nick});
  165. push @{$json}, $event;
  166. }
  167. spurt($output_file, encode_json($json));
  168. }
  169. sub archive_stale_channels {
  170. my ($config) = @_;
  171. my $now = time();
  172. my $reload = 0;
  173. foreach my $channel (sort keys %{ $config->{channels} }) {
  174. next if $config->{channels}->{$channel}->{disabled};
  175. next if $config->{channels}->{$channel}->{archived};
  176. my $dbh = dbh($config);
  177. # some messages are always ignored when determining channel activity
  178. my @ignore;
  179. # notices from the irc network itself
  180. # just stripping ^irc. isn't ideal but works for most networks.
  181. (my $network = $config->{irc}->{host}) =~ s/^irc\.//;
  182. $network =~ s/:\d+$//;
  183. push @ignore, "type = 2 AND nick LIKE '%.$network'";
  184. # ignore our own messages
  185. push @ignore, 'nick = ' . $dbh->quote($config->{irc}->{nick});
  186. my $where = 'channel = ' . $dbh->quote($channel) . ' AND NOT ((' . join(') OR (', @ignore) . '))';
  187. # empty channels
  188. my $count = $dbh->selectrow_array('SELECT COUNT(*) FROM logs WHERE ' . $where);
  189. if ($count < $EMPTY_THRESHOLD) {
  190. my $first_time =
  191. $dbh->selectrow_array('SELECT time FROM logs WHERE channel = ? ORDER BY time LIMIT 1', undef, $channel);
  192. next if $first_time && $now - $first_time <= $DELETE_TIME;
  193. my $msg = $config->{name} . ": channel $channel is empty ($count) and stale, deleting";
  194. $log->info($msg);
  195. say $msg if $ENV{DEBUG};
  196. $config = reload_config($config);
  197. delete $config->{channels}->{$channel};
  198. save_config($config);
  199. next;
  200. }
  201. # stale channels
  202. my $last_time = $dbh->selectrow_array('SELECT time FROM logs WHERE ' . $where . ' ORDER BY time DESC LIMIT 1');
  203. next unless $last_time;
  204. if ($now - $last_time > $ARCHIVE_TIME) {
  205. my $msg = $config->{name} . ": $channel is stale, archiving";
  206. $log->info($msg);
  207. say $msg if $ENV{DEBUG};
  208. $config = reload_config($config);
  209. $config->{channels}->{$channel}->{archived} = 1;
  210. save_config($config);
  211. next;
  212. }
  213. }
  214. if ($reload) {
  215. system("$RealBin/logbot-irc", $config->{_derived}->{file}, '--reload');
  216. }
  217. }
  218. sub remove_orphaned_entries {
  219. my ($config) = @_;
  220. my $dbh = dbh($config, read_write => 1);
  221. my @channels = sort keys %{ $config->{channels} };
  222. my $db_channels = $dbh->selectcol_arrayref('SELECT DISTINCT channel FROM logs ORDER BY channel');
  223. my @orphans;
  224. foreach my $channel (@{$db_channels}) {
  225. next if any { $_ eq $channel } @channels;
  226. push @orphans, $channel;
  227. }
  228. if (@orphans) {
  229. say 'deleting orphaned channels: ', join(', ', @orphans) if $ENV{DEBUG};
  230. $dbh->do('DELETE FROM logs WHERE channel in (' . join(',', ('?') x scalar(@orphans)) . ')', undef, @orphans);
  231. }
  232. $dbh->do('DELETE FROM topics WHERE NOT (channel in (' . join(',', ('?') x scalar(@channels)) . '))',
  233. undef, @channels);
  234. }