#!/usr/bin/perl =head1 SYNOPSIS remove_session [--age time] [session_ids ...] =head1 DESCRIPTION The C script removes the specified sessions from the simba database and then cleans up orphaned entries. The sessions can be specified by minimum age or by listing their ids (or both, but that is probably not very useful). The age is specified as a fractional number followed by a unit: "y", "m", "w", or "d". So C removes all sessions older than 2.5 years (actually 2.5 * 365 * 86400 seconds -- leap years and DST are ignored), and C removes all sessions older than 52 weeks. If neither the age nor a list of sessions is specified, no sessions are removed, but the cleanup phase is still run, which may take considerable time (and might be considered a bug). =cut # This script removes all data associated with the given sessions. # For each session it first removes all instances of that session and # then cleans up any orphans. use warnings; use strict; use Simba::CA; use Bit::Vector::Judy; use Getopt::Long; use Pod::Usage; $| = 1; my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba", }); my $dbh = $ca->{dbh}; my $partition_size = $ca->{instances_part_size}; my %opt; GetOptions( \%opt, "age=s", "help", ) or pod2usage(verbose => 0); if ($opt{help}) { pod2usage(verbose => 2); } if ($opt{age}) { my ($num, $unit) = $opt{age} =~ /(\d(?:.\d+)?)(y|m|w|d)/; my $scale = { y => 365 * 86400, m => 30 * 86400, w => 7 * 86400, d => 1 * 86400 }->{$unit}; die "unknown time unit $unit" unless $scale; my $expired_sessions = $dbh->selectcol_arrayref("select id from sessions where start_date < ? order by id", {}, time() - $num * $scale); push @ARGV, @$expired_sessions; } for my $session (@ARGV) { print "deleting instances of session $session\n"; my $old_min_id = $dbh->selectrow_array("select min(id) from instances"); my $n_instances = $dbh->do("delete from instances where session=?", {}, $session); print "\t$n_instances instances deleted\n"; # Check if we just crossed into a new partition, if so, the old one is empty # and should be shrunk to minimum size. # # Note: This will not shrink partitions if we delete a session somewhere # in the middle, but I expect to do that rarely, and rebuilding a partition # after expiring a single session isn't worthwhile anyway. If I delete lots # of instances in the middle, I can always rebuild the affected partitions # manually. my $new_min_id = $dbh->selectrow_array("select min(id) from instances"); if (int($new_min_id/$partition_size) > int($old_min_id/$partition_size)) { my $partition = sprintf("p%03d", int($new_min_id/$partition_size)); $dbh->do("alter table instances rebuild partition $partition"); print "\trebuilt partition $partition\n"; } $dbh->commit(); } remove_orphaned_sessions(); remove_orphaned_files(); remove_orphaned_versions(); $dbh->disconnect(); exit(); sub remove_orphaned_sessions { print "deleting orphaned sessions\n"; my $sessions = $dbh->selectcol_arrayref( q{select s.id from instances i right outer join sessions s on i.session=s.id where i.id is null} ); for my $session (@$sessions) { $dbh->do(q{delete from sessions where id=?}, {}, $session); print "\tsession $session deleted\n"; } $dbh->commit(); } sub remove_orphaned_files { print "deleting orphaned files\n"; my $files = $dbh->selectcol_arrayref( q{select f.id from instances i right outer join files f on i.file=f.id where i.id is null} ); for my $file (@$files) { $dbh->do(q{delete from files where id=?}, {}, $file); print "\tfile $file deleted\n"; } $dbh->commit(); } sub remove_orphaned_versions { # This differs from the other two because mysql doesn't find a good plan for # the outer join: It does an index lookup on instances for every row of # versions2. For the other tables that's good because sessions and files are # much smaller than instances, but there is only about a factor of 10 # between versions2 and instances, so reading both sequentally is much # better. Surprisingly, perl is also faster at eliminating duplicates than # mysql, so just doing two selects and doing all the work in perl is faster # than “select distinct … minus …” though not much. print "deleting orphaned versions\n"; my $sth; $dbh->{'mysql_use_result'} = 1; my $versions = Bit::Vector::Judy->new; $sth = $dbh->prepare("select id from versions2"); $sth->execute; my $i = 0; while (my $version = $sth->fetchrow_array) { if ($i % 1_000_000 == 0) { print "\t$i records from versions processed, ", $versions->count(0, -1), " versions found\n"; } $versions->set($version); $i++; } $sth = $dbh->prepare("select version from instances"); $sth->execute; $i = 0; while (my $version = $sth->fetchrow_array) { if ($i % 1_000_000 == 0) { print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n"; } $versions->unset($version); $i++; } $dbh->{'mysql_use_result'} = 0; print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n"; for (my $version = $versions->first(0); $version; $version = $versions->next($version)) { $dbh->do(q{delete from versions2 where id=?}, {}, $version); print "\tversion $version deleted\n"; } $dbh->commit(); } # vim: tw=132