pitagora-network.github.io

ヒストリーのインポート

https://usegalaxy.org/history/export_archive?id=35f5425fcc2feb0f

/home/galaxy/galaxy-dist/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py

python /home/galaxy/galaxy-dist/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py "aHR0cHM6Ly91c2VnYWxheHkub3JnL2hpc3RvcnkvZXhwb3J0X2FyY2hpdmU/aWQ9MzVmNTQyNWZj YzJmZWIwZg== " "L2hvbWUvZ2FsYXh5L2dhbGF4eS1kaXN0L2RhdGFiYXNlL3RtcC90bXA0UmQ3TWg= " --url --encoded

archive_file:  /home/galaxy/galaxy-dist/database/tmp/tmp4Rd7Mh/tmpOtoovr

Error unpacking tar/gz archive: not a gzip file <open file ‘`', mode 'w' at 0x7f6e729291e0>`

/home/galaxy/galaxy-dist/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py

       if is_url:            #archive_file = url_to_file( archive_source, tempfile.NamedTemporaryFile( dir=dest_dir ).name )            archive_file = archive_source

/home/galaxy/Galaxy-History-imported-RNA-seq-exercise-datasets.tar.gz

不要なデータの削除

誰がたくさん使っているか確認

SELECT
  u.email
, SUM(d.file_size)/1024/1024/1024 AS size_gb
FROM
  dataset d
, history h
, history_dataset_association a
, galaxy_user u
WHERE
    h.user_id = u.id
AND a.history_id = h.id
AND a.dataset_id = d.id
AND d.purged = 0
AND d.deleted = 1
GROUP BY
  u.email
ORDER BY
  size_gb DESC
;

あるユーザーのシェアされていないデータセット全て

SELECT
  h.name
, h.create_time
, h.update_time
FROM
  history h
, galaxy_user u
WHERE
    h.user_id = u.id
AND h.purged = 0
AND h.deleted = 0
AND h.id NOT IN (SELECT history_id FROM history_user_share_association s)
AND u.email = 'yamanaka@genome.rcast.u-tokyo.ac.jp'
ORDER BY
  h.update_time DESC

これらに deleted & purged フラグを付けてしまおう(メンバーがいなくなったときなどに便利?)

UPDATE history
SET deleted = 1, purged = 1
WHERE
    user_id = (SELECT id FROM galaxy_user WHERE email = 'yamanaka@genome.rcast.u-tokyo.ac.jp')
AND purged = 0
AND deleted = 0
AND id NOT IN (SELECT history_id FROM history_user_share_association)

フラグを付けたデータを削除するスクリプト実行、、、でデータが消えない。。。

$ sh delete_userless_histories.sh $ purge_histories.sh

ジョブの実行時間測定

cd galaxy-dist perl time

use Time::Local 'timegm';
use POSIX 'strftime';

my $file = "paster.log";
open(my $fh, "<", $file)
  or die "Cannot open $file: $!";

my %dic_sta;
my %dic_end;

while (my $line = readline $fh) {
  if ($line =~ /([0-9]+)-([0-9]+)-([0-9]+) ([0-9]+):([0-9]+):([0-9]+),[0-9]+ \(([0-9]+)\) Job dispatched/) {
    print $line;
    $sec_sta = timegm($6, $5, $4, $3, $2 - 1, $1);
    $dic_sta{$7} = $sec_sta;
  }
  if ($line =~ /galaxy.jobs DEBUG ([0-9]+)-([0-9]+)-([0-9]+) ([0-9]+):([0-9]+):([0-9]+),[0-9]+ job ([0-9]+) ended/) {
    print $line;
    $sec_end = timegm($6, $5, $4, $3, $2 - 1, $1);
    $dic_end{$7} = $sec_end;
  }
}

foreach (sort keys %dic_sta) {
  $time_sta = $dic_sta{$_};
  $time_end = $dic_end{$_};
  print $_, "\t",
    strftime("%Y-%m-%d %H:%M:%S", localtime($time_sta)), "\t",
    strftime("%Y-%m-%d %H:%M:%S", localtime($time_end)), "\t",
    $time_end - $time_sta, "\n";
}

I/O量測定

cd iostat.sh

#!/bin/bash
while :
do
  iostat -cxk >> iostat_out.txt;
  sleep 2;
done