|
|
|
APPENDIX G Generating the percentile rank for each
repeat (p-values) We
were interested in knowing how significant a repeat length was relative to all
other repeats of the same class in the human genome. In the case of ####################### # generate_pvalues.pl # ####################### #!/usr/bin/perl # generate_pvalues.pl # # This script loops through the distinct repeat
classes in Satellog and collects counts of all the lengths for all repeat
classes # Results are stored in the class_stats table in
Satellog use strict; use DBI; # DBI my ($dsn) =
"DBI:mysql:schz_db:athena.bcgsc.ca"; my ($user_name) = "schz_rw"; my ($password) = "repeat"; my ($dbh, $sth); my (@ary); ####################### # Connect to Database # ####################### my $dbh = DBI->connect ($dsn, $user_name,
$password, my $sth1 = $dbh->prepare ("SELECT COUNT(*)
AS count FROM repeats WHERE class = ?"); my $sth2 = $dbh->prepare ("SELECT length,
COUNT(length) AS count FROM repeats WHERE class = ? GROUP BY length;"); my $sth3 = $dbh->prepare ("INSERT INTO
class_stats VALUES(?,?,?)"); ##################### # Extract for query # ##################### my $i = 1; print "i is $i\n\n"; # 90700 while ($i < 90700) { print "i is $i\n\n";
$sth1->execute($i); my
$count; while
( my $href = $sth1->fetchrow_hashref ) {
$count = $href-> print
"$i\t$count\n"; }
$sth2->execute($i); my
$fraction_larger = $count; my $pvalue; while
( my $href = $sth2->fetchrow_hashref ) { my $length = $href-> my
$length_count = $href-> $pvalue =
$fraction_larger / $count;
print "$i\t$length\t$length_count\t$fraction_larger\t$pvalue\n";
$sth3->execute($i,$length,$pvalue); $fraction_larger =
($fraction_larger - $length_count); } $i++; } print "done\n\n"; Lastly, each rep_id in the repeats table had its
class and length queried against the class_stats table to extract a p-value for
each repeat. ^ top
|
|
|