Satellog Resources
Query Database
Tutorial
Documentation
Downloads

Contact Us

Acknowledgements
Publications

GSC
UBiC

2nd place BC Net's Coolest Application Contest, 2005

Satellog Database Documentation

APPENDIX A

Satellog MySQL Database commands

########################################

# MySQL Commands for Sattelog Database #

########################################

The following document has all of the commands needed to recreate the Satellog database from scratch. Broadly speaking, the tables can be divided into two classes: those that must be populated manually prior to running repeatalyzer.pl and those populated automatically when repeatalyzer.pl is run. Some comments are provided after each create table command to briefly expain what data the table holds. See the Satellog manuscript for further details.

#######################

# DROP TABLE COMMANDS #

#######################

DROP TABLE affy;

DROP TABLE build_info;

DROP TABLE ugcount;

DROP TABLE ugstats;

DROP TABLE ens_db;

DROP TABLE gc;

DROP TABLE go;

DROP TABLE mim;

DROP TABLE pdb;

DROP TABLE repeats;

DROP TABLE transcripts;

# Following tables are not automatically generated

# Are you sure you need to drop them?

DROP TABLE linkage;

DROP TABLE rep_stats;

DROP TABLE rep_class;

DROP TABLE GeneNote;

DROP TABLE disease;

DROP TABLE unigene;

#########################

# CREATE TABLE COMMANDS #

#########################

CREATE TABLE build_info

(

ens_db VARCHAR(50) NOT NULL PRIMARY KEY,

db_name VARCHAR(50) NOT NULL,

date_run DATETIME

);

CREATE TABLE repeats

(

rep_id INT auto_increment NOT NULL PRIMARY KEY,

chr VARCHAR(4),

start INT UNSIGNED,

end INT UNSIGNED,

period TINYINT UNSIGNED,

unit VARCHAR(16),

class_id INT,

seq VARCHAR(255),

length INT UNSIGNED,

pvalue DECIMAL(8,6) NULL

);

CREATE INDEX r_total ON repeats (rep_id,chr,start,end,period,unit,seq,length);

CREATE INDEX r_common ON repeats (rep_id,period,unit,length);

CREATE INDEX r_chr ON repeats (chr,start,end,period,unit,length);

CREATE INDEX r_total_period ON repeats (rep_id,period);

CREATE INDEX r_total_unit ON repeats (rep_id,unit);

CREATE INDEX r_total_length ON repeats (rep_id,length);

CREATE INDEX r_co_ords ON repeats (rep_id,chr,start,end);

CREATE INDEX rapid ON repeats (period, length);

CREATE INDEX r_length_class ON repeats (class, length);

###########

# ugcount #

###########

CREATE TABLE ugcount

(

count_id INT auto_increment NOT NULL PRIMARY KEY,

rep_id INT NOT NULL,

cluster VARCHAR(20),

sequence VARCHAR(20),

length INT

);

CREATE INDEX ug_hits ON ugcount (rep_id,cluster,sequence,length);

CREATE INDEX length ON count (rep_id,length);

CREATE TABLE ugstats

(

count_id INT auto_increment NOT NULL PRIMARY KEY,

rep_id INT NOT NULL,

count INT NOT NULL,

min INT NOT NULL,

max INT NOT NULL,

mean DECIMAL(8,2) NOT NULL,

sd DECIMAL(8,2) NULL

);

CREATE INDEX ug_stats ON ugstats (rep_id,count,min,max,mean,sd);

CREATE INDEX sd ON ugstats (rep_id,sd);

CREATE TABLE transcripts

(

ts_id INT auto_increment NOT NULL PRIMARY KEY,

rep_id INT NOT NULL,

ens_ts VARCHAR(15),

gene_location VARCHAR(20),

pep VARCHAR(150) NULL,

ens_id INT NOT NULL

);

CREATE INDEX t_common ON transcripts (rep_id,gene_location,pep,ens_id);

CREATE INDEX t_pep ON transcripts (rep_id,gene_location); CREATE INDEX t_gene ON transcripts (rep_id,ens_id);

CREATE TABLE gc

(

rep_id INT NOT NULL PRIMARY KEY,

100_bp DECIMAL(7,6) NOT NULL,

500_bp DECIMAL(7,6) NOT NULL,

1000_bp DECIMAL(7,6) NOT NULL

);

CREATE INDEX gc_100 ON gc (rep_id,100_bp);

CREATE INDEX gc_500 ON gc (rep_id,500_bp);

CREATE INDEX gc_1000 ON gc (rep_id,1000_bp);

CREATE TABLE ens_db

(

ens_id INT auto_increment NOT NULL PRIMARY KEY,

ens_name VARCHAR(15) NOT NULL,

name VARCHAR(15) NULL,

description TEXT NULL,

chr CHAR(2),

start INT UNSIGNED,

end INT UNSIGNED,

strand CHAR(1)

);

CREATE INDEX ens_common ON ens_db (ens_id,ens_name,name); CREATE INDEX ens_lookup ON ens_db (ens_name,ens_id);

CREATE TABLE go

(

go_id INT auto_increment NOT NULL PRIMARY KEY,

ens_id INT NOT NULL,

go_value VARCHAR(50) NOT NULL

);

CREATE INDEX go_go_value ON go (ens_id,go_value);

CREATE TABLE pdb

(

pdb_id INT auto_increment NOT NULL PRIMARY KEY,

ens_id INT NOT NULL,

domain VARCHAR(20) NOT NULL

);

CREATE INDEX pdb_domain ON pdb (ens_id,domain);

CREATE TABLE mim

(

mim_id INT auto_increment NOT NULL PRIMARY KEY,

ens_id INT NOT NULL,

mim_value VARCHAR(20) NOT NULL

);

CREATE INDEX mim_mim_value ON mim (ens_id,mim_value);

CREATE TABLE affy

(

affy_id INT auto_increment NOT NULL PRIMARY KEY,

ens_id INT NOT NULL,

g_id INT NOT NULL

);

CREATE INDEX affy_id_ref ON affy (ens_id,g_id);

# Following tables are not automatically generated

CREATE TABLE linkage

(

link_id INT auto_increment NOT NULL PRIMARY KEY,

disease VARCHAR(10) NOT NULL,

band VARCHAR(25),

marker VARCHAR(50),

chr CHAR(2),

pstart INT,

start INT,

end INT,

qend INT,

ref INT,

score DECIMAL(3,2),

type VARCHAR(10),

p_value DECIMAL(11,8),

notes TEXT

);

CREATE TABLE rep_stats

(

class_id INT NOT NULL,

chr VARCHAR(4),

length INT UNSIGNED

);

CREATE INDEX stats_search ON rep_stats (class_id,length);

CREATE INDEX stats_search_chr ON rep_stats (chr,class_id,length);

CREATE TABLE GeneNote

(

g_id INT auto_increment NOT NULL PRIMARY KEY,

id_ref VARCHAR(15) NOT NULL,

value DECIMAL(10,1) NOT NULL,

call CHAR(1) NOT NULL,

tissue VARCHAR(15) NOT NULL,

array CHAR(1) NOT NULL,

number CHAR(4) NOT NULL

);

CREATE INDEX g_lookup ON GeneNote (id_ref,g_id);

CREATE INDEX g_all ON GeneNote (g_id,id_ref,value,call,tissue,array,number);

CREATE INDEX g_common ON GeneNote (g_id,call,tissue);

CREATE TABLE rep_class

(

class_id INT auto_increment NOT NULL PRIMARY KEY,

class TEXT

);

CREATE INDEX rc_search ON rep_class (class (20) ASC,rep_class_id);

CREATE TABLE disease

(

disease_id INT auto_increment NOT NULL PRIMARY KEY,

rep_id INT,

short_name VARCHAR(15),

full_name VARCHAR(100),

name VARCHAR(15),

ens_name VARCHAR(15) NOT NULL,

norm_min INT,

norm_max INT,

dis_min INT,

dis_max INT,

locus CHAR,

anticipation VARCHAR(2)

);

CREATE TABLE unigene

(

cluster_id INT auto_increment NOT NULL PRIMARY KEY,

cluster VARCHAR(20),

chr VARCHAR(4),

start INT UNSIGNED,

end INT UNSIGNED,

blatscore INT,

identity DECIMAL(4,1)

);

CREATE INDEX unigene_look_up ON unigene (cluster_id,cluster,chr,start,end);

CREATE TABLE class_stats

(

class INT NOT NULL,

length INT,

pvalue DECIMAL(9,8)

);

CREATE INDEX class_stats_look_up ON class_stats (class,length,pvalue);

CREATE TABLE repeats_in_linkage

(

rep_id INT,

disease VARCHAR(10) NOT NULL,

link_id INT

);

CREATE INDEX rep_link ON repeats_in_linkage (rep_id,link_id);

CREATE INDEX rep_link_disease ON repeats_in_linkage (rep_id,disease,link_id);

LOAD DATA INFILE '/home/perseusm/My_Documents/Publications/Satellog/Results/repeats_in_linkage.txt'

INTO TABLE repeats_in_linkage

IGNORE 1 LINES;

CREATE TABLE go_terms

(

go_value VARCHAR(50) NOT NULL PRIMARY KEY,

go_term VARCHAR(200),

go_class VARCHAR(1)

);

LOAD DATA INFILE '/home/perseusm/GO/go_terms.txt'

INTO TABLE go_terms;

^ top

Satellog

W3C: XHTML, CSS