Skip to content

Commit

Permalink
Adding schema folder that hold all schemas for 4 databases
Browse files Browse the repository at this point in the history
ntran18 committed Jan 22, 2025
1 parent 84bdcc4 commit e5af8cb
Showing 5 changed files with 252 additions and 0 deletions.
116 changes: 116 additions & 0 deletions database2/schema/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Schema Setup for GRNsight

This directory contains all the necessary schemas for the databases required by GRNsight.

## Load Database

For Mac:
```
psql <address to database>
```

For Windows:
```
psql -U postgres <address to database>
```
When prompted for the password, use the password you specified earlier during the installation process. For all future commands requiring you to access postgres, you will need to add `-U postgres `

For example, to access your local PostgreSQL database, use the following command:

```
psql postgresql://localhost/postgres
```

## Creating Schemas

GRNsight requires four schemas, one for each of the following tables:

1. `grnsettings`
2. `gene_expression`
3. `gene_regulatory_network`
4. `protein_protein_interactions`

To create these schemas, run the following commands:

```
CREATE SCHEMA <database_namespace>;
```

Since we have 4 tables, using the following commands to create them:

```
CREATE SCHEMA settings;
```

```
CREATE SCHEMA gene_expression;
```

```
CREATE SCHEMA gene_regulatory_network;
```

```
CREATE SCHEMA protein_protein_interactions;
```

## Adding Table Specifications

Each schema requires a set of table definitions. You can add these by running the following commands, each corresponding to an SQL file that defines the structure for each schema:

```
psql -f expression_schema.sql postgresql://localhost/postgres
```

```
psql -f gene_regulatory_network_schema.sql postgresql://localhost/postgres
```

```
psql -f protein_protein_interactions_schema.sql postgresql://localhost/postgres
```

```
psql -f grnsettings_schema.sql postgresql://localhost/postgres
```

Once these steps are completed, your database will be set up and ready to accept expression and network data.

## Populating Data into the Database

### 1. Settings Database

The `settings` table stores the default database name.

To change the default database name, follow these steps:

1. **Log in to the Database**

For instructions on how to load the database, refer to the [Load Database](#load-database) section.

2. **Set the Search Path**

Set your search path to the `settings` schema with the following command:

```
SET SEARCH_PATH TO settings;
```
3. **Delete the Current Default Database Name**
Delete the existing database name with this command:
```
DELETE FROM grnsettings;
```
4. **Insert the New Default Database Name**
Insert the new default database name with the following command:
```
INSERT INTO grnsettings(expression_dataset) VALUES ('<new default database name>');
```
_Note: The current default database is `dahlquist_2018`._
### 2. Other databases
For other databases, continue follow the instructions in the [README.md](https://github.com/dondi/GRNsight/tree/master/database) outside of this directory.
71 changes: 71 additions & 0 deletions database2/schema/expression_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
CREATE TABLE gene_expression.ref (
pubmed_id VARCHAR,
authors VARCHAR,
publication_year VARCHAR,
title VARCHAR,
doi VARCHAR,
ncbi_geo_id VARCHAR,
PRIMARY KEY(ncbi_geo_id, pubmed_id)
);

CREATE TABLE gene_expression.gene (
gene_id VARCHAR, -- systematic like name
display_gene_id VARCHAR, -- standard like name
species VARCHAR,
taxon_id VARCHAR,
PRIMARY KEY(gene_id, taxon_id)
);

CREATE TABLE gene_expression.expression_metadata (
ncbi_geo_id VARCHAR,
pubmed_id VARCHAR,
FOREIGN KEY (ncbi_geo_id, pubmed_id) REFERENCES gene_expression.ref(ncbi_geo_id, pubmed_id),
control_yeast_strain VARCHAR,
treatment_yeast_strain VARCHAR,
control VARCHAR,
treatment VARCHAR,
concentration_value FLOAT,
concentration_unit VARCHAR,
time_value FLOAT,
time_unit VARCHAR,
number_of_replicates INT,
expression_table VARCHAR,
display_expression_table VARCHAR,
PRIMARY KEY(ncbi_geo_id, pubmed_id, time_value)
);
CREATE TABLE gene_expression.expression (
gene_id VARCHAR,
taxon_id VARCHAR,
FOREIGN KEY (gene_id, taxon_id) REFERENCES gene_expression.gene(gene_id, taxon_id),
-- ncbi_geo_id VARCHAR,
-- pubmed_id VARCHAR,
sort_index INT,
sample_id VARCHAR,
expression FLOAT,
time_point FLOAT,
dataset VARCHAR,
PRIMARY KEY(gene_id, sample_id)
-- FOREIGN KEY (ncbi_geo_id, pubmed_id, time_point) REFERENCES gene_expression.expression_metadata(ncbi_geo_id, pubmed_id, time_value)
);
CREATE TABLE gene_expression.degradation_rate (
gene_id VARCHAR,
taxon_id VARCHAR,
FOREIGN KEY (gene_id, taxon_id) REFERENCES gene_expression.gene(gene_id, taxon_id),
ncbi_geo_id VARCHAR,
pubmed_id VARCHAR,
FOREIGN KEY (ncbi_geo_id, pubmed_id) REFERENCES gene_expression.ref(ncbi_geo_id, pubmed_id),
PRIMARY KEY(gene_id, ncbi_geo_id, pubmed_id),
degradation_rate FLOAT
);

CREATE TABLE gene_expression.production_rate (
gene_id VARCHAR,
taxon_id VARCHAR,
FOREIGN KEY (gene_id, taxon_id) REFERENCES gene_expression.gene(gene_id, taxon_id),
ncbi_geo_id VARCHAR,
pubmed_id VARCHAR,
FOREIGN KEY (ncbi_geo_id, pubmed_id) REFERENCES gene_expression.ref(ncbi_geo_id, pubmed_id),
PRIMARY KEY(gene_id, ncbi_geo_id, pubmed_id),
production_rate FLOAT
-- FOREIGN KEY (gene_id, ncbi_geo_id, pubmed_id) REFERENCES gene_expression.degradation_rate(gene_id, ncbi_geo_id, pubmed_id) -- not sure if we want to link the generated production rate to it's original degradation rate
);
25 changes: 25 additions & 0 deletions database2/schema/gene_regulatory_network_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
CREATE TABLE gene_regulatory_network.source (
time_stamp TIMESTAMP WITH TIME ZONE,
source VARCHAR,
display_name VARCHAR,
PRIMARY KEY(time_stamp, source)
);

CREATE TABLE gene_regulatory_network.gene (
gene_id VARCHAR, -- systematic like name
display_gene_id VARCHAR, -- standard like name
species VARCHAR,
taxon_id VARCHAR,
regulator BOOLEAN,
PRIMARY KEY(gene_id, taxon_id)
);
CREATE TABLE gene_regulatory_network.network (
regulator_gene_id VARCHAR,
target_gene_id VARCHAR,
taxon_id VARCHAR,
time_stamp TIMESTAMP WITH TIME ZONE,
source VARCHAR,
FOREIGN KEY (regulator_gene_id, taxon_id) REFERENCES gene_regulatory_network.gene(gene_id, taxon_id),
FOREIGN KEY (target_gene_id, taxon_id) REFERENCES gene_regulatory_network.gene(gene_id, taxon_id),
FOREIGN KEY (time_stamp, source) REFERENCES gene_regulatory_network.source(time_stamp, source)
);
3 changes: 3 additions & 0 deletions database2/schema/grnsettings_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CREATE TABLE settings.grnsettings (
expression_dataset VARCHAR PRIMARY KEY
);
37 changes: 37 additions & 0 deletions database2/schema/protein_protein_interactions.schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
CREATE TABLE protein_protein_interactions.source (
time_stamp TIMESTAMP WITH TIME ZONE,
source VARCHAR,
display_name VARCHAR,
PRIMARY KEY(time_stamp, source)
);

CREATE TABLE protein_protein_interactions.gene (
gene_id VARCHAR, -- systematic like name
display_gene_id VARCHAR, -- standard like name
species VARCHAR,
taxon_id VARCHAR,
PRIMARY KEY(gene_id, taxon_id)
);

CREATE TABLE protein_protein_interactions.protein (
standard_name VARCHAR PRIMARY KEY,
gene_systematic_name VARCHAR,
length FLOAT,
molecular_weight FLOAT,
PI FLOAT,
taxon_id VARCHAR,
FOREIGN KEY (gene_systematic_name, taxon_id) REFERENCES protein_protein_interactions.gene(gene_id, taxon_id)
);

CREATE TABLE protein_protein_interactions.physical_interactions (
protein1 VARCHAR,
protein2 VARCHAR,
interaction_detection_methods_identifier VARCHAR,
experiment_name VARCHAR,
time_stamp TIMESTAMP WITH TIME ZONE,
source VARCHAR,
FOREIGN KEY (protein1) REFERENCES protein_protein_interactions.protein(standard_name),
FOREIGN KEY (protein2) REFERENCES protein_protein_interactions.protein(standard_name),
FOREIGN KEY (time_stamp, source) REFERENCES protein_protein_interactions.source(time_stamp, source),
CONSTRAINT unique_physical_interaction UNIQUE (protein1, protein2, interaction_detection_methods_identifier, experiment_name, time_stamp, source)
);

0 comments on commit e5af8cb

Please sign in to comment.