Skip to content

Commit 63baa11

Browse files
committed
#1120 #1164 Change the database schema to make sure gene or protein with different timestamps can be added to the database when adding same data with different timestamps
1 parent 170670f commit 63baa11

File tree

4 files changed

+13
-32
lines changed

4 files changed

+13
-32
lines changed

database/network-database/database_services/populator.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def process_file(self, conn, cursor, data_filepath, copy_statement):
3232
"""
3333

3434
# Determine if we need to drop the last column (PPI network type)
35-
if self.network_mode == Constants.PPI_NETWORK_MODE and data_filepath == Constants.MISSING_PPI_GENE_DATA_FILEPATH:
35+
if self.network_mode == Constants.PPI_NETWORK_MODE and data_filepath == Constants.GENE_DATA_FILEPATH:
3636
print("Dropping the regulator column from the input data...")
3737
processed_rows = []
3838

@@ -72,12 +72,11 @@ class GeneDataPopulator(DataPopulator):
7272
def __init__(self, db_url, network_mode):
7373
super().__init__(db_url)
7474
self.network_mode = network_mode
75+
self.filepath = Constants.GENE_DATA_FILEPATH
7576
if network_mode == Constants.GRN_NETWORK_MODE:
7677
self.database_namespace = Constants.GRN_DATABASE_NAMESPACE
77-
self.filepath = Constants.MISSING_GRN_GENE_DATA_FILEPATH
7878
elif network_mode == Constants.PPI_NETWORK_MODE:
7979
self.database_namespace = Constants.PPI_DATABASE_NAMESPACE
80-
self.filepath = Constants.MISSING_PPI_GENE_DATA_FILEPATH
8180
else:
8281
raise ValueError(f"Unknown network type: {network_mode}")
8382

@@ -92,7 +91,7 @@ def get_copy_statement(self):
9291
class ProteinDataPopulator(DataPopulator):
9392
def __init__(self, db_url):
9493
super().__init__(db_url)
95-
self.filepath = Constants.MISSING_PROTEIN_DATA_FILEPATH
94+
self.filepath = Constants.PROTEIN_DATA_FILEPATH
9695

9796
def get_copy_statement(self):
9897
return f"COPY {Constants.PPI_DATABASE_NAMESPACE}.protein (standard_name, gene_systematic_name, length, molecular_weight, PI, taxon_id, time_stamp, source) FROM stdin WITH CSV DELIMITER E'\\t' HEADER;"

database/network-database/main.py

+1-15
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from data_services.data_generator import *
33
from data_services.save_service import *
44
from database_services.filter import *
5-
from database_services.updater import *
65
from database_services.populator import *
76
import argparse
87
from datetime import datetime, timezone, timedelta
@@ -13,6 +12,7 @@ def load_data(network_option):
1312
print("Generating data.................................................")
1413
time_stamp = datetime.now(timezone(timedelta(hours=-8)))
1514
formatted_time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S%z")
15+
print("Formatted_time_stamp: ", formatted_time_stamp)
1616
if network_option in ['all', Constants.GRN_NETWORK_MODE]:
1717
grnDataGenerator = GeneRegulatoryNetworkDataGenerator(GeneRegulatoryNetworkFetcherService(), GeneRegulatoryNetworkProcessor(formatted_time_stamp), save_service)
1818

@@ -27,40 +27,26 @@ def load_data(network_option):
2727

2828
SourceDataGenerator(SourceProcessor(formatted_time_stamp), save_service)
2929

30-
def filter_data(network_option, db_url):
31-
print("Filtering data.................................................")
32-
if network_option in ['all', Constants.GRN_NETWORK_MODE]:
33-
GeneFilter(db_url, save_service, network_mode="grn").filter_data()
34-
35-
if network_option in ['all', Constants.PPI_NETWORK_MODE]:
36-
GeneFilter(db_url, save_service, network_mode="ppi").filter_data()
37-
ProteinFilter(db_url, save_service).filter_data()
38-
3930
def adding_data_to_databse(network_option, db_url):
4031
print("Adding data to database.................................................")
4132
if network_option in ['all', Constants.GRN_NETWORK_MODE]:
4233
network_mode = Constants.GRN_NETWORK_MODE
4334
SourceDataPopulator(db_url, network_mode).populate_data()
4435
GeneDataPopulator(db_url, network_mode).populate_data()
45-
GeneUpdater(db_url, network_mode).update_data()
4636
GeneRegulatoryNetworkDataPopulator(db_url).populate_data()
4737

4838
if network_option in ['all', Constants.PPI_NETWORK_MODE]:
4939
network_mode = Constants.PPI_NETWORK_MODE
5040
SourceDataPopulator(db_url, network_mode).populate_data()
5141

5242
GeneDataPopulator(db_url, network_mode).populate_data()
53-
GeneUpdater(db_url, network_mode).update_data()
5443

5544
ProteinDataPopulator(db_url).populate_data()
56-
ProteinProteinInteractionsUpdater(db_url).update_data()
57-
ProteinUpdater(db_url).update_data()
5845

5946
ProteinProteinInteractionsDataPopulator(db_url).populate_data()
6047

6148
def main(network_option, db_url):
6249
load_data(network_option)
63-
filter_data(network_option, db_url)
6450
adding_data_to_databse(network_option, db_url)
6551

6652
if __name__ == "__main__":

database/schema/gene_regulatory_network_schema.sql

+3-4
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ CREATE TABLE gene_regulatory_network_with_timestamp.gene (
1515
regulator BOOLEAN,
1616
time_stamp TIMESTAMP WITH TIME ZONE,
1717
source VARCHAR,
18-
PRIMARY KEY(gene_id, taxon_id),
18+
PRIMARY KEY(gene_id, taxon_id, time_stamp, source),
1919
FOREIGN KEY (time_stamp, source) REFERENCES gene_regulatory_network_with_timestamp.source(time_stamp, source)
2020
);
2121

@@ -26,8 +26,7 @@ CREATE TABLE gene_regulatory_network_with_timestamp.network (
2626
annotation_type VARCHAR,
2727
time_stamp TIMESTAMP WITH TIME ZONE,
2828
source VARCHAR,
29-
FOREIGN KEY (regulator_gene_id, taxon_id) REFERENCES gene_regulatory_network_with_timestamp.gene(gene_id, taxon_id),
30-
FOREIGN KEY (target_gene_id, taxon_id) REFERENCES gene_regulatory_network_with_timestamp.gene(gene_id, taxon_id),
31-
FOREIGN KEY (time_stamp, source) REFERENCES gene_regulatory_network_with_timestamp.source(time_stamp, source),
29+
FOREIGN KEY (regulator_gene_id, taxon_id, time_stamp, source) REFERENCES gene_regulatory_network_with_timestamp.gene(gene_id, taxon_id, time_stamp, source),
30+
FOREIGN KEY (target_gene_id, taxon_id, time_stamp, source) REFERENCES gene_regulatory_network_with_timestamp.gene(gene_id, taxon_id, time_stamp, source),
3231
CONSTRAINT unique_network UNIQUE (regulator_gene_id, target_gene_id, taxon_id, time_stamp, source, annotation_type)
3332
);

database/schema/protein_protein_interactions_schema.sql

+6-9
Original file line numberDiff line numberDiff line change
@@ -14,35 +14,32 @@ CREATE TABLE protein_protein_interactions_with_timestamp.gene (
1414
taxon_id VARCHAR,
1515
time_stamp TIMESTAMP WITH TIME ZONE,
1616
source VARCHAR,
17-
PRIMARY KEY(gene_id, taxon_id),
17+
PRIMARY KEY(gene_id, taxon_id, time_stamp, source),
1818
FOREIGN KEY (time_stamp, source) REFERENCES protein_protein_interactions_with_timestamp.source(time_stamp, source)
1919
);
2020

2121
CREATE TABLE protein_protein_interactions_with_timestamp.protein (
22-
standard_name VARCHAR PRIMARY KEY,
22+
standard_name VARCHAR,
2323
gene_systematic_name VARCHAR,
2424
length FLOAT,
2525
molecular_weight FLOAT,
2626
PI FLOAT,
2727
taxon_id VARCHAR,
2828
time_stamp TIMESTAMP WITH TIME ZONE,
2929
source VARCHAR,
30-
FOREIGN KEY (gene_systematic_name, taxon_id) REFERENCES protein_protein_interactions_with_timestamp.gene(gene_id, taxon_id),
31-
FOREIGN KEY (time_stamp, source) REFERENCES protein_protein_interactions_with_timestamp.source(time_stamp, source)
30+
PRIMARY KEY(standard_name, time_stamp, source),
31+
FOREIGN KEY (gene_systematic_name, taxon_id, time_stamp, source) REFERENCES protein_protein_interactions_with_timestamp.gene(gene_id, taxon_id, time_stamp, source)
3232
);
3333

3434
CREATE TABLE protein_protein_interactions_with_timestamp.physical_interactions (
3535
protein1 VARCHAR,
3636
protein2 VARCHAR,
37-
gene_systematic_name1 VARCHAR,
38-
gene_systematic_name2 VARCHAR,
3937
interaction_detection_methods_identifier VARCHAR,
4038
annotation_type VARCHAR,
4139
experiment_name VARCHAR,
4240
time_stamp TIMESTAMP WITH TIME ZONE,
4341
source VARCHAR,
44-
FOREIGN KEY (protein1) REFERENCES protein_protein_interactions_with_timestamp.protein(standard_name),
45-
FOREIGN KEY (protein2) REFERENCES protein_protein_interactions_with_timestamp.protein(standard_name),
46-
FOREIGN KEY (time_stamp, source) REFERENCES protein_protein_interactions_with_timestamp.source(time_stamp, source),
42+
FOREIGN KEY (protein1, time_stamp, source) REFERENCES protein_protein_interactions_with_timestamp.protein(standard_name, time_stamp, source),
43+
FOREIGN KEY (protein2, time_stamp, source) REFERENCES protein_protein_interactions_with_timestamp.protein(standard_name, time_stamp, source),
4744
CONSTRAINT unique_physical_interaction UNIQUE (protein1, protein2, interaction_detection_methods_identifier, annotation_type, experiment_name, time_stamp, source)
4845
);

0 commit comments

Comments
 (0)