Skip to content

Commit 499803c

Browse files
committed
basic config file
1 parent afcadc5 commit 499803c

File tree

1 file changed

+120
-0
lines changed

1 file changed

+120
-0
lines changed
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# NOTE: All paths should be fully qualified paths
2+
3+
# Path to raw ligand data | DATABASE/DATASET/file
4+
INPUT_DIR: "/lustre/project/m2_jgu-smitt/data/raw"
5+
6+
# if you want to manually upload target pdb file upload these to in a subfolder of the input dir called "/PDB/receptor"
7+
8+
9+
# Path to output prepared target proteins
10+
PREPARED_DATA_DIR: "/lustre/project/m2_jgu-smitt/data/prepared"
11+
12+
# Path to energy minimized ligand files
13+
PREPARED_LIGAND_DIR: "/lustre/project/m2_jgu-smitt/data/minimized"
14+
15+
# Path to scratch directory
16+
TEMP_DATA_DIR: "/lustre/scratch/m2_jgu-smitt"
17+
18+
# Path where docking results are stored
19+
OUTPUT_DIR: "/lustre/project/m2_jgu-smitt/<FOLDER>"
20+
21+
# Number of best results to be displayed (0<value<=1: percentage )
22+
RESULT_NUMBER: "10"
23+
24+
# Specify cutoff value for rescreening
25+
CUTOFF_VALUE: "-8"
26+
27+
#Specify name for local uploaded data
28+
# note: this will be ignored, if a 'DATABASE' (see below) is specified
29+
LOC_DATA: ["DATASET"]
30+
31+
#Path to folder which contains compounds
32+
# Here, a full qualified path should be indicated.
33+
# note: this will be ignored, if a 'DATABASE' (see below) is specified
34+
LOCAL_INPUT_DIR: "<LOCAL_INPUT_DIR>"
35+
36+
#Specify database to use ZINC usees and downloads compounds from ZINC database, others read local input from LOCAL_INPUT_DIR
37+
38+
DATABASE: ["ZINC"]
39+
40+
# First letter is the molecular weight bin - a measure of size - horizontal axis, left to right, online. A: 200 D, B: 250, C:300, D: 325, E:350, F: 375
41+
# Second letter is the logP bin - a measure of polarity - vertical axis, top to bottom, online.
42+
# The third letter is reactivity : A=anodyne. B=Bother (e.g. chromophores) C=clean (but pains ok), E=mild reactivity ok, G=reactive ok, I = hot chemistry ok
43+
# The fourth letter is purchasability: A and B = in stock, C = in stock via agent, D = make on demand, E = boutique (expensive), F=annotated (not for sale)
44+
# The fifth letter is pH range: R = ref (7.4), M = mid (near 7.4), L = low (around 6.4), H=high (around 8.4).
45+
# The sixth and last dimension is net molecular charge. Here we follow the convention of InChIkeys.
46+
# Thus. N = neutral, M = minus 1, L = minus 2 (or greater). O = plus 1, P = plus 2 (or greater).
47+
48+
ZINC_INPUT:
49+
WEIGHT: ["A", "B"] #["C","D","E","F","G"]
50+
LOGP: ["A"] # ,"D","E","F","G", "H","I","J"]
51+
REACT: ["A"] #,"B"] # ,"C", "E", "G"]
52+
PURCHASE: ["A"] #, "B"] #, "C", "D", "E"]
53+
PH: ["M"]
54+
CHARGE: ["N"] # ,"M","O","L","P"]
55+
56+
#In case you don't want to download tranches from ZINC based on the paramters given above, a ZINC subset can be choosen. Otherwise set subset as TRANCHES
57+
# ex.
58+
SUBSET: "<SUBSET_NAME>"
59+
60+
#Specify ENAMINE collection
61+
ENAMINE_INPUT:
62+
- Advanced_Collection/200721_Enamine_advanced_collection_493968
63+
- Functional_Collection/200721_Enamine_functional_collection_55353
64+
- HTS_Collection/200721_Enamine_hts_collection_2115979
65+
- Premium_Collection/200721_Enamine_premium_collection_44685
66+
67+
ENAMINE_URL: http://www.enamine.net/files/Stock_Screening_Collections/
68+
69+
RESCREENING: "FALSE"
70+
71+
# Specify target enzyme ID and chains format: ["PDB_ID, <CHAIN_1> <CHAIN_2]
72+
TARGETS: ["TARGET,A B C"]
73+
74+
# to be specified, if 'RESCREENING' is desired (RESCREENING: "TRUE")
75+
RESCREENING_TARGETS: ["TARGET1,A B C", "TARGET2,A B C", "TARGET3, A B C"]
76+
77+
78+
TARGET_URL: https://files.rcsb.org/download
79+
GRID_DIR: "/<GRID_DIRECTORY>"
80+
81+
#Name your experiment here or change it in the final json file
82+
83+
EXPERIMENT_NAME: "<Name>"
84+
85+
#parameters for energy minimization
86+
ENERGY_MIN_ALGORITHM: 'cg'
87+
CONVERGENCE_CRITERIA: '1e-6'
88+
STEPS: '2500'
89+
FORCEFIELD: 'MMFF94'
90+
91+
#Env. Modules
92+
OPENBABEL: "chem/OpenBabel/3.0.0-gompi-2019a-Python-3.7.4"
93+
BIOPYTHON: "bio/Biopython/1.79-foss-2021a"
94+
VINALC: "bio/VinaLC/1.3.0-gompi-2021b"
95+
PYPLOT: "vis/matplotlib/3.4.2-foss-2021b"
96+
PYTHON: "lang/Python/3.7.4-GCCcore-8.3.0"
97+
VENN: "vis/matplotlib-venn/0.11.6-foss-2020b-Python-3.8.6"
98+
99+
100+
#Cluster configuration
101+
DOCKING:
102+
mem_mb_per_cpu: 3000
103+
jobname: docking
104+
partition: covid19,parallel
105+
ntasks: 384
106+
constraint: caskadelake,skylake
107+
108+
ENERGY_MIN:
109+
jobname: energyMin
110+
mem_mb: 350
111+
partition: smp,covid19
112+
threads: 1
113+
walltime_minutes: 90
114+
115+
DOCKING_RESULTS:
116+
jobname: results
117+
mem_mb: 65000
118+
partition: smp
119+
threads: 2
120+
walltime_minutes: 300

0 commit comments

Comments
 (0)