Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 47 additions & 35 deletions workflow/rules/analyse.smk
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,15 @@ import builtins
import importlib
from urllib.parse import urlparse


# all rules in this file are local rules
localrules: dockingResults, dockingResultsTxt, bestLigands, makeHistogram, mergeDocking
localrules:
dockingResults,
dockingResultsTxt,
bestLigands,
makeHistogram,
mergeDocking,


def url_reachable(url):
"""
Expand Down Expand Up @@ -47,7 +54,9 @@ def check_zinc_url(url):
return bool(mod.zinc_available(url))
except Exception:
# if the user function errors, fall back to default checks
logger.warning(f"user zinc_available in {modname} raised an exception; falling back to default checks")
logger.warning(
f"user zinc_available in {modname} raised an exception; falling back to default checks"
)
except Exception:
continue

Expand Down Expand Up @@ -83,11 +92,13 @@ def library_files(wildcards):
out = []
# Use ZINC_MIRROR from config if available
zinc_mirror = config.get("ZINC_MIRROR", "files.docking.org")
if not zinc_mirror.startswith("http://") and not zinc_mirror.startswith("https://"):
if not zinc_mirror.startswith("http://") and not zinc_mirror.startswith(
"https://"
):
zinc_mirror = "http://" + zinc_mirror
zinc_mirror = zinc_mirror.rstrip("/")
zinc_test_url = f"{zinc_mirror}/3D/"

# test for ZINC reachability (robust):
if not check_zinc_url(zinc_test_url):
logger.info(
Expand Down Expand Up @@ -123,20 +134,27 @@ def library_files(wildcards):
)
sys.exit(1)
rawOut = expand(
path.join(
path.join(
"docking",
"{receptorID}",
"{receptorID}_{database}_{dataset}_{name}.pdbqt.gz",
),
receptorID=config["TARGETS"][0].split(",")[0],
database=config["DATABASE"],
dataset=[w+l for w in config["ZINC_INPUT"]["WEIGHT"] for l in config["ZINC_INPUT"]["LOGP"]],
name=[w+l+r+p+ph+c for w in config["ZINC_INPUT"]["WEIGHT"]
for l in config["ZINC_INPUT"]["LOGP"]
for r in config["ZINC_INPUT"]["REACT"]
for p in config["ZINC_INPUT"]["PURCHASE"]
for ph in config["ZINC_INPUT"]["PH"]
for c in config["ZINC_INPUT"]["CHARGE"]],
dataset=[
w + l
for w in config["ZINC_INPUT"]["WEIGHT"]
for l in config["ZINC_INPUT"]["LOGP"]
],
name=[
w + l + r + p + ph + c
for w in config["ZINC_INPUT"]["WEIGHT"]
for l in config["ZINC_INPUT"]["LOGP"]
for r in config["ZINC_INPUT"]["REACT"]
for p in config["ZINC_INPUT"]["PURCHASE"]
for ph in config["ZINC_INPUT"]["PH"]
for c in config["ZINC_INPUT"]["CHARGE"]
],
)
for i in rawOut:
weighLog = i.split("_")[-2]
Expand Down Expand Up @@ -183,7 +201,9 @@ def library_files(wildcards):
logger.warning(f"Could not connect to ZINC to validate subset: {e}")

try:
r_zinc = requests.get("https://zinc15.docking.org/", allow_redirects=True, timeout=10)
r_zinc = requests.get(
"https://zinc15.docking.org/", allow_redirects=True, timeout=10
)
if r_zinc.status_code != 200: # test if ZINC database is available
logger.info(
"The ZINC database is not accessible right now. Perhaps it is temporarily down?"
Expand Down Expand Up @@ -291,9 +311,7 @@ rule removeDuplicateLigands:
input:
path.join("results", "{receptorID}_{percentage}.pdbqt"),
output:
path.join(
"rescreening", "unique", "{receptorID}_{percentage}.pdbqt"
),
path.join("rescreening", "unique", "{receptorID}_{percentage}.pdbqt"),
log:
"logs/removeDuplicateLigands_{receptorID}_{percentage}.log",
shell:
Expand All @@ -302,13 +320,15 @@ rule removeDuplicateLigands:

checkpoint split2:
input:
path.join(
"rescreening", "unique", "{receptorID}_{percentage}.pdbqt"
),
path.join("rescreening", "unique", "{receptorID}_{percentage}.pdbqt"),
output:
temp(directory(
os.path.join("scratch", "rescreening_ligands_{percentage}", "{receptorID}")
)),
temp(
directory(
os.path.join(
"scratch", "rescreening_ligands_{percentage}", "{receptorID}"
)
)
),
log:
"logs/split2_{receptorID}_{percentage}.log",
script:
Expand All @@ -321,9 +341,7 @@ rule prepareLigands2:
"scratch", "rescreening_ligands_{percentage}", "{receptorID}", "{i}.pdbqt"
),
output:
ligands=path.join(
"rescreening_{percentage}", "{name}_{receptorID}", "{i}.txt"
),
ligands=path.join("rescreening_{percentage}", "{name}_{receptorID}", "{i}.txt"),
log:
"logs/prepareLigands2_{receptorID}_{percentage}_{name}_{i}.log",
shell:
Expand All @@ -335,11 +353,9 @@ rule prepareSecondDocking:
grid=path.join("grid", "{name}_grid.txt"),
receptor=path.join("prepared", "receptor", "{name}.pdbqt"),
output:
grid=path.join(
"rescreening_{percentage}", "{name}_{receptorID}", "{name}.grd"
),
grid=path.join("rescreening_{percentage}", "{name}_{receptorID}", "{name}.grd"),
receptor=path.join(
"rescreening_{percentage}", "{name}_{receptorID}", "{name}.rec"
"rescreening_{percentage}", "{name}_{receptorID}", "{name}.rec"
),
log:
"logs/prepareSecondDocking_{name}_{receptorID}_{percentage}.log",
Expand All @@ -352,12 +368,8 @@ rule prepareSecondDocking:

rule docking2:
input:
ligands=path.join(
"rescreening_{percentage}", "{name}_{receptorID}", "{i}.txt"
),
grid=path.join(
"rescreening_{percentage}", "{name}_{receptorID}", "{name}.grd"
),
ligands=path.join("rescreening_{percentage}", "{name}_{receptorID}", "{i}.txt"),
grid=path.join("rescreening_{percentage}", "{name}_{receptorID}", "{name}.grd"),
receptor=path.join(
"rescreening_{percentage}", "{name}_{receptorID}", "{name}.rec"
),
Expand Down
50 changes: 28 additions & 22 deletions workflow/rules/docking.smk
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
localrules: prepare_docking_local, prepare_docking_ligand
localrules:
prepare_docking_local,
prepare_docking_ligand,


from snakemake.exceptions import WorkflowError


def get_spacing(gridfile):
"""Return spacing as float parsed from a .gpf grid file.

Expand Down Expand Up @@ -35,24 +39,20 @@ rule prepare_docking_local:
receptor=rules.makeReceptorPDBQT.output,
geometry=path.join("grid", "{receptorID}_grid.txt"),
output:
temp(path.join(
"docking", "{receptorID}", "{dataset}", "{receptorID}.txt"
)),
temp(path.join(
"docking", "{receptorID}", "{dataset}", "{receptorID}_grid.txt"
)),
temp(path.join("docking", "{receptorID}", "{dataset}", "{receptorID}.txt")),
temp(path.join("docking", "{receptorID}", "{dataset}", "{receptorID}_grid.txt")),
message:
(
f" Copying receptor from {str(input.receptor)} to {str(output[0])}; "
f"Copying geometry from {str(input.geometry)} to {str(output[1])}"
),
)
run:
import shutil

shutil.copy(str(input.receptor), str(output[0]))
shutil.copy(str(input.geometry), str(output[1]))



rule prepare_docking_ligand:
"""Copy a single ligand-list entry into the per-job output directory.

Expand All @@ -63,11 +63,13 @@ rule prepare_docking_ligand:
input:
ligands=path.join("library", "{database}_{dataset}_{name}_{i}.txt"),
output:
lig=path.join(
"docking",
"{receptorID}",
"{dataset}",
"{database}_{dataset}_{name}_{i}.txt",
temp(
path.join(
"docking",
"{receptorID}",
"{dataset}",
"{database}_{dataset}_{name}_{i}.txt",
)
),
params:
directory=path.join("docking"),
Expand All @@ -78,7 +80,7 @@ rule prepare_docking_ligand:
outdir = os.path.join(params.directory, wildcards.receptorID, wildcards.dataset)
os.makedirs(outdir, exist_ok=True)

shutil.copy(input.ligands, output.lig)
shutil.copy(input.ligands, output[0])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Output reference change is correct, but breaks downstream rule.

The change from output.lig to output[0] is correct since temp() returns a positional output. However, this breaks the docking rule at line 94, which still references rules.prepare_docking_ligand.output.lig.

Apply this diff to fix the broken reference:

     input:
         rec=rules.prepare_docking_local.output[0],
         geo=rules.prepare_docking_local.output[1],
-        lig=rules.prepare_docking_ligand.output.lig,
+        lig=rules.prepare_docking_ligand.output[0],

Alternatively, if you prefer named outputs for clarity, modify the output definition at lines 66-71 to:

    output:
        lig=temp(
            path.join(
                "docking",
                "{receptorID}",
                "{dataset}",
                "{database}_{dataset}_{name}_{i}.txt",
            )
        ),

And keep line 94 as is. This approach provides better readability.



rule docking:
Expand All @@ -105,18 +107,20 @@ rule docking:
config["VINALC"],
params:
# get spacing from the receptor's .gpf at runtime using wildcards
space=lambda wildcards: get_spacing(os.path.join(config['GRID_DIR'], f'{wildcards.receptorID}.gpf')),
space=lambda wildcards: get_spacing(
os.path.join(config["GRID_DIR"], f"{wildcards.receptorID}.gpf")
),
log:
"logs/docking/{receptorID}_{dataset}_{database}_{name}_{i}.log",
resources:
mpi="mpiexec"
mpi="mpiexec",
shell:
(
"cd docking/{wildcards.receptorID}/{wildcards.dataset} ; "
"{resources.mpi} vinalc --recList {wildcards.receptorID}.txt "
"--ligList {wildcards.database}_{wildcards.dataset}_{wildcards.name}_{wildcards.i}.txt "
"--geoList {wildcards.receptorID}_grid.txt --granularity {params.space} "
)
)


def aggregate_in(wildcards):
Expand All @@ -137,10 +141,12 @@ rule mergeDocking:
input:
unpack(aggregate_in),
output:
path.join(
"docking",
"{receptorID}",
"{receptorID}_{database}_{dataset}_{name}.pdbqt.gz",
temp(
path.join(
"docking",
"{receptorID}",
"{receptorID}_{database}_{dataset}_{name}.pdbqt.gz",
)
),
script:
"../scripts/mergeOutput.py"
12 changes: 7 additions & 5 deletions workflow/rules/preparation.smk
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ rule targetProtein:
rule getZINCdata:
output:
temp(path.join(DATABASE, "{dataset}", "{name}.pdbqt.gz")),
log: "logs/downloadZINC/{dataset}_{name}.log",
log:
"logs/downloadZINC/{dataset}_{name}.log",
message:
"Downloading ZINC data for {wildcards.name} from ZINC database {wildcards.dataset}...",
"Downloading ZINC data for {wildcards.name} from ZINC database {wildcards.dataset}..."
script:
"../scripts/ZINCdownload.py"

Expand Down Expand Up @@ -121,7 +122,7 @@ rule makeReceptorPDBQT:
input:
path.join("scratch", "PDB", "receptor", "{receptorID}.pdb"),
output:
path.join("prepared", "receptor", "{receptorID}.pdbqt"),
temp(path.join("prepared", "receptor", "{receptorID}.pdbqt")),
conda:
"../envs/openbabel.yml"
envmodules:
Expand All @@ -147,6 +148,7 @@ rule gunzip:
"logs/gunzip/{database}_{dataset}_{name}_{filetype}.log",
run:
import gzip, shutil, os

try:
with gzip.open(input[0], "rb") as src, open(output[0], "wb") as dst:
shutil.copyfileobj(src, dst)
Expand Down Expand Up @@ -215,7 +217,7 @@ rule prepareGeometry:
input:
path.join(config["GRID_DIR"], "{receptorID}.gpf"),
output:
path.join("grid", "{receptorID}_grid.txt"),
temp(path.join("grid", "{receptorID}_grid.txt")),
log:
"logs/prepareGeometry/{receptorID}.log",
run:
Expand Down Expand Up @@ -253,7 +255,7 @@ rule prepareDocking:
input:
rules.makeReceptorPDBQT.output,
output:
path.join("receptor", "{receptorID}.txt"),
temp(path.join("receptor", "{receptorID}.txt")),
log:
"logs/prepareDocking/{receptorID}.log",
shell:
Expand Down