diff --git a/examples/pipeline/Small_Molecule_Pipeline_GAFF.ipynb b/examples/pipeline/Small_Molecule_Pipeline_GAFF.ipynb new file mode 100644 index 0000000..baf3f2f --- /dev/null +++ b/examples/pipeline/Small_Molecule_Pipeline_GAFF.ipynb @@ -0,0 +1,401 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fc3ea0e8-57da-4a41-8f24-a5290beed474", + "metadata": {}, + "source": [ + "# Small Molecule Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "id": "3637a8b5-067b-4956-a7ae-a1621a51feb6", + "metadata": { + "execution": { + "iopub.execute_input": "2025-06-23T16:24:00.857110Z", + "iopub.status.busy": "2025-06-23T16:24:00.856906Z", + "iopub.status.idle": "2025-06-23T16:24:00.869792Z", + "shell.execute_reply": "2025-06-23T16:24:00.869257Z", + "shell.execute_reply.started": "2025-06-23T16:24:00.857092Z" + } + }, + "outputs": [], + "source": [ + "from importlib import reload\n", + "\n", + "\n", + "import openpharmmdflow\n", + "from openpharmmdflow.pipeline.sm.pipeline_settings import *\n", + "from openpharmmdflow.pipeline.sm.pipeline import SmallMoleculePipeline\n", + "import nglview\n", + "\n", + "reload(openpharmmdflow)\n", + "reload(openpharmmdflow.pipeline.sm.pipeline_settings)\n", + "reload(openpharmmdflow.pipeline.sm.pipeline)\n", + "\n", + "import openpharmmdflow\n", + "from openpharmmdflow.pipeline.sm.pipeline_settings import *\n", + "from openpharmmdflow.pipeline.sm.pipeline import SmallMoleculePipeline\n", + "import nglview" + ] + }, + { + "cell_type": "markdown", + "id": "1631890b-e4c8-4b7d-a37f-e01d52dd2bc0", + "metadata": {}, + "source": [ + "First we create the pipeline config settings object" + ] + }, + { + "cell_type": "code", + "execution_count": 265, + "id": "20c72ac0-ae52-4dd1-a9d5-c7558e83932f", + "metadata": { + "execution": { + "iopub.execute_input": "2025-06-23T16:24:01.373848Z", + "iopub.status.busy": "2025-06-23T16:24:01.373681Z", + "iopub.status.idle": "2025-06-23T16:24:01.378571Z", + "shell.execute_reply": "2025-06-23T16:24:01.378123Z", + "shell.execute_reply.started": "2025-06-23T16:24:01.373834Z" + } + }, + "outputs": [], + "source": [ + "# Any file supported by openff-toolkit or cif\n", + "inputA = {\"name\": \"ibuprofen\", \"path\": \"ibuprofen.sdf\"}\n", + "inputB = {\"name\": \"ibuprofen1\", \"path\": \"ibuprofen.sdf\"}\n", + "\n", + "sm_inputA = SmallMoleculePipelineInputConfig(**inputA)\n", + "sm_inputB = SmallMoleculePipelineInputConfig(**inputB)\n", + "\n", + "settings = {\n", + " # Not used right now\n", + " \"work_dir\": \"/home/mmh/Projects/OpenPharmMDFlow/experiments/sm/scratch\",\n", + " \"inputs\": [sm_inputA, sm_inputB],\n", + " \"prep_config\": SmallMoleculePipelinePrepConfig(\n", + " bespokefit_config=BespokefitConfig(\n", + " bespoke_workflow_factory_config=BespokeWorkflowFactoryConfig(),\n", + " bespoke_executor_config=BespokeExecutorConfig(),\n", + " mol_to_bespoke=\"ibuprofen\",\n", + " )\n", + " ),\n", + " \"pack_config\": SmallMoleculePipelinePackConfig(\n", + " molecule_names=[\"ibuprofen\"],\n", + " number_of_copies=[1],\n", + " target_density=0.01,\n", + " ),\n", + " \"parameterize_config\": SmallMoleculePipelineParameterizeConfig(\n", + " force_field=\"gaff-2.11\"\n", + " ),\n", + " \"simulate_config\": SmallMoleculePipelineSimulateConfig(),\n", + " \"analyize_config\": SmallMoleculePipelineAnalyizeConfig(),\n", + " \"solvate_config\": SmallMoleculePipelineSolvateConfig(\n", + " nacl_conc=Quantity(0.1, \"mole / liter\"),\n", + " padding=Quantity(1.2, \"nanometer\"),\n", + " box_shape=RHOMBIC_DODECAHEDRON,\n", + " target_density=Quantity(0.9, \"gram / milliliter\"),\n", + " tolerance=Quantity(2.0, \"angstrom\"),\n", + " ),\n", + "}\n", + "sm_config = SmallMoleculePipelineConfig(**settings)" + ] + }, + { + "cell_type": "markdown", + "id": "d1ceb998-3b82-493b-a4d1-e0cc2b3a5d13", + "metadata": {}, + "source": [ + "## Now we can create our pipeline object" + ] + }, + { + "cell_type": "code", + "execution_count": 266, + "id": "b85ccc8a-7913-4fcc-83fc-d6330bd6111e", + "metadata": { + "execution": { + "iopub.execute_input": "2025-06-23T16:24:02.013900Z", + "iopub.status.busy": "2025-06-23T16:24:02.013733Z", + "iopub.status.idle": "2025-06-23T16:24:02.016387Z", + "shell.execute_reply": "2025-06-23T16:24:02.015950Z", + "shell.execute_reply.started": "2025-06-23T16:24:02.013886Z" + } + }, + "outputs": [], + "source": [ + "smp = SmallMoleculePipeline(sm_config)" + ] + }, + { + "cell_type": "code", + "execution_count": 267, + "id": "3a96f1a8-1c8a-42f9-b865-d48eccb42e1a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-06-23T16:24:02.230671Z", + "iopub.status.busy": "2025-06-23T16:24:02.230476Z", + "iopub.status.idle": "2025-06-23T16:24:02.284707Z", + "shell.execute_reply": "2025-06-23T16:24:02.284252Z", + "shell.execute_reply.started": "2025-06-23T16:24:02.230658Z" + } + }, + "outputs": [], + "source": [ + "smp.load()" + ] + }, + { + "cell_type": "markdown", + "id": "94143026-84e6-4074-b635-82982b166da3", + "metadata": {}, + "source": [ + "Prep will run bespoke fit" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "id": "dc6da737-25d8-495f-94ce-fa1f7211d7f5", + "metadata": { + "execution": { + "iopub.execute_input": "2025-06-23T16:24:02.607806Z", + "iopub.status.busy": "2025-06-23T16:24:02.607619Z", + "iopub.status.idle": "2025-06-23T16:24:02.622277Z", + "shell.execute_reply": "2025-06-23T16:24:02.621694Z", + "shell.execute_reply.started": "2025-06-23T16:24:02.607791Z" + } + }, + "outputs": [], + "source": [ + "smp.pack()" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "id": "ffbdda53-e6aa-44c3-93f0-7214b339c186", + "metadata": { + "execution": { + "iopub.execute_input": "2025-06-23T16:24:02.918822Z", + "iopub.status.busy": "2025-06-23T16:24:02.918659Z", + "iopub.status.idle": "2025-06-23T16:24:02.932717Z", + "shell.execute_reply": "2025-06-23T16:24:02.932369Z", + "shell.execute_reply.started": "2025-06-23T16:24:02.918809Z" + } + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a5476071c0a34580a1706e016c5fc853", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "NGLWidget()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "smp.topology.to_file(\"system.pdb\")\n", + "nglview.show_structure_file(\"system.pdb\")" + ] + }, + { + "cell_type": "code", + "execution_count": 270, + "id": "02ee39eb-c747-4eaf-890e-35896a5d847a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-06-23T16:24:03.327068Z", + "iopub.status.busy": "2025-06-23T16:24:03.326878Z", + "iopub.status.idle": "2025-06-23T16:24:03.329730Z", + "shell.execute_reply": "2025-06-23T16:24:03.329231Z", + "shell.execute_reply.started": "2025-06-23T16:24:03.327051Z" + } + }, + "outputs": [], + "source": [ + "# smp.solvate()" + ] + }, + { + "cell_type": "code", + "execution_count": 271, + "id": "f68c7e13-e24a-4d3b-bc9d-775e7ef0bb1d", + "metadata": { + "execution": { + "iopub.execute_input": "2025-06-23T16:24:03.676396Z", + "iopub.status.busy": "2025-06-23T16:24:03.676209Z", + "iopub.status.idle": "2025-06-23T16:24:03.696972Z", + "shell.execute_reply": "2025-06-23T16:24:03.696397Z", + "shell.execute_reply.started": "2025-06-23T16:24:03.676380Z" + } + }, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'SmallMoleculePipeline' object has no attribute 'solvated_topology'", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[271]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43msmp\u001b[49m\u001b[43m.\u001b[49m\u001b[43msolvated_topology\u001b[49m.to_file(\u001b[33m\"\u001b[39m\u001b[33msystem.pdb\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 2\u001b[39m nglview.show_structure_file(\u001b[33m\"\u001b[39m\u001b[33msystem.pdb\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[31mAttributeError\u001b[39m: 'SmallMoleculePipeline' object has no attribute 'solvated_topology'" + ] + } + ], + "source": [ + "# smp.solvated_topology.to_file(\"system.pdb\")\n", + "# nglview.show_structure_file(\"system.pdb\")" + ] + }, + { + "cell_type": "code", + "execution_count": 272, + "id": "5cf23d74-a55e-4d03-8a9d-511c505f1c4f", + "metadata": { + "execution": { + "iopub.execute_input": "2025-06-23T16:24:04.071459Z", + "iopub.status.busy": "2025-06-23T16:24:04.071258Z", + "iopub.status.idle": "2025-06-23T16:24:35.934838Z", + "shell.execute_reply": "2025-06-23T16:24:35.934276Z", + "shell.execute_reply.started": "2025-06-23T16:24:04.071443Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "it worked\n" + ] + } + ], + "source": [ + "smp.parameterize()" + ] + }, + { + "cell_type": "code", + "execution_count": 273, + "id": "d7c2b727-1ef0-432a-be02-9c004c4d50c4", + "metadata": { + "execution": { + "iopub.execute_input": "2025-06-23T16:24:35.935771Z", + "iopub.status.busy": "2025-06-23T16:24:35.935606Z", + "iopub.status.idle": "2025-06-23T16:24:35.953656Z", + "shell.execute_reply": "2025-06-23T16:24:35.953013Z", + "shell.execute_reply.started": "2025-06-23T16:24:35.935755Z" + } + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2746daaaa54346dbadf95d38adb00994", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "NGLWidget()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "smp.interchange.visualize(\"nglview\")" + ] + }, + { + "cell_type": "code", + "execution_count": 274, + "id": "3f1fe01b-93e1-44c5-9429-ba3348d2a5af", + "metadata": { + "execution": { + "iopub.execute_input": "2025-06-23T16:24:35.954431Z", + "iopub.status.busy": "2025-06-23T16:24:35.954253Z", + "iopub.status.idle": "2025-06-23T16:24:39.615520Z", + "shell.execute_reply": "2025-06-23T16:24:39.615013Z", + "shell.execute_reply.started": "2025-06-23T16:24:35.954403Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "start minimize\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "1 warning generated.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "end minimize\n", + "Starting simulation\n", + "Step, volume (nm^3)\n", + "0 45.592\n", + "500 44.215\n", + "1000 43.176\n", + "1500 45.32\n", + "2000 42.603\n", + "2500 42.267\n", + "3000 43.459\n", + "3500 42.732\n", + "4000 40.587\n", + "4500 44.755\n", + "Elapsed time: 1.33 seconds\n" + ] + } + ], + "source": [ + "smp.simulate()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bad69046-eecf-4eeb-9996-d7a60dfe8d5e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openpharmmdflow/pipeline/sm/pipeline.py b/openpharmmdflow/pipeline/sm/pipeline.py index 9407841..5254ec2 100644 --- a/openpharmmdflow/pipeline/sm/pipeline.py +++ b/openpharmmdflow/pipeline/sm/pipeline.py @@ -42,6 +42,10 @@ def load(self): for input in self.inputs: mol = load_file(input.path) mol.name = input.name + # If it is a openff mol, making a conformer will give us 3D cords + # we can use later, needed for GAFF support + if isinstance(mol, Molecule): + mol.generate_conformers(n_conformers=1) self.loaded_mols[mol.name] = mol def prep(self): @@ -124,36 +128,111 @@ def solvate(self): ) def parameterize(self): - # TODO test to make sure we use the FF we expect to use - # Use bespoke ff if we made one - self.force_field = ( - self.bespoke_ff if self.bespoke_ff else self.parameterize_config.force_field - ) - # Now if force_field is a path or string, we need to turn it into a ForceField object - if not isinstance(self.force_field, ForceField): - self.force_field = ForceField(self.force_field) - self.components_intrcg = Interchange.from_smirnoff( - force_field=self.force_field, topology=self.topology - ) - # if there are waters built during the solvate step combine the components topology - # with the water topology - # TODO better way to check if we ran solvate? - # TODO let user specify ff for solute - if hasattr(self, "water"): - self.water_intrcg = Interchange.from_smirnoff( - force_field=ForceField("openff_unconstrained-2.0.0.offxml"), - topology=[self.water] * self.n_water - + [self.sodium_ion] * self.n_sodium_ion - + [self.chlorine_ion] * self.n_chlorine_ion, + # We can either use a openmmforcefield suported FF or a openff supported one + + # GAFF path + if self.parameterize_config.force_field in [ + "gaff-1.4", + "gaff-1.8", + "gaff-1.81", + "gaff-2.1", + "gaff-2.11", + ]: + # Keep these imports lazy since they are only needed with gaff + # if len(self.loaded_mols) > 1: + # raise RuntimeError("Can't use GAFF with more than one mol yet") + import openmm.app + from openmm.unit import Quantity + from openmm.unit import angstrom + from openmm.unit import nanometer + from openmmforcefields.generators import GAFFTemplateGenerator + + gaff = GAFFTemplateGenerator( + molecules=list(self.loaded_mols.values()), + forcefield=self.parameterize_config.force_field, + ) + forcefield_gaff = openmm.app.ForceField() + forcefield_gaff.registerTemplateGenerator(gaff.generator) + + # This will need to be modified to support more than one mol + # topology = list(self.loaded_mols.values())[0].to_topology() + # topology.box_vectors = Quantity([4, 4, 4], unit.nanometer) + + system_gaff = forcefield_gaff.createSystem( + topology=self.topology.to_openmm(), + nonbondedMethod=openmm.app.PME, + # Match interchange default + nonbondedCutoff=Quantity(value=0.9, unit=nanometer), + # SwitchingFunctionMismatchError: Switching distance(s) do not match. Found 0.09999999999999998 nanometer and 1.0 angstrom. + # switchDistance=Quantity(value=0.8, unit=nanometer), + # SwitchingFunctionMismatchError: Switching distance(s) do not match. Found 0.8 nanometer and 1.0 angstrom. + # switchDistance=Quantity(value=1.0, unit=angstrom) ) - # combine is still experimental os.environ["INTERCHANGE_EXPERIMENTAL"] = "1" - self.interchange = self.components_intrcg.combine(self.water_intrcg) - self.interchange.positions = self.solvated_topology.get_positions() - self.interchange.box = self.solvated_topology.box_vectors + # This will need to be modified to support more than one mol + self.components_intrcg = Interchange.from_openmm( + topology=self.topology.to_openmm(), + system=system_gaff, + positions=self.topology.get_positions().to_openmm(), + ) + print("it worked") + if hasattr(self, "water"): + self.water_intrcg = Interchange.from_smirnoff( + force_field=ForceField("openff_unconstrained-2.2.1.offxml"), + topology=[self.water] * self.n_water + + [self.sodium_ion] * self.n_sodium_ion + + [self.chlorine_ion] * self.n_chlorine_ion, + ) + # combine is still experimental + os.environ["INTERCHANGE_EXPERIMENTAL"] = "1" + # Match interchange default + from openff.units import unit as off_unit + + self.components_intrcg.collections["vdW"].switch_width = ( + off_unit.Quantity(1.0, off_unit.angstrom) + ) + self.interchange = self.components_intrcg.combine(self.water_intrcg) + self.interchange.positions = self.solvated_topology.get_positions() + self.interchange.box = self.solvated_topology.box_vectors + + else: + self.interchange = self.components_intrcg + + # OpenFF XML FF path else: - self.interchange = self.components_intrcg + # TODO test to make sure we use the FF we expect to use + # Use bespoke ff if we made one + self.force_field = ( + self.bespoke_ff + if self.bespoke_ff + else self.parameterize_config.force_field + ) + # Now if force_field is a path or string, we need to turn it into a ForceField object + if not isinstance(self.force_field, ForceField): + self.force_field = ForceField(self.force_field) + self.components_intrcg = Interchange.from_smirnoff( + force_field=self.force_field, topology=self.topology + ) + # if there are waters built during the solvate step combine the components topology + # with the water topology + # TODO better way to check if we ran solvate? + # TODO let user specify ff for solute + if hasattr(self, "water"): + self.water_intrcg = Interchange.from_smirnoff( + force_field=ForceField("openff_unconstrained-2.0.0.offxml"), + topology=[self.water] * self.n_water + + [self.sodium_ion] * self.n_sodium_ion + + [self.chlorine_ion] * self.n_chlorine_ion, + ) + # combine is still experimental + os.environ["INTERCHANGE_EXPERIMENTAL"] = "1" + self.interchange = self.components_intrcg.combine(self.water_intrcg) + self.interchange.positions = self.solvated_topology.get_positions() + self.interchange.box = self.solvated_topology.box_vectors + + else: + self.interchange = self.components_intrcg def simulate(self): self.simulation = create_simulation(self.simulate_config, self.interchange)