Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion package/MDAnalysis/coordinates/MMCIF.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
import numpy as np

from . import base
from ..lib import util

try:
import gemmi
Expand Down Expand Up @@ -119,7 +120,7 @@ class MMCIFReader(base.SingleFrameReaderBase):
units = {"time": None, "length": "Angstrom"}

def _read_first_frame(self):
structure = gemmi.read_structure(self.filename)
structure = self._get_structure()
cell_dims = np.array(
[
getattr(structure.cell, name)
Expand All @@ -145,3 +146,36 @@ def _read_first_frame(self):
else:
self.ts.dimensions = cell_dims
self.ts.frame = 0

def _get_structure(self):
# This method exists because of some lacking methods in the gemmi Python API.
# within gemmi in C++, one can call `read_structure` and in-memory, string, and filepath
# arguments will all be accepted:
# https://github.com/project-gemmi/gemmi/blob/4416e298f204b7b57bf5b3051d7efd4fe02957cf/include/gemmi/mmread.hpp#L86

# However, for MDA to similarly accept common input types like streams (open File-like objs and StringIO objs)
# as well as pathlib.Path() objects, we have to use the Python API methods available currently (as of 0.7.3)
# with a string as a common target for all input types
# For this, we call gemmi.cif.read_string (https://gemmi.readthedocs.io/en/latest/cif.html#reading) to handle CIF
# strings and gemmi.read_pdb to handle PDB strings (no one method can handle both formats currently Py-side)

# openany() is called instead of passing file paths (when available) differently from streams
# even though reading the file into a string is less efficient, this is easier to maintain

# if the gemmi Python API is extended, this method can be simplified/removed and replaced with something like
# gemmi.read_structure

with util.openany(self.filename) as f:
content_as_str = f.read()
try:
# String -> Doc -> Block -> Structure
# making Structure from first Block in Document as is done internally in gemmi:
# https://github.com/project-gemmi/gemmi/blob/4416e298f204b7b57bf5b3051d7efd4fe02957cf/include/gemmi/mmcif.hpp#L32
return gemmi.make_structure_from_block(
gemmi.cif.read_string(content_as_str)[0]
)
except ValueError as e:
try:
return gemmi.read_pdb_string(content_as_str)
except ValueError:
raise e
36 changes: 35 additions & 1 deletion package/MDAnalysis/topology/MMCIFParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
Tempfactors,
)
from .base import TopologyReaderBase, change_squash
from ..lib import util


class MMCIFParser(TopologyReaderBase):
Expand Down Expand Up @@ -108,7 +109,7 @@ def parse(self, **kwargs) -> Topology:
-------
MDAnalysis Topology object
"""
structure = gemmi.read_structure(self.filename)
structure = self._get_structure()

if len(structure) > 1:
warnings.warn(
Expand Down Expand Up @@ -224,3 +225,36 @@ def parse(self, **kwargs) -> Topology:
atom_resindex=residx,
residue_segindex=segidx,
)

def _get_structure(self):
# This method exists because of some lacking methods in the gemmi Python API.
# within gemmi in C++, one can call `read_structure` and in-memory, string, and filepath
# arguments will all be accepted:
# https://github.com/project-gemmi/gemmi/blob/4416e298f204b7b57bf5b3051d7efd4fe02957cf/include/gemmi/mmread.hpp#L86

# However, for MDA to similarly accept common input types like streams (open File-like objs and StringIO objs)
# as well as pathlib.Path() objects, we have to use the Python API methods available currently (as of 0.7.3)
# with a string as a common target for all input types
# For this, we call gemmi.cif.read_string (https://gemmi.readthedocs.io/en/latest/cif.html#reading) to handle CIF
# strings and gemmi.read_pdb to handle PDB strings (no one method can handle both formats currently Py-side)

# openany() is called instead of passing file paths (when available) differently from streams
# even though reading the file into a string is less efficient, this is easier to maintain

# if the gemmi Python API is extended, this method can be simplified/removed and replaced with something like
# gemmi.read_structure

with util.openany(self.filename) as f:
content_as_str = f.read()
try:
# String -> Doc -> Block -> Structure
# making Structure from first Block in Document as is done internally in gemmi:
# https://github.com/project-gemmi/gemmi/blob/4416e298f204b7b57bf5b3051d7efd4fe02957cf/include/gemmi/mmcif.hpp#L32
return gemmi.make_structure_from_block(
gemmi.cif.read_string(content_as_str)[0]
)
except ValueError as e:
try:
return gemmi.read_pdb_string(content_as_str)
except ValueError:
raise e
49 changes: 49 additions & 0 deletions testsuite/MDAnalysisTests/topology/test_mmcif.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import MDAnalysis as mda
import pytest
from pathlib import Path
from io import StringIO
import gzip
from MDAnalysis.lib import util
from MDAnalysis.coordinates.MMCIF import HAS_GEMMI

from MDAnalysisTests.datafiles import MMCIF as MMCIF_FOLDER
Expand Down Expand Up @@ -105,3 +109,48 @@ def test_multimodel_warning_msg():
mda.topology.MMCIFParser.MMCIFParser(
f"{MMCIF_FOLDER}/multimodel_warning.cif"
).parse()


@pytest.mark.skipif(not HAS_GEMMI, reason="gemmi not installed")
@pytest.mark.parametrize(
"filename,fmt",
[
(f"{MMCIF_FOLDER}/1BD2_short.cif.gz", None),
(Path(f"{MMCIF_FOLDER}/1BD2_short.cif.gz"), None),
(
StringIO(util.anyopen(f"{MMCIF_FOLDER}/1BD2_short.cif.gz").read()),
"CIF",
),
(gzip.open(f"{MMCIF_FOLDER}/1BD2_short.cif.gz"), "CIF"),
(
util.NamedStream(
StringIO(
util.anyopen(f"{MMCIF_FOLDER}/1BD2_short.cif.gz").read()
),
"some_name.cif",
),
"CIF",
),
(f"{MMCIF_FOLDER}/1BD2_short.pdb.gz", None),
(Path(f"{MMCIF_FOLDER}/1BD2_short.pdb.gz"), None),
(
StringIO(util.anyopen(f"{MMCIF_FOLDER}/1BD2_short.pdb.gz").read()),
"CIF",
),
(
util.anyopen(f"{MMCIF_FOLDER}/1BD2_short.pdb.gz"),
"CIF",
),
(
util.NamedStream(
StringIO(
util.anyopen(f"{MMCIF_FOLDER}/1BD2_short.pdb.gz").read()
),
"some_name.pdb",
),
"CIF",
),
],
)
def test_input_methods(filename, fmt):
mda.Universe(filename, topology_format=fmt)