| 
1 | 1 | import os  | 
 | 2 | +import string  | 
2 | 3 | import warnings  | 
3 | 4 | from collections import defaultdict  | 
4 | 5 | 
 
  | 
@@ -50,18 +51,16 @@ class Protein(Molecule):  | 
50 | 51 |     residue2id = {"GLY": 0, "ALA": 1, "SER": 2, "PRO": 3, "VAL": 4, "THR": 5, "CYS": 6, "ILE": 7, "LEU": 8,  | 
51 | 52 |                   "ASN": 9, "ASP": 10, "GLN": 11, "LYS": 12, "GLU": 13, "MET": 14, "HIS": 15, "PHE": 16,  | 
52 | 53 |                   "ARG": 17, "TYR": 18, "TRP": 19}  | 
 | 54 | +    residue_symbol2id = {"G": 0, "A": 1, "S": 2, "P": 3, "V": 4, "T": 5, "C": 6, "I": 7, "L": 8, "N": 9,  | 
 | 55 | +                         "D": 10, "Q": 11, "K": 12, "E": 13, "M": 14, "H": 15, "F": 16, "R": 17, "Y": 18, "W": 19}  | 
53 | 56 |     atom_name2id = {"C": 0, "CA": 1, "CB": 2, "CD": 3, "CD1": 4, "CD2": 5, "CE": 6, "CE1": 7, "CE2": 8,  | 
54 | 57 |                     "CE3": 9, "CG": 10, "CG1": 11, "CG2": 12, "CH2": 13, "CZ": 14, "CZ2": 15, "CZ3": 16,  | 
55 | 58 |                     "N": 17, "ND1": 18, "ND2": 19, "NE": 20, "NE1": 21, "NE2": 22, "NH1": 23, "NH2": 24,  | 
56 | 59 |                     "NZ": 25, "O": 26, "OD1": 27, "OD2": 28, "OE1": 29, "OE2": 30, "OG": 31, "OG1": 32,  | 
57 | 60 |                     "OH": 33, "OXT": 34, "SD": 35, "SG": 36, "UNK": 37}  | 
58 |  | -    alphabet2id = {" ": 0, "A": 1, "B": 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7, "H": 8, "I": 9, "J": 10,  | 
59 |  | -                   "K": 11, "L": 12, "M": 13, "N": 14, "O": 15, "P": 16, "Q": 17, "R": 18, "S": 19, "T": 20,  | 
60 |  | -                   "U": 21, "V": 22, "W": 23, "X": 24, "Y": 25, "Z": 26}  | 
 | 61 | +    alphabet2id = {c: i for i, c in enumerate(" " + string.ascii_uppercase + string.ascii_lowercase + string.digits)}  | 
61 | 62 |     id2residue = {v: k for k, v in residue2id.items()}  | 
62 |  | -    id2residue_symbol = {0: "G", 1: "A", 2: "S", 3: "P", 4: "V", 5: "T", 6: "C", 7: "I", 8: "L", 9: "N",  | 
63 |  | -                         10: "D", 11: "Q", 12: "K", 13: "E", 14: "M", 15: "H", 16: "F", 17: "R", 18: "Y", 19: "W"}  | 
64 |  | -    residue_symbol2id = {v: k for k, v in id2residue_symbol.items()}  | 
 | 63 | +    id2residue_symbol = {v: k for k, v in residue_symbol2id.items()}  | 
65 | 64 |     id2atom_name = {v: k for k, v in atom_name2id.items()}  | 
66 | 65 |     id2alphabet = {v: k for k, v in alphabet2id.items()}  | 
67 | 66 | 
 
  | 
@@ -214,7 +213,11 @@ def from_molecule(cls, mol, atom_feature="default", bond_feature="default", resi  | 
214 | 213 |                     type = "GLY"  | 
215 | 214 |                 residue_type.append(cls.residue2id[type])  | 
216 | 215 |                 residue_number.append(number)  | 
217 |  | -                if pdbinfo.GetInsertionCode() not in cls.alphabet2id or pdbinfo.GetChainId() not in cls.alphabet2id:  | 
 | 216 | +                if pdbinfo.GetInsertionCode() not in cls.alphabet2id:  | 
 | 217 | +                    warnings.warn(f"Fail to create the protein. Unknown insertion code {pdbinfo.GetInsertionCode()}.")  | 
 | 218 | +                    return None  | 
 | 219 | +                if pdbinfo.GetChainId() not in cls.alphabet2id:  | 
 | 220 | +                    warnings.warn(f"Fail to create the protein. Unknown chain id {pdbinfo.GetChainId()}.")  | 
218 | 221 |                     return None  | 
219 | 222 |                 insertion_code.append(cls.alphabet2id[pdbinfo.GetInsertionCode()])  | 
220 | 223 |                 chain_id.append(cls.alphabet2id[pdbinfo.GetChainId()])  | 
 | 
0 commit comments