Skip to content

Instantly share code, notes, and snippets.

@matteoferla
Created June 10, 2024 12:51
Show Gist options
  • Save matteoferla/b517f1a9b6c196e1312ce1511ad0407c to your computer and use it in GitHub Desktop.
Save matteoferla/b517f1a9b6c196e1312ce1511ad0407c to your computer and use it in GitHub Desktop.
MMFF94 AtomTypes from RDKit
"""
The values from https://towhee.sourceforge.net/forcefields/mmff94.html
were extracted and compared to https://raw.githubusercontent.com/rdkit/rdkit/master/Code/GraphMol/ForceFieldHelpers/MMFF/AtomTyper.cpp
NB. Do not ask ChatGTP4 for the atomtypes as they will be wrong.
"""
atomtype2namedex: Dict[int, str] = {"1": {"CR ": "ALKYL CARBON, SP3"}, "2": {"C=C ": "VINYLIC CARBON, SP2", "CSP2": "GENERIC SP2 CARBON"}, "3": {"C=O ": "GENERAL CARBONYL CARBON", "C=N ": "SP2 CARBON IN C=N", "CGD ": "GUANIDINE CARBON, DOUBLY BONDED TO N", "C=OR": "KETONE OR ALDEHYDE CARBONYL CARBON", "C=ON": "AMIDE CARBONYL CARBON", "CONN": "UREA CARBONYL CARBON", "COO ": "CARBOXYLIC ACID OR ESTER CARBONYL CARBON", "COON": "CARBAMATE CARBONYL CARBON", "COOO": "C ARBONIC ACID OR ESTER CARBONYL CARBON", "C=OS": "THIOESTER CARBONYL CARBON, DOUBLE BONDED TO O", "C=S ": "THIOESTER CARBON, DOUBLY BONDED TO S", "C=SN": "THIOAMIDE, CARBON, DOUBLY BONDED TO S", "CSO2": "CARBON IN >C=SO2", "CS=O": "CARBON IN >C=S=O (SULFINYL GROUP)", "CSS ": "THIOCARBOXYLIC ACID OR ESTER CARBONYL CARBON", "C=P ": "CARBON DOUBLE BONDED TO PHOSPHOROUS"}, "4": {"CSP ": "ACETYLENIC CARBON", "=C= ": "ALLENIC CARBON"}, "5": {"HC ": "H ATTACHED TO C", "HSI ": "H ATTACHED TO SI"}, "11": {"F ": "FLUORINE"}, "12": {"CL ": "CHLORINE"}, "13": {"BR ": "BROMINE"}, "14": {"I ": "IODINE"}, "20": {"CR4R": "CARBON IN 4-MEMBERED RINGS"}, "21": {"HOR ": "HYDROGEN IN ALCOHOLS", "HO ": "GENERAL H ON OXYGEN", "HOM ": "HYDROGEN IN HYDROXIDE ANION"}, "22": {"CR3R": "CARBON IN A 3-MEMBERED RING"}, "23": {"HNR ": "H-N(SP3)", "H3N ": "H-N(SP3), AMMONIA", "HPYL": "H-N IN PYRROLE", "HNOX": "H-N IN IN A N-OXIDE", "HNM ": "H ON DICOORD, NEGATIVELY CHARGED NITROGEN", "HN ": "GENERAL H ON NITROGEN"}, "24": {"HOCO": "H-O IN CARBOXYLIC ACIDS", "HOP ": "HYDROGEN ON OXYGEN ATTACHED TO PHOSPHOROUS"}, "27": {"HN=N": "AZO HYDROGEN", "HN=C": "IMINE HYDROGEN"}, "28": {"HNCO": "AMIDE HYDROGEN", "HNCS": "THIOAMIDE HYDROGEN", "HNCC": "H-N IN ENAMINES", "HNCN": "H-N IN H-N-C=N", "HNNC": "H-N IN H-N-N=C", "HNNN": "H-N IN H-N-N=N", "HNSO": "H-N IN SULFONAMIDE", "HNPO": "H-N IN PHOSPHONAMIDE", "HNC%": "HYDROGEN ON N ATTACHED TO TRIPLY BONDED CARBON", "HSP2": "GENERAL H ON SP2 NITROGEN"}, "29": {"HOCC": "H-O IN ENOLS AND PHENOLS", "HOCN": "H-O IN HO-C=N"}, "30": {"CE4R": "OLEFINIC CARBON IN 4-MEMBERED RINGS"}, "31": {"HOH ": "HYDROGEN IN H2O"}, "33": {"HOS ": "H ON OXYGEN ATTACHED TO SULFUR"}, "36": {"HNR+": "H ON QUATERNARY NITROGEN", "HIM+": "H ON IMIDAZOLIUM-TYPE NITROGEN", "HPD+": "H ON PROTONATED PYRIDINE NITROGEN", "HNN+": "H ON AMIDINIUM-TYPE NITROGEN", "HNC+": "H ON PROTONATED IMINE NITROGEN", "HGD+": "H ON GUANIDINIUM-TYPE NITROGEN", "HN5+": "H ON N5+, N5A+ OR N5B+"}, "37": {"CB ": "CARBON AS IN BENZENE, PYRROLE"}, "41": {"CO2M": "CARBOXYLATE ANION CARBON (base charge -0.5)", "CS2M": "CARBON IN THIOCARBOXYLATE ANION"}, "50": {"HO+ ": "HYDROGEN ON O+ OXYGEN"}, "52": {"HO=+": "HYDROGEN ON OXENIUM OXYGEN"}, "57": {"CGD+": "GUANIDINIUM CARBON", "CNN+": "C IN +N=C-N RESONANCE STRUCTURES"}, "60": {"C% ": "ISONITRILE CARBON"}, "63": {"C5A ": "ALPHA CARBON IN 5-MEMBERED HETEROAROMATIC RING"}, "64": {"C5B ": "BETA CARBON IN 5-MEMBERED HETEROAROMATIC RING"}, "71": {"HS ": "H ATTACHED TO DIVALENT, DICOORDINATE S", "HS=N": "H ATTACHED TO TETRAVALENT, TRICOODR S DBL BONDED", "HP ": "H ATTACHED TO TRI- OR TETRACOORDINATE PHOSPHORUS"}, "77": {"CLO4": "CHLORINE IN PERCHLORATE ANION, CLO4(-)"}, "78": {"C5 ": "GENERAL CARBON IN 5-MEMBERED HETEROAROMATIC RING"}, "80": {"CIM+": "C IN N-C-N IN IMIDAZOLIUM ION"}, "87": {"FE+2": "IRON +2 CATION"}, "88": {"FE+3": "IROM +3 CATION"}, "89": {"F- ": "FLUORIDE ANION"}, "90": {"CL- ": "CHLORIDE ANION"}, "91": {"BR- ": "BROMIDE ANION"}, "96": {"CA+2": "DIPOSITIVE CALCIUM"}, "97": {"CU+1": "MONOPOSITIVE COPPER"}, "98": {"CU+2": "DIPOSITIVE COPPER"}}
atomtype2symbol: Dict[int, Dict[str, str]] = {"1": "Carbon", "2": "Carbon", "3": "Carbon", "4": "Carbon", "5": "Hydrogen", "11": "Fluorine", "12": "Chlorine", "13": "Bromine", "14": "Iodine", "20": "Carbon", "21": "Hydrogen", "22": "Carbon", "23": "Hydrogen", "24": "Hydrogen", "27": "Hydrogen", "28": "Hydrogen", "29": "Hydrogen", "30": "Carbon", "31": "Hydrogen", "33": "Hydrogen", "36": "Hydrogen", "37": "Carbon", "41": "Carbon", "50": "Hydrogen", "52": "Hydrogen", "57": "Carbon", "60": "Carbon", "63": "Carbon", "64": "Carbon", "71": "Hydrogen", "77": "Chlorine", "78": "Carbon", "80": "Carbon", "87": "Iron", "88": "Iron", "89": "Fluorine", "90": "Chlorine", "91": "Bromine", "96": "Calcium", "97": "Copper", "98": "Copper"}
atomtype2shortnames: Dict[int, List[str]] = {i: list(map(str.strip, d.keys())) for i,d in atomtype2namedex.items()}
atomtype2shortname: Dict[int, str] = {i: '|'.join(list(map(str.strip, d.keys()))) for i,d in atomtype2namedex.items()}
# ------------------------------------------------------------------
from rdkit import Chem
from rdkit.Chem import AllChem
from typing import Sequence
def get_atomtypes(mol: Chem.Mol) -> Sequence[int]:
"""
Given a Chem.Mol get the list of MMFF94 atom types (numbers) of the atoms
Those numbers can be used with the dict ``atomtype2shortnames`` say.
"""
# AllChem.MMFFGetMoleculeProperties.GetMMFFAtomType return a number!
n_atoms = mol.GetNumAtoms()
mol = AllChem.AddHs(mol)
AllChem.EmbedMolecule(mol)
p: AllChem.MMFFGetMoleculeProperties = AllChem.MMFFGetMoleculeProperties(mol, 'MMFF94')
return (*map(p.GetMMFFAtomType, range(mol.GetNumAtoms())),)[:n_atoms]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment