RDKit
Open-source cheminformatics and machine learning.
RGroupDecompData.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2017 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef RGROUP_DECOMP_DATA
11 #define RGROUP_DECOMP_DATA
12 
13 #include "RGroupCore.h"
14 #include "RGroupDecomp.h"
15 #include "RGroupMatch.h"
16 #include "RGroupScore.h"
17 #include "RGroupFingerprintScore.h"
18 #include "RGroupGa.h"
19 #include <vector>
20 #include <map>
21 
22 // #define VERBOSE 1
23 
24 namespace RDKit {
26  // matches[mol_idx] == vector of potential matches
27  std::map<int, RCore> cores;
28  std::map<std::string, int> newCores; // new "cores" found along the way
30  // this caches the running product of permutations
31  // across calls to process()
32  size_t permutationProduct = 1;
33  // this caches the size of the previous matches vector
34  // such that the size of the current chunk can be inferred
35  size_t previousMatchSize = 0;
36  // the default for Greedy/GreedyChunks is keeping only the best
37  // permutation after each call to process()
38  bool prunePermutations = true;
40 
41  std::vector<std::vector<RGroupMatch>> matches;
42  std::set<int> labels;
43  std::vector<size_t> permutation;
44  unsigned int pruneLength = 0U;
46  std::map<int, std::vector<int>> userLabels;
47 
48  std::vector<int> processedRlabels;
49 
50  std::map<int, int> finalRlabelMapping;
52 
53  RGroupDecompData(const RWMol &inputCore,
55  : params(std::move(inputParams)) {
56  cores[0] = RCore(inputCore);
57  prepareCores();
58  }
59 
60  RGroupDecompData(const std::vector<ROMOL_SPTR> &inputCores,
62  : params(std::move(inputParams)) {
63  for (size_t i = 0; i < inputCores.size(); ++i) {
64  cores[i] = RCore(*inputCores[i]);
65  }
66  prepareCores();
67  }
68 
69  void prepareCores() {
70  for (auto &core : cores) {
71  RWMol *alignCore = core.first ? cores[0].core.get() : nullptr;
72  CHECK_INVARIANT(params.prepareCore(*core.second.core, alignCore),
73  "Could not prepare at least one core");
74  core.second.init();
75  core.second.labelledCore.reset(new RWMol(*core.second.core));
76  }
77  }
78 
79  void setRlabel(Atom *atom, int rlabel) {
80  PRECONDITION(rlabel > 0, "RLabels must be >0");
82  atom->setAtomMapNum(rlabel);
83  }
84 
86  std::string dLabel = "R" + std::to_string(rlabel);
88  setAtomRLabel(atom, rlabel);
89  }
90 
92  atom->setIsotope(rlabel);
93  }
94  }
95 
96  int getRlabel(Atom *atom) const {
98  return atom->getAtomMapNum();
99  }
101  return atom->getIsotope();
102  }
103 
105  unsigned int label = 0;
107  return label;
108  }
109  }
110 
111  CHECK_INVARIANT(0, "no valid r label found");
112  }
113 
114  double scoreFromPrunedData(const std::vector<size_t> &permutation,
115  bool reset = true) {
116  PRECONDITION(
118  "Scoring method is not fingerprint variance!");
119 
121  "Illegal permutation prune length");
122  if (permutation.size() < pruneLength * 1.5) {
123  for (unsigned int pos = pruneLength; pos < permutation.size(); ++pos) {
125  pos, permutation[pos], matches, labels);
126  }
127  double score =
129  if (reset) {
130  for (unsigned int pos = pruneLength; pos < permutation.size(); ++pos) {
132  pos, permutation[pos], matches, labels);
133  }
134  } else {
135  pruneLength = permutation.size();
136  }
137  return score;
138  } else {
139  if (reset) {
141  } else {
143  pruneLength = permutation.size();
146  }
147  }
148  }
149 
150  void prune() { // prune all but the current "best" permutation of matches
151  PRECONDITION(permutation.size() <= matches.size(),
152  "permutation.size() should be <= matches.size()");
153  size_t offset = matches.size() - permutation.size();
154  for (size_t mol_idx = 0; mol_idx < permutation.size(); ++mol_idx) {
155  std::vector<RGroupMatch> keepVector;
156  size_t mi = mol_idx + offset;
157  keepVector.push_back(matches[mi].at(permutation[mol_idx]));
158  matches[mi] = keepVector;
159  }
160 
161  permutation = std::vector<size_t>(permutation.size(), 0);
165  }
166  }
167 
168  // Return the RGroups with the current "best" permutation
169  // of matches.
170  std::vector<RGroupMatch> GetCurrentBestPermutation() const {
171  const bool removeAllHydrogenRGroups =
174 
175  std::vector<RGroupMatch> results; // std::map<int, RGroup> > result;
176  bool isPruned = (permutation.size() < matches.size());
177  for (size_t i = 0; i < matches.size(); ++i) {
178  size_t pi = (isPruned ? 0 : permutation.at(i));
179  results.push_back(matches[i].at(pi));
180  }
181 
182  // * if a dynamically-added RGroup (i.e., when onlyMatchAtRGroups=false)
183  // is all hydrogens, remove it
184  // * if a user-defined RGroup is all hydrogens and either
185  // params.removeAllHydrogenRGroups==true or
186  // params.removeAllHydrogenRGroupsAndLabels==true, remove it
187 
188  // This logic is a bit tricky, find all labels that have common cores
189  // and analyze those sets independently.
190  // i.e. if core 1 doesn't have R1 then don't analyze it in when looking
191  // at label 1
192  std::map<int, std::set<int>> labelCores; // map from label->cores
193  std::set<int> coresVisited;
194  for (auto &position : results) {
195  int core_idx = position.core_idx;
196  if (coresVisited.find(core_idx) == coresVisited.end()) {
197  coresVisited.insert(core_idx);
198  auto core = cores.find(core_idx);
199  if (core != cores.end()) {
200  for (auto rlabels : getRlabels(*core->second.core)) {
201  int rlabel = rlabels.first;
202  labelCores[rlabel].insert(core_idx);
203  }
204  }
205  }
206  }
207 
208  for (int label : labels) {
209  if (label > 0 && !removeAllHydrogenRGroups) {
210  continue;
211  }
212  bool allH = true;
213  for (auto &position : results) {
214  R_DECOMP::const_iterator rgroup = position.rgroups.find(label);
215  bool labelHasCore = labelCores[label].find(position.core_idx) !=
216  labelCores[label].end();
217  if (labelHasCore && rgroup != position.rgroups.end() &&
218  !rgroup->second->is_hydrogen) {
219  allH = false;
220  break;
221  }
222  }
223 
224  if (allH) {
225  for (auto &position : results) {
226  position.rgroups.erase(label);
227  }
228  }
229  }
230  return results;
231  }
232 
233  class UsedLabels {
234  public:
235  std::set<int> labels_used;
236  bool add(int rlabel) {
237  if (labels_used.find(rlabel) != labels_used.end()) {
238  return false;
239  }
240  labels_used.insert(rlabel);
241  return true;
242  }
243 
244  int next() {
245  int i = 1;
246  while (labels_used.find(i) != labels_used.end()) {
247  ++i;
248  }
249  labels_used.insert(i);
250  return i;
251  }
252  };
253 
254  void addCoreUserLabels(const RWMol &core, std::set<int> &userLabels) {
255  auto atoms = getRlabels(core);
256  for (const auto &p : atoms) {
257  if (p.first > 0) {
258  userLabels.insert(p.first);
259  }
260  }
261  }
262 
263  void addAtoms(RWMol &mol,
264  const std::vector<std::pair<Atom *, Atom *>> &atomsToAdd) {
265  for (const auto &i : atomsToAdd) {
266  mol.addAtom(i.second, false, true);
267  mol.addBond(i.first, i.second, Bond::SINGLE);
268  if (mol.getNumConformers()) {
269  MolOps::setTerminalAtomCoords(mol, i.second->getIdx(),
270  i.first->getIdx());
271  }
272  }
273  }
274 
275  void relabelCore(RWMol &core, std::map<int, int> &mappings,
276  UsedLabels &used_labels, const std::set<int> &indexLabels,
277  const std::map<int, std::vector<int>> &extraAtomRLabels) {
278  // Now remap to proper rlabel ids
279  // if labels are positive, they come from User labels
280  // if they are negative, they come from indices and should be
281  // numbered *after* the user labels.
282  //
283  // Some indices are attached to multiple bonds,
284  // these rlabels should be incrementally added last
285  std::map<int, Atom *> atoms = getRlabels(core);
286  // a core only has one labelled index
287  // a secondary structure extraAtomRLabels contains the number
288  // of bonds between this atom and the side chain
289 
290  // a sidechain atom has a vector of the attachments back to the
291  // core that takes the place of numBondsToRlabel
292 
293  std::map<int, std::vector<int>> bondsToCore;
294  std::vector<std::pair<Atom *, Atom *>> atomsToAdd; // adds -R if necessary
295 
296  // Deal with user supplied labels
297  for (const auto &rlabels : atoms) {
298  int userLabel = rlabels.first;
299  if (userLabel < 0) {
300  continue; // not a user specified label
301  }
302  Atom *atom = rlabels.second;
303  mappings[userLabel] = userLabel;
304  used_labels.add(userLabel);
305 
306  if (atom->getAtomicNum() == 0 &&
307  atom->getDegree() == 1) { // add to existing dummy/rlabel
308  setRlabel(atom, userLabel);
309  } else { // adds new rlabel
310  auto *newAt = new Atom(0);
311  setRlabel(newAt, userLabel);
312  atomsToAdd.emplace_back(atom, newAt);
313  }
314  }
315 
316  // Deal with non-user supplied labels
317  for (auto newLabel : indexLabels) {
318  auto atm = atoms.find(newLabel);
319  if (atm == atoms.end()) {
320  continue;
321  }
322 
323  Atom *atom = atm->second;
324 
325  int rlabel;
326  auto mapping = mappings.find(newLabel);
327  if (mapping == mappings.end()) {
328  rlabel = used_labels.next();
329  mappings[newLabel] = rlabel;
330  } else {
331  rlabel = mapping->second;
332  }
333 
334  if (atom->getAtomicNum() == 0 &&
336  *atom)) { // add to dummy
337  setRlabel(atom, rlabel);
338  } else {
339  auto *newAt = new Atom(0);
340  setRlabel(newAt, rlabel);
341  atomsToAdd.emplace_back(atom, newAt);
342  }
343  }
344 
345  // Deal with multiple bonds to the same label
346  for (const auto &extraAtomRLabel : extraAtomRLabels) {
347  auto atm = atoms.find(extraAtomRLabel.first);
348  if (atm == atoms.end()) {
349  continue; // label not used in the rgroup
350  }
351  Atom *atom = atm->second;
352 
353  for (size_t i = 0; i < extraAtomRLabel.second.size(); ++i) {
354  int rlabel = used_labels.next();
355  // Is this necessary?
357  atom->getAtomicNum() > 1,
358  "Multiple attachments to a dummy (or hydrogen) is weird.");
359  auto *newAt = new Atom(0);
360  setRlabel(newAt, rlabel);
361  atomsToAdd.emplace_back(atom, newAt);
362  }
363  }
364 
365  addAtoms(core, atomsToAdd);
366  for (const auto &rlabels : atoms) {
367  auto atom = rlabels.second;
368  atom->clearProp(RLABEL);
369  atom->clearProp(RLABEL_TYPE);
370  }
371  core.updatePropertyCache(false); // this was github #1550
372  }
373 
374  void relabelRGroup(RGroupData &rgroup, const std::map<int, int> &mappings) {
375  PRECONDITION(rgroup.combinedMol.get(), "Unprocessed rgroup");
376 
377  RWMol &mol = *rgroup.combinedMol.get();
378 
379  if (rgroup.combinedMol->hasProp(done)) {
380  rgroup.labelled = true;
381  return;
382  }
383 
384  mol.setProp(done, true);
385  std::vector<std::pair<Atom *, Atom *>> atomsToAdd; // adds -R if necessary
386  std::map<int, int> rLabelCoreIndexToAtomicWt;
387 
388  for (RWMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
389  ++atIt) {
390  Atom *atom = *atIt;
391  if (atom->hasProp(SIDECHAIN_RLABELS)) {
392  atom->setIsotope(0);
393  const std::vector<int> &rlabels =
394  atom->getProp<std::vector<int>>(SIDECHAIN_RLABELS);
395  // switch on atom mappings or rlabels....
396 
397  for (int rlabel : rlabels) {
398  auto label = mappings.find(rlabel);
399  CHECK_INVARIANT(label != mappings.end(), "Unprocessed mapping");
400 
401  if (atom->getAtomicNum() == 0) {
402  setRlabel(atom, label->second);
403  } else if (atom->hasProp(RLABEL_CORE_INDEX)) {
404  atom->setAtomicNum(0);
405  setRlabel(atom, label->second);
406  } else {
407  auto *newAt = new Atom(0);
408  setRlabel(newAt, label->second);
409  atomsToAdd.emplace_back(atom, newAt);
410  }
411  }
412  }
413  if (atom->hasProp(RLABEL_CORE_INDEX)) {
414  // convert to dummy as we don't want to collapse hydrogens onto the core
415  // match
416  auto rLabelCoreIndex = atom->getProp<int>(RLABEL_CORE_INDEX);
417  rLabelCoreIndexToAtomicWt[rLabelCoreIndex] = atom->getAtomicNum();
418  atom->setAtomicNum(0);
419  }
420  }
421 
422  addAtoms(mol, atomsToAdd);
423 
425  RDLog::LogStateSetter blocker;
426  bool implicitOnly = false;
427  bool updateExplicitCount = false;
428  bool sanitize = false;
429  MolOps::removeHs(mol, implicitOnly, updateExplicitCount, sanitize);
430  }
431 
432  mol.updatePropertyCache(false); // this was github #1550
433  rgroup.labelled = true;
434 
435  // Restore any core matches that we have set to dummy
436  for (RWMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
437  ++atIt) {
438  Atom *atom = *atIt;
439  if (atom->hasProp(RLABEL_CORE_INDEX)) {
440  // don't need to set IsAromatic on atom - that seems to have been saved
441  atom->setAtomicNum(
442  rLabelCoreIndexToAtomicWt[atom->getProp<int>(RLABEL_CORE_INDEX)]);
443  atom->setNoImplicit(true);
445  }
447  }
448 
449 #ifdef VERBOSE
450  std::cerr << "Relabel Rgroup smiles " << MolToSmiles(mol) << std::endl;
451 #endif
452  }
453 
454  // relabel the core and sidechains using the specified user labels
455  // if matches exist for non labelled atoms, these are added as well
456  void relabel() {
457  std::vector<RGroupMatch> best = GetCurrentBestPermutation();
458 
459  // get the labels used
460  std::set<int> userLabels;
461  std::set<int> indexLabels;
462 
463  // Go through all the RGroups and find out which labels were
464  // actually used.
465 
466  // some atoms will have multiple attachment points, i.e. cycles
467  // split these up into new rlabels if necessary
468  // These are detected at match time
469  // This vector will hold the extra (new) labels required
470  std::map<int, std::vector<int>> extraAtomRLabels;
471 
472  for (auto &it : best) {
473  for (auto &rgroup : it.rgroups) {
474  if (rgroup.first > 0) {
475  userLabels.insert(rgroup.first);
476  }
477  if (rgroup.first < 0 && !params.onlyMatchAtRGroups) {
478  indexLabels.insert(rgroup.first);
479  }
480 
481  std::map<int, int> rlabelsUsedInRGroup =
482  rgroup.second->getNumBondsToRlabels();
483  for (auto &numBondsUsed : rlabelsUsedInRGroup) {
484  // Make space for the extra labels
485  if (numBondsUsed.second > 1) { // multiple rgroup bonds to same atom
486  extraAtomRLabels[numBondsUsed.first].resize(numBondsUsed.second -
487  1);
488  }
489  }
490  }
491  }
492 
493  // find user labels that are not present in the decomposition
494  for (auto &core : cores) {
495  core.second.labelledCore.reset(new RWMol(*core.second.core));
496  addCoreUserLabels(*core.second.labelledCore, userLabels);
497  }
498 
499  // Assign final RGroup labels to the cores and propagate these to
500  // the scaffold
501  finalRlabelMapping.clear();
502 
503  UsedLabels used_labels;
504  // Add all the user labels now to prevent an index label being assigned to a
505  // user label when multiple cores are present (e.g. the user label is
506  // present in the second core, but not the first).
507  for (auto userLabel : userLabels) {
508  used_labels.add(userLabel);
509  }
510  for (auto &core : cores) {
511  relabelCore(*core.second.labelledCore, finalRlabelMapping, used_labels,
512  indexLabels, extraAtomRLabels);
513  }
514 
515  for (auto &it : best) {
516  for (auto &rgroup : it.rgroups) {
517  relabelRGroup(*rgroup.second, finalRlabelMapping);
518  }
519  }
520 
521  std::set<int> uniqueMappedValues;
522  std::transform(finalRlabelMapping.cbegin(), finalRlabelMapping.cend(),
523  std::inserter(uniqueMappedValues, uniqueMappedValues.end()),
524  [](const std::pair<int, int> &p) { return p.second; });
525  CHECK_INVARIANT(finalRlabelMapping.size() == uniqueMappedValues.size(),
526  "Error in uniqueness of final RLabel mapping");
528  uniqueMappedValues.size() == userLabels.size() + indexLabels.size(),
529  "Error in final RMapping size");
530  }
531 
532  double score(const std::vector<size_t> &permutation,
533  FingerprintVarianceScoreData *fingerprintVarianceScoreData =
534  nullptr) const {
535  RGroupScore scoreMethod = static_cast<RGroupScore>(params.scoreMethod);
536  switch (scoreMethod) {
537  case Match:
539  break;
540  case FingerprintVariance:
542  fingerprintVarianceScoreData);
543  break;
544  default:;
545  }
546  return NAN;
547  }
548 
550  bool finalize = false) {
551  if (matches.empty()) {
552  return RGroupDecompositionProcessResult(false, -1);
553  }
554  auto t0 = std::chrono::steady_clock::now();
555  std::unique_ptr<CartesianProduct> iterator;
557 
558  if (params.matchingStrategy == GA) {
559  RGroupGa ga(*this, params.timeout >= 0 ? &t0 : nullptr);
560  if (ga.numberPermutations() < 100 * ga.getPopsize()) {
562  } else {
563  if (params.gaNumberRuns > 1) {
564  auto results = ga.runBatch();
565  auto best = max_element(results.begin(), results.end(),
566  [](const GaResult &a, const GaResult &b) {
567  return a.rGroupScorer.getBestScore() <
568  b.rGroupScorer.getBestScore();
569  });
570  rGroupScorer = best->rGroupScorer;
571  } else {
572  auto result = ga.run();
573  rGroupScorer = result.rGroupScorer;
574  }
575  }
576  }
577  size_t offset = 0;
578  if (params.matchingStrategy != GA) {
579  // Exhaustive search, get the MxN matrix
580  // (M = matches.size(): number of molecules
581  // N = iterator.maxPermutations)
582  std::vector<size_t> permutations;
583 
584  if (pruneMatches && params.scoreMethod != FingerprintVariance) {
585  offset = previousMatchSize;
586  }
587  previousMatchSize = matches.size();
588  std::transform(
589  matches.begin() + offset, matches.end(),
590  std::back_inserter(permutations),
591  [](const std::vector<RGroupMatch> &m) { return m.size(); });
592  permutation = std::vector<size_t>(permutations.size(), 0);
593 
594  // run through all possible matches and score each
595  // set
596  size_t count = 0;
597 #ifdef DEBUG
598  std::cerr << "Processing" << std::endl;
599 #endif
600  std::unique_ptr<CartesianProduct> it(new CartesianProduct(permutations));
601  iterator = std::move(it);
602  // Iterates through the permutation idx, i.e.
603  // [m1_permutation_idx, m2_permutation_idx, m3_permutation_idx]
604 
605  while (iterator->next()) {
606  if (count > iterator->maxPermutations) {
607  throw ValueErrorException("next() did not finish");
608  }
609 #ifdef DEBUG
610  std::cerr << "**************************************************"
611  << std::endl;
612 #endif
613  double newscore = params.scoreMethod == FingerprintVariance
614  ? scoreFromPrunedData(iterator->permutation)
615  : score(iterator->permutation);
616 
617  if (fabs(newscore - rGroupScorer.getBestScore()) <
618  1e-6) { // heuristic to overcome floating point comparison issues
619  rGroupScorer.pushTieToStore(iterator->permutation);
620  } else if (newscore > rGroupScorer.getBestScore()) {
621 #ifdef DEBUG
622  std::cerr << " ===> current best:" << newscore << ">"
623  << rGroupScorer.getBestScore() << std::endl;
624 #endif
625  rGroupScorer.setBestPermutation(iterator->permutation, newscore);
627  rGroupScorer.pushTieToStore(iterator->permutation);
628  }
629  ++count;
630  }
631  }
632 
633  if (rGroupScorer.tieStoreSize() > 1) {
636  } else {
638  }
640  if (pruneMatches || finalize) {
641  prune();
642  }
643 
644  if (finalize) {
645  relabel();
646  }
647 
649  }
650 };
651 } // namespace RDKit
652 
653 #endif
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
The class for representing atoms.
Definition: Atom.h:68
void setNoImplicit(bool what)
sets our noImplicit flag, indicating whether or not we are allowed to have implicit Hs
Definition: Atom.h:214
void setAtomicNum(int newNum)
sets our atomic number
Definition: Atom.h:122
void setIsotope(unsigned int what)
sets our isotope number
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:120
int getAtomMapNum() const
Definition: Atom.h:382
void setAtomMapNum(int mapno, bool strict=true)
Set the atom map Number of the atom.
Definition: Atom.h:370
unsigned int getIsotope() const
returns our isotope number
Definition: Atom.h:234
unsigned int getDegree() const
@ SINGLE
Definition: Bond.h:58
bool getPropIfPresent(const std::string &key, T &res) const
Definition: RDProps.h:121
void clearProp(const std::string &key) const
clears the value of a property
Definition: RDProps.h:137
void getProp(const std::string &key, T &res) const
allows retrieval of a particular property value
Definition: RDProps.h:107
bool hasProp(const std::string &key) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: RDProps.h:126
void setProp(const std::string &key, T val, bool computed=false) const
sets a property value
Definition: RDProps.h:77
vector< GaResult > runBatch()
GaResult run(int runNumber=1)
unsigned int numberPermutations() const
Definition: RGroupGa.h:127
void pushTieToStore(const std::vector< size_t > &permutation)
store the passed tied permutation for subsequent processing
void startProcessing()
called when process() starts to initialize State
void setBestPermutation(const std::vector< size_t > &permutation, double score)
set the passed permutation and score as the best one
void breakTies(const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels, const std::unique_ptr< CartesianProduct > &iterator, const std::chrono::steady_clock::time_point &t0, double timeout)
find the best permutation across the tied ones that were stored
void clearTieStore()
clear all stored tied permutations
const std::vector< size_t > & getBestPermutation() const
return the best permutation found so far
Definition: RGroupScore.h:83
double matchScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
score the passed permutation of matches
size_t tieStoreSize() const
number of stored tied permutations
Definition: RGroupScore.h:99
double getBestScore() const
return the best score found so far
Definition: RGroupScore.h:101
unsigned int getNumConformers() const
Definition: ROMol.h:542
AtomIterator endAtoms()
get an AtomIterator pointing at the end of our Atoms
void updatePropertyCache(bool strict=true)
calculates any of our lazy properties
AtomIterator beginAtoms()
get an AtomIterator pointing at our first Atom
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
Definition: Exceptions.h:40
static std::string to_string(const Descriptor &desc)
Definition: Descriptor.h:54
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_RDGENERAL_EXPORT const std::string dummyLabel
RDKIT_RDGENERAL_EXPORT const std::string _MolFileRLabel
Std stuff.
Definition: Abbreviations.h:18
@ FingerprintVariance
Definition: RGroupDecomp.h:63
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL_CORE_INDEX
std::map< int, Atom * > getRlabels(const RWMol &mol)
Get the RLabels,atom mapping for the current molecule.
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string done
@ Exhaustive
Definition: RGroupDecomp.h:43
RDKIT_GRAPHMOL_EXPORT void setAtomRLabel(Atom *atm, int rlabel)
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params)
returns canonical SMILES for a molecule
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string SIDECHAIN_RLABELS
@ MDLRGroup
Definition: RGroupDecomp.h:51
@ AtomMap
Definition: RGroupDecomp.h:49
@ Isotope
Definition: RGroupDecomp.h:50
bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, double timeout, bool throwOnTimeout=true)
Definition: RGroupDecomp.h:207
bool isAnyAtomWithMultipleNeighborsOrNotUserRLabel(const Atom &atom)
Definition: RGroupUtils.h:66
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL_TYPE
const unsigned int EMPTY_CORE_LABEL
Definition: RGroupUtils.h:25
RDKIT_RGROUPDECOMPOSITION_EXPORT double fingerprintVarianceScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr)
iterate through all possible permutations of the rgroups
Definition: RGroupScore.h:20
void addVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
void removeVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
RCore is the core common to a series of molecules.
Definition: RGroupCore.h:24
A single rgroup attached to a given core.
Definition: RGroupData.h:27
boost::shared_ptr< RWMol > combinedMol
Definition: RGroupData.h:28
std::vector< std::vector< RGroupMatch > > matches
FingerprintVarianceScoreData prunedFingerprintVarianceScoreData
RGroupDecompData(const RWMol &inputCore, RGroupDecompositionParameters inputParams)
double score(const std::vector< size_t > &permutation, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr) const
double scoreFromPrunedData(const std::vector< size_t > &permutation, bool reset=true)
void relabelRGroup(RGroupData &rgroup, const std::map< int, int > &mappings)
std::vector< size_t > permutation
std::map< int, std::vector< int > > userLabels
RGroupDecompositionParameters params
std::map< std::string, int > newCores
RGroupDecompositionProcessResult process(bool pruneMatches, bool finalize=false)
std::map< int, RCore > cores
void setRlabel(Atom *atom, int rlabel)
void addAtoms(RWMol &mol, const std::vector< std::pair< Atom *, Atom * >> &atomsToAdd)
std::vector< int > processedRlabels
int getRlabel(Atom *atom) const
std::map< int, int > finalRlabelMapping
void addCoreUserLabels(const RWMol &core, std::set< int > &userLabels)
RGroupDecompData(const std::vector< ROMOL_SPTR > &inputCores, RGroupDecompositionParameters inputParams)
void relabelCore(RWMol &core, std::map< int, int > &mappings, UsedLabels &used_labels, const std::set< int > &indexLabels, const std::map< int, std::vector< int >> &extraAtomRLabels)
std::vector< RGroupMatch > GetCurrentBestPermutation() const
bool onlyMatchAtRGroups
only allow rgroup decomposition at the specified rgroups
Definition: RGroupDecomp.h:84
bool removeAllHydrogenRGroups
remove all user-defined rgroups that only have hydrogens
Definition: RGroupDecomp.h:86
double timeout
timeout in seconds. <=0 indicates no timeout
Definition: RGroupDecomp.h:95
bool removeHydrogensPostMatch
remove all hydrogens from the output molecules
Definition: RGroupDecomp.h:91
bool prepareCore(RWMol &, const RWMol *alignCore)