11 #ifndef RD_FILEPARSERUTILS_H
12 #define RD_FILEPARSERUTILS_H
17 #include <boost/lexical_cast.hpp>
18 #include <boost/algorithm/string.hpp>
19 #include <boost/format.hpp>
26 namespace FileParserUtils {
29 std::string trimmed = boost::trim_copy(input);
30 if (acceptSpaces && trimmed ==
"") {
33 return boost::lexical_cast<T>(trimmed);
37 bool acceptSpaces =
false);
39 bool acceptSpaces =
true);
47 std::istream *inStream,
unsigned int &line,
RWMol *mol,
Conformer *&conf,
48 bool &chiralityPossible,
unsigned int &nAtoms,
unsigned int &nBonds,
49 bool strictParsing =
true,
bool expectMEND =
true);
53 std::istream *inStream,
unsigned int &line,
RWMol *mol,
Conformer *&conf,
54 bool &chiralityPossible,
unsigned int &nAtoms,
unsigned int &nBonds,
55 bool strictParsing =
true);
66 const std::string &prefix,
67 const std::string &missingValueMarker =
"n/a") {
68 std::string atompn = pn.substr(prefix.size());
69 std::string strVect = mol.
getProp<std::string>(pn);
70 std::vector<std::string> tokens;
71 boost::split(tokens, strVect, boost::is_any_of(
" \t\n"),
72 boost::token_compress_on);
75 <<
"Property list " << pn <<
" too short, only " << tokens.size()
76 <<
" elements found. Ignoring it." << std::endl;
79 std::string mv = missingValueMarker;
80 size_t first_token = 0;
81 if (tokens.size() == mol.
getNumAtoms() + 1 && tokens[0].front() ==
'[' &&
82 tokens[0].back() ==
']') {
83 mv = std::string(tokens[0].begin() + 1, tokens[0].end() - 1);
88 <<
" is empty." << std::endl;
90 for (
size_t i = first_token; i < tokens.size(); ++i) {
91 if (tokens[i] != mv) {
92 unsigned int atomid = i - first_token;
94 T apv = boost::lexical_cast<T>(tokens[i]);
96 }
catch (
const boost::bad_lexical_cast &) {
98 <<
"Value " << tokens[i] <<
" for property " << pn <<
" of atom "
99 << atomid <<
" can not be parsed. Ignoring it." << std::endl;
106 template <
typename T>
108 const std::string missingValueMarker =
"n/a") {
110 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
111 applyMolListPropToAtoms<T>(mol, pn, prefix, missingValueMarker);
119 ROMol &mol,
const std::string pn,
120 const std::string &missingValueMarker =
"n/a") {
123 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
124 applyMolListPropToAtoms<std::string>(mol, pn, prefix, missingValueMarker);
127 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
128 applyMolListPropToAtoms<std::int64_t>(mol, pn, prefix,
132 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
133 applyMolListPropToAtoms<double>(mol, pn, prefix, missingValueMarker);
136 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
137 applyMolListPropToAtoms<bool>(mol, pn, prefix, missingValueMarker);
147 ROMol &mol,
const std::string &missingValueMarker =
"n/a") {
152 template <
typename T>
154 std::string missingValueMarker =
"",
155 unsigned int lineSize = 190) {
158 if (!missingValueMarker.empty()) {
159 propVal += boost::str(boost::format(
"[%s] ") % missingValueMarker);
161 missingValueMarker =
"n/a";
163 for (
const auto &atom : mol.
atoms()) {
164 std::string apVal = missingValueMarker;
165 if (atom->hasProp(atomPropName)) {
166 T tVal = atom->getProp<T>(atomPropName);
167 apVal = boost::lexical_cast<std::string>(tVal);
171 if (propVal.length() + apVal.length() + 1 >= lineSize) {
174 res += propVal +
"\n";
177 propVal += apVal +
" ";
179 if (!propVal.empty()) {
187 ROMol &mol,
const std::string &atomPropName,
188 const std::string &missingValueMarker =
"",
unsigned int lineSize = 190) {
189 std::string molPropName =
"atom.iprop." + atomPropName;
191 getAtomPropertyList<boost::int64_t>(
192 mol, atomPropName, missingValueMarker, lineSize));
195 ROMol &mol,
const std::string &atomPropName,
196 const std::string &missingValueMarker =
"",
unsigned int lineSize = 190) {
197 std::string molPropName =
"atom.dprop." + atomPropName;
199 getAtomPropertyList<double>(mol, atomPropName, missingValueMarker,
203 ROMol &mol,
const std::string &atomPropName,
204 const std::string &missingValueMarker =
"",
unsigned int lineSize = 190) {
205 std::string molPropName =
"atom.bprop." + atomPropName;
207 getAtomPropertyList<bool>(mol, atomPropName, missingValueMarker,
211 ROMol &mol,
const std::string &atomPropName,
212 const std::string &missingValueMarker =
"",
unsigned int lineSize = 190) {
213 std::string molPropName =
"atom.prop." + atomPropName;
215 getAtomPropertyList<std::string>(mol, atomPropName,
216 missingValueMarker, lineSize));