-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathbonds.go
365 lines (334 loc) · 12.7 KB
/
bonds.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
/*
* atomicdata.go, part of gochem.
*
*
* Copyright 2021 Raul Mera <rmera{at}chemDOThelsinkiDOTfi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General
* Public License along with this program. If not, see
* <http://www.gnu.org/licenses/>.
*
*
*
*/
package chem
import (
"fmt"
"sort"
v3 "github.com/rmera/gochem/v3"
)
// constants from DOI:10.1186/1758-2946-3-33
const (
tooclose = 0.63
bondtol = 0.045 //the reference actually says 0.45 A but I strongly suspect that is a typo.
)
// Bond represents a chemical, covalent bond.
type Bond struct {
Index int
At1 *Atom
At2 *Atom
Dist float64
Energy float64 //One might prefer to use energy insteaf of order.
//NOTE
//I'm not sure if I leave just the order and let the user "Abuse" that field for energy
//or anything else they want, or if I keep this extra field.
Order float64 //Order 0 means undetermined
}
// Cross returns the atom bonded to the origin atom
// bond in the receiver.
func (B *Bond) Cross(origin *Atom) *Atom {
if origin.index == B.At1.index {
return B.At2
}
if origin.index == B.At2.index {
return B.At1
}
panic("Trying to cross a bond: The origin atom given is not present in the bond!") //I think this got to be a programming error, so a panic is warranted.
}
// Remove removes the receiver bond from the the Bond slices in the corresponding atoms
func (b *Bond) Remove() error {
lenb1 := len(b.At1.Bonds)
lenb2 := len(b.At2.Bonds)
b.At1.Bonds = takefromslice(b.At1.Bonds, b.Index)
b.At2.Bonds = takefromslice(b.At2.Bonds, b.Index)
err := new(CError)
errs := 0
err.msg = fmt.Sprintf("Failed to remove bond Index:%d", b.Index)
if len(b.At1.Bonds) == lenb1 {
err.msg = err.msg + fmt.Sprintf("from atom. Index:%d", b.At1.index)
err.Decorate("RemoveBond")
errs++
}
if len(b.At2.Bonds) == lenb2 {
err := new(CError)
if errs > 0 {
err.msg = err.msg + " and"
}
err.msg = err.msg + fmt.Sprintf("from atom. Index:%d", b.At2.index)
err.Decorate("RemoveBond")
errs++
}
if errs > 0 {
return err
}
return nil
}
// returns a new *Bond slice with the element id removed
func takefromslice(bonds []*Bond, id int) []*Bond {
newb := make([]*Bond, len(bonds)-1)
for _, v := range bonds {
if v.Index != id {
newb = append(newb, v)
}
}
return newb
}
// BondedOptions contains options for the BondePaths function
type BondedOptions struct {
OnlyShortest bool //Only return the shortest path between the atoms
path []int //
}
/******
**** The following is commented out and excluded from the API, as I don't think it is needed.
**** In any case, adding this function, or any other, wouldn't break the API, so it's best
**** to exclude when in doubt. Of course this is in itself an API break, but the bond functionality
**** is still very new, so the break is very unlikely to affect anyone.
//SetAlreadyWalkedPath set the unexported field "path" in BondedOptions to p.
//the path field represents the already-walked path, so you almost never
//want to set it. The exception is definidng your own function that calls
//BondedPath recursively, as I do here. This method was added to allow
//for such use.
func (B *BondedOptions)SetAlreadyWalkedPath(p []int){
B.path=p
}
******/
// BondedPaths determines the paths between at and the atom with
// Index targetIndex. It returns a slice of slices of int, where each sub-slice contains all the atoms
// in between at and the target (including the index of at)
// If there is no valid path, it returns nil.
// In the options structure BondeOption, OnlyShortest set to true causes only the shortest of the found paths to
// be returned. All atoms in the molecule need to have the "index" field filled.
// If the targetIndex is the same as that of the current atom, and the path is not given, nil, or
// of len 0, the function will search for a cyclic path back to the initial atom.
// if onlyshortest is true, only the shortest path will be returned (the other elements of the slice will be nil)
// This can be useful if you want to save memory on a very intrincate molecule.
func BondedPaths(at *Atom, targetIndex int, options ...*BondedOptions) [][]int {
if len(options) == 0 {
options = []*BondedOptions{{OnlyShortest: false, path: nil}}
}
onlyshortest := options[0].OnlyShortest
path := [][]int{options[0].path}
//I am not completely sure about this function signature. It is a candidate for API change.
if len(path) > 0 && len(path[0]) > 1 && path[0][len(path[0])-2] == at.index {
return nil //We are back to the atom we just had visited, not a valid path. We have to check this before checking if we completed the "quest"
//or, by just going back via the same bond, it would seem like we are at the finishing line.
}
if len(path) == 0 {
path = append(path, []int{at.index})
} else if path[0] == nil {
path[0] = []int{at.index}
} else {
path[0] = append(path[0], at.index)
}
if at.index == targetIndex && len(path[0]) > 1 {
return [][]int{path[0]} //We arrived! Note that if the starting node is the same as the target, we will
//only settle for a "cyclic" path that goes through at least another atom (really, at least 2 more atoms).
// We will not immediately return success on the first node. This is enforced by the len(path[0]>1 condition.
}
//Here we check that we are not back to an atom we previously visited. This checks for loops, and has to be performed
//after we check if we got to the goal, since the goal could be the same starting atom (if we look for a cyclic path).
if len(path[0]) > 1 && isInInt(path[0][:len(path[0])-1], at.index) {
return nil //means we took the same bond back to the previous node, or got trapped in a loop. not a valid path.
}
if len(at.Bonds) <= 1 {
return nil //means that we hit an end of the road. There is only one bond in the atom (i.e. the one leading to the previous node)
}
rets := make([][]int, 0, len(at.Bonds))
for _, v := range at.Bonds {
path2 := make([]int, len(path[0]))
copy(path2, path[0])
rets = append(rets, BondedPaths(v.Cross(at), targetIndex, &BondedOptions{OnlyShortest: onlyshortest, path: path2})...) //scary stuff
}
rets2 := make([][]int, 0, len(at.Bonds))
for _, v := range rets {
if v != nil {
rets2 = append(rets2, v)
}
}
if len(rets2) == 0 {
return nil
}
sort.Slice(rets2, func(i, j int) bool { return len(rets2[i]) < len(rets2[j]) })
if onlyshortest {
return [][]int{rets2[0]}
}
return rets2
}
// If this works, we could replace BondedPaths by just a call to this function with f=func(*Bond)bool{return true}
// BondedPaths determines the paths between at and the atom with
// Index targetIndex. It returns a slice of slices of int, where each sub-slice contains all the atoms
// in between at and the target (including the index of at)
// If there is no valid path, it returns nil.
// In the options structure BondeOption, OnlyShortest set to true causes only the shortest of the found paths to
// be returned. All atoms in the molecule need to have the "index" field filled.
// If the targetIndex is the same as that of the current atom, and the path is not given, nil, or
// of len 0, the function will search for a cyclic path back to the initial atom.
// if onlyshortest is true, only the shortest path will be returned (the other elements of the slice will be nil)
// This can be useful if you want to save memory on a very intrincate molecule.
func BondedPathsFunc(at *Atom, targetIndex int, f func(*Bond) bool, options ...*BondedOptions) [][]int {
if len(options) == 0 {
options = []*BondedOptions{{OnlyShortest: false, path: nil}}
}
onlyshortest := options[0].OnlyShortest
path := [][]int{options[0].path}
//I am not completely sure about this function signature. It is a candidate for API change.
if len(path) > 0 && len(path[0]) > 1 && path[0][len(path[0])-2] == at.index {
return nil //We are back to the atom we just had visited, not a valid path. We have to check this before checking if we completed the "quest"
//or, by just going back via the same bond, it would seem like we are at the finishing line.
}
if len(path) == 0 {
path = append(path, []int{at.index})
} else if path[0] == nil {
path[0] = []int{at.index}
} else {
path[0] = append(path[0], at.index)
}
if at.index == targetIndex && len(path[0]) > 1 {
return [][]int{path[0]} //We arrived! Note that if the starting node is the same as the target, we will
//only settle for a "cyclic" path that goes through at least another atom (really, at least 2 more atoms).
// We will not immediately return success on the first node. This is enforced by the len(path[0]>1 condition.
}
//Here we check that we are not back to an atom we previously visited. This checks for loops, and has to be performed
//after we check if we got to the goal, since the goal could be the same starting atom (if we look for a cyclic path).
if len(path[0]) > 1 && isInInt(path[0][:len(path[0])-1], at.index) {
return nil //means we took the same bond back to the previous node, or got trapped in a loop. not a valid path.
}
if len(at.Bonds) <= 1 {
return nil //means that we hit an end of the road. There is only one bond in the atom (i.e. the one leading to the previous node)
}
rets := make([][]int, 0, len(at.Bonds))
for _, v := range at.Bonds {
if !f(v) {
continue
}
path2 := make([]int, len(path[0]))
copy(path2, path[0])
rets = append(rets, BondedPathsFunc(v.Cross(at), targetIndex, f, &BondedOptions{OnlyShortest: onlyshortest, path: path2})...) //scary stuff
}
rets2 := make([][]int, 0, len(at.Bonds))
for _, v := range rets {
if v != nil {
rets2 = append(rets2, v)
}
}
if len(rets2) == 0 {
return nil
}
sort.Slice(rets2, func(i, j int) bool { return len(rets2[i]) < len(rets2[j]) })
if onlyshortest {
return [][]int{rets2[0]}
}
return rets2
}
// Ring represents a molecular cycle.
type Ring struct {
Atoms []int
planarity float64
}
// Contains returns true or false depending on whether
// the atom with the given index is part of the ring
func (R *Ring) Contains(index int) bool {
return isInInt(R.Atoms, index)
}
func (R *Ring) IsIn(index int) bool {
return R.Contains(index)
}
// Size returns the number of atoms in the ring
func (R *Ring) Size() int {
return len(R.Atoms)
}
// Planarity returns the planarity percentage of the receiver ring
// coords is the set of coordinates for the _entire_ molecule of which
// the ring is part. Planarity does not check that coords indeed corresponds
// to the correct molecule, so, doing so is the user's responsibility.
func (R *Ring) Planarity(coord *v3.Matrix) float64 {
if R.planarity != 0 {
return R.planarity
}
c := v3.Zeros(len(R.Atoms))
c.SomeVecs(coord, R.Atoms)
_, plan, err := EasyShape(c, 0.01)
if err != nil {
R.planarity = -1
return -1
}
R.planarity = plan
return plan
}
// AddHs Adds to the ring the H atoms bonded to its members
// mol is the _entire_ molecule of which the receiver ring is part.
// It will panic if an atom of the ring is not
// found in mol, or is not bonded to anything
func (R *Ring) AddHs(mol Atomer) {
newind := make([]int, 0, 6)
for _, v := range R.Atoms {
at := mol.Atom(v) //this can panic if you give the wrong mol object
for _, w := range at.Bonds {
at2 := w.Cross(at)
if at2.Symbol == "H" {
newind = append(newind, at2.index)
}
}
}
R.Atoms = append(R.Atoms, newind...)
}
// InWhichRing returns the index of the first ring found to which the
// at atom belongs, or -1 if the atom is not part of any ring.
func InWhichRing(at *Atom, rings []*Ring) int {
if len(rings) == 0 {
return -1
}
for i, v := range rings {
if v.IsIn(at.index) {
return i
}
}
return -1
}
// Identifies and returns all rings in mol, by
// searching for cyclic paths. if at least 1 element is given for addHs
// and the first element of those give is true, the Hs bound to the atoms in the rings
// will also be added to the ring.
func FindRings(coords *v3.Matrix, mol Atomer, addHs ...bool) []*Ring {
L := mol.Len()
var rings []*Ring
minplanarity := 95.0
for i := 0; i < L; i++ {
at := mol.Atom(i)
if InWhichRing(at, rings) == -1 {
paths := BondedPaths(at, at.index, &BondedOptions{OnlyShortest: true})
if len(paths) == 0 || len(paths[0][1:]) > 6 {
continue
}
r := &Ring{Atoms: paths[0]}
p := r.Planarity(coords)
if p > minplanarity {
if len(addHs) != 0 && addHs[0] {
r.AddHs(mol)
}
rings = append(rings, r)
}
}
}
return rings
}