Skip to content

Commit

Permalink
fix: support xyz format (#669)
Browse files Browse the repository at this point in the history
* fix: support xyz format

* fix: support nopbc in command line
  • Loading branch information
njzjz committed Apr 1, 2020
1 parent a055f0f commit 4d5c168
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 53 deletions.
139 changes: 90 additions & 49 deletions reacnetgenerator/_detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ class _Detect(SharedRNGData, metaclass=ABCMeta):
subclasses = {}

def __init__(self, rng):
SharedRNGData.__init__(self, rng, ['inputfilename', 'atomname', 'stepinterval', 'nproc', 'pbc'],
SharedRNGData.__init__(self, rng, ['inputfilename', 'atomname', 'stepinterval', 'nproc', 'pbc',
'cell'],
['N', 'atomtype', 'step', 'timestep', 'temp1it', 'moleculetempfilename'])

@classmethod
Expand Down Expand Up @@ -155,18 +156,66 @@ def _readstepfunc(self, item):
level[s0] = map(self._get_bo, s[4+s2:4+2*s2])
molecules = self._connectmolecule(bond, level)
return molecules, (step, timestep)

@staticmethod
def _get_idx(x):
return int(x) - 1

@staticmethod
def _get_bo(x):
return max(1, round(float(x)))


class _DetectCrd(_Detect):
def _getbondfromcrd(self, step_atoms, cell):
atomnumber = len(step_atoms)
if self.pbc:
# Apply period boundry conditions
step_atoms.set_pbc(True)
step_atoms.set_cell(cell)
# add ghost atoms
repeated_atoms = step_atoms.repeat(2)[atomnumber:]
tree = cKDTree(step_atoms.get_positions())
d = tree.query(repeated_atoms.get_positions(), k=1)[0]
nearest = d < 5
ghost_atoms = repeated_atoms[nearest]
realnumber = np.where(nearest)[0] % atomnumber
step_atoms += ghost_atoms
# Use openbabel to connect atoms
mol = openbabel.OBMol()
mol.BeginModify()
for idx, (num, position) in enumerate(zip(step_atoms.get_atomic_numbers(), step_atoms.positions)):
a = mol.NewAtom(idx)
a.SetAtomicNum(int(num))
a.SetVector(*position)
mol.ConnectTheDots()
mol.PerceiveBondOrders()
mol.EndModify()
bond = [[] for i in range(atomnumber)]
bondlevel = [[] for i in range(atomnumber)]
for b in openbabel.OBMolBondIter(mol):
s1 = b.GetBeginAtom().GetId()
s2 = b.GetEndAtom().GetId()
if s1 >= atomnumber and s2 >= atomnumber:
# duplicated
continue
elif s1 >= atomnumber:
s1 = realnumber[s1-atomnumber]
elif s2 >= atomnumber:
s2 = realnumber[s2-atomnumber]
level = b.GetBondOrder()
if level == 5:
# aromatic, 5 in openbabel but 12 in rdkit
level = 12
bond[s1].append(s2)
bond[s2].append(s1)
bondlevel[s1].append(level)
bondlevel[s2].append(level)
return bond, bondlevel


@_Detect.register_subclass("lammpsdumpfile")
class _DetectLAMMPSdump(_Detect):
class _DetectLAMMPSdump(_DetectCrd):
class LineType(Enum):
"""Line type in the LAMMPS dump files."""

Expand Down Expand Up @@ -247,48 +296,40 @@ def _readstepfunc(self, item):
molecules = self._connectmolecule(bond, level)
return molecules, timestep

def _getbondfromcrd(self, step_atoms, cell):
atomnumber = len(step_atoms)
if self.pbc:
# Apply period boundry conditions
step_atoms.set_pbc(True)
step_atoms.set_cell(cell)
# add ghost atoms
repeated_atoms = step_atoms.repeat(2)[atomnumber:]
tree = cKDTree(step_atoms.get_positions())
d = tree.query(repeated_atoms.get_positions(), k=1)[0]
nearest = d < 5
ghost_atoms = repeated_atoms[nearest]
realnumber = np.where(nearest)[0] % atomnumber
step_atoms += ghost_atoms
# Use openbabel to connect atoms
mol = openbabel.OBMol()
mol.BeginModify()
for idx, (num, position) in enumerate(zip(step_atoms.get_atomic_numbers(), step_atoms.positions)):
a = mol.NewAtom(idx)
a.SetAtomicNum(int(num))
a.SetVector(*position)
mol.ConnectTheDots()
mol.PerceiveBondOrders()
mol.EndModify()
bond = [[] for i in range(atomnumber)]
bondlevel = [[] for i in range(atomnumber)]
for b in openbabel.OBMolBondIter(mol):
s1 = b.GetBeginAtom().GetId()
s2 = b.GetEndAtom().GetId()
if s1 >= atomnumber and s2 >= atomnumber:
# duplicated
continue
elif s1 >= atomnumber:
s1 = realnumber[s1-atomnumber]
elif s2 >= atomnumber:
s2 = realnumber[s2-atomnumber]
level = b.GetBondOrder()
if level == 5:
# aromatic, 5 in openbabel but 12 in rdkit
level = 12
bond[s1].append(s2)
bond[s2].append(s1)
bondlevel[s1].append(level)
bondlevel[s2].append(level)
return bond, bondlevel

@_Detect.register_subclass("xyz")
class _Detectxyz(_DetectCrd):
"""xyz file. Two frames are connected. Cell information must be inputed by user."""

def _readNfunc(self, f):
atomname_dict = dict(zip(self.atomname.tolist(), range(self.atomname.size)))
for index, line in enumerate(f):
s = line.split()
if index == 0:
N = int(line.strip())
atomtype = np.zeros(N, dtype=int)
elif index > N+1:
break
elif index > 1:
atomtype[index-2] = atomname_dict[s[0]]
self.N = N
self.atomtype = atomtype
steplinenum = N + 2
return steplinenum

def _readstepfunc(self, item):
step, lines = item
timestep = step, step
step_atoms = []
boxsize = self.cell
if self.pbc and boxsize is None:
raise RuntimeError("No cell information is given.")
for index, line in enumerate(lines):
s = line.split()
if index > 1:
step_atoms.append((index-1, Atom(s[0] ,tuple((float(x) for x in s[1:4])))))
_, step_atoms = zip(*sorted(step_atoms, key=operator.itemgetter(0)))
step_atoms = Atoms(step_atoms)
bond, level = self._getbondfromcrd(step_atoms, boxsize)
molecules = self._connectmolecule(bond, level)
return molecules, timestep
17 changes: 14 additions & 3 deletions reacnetgenerator/commandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ def _commandline():
action="store_true")
parser.add_argument(
'--dump', help='Process the LAMMPS dump file', action="store_true")
parser.add_argument(
'--type', '-t', help='Input file type', default='lammpsbondfile')
parser.add_argument(
'--nopbc', help='Disable PBC.', action="store_true")
parser.add_argument(
'--cell', '-c', nargs=3, type=float, help='Cell')
parser.add_argument(
'-n', '-np', '--nproc', help='Number of processes', type=int)
parser.add_argument(
Expand All @@ -39,7 +45,7 @@ def _commandline():
ReacNetGenerator(
inputfilename=args.inputfilename, atomname=args.atomname,
runHMM=not args.nohmm,
inputfiletype=('lammpsdumpfile' if args.dump else 'lammpsbondfile'),
inputfiletype=('lammpsdumpfile' if args.dump else args.type),
nproc=args.nproc, selectatoms=args.selectatoms,
stepinterval=args.stepinterval,
split=args.split,
Expand All @@ -48,6 +54,8 @@ def _commandline():
for url in args.urls] if args.urls else None,
a=np.array(args.matrixa).reshape((2, 2)),
b=np.array(args.matrixb).reshape((2, 2)),
pbc=not args.nopbc,
cell=args.cell,
).runanddraw()


Expand All @@ -56,8 +64,8 @@ def parm2cmd(pp):
pp['inputfilename'], '-a', *pp['atomname']]
if not pp.get('runHMM', True):
commands.append('--nohmm')
if pp['inputfiletype'] == 'lammpsdumpfile':
commands.append('--dump')
if pp['inputfiletype']:
commands.extend(('--type', pp['inputfiletype']))
if pp['atomname']:
commands.extend(('-s', pp['atomname'][0]))
if pp.get('urls', []):
Expand All @@ -69,6 +77,9 @@ def parm2cmd(pp):
if pp.get('b', []):
commands.extend(
('--matrixb', str(pp['b'][0][0]), str(pp['b'][0][1]), str(pp['b'][1][0]), str(pp['b'][1][1])))
if pp.get('cell', []):
commands.extend(
('--cell', str(pp['cell'][0]), str(pp['cell'][1]), str(pp['cell'][2])))
for ii in ['nproc', 'selectatoms', 'stepinterval', 'split', 'maxspecies']:
if pp.get(ii, None):
commands.extend(("--{}".format(ii), str(pp[ii])))
Expand Down
2 changes: 1 addition & 1 deletion reacnetgenerator/reacnetgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __init__(self, **kwargs):
"nolabel": False, "printfiltersignal": False, "showid": True, "runHMM": True, "SMILES": True,
"getoriginfile": False, "needprintspecies": True, "urls": []
}
none_key = ['selectatoms', 'species', 'pos', 'k', 'speciescenter']
none_key = ['selectatoms', 'species', 'pos', 'k', 'speciescenter', 'cell']
accept_keys = ['atomtype', 'step', 'hmmit', 'timestep', 'steplinenum', 'N',
'temp1it', 'originfilename', 'hmmfilename', 'moleculetempfilename', 'moleculetemp2filename',
'allmoleculeroute', 'splitmoleculeroute']
Expand Down

1 comment on commit 4d5c168

@vercel
Copy link

@vercel vercel bot commented on 4d5c168 Apr 1, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.