webpdbplugin.c 12.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
/***************************************************************************
 *cr
 *cr            (C) Copyright 1995-2016 The Board of Trustees of the
 *cr                        University of Illinois
 *cr                         All Rights Reserved
 *cr
 ***************************************************************************/

/***************************************************************************
 * RCS INFORMATION:
 *
 *      $RCSfile: webpdbplugin.c,v $
 *      $Author: johns $       $Locker:  $             $State: Exp $
 *      $Revision: 1.54 $       $Date: 2016/11/28 05:01:55 $
 *
 ***************************************************************************/

#include <tcl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "molfile_plugin.h"
#include "readpdb.h"
#include "periodic_table.h"

/*
 * Load pdb from the RCSB
 * Uses Tcl
 */

/*
 * Need my own read_pdb_record because the one in readpdb takes a FILE*.
 * This one will be better anyway since I don't recopy the string ;-)
 * Read the given pdb string.  On returning, pos will point to the start of
 * the next read. 
 */ 
static int my_read_pdb_record(const char *pdb, char **pos) {
  int recType = PDB_UNKNOWN;
  char *nlpos;  /* newline position */

  nlpos = strchr(pdb, '\n'); /* XXX segv occurs on x86_64 linux */
                             /* loading '1epw' or '1sft'        */ 

  if (!nlpos) {
    return PDB_EOF;
  } 

  /* set the next position to the first char after the newline */
  *pos = nlpos + 1;

  /* atom records are the most common */
  if (!strncmp(pdb, "ATOM ",  5) || !strncmp(pdb, "HETATM", 6)) {
    /* Note that by only comparing 5 chars for "ATOM " rather than 6,     */
    /* we allow PDB files containing > 99,999 atoms generated by AMBER    */
    /* to load which would otherwise fail.  Not needed for HETATM since   */
    /* those aren't going to show up in files produced for/by MD engines. */
    recType = PDB_ATOM;
  } else if (!strncmp(pdb, "REMARK", 6)) {
    recType = PDB_REMARK;
  } else if (!strncmp(pdb, "CRYST1", 6)) {
    recType = PDB_CRYST1;
  } else if (!strncmp(pdb, "HEADER", 6)) {
    recType = PDB_HEADER;
  } else if (!strncmp(pdb, "END", 3)) {  /* very permissive */
    /* XXX we treat any "ENDxxx" record as an end, to simplify testing */
    /*     since we don't remove trailing '\n' chars                   */

    /* the only two legal END records are "END   " and "ENDMDL" */
    recType = PDB_END;
  } 

  return recType;
}

 
typedef struct {
  char *pdbstr; 
  char *pos;
  int natoms;
  molfile_metadata_t *meta;
  int nconect;
  int nbonds, maxbnum;
  int *from, *to, *idxmap;
} pdbdata;


static void *pdb_read(char *pdbstr, int *natoms) {
  pdbdata *pdb;
  int indx, nconect;
  char *pos = pdbstr;
  char *next;

  if (!pdbstr) return NULL;

  pdb = (pdbdata *)malloc(sizeof(pdbdata));
  pdb->meta = (molfile_metadata_t *) malloc(sizeof(molfile_metadata_t));
  memset(pdb->meta, 0, sizeof(molfile_metadata_t));

  pdb->meta->remarklen = 0;
  pdb->meta->remarks = NULL;

  *natoms=0;
  nconect=0;
  do {
    indx = my_read_pdb_record(pos, &next);
    if (indx == PDB_ATOM) {
      *natoms += 1;
    } else if (indx == PDB_CONECT) {
      nconect++;
    } else if (indx == PDB_HEADER) {
      get_pdb_header(pos, pdb->meta->accession, pdb->meta->date, NULL);
      if (strlen(pdb->meta->accession) > 0)
        strcpy(pdb->meta->database, "PDB");
    } else if (indx == PDB_REMARK || indx == PDB_UNKNOWN) {
      int len = next - pos;
      int newlen = len + pdb->meta->remarklen;

      char *newstr=realloc(pdb->meta->remarks, newlen + 1);
      if (newstr != NULL) {
        pdb->meta->remarks = newstr;
        pdb->meta->remarks[pdb->meta->remarklen] = '\0';
        memcpy(pdb->meta->remarks + pdb->meta->remarklen, pos, len);
        pdb->meta->remarks[newlen] = '\0';
        pdb->meta->remarklen = newlen;
      }
    }

    pos = next;
  } while (indx != PDB_END && indx != PDB_EOF);

  pdb->pdbstr = pdbstr;
  pdb->pos =    pdbstr;

  pdb->natoms = *natoms;
  pdb->nconect = nconect;
  pdb->nbonds = 0;
  pdb->maxbnum = 0;
  pdb->from = NULL;
  pdb->to = NULL;
  pdb->idxmap = NULL;

#if defined(VMDUSECONECTRECORDS)
  /* allocate atom index translation table if we have 99,999 atoms or less */
  /* and we have conect records to process                                 */
  if (pdb->natoms < 100000 && pdb->nconect > 0) {
    pdb->idxmap = (int *) malloc(100000 * sizeof(int));
    memset(pdb->idxmap, 0, 100000 * sizeof(int));
  }
#endif

  return pdb;
}

static const char *rcsbmsg[] = {
  "  The PDB is supported by RCSB, the NSF, US PHS, NIH, NCRP, NIGMS, NLM,",
  "and US DoE, who are not liable for the data.  PDB files shall not be",
  "sold.  See ftp://ftp.rcsb.org/advisory.doc for full details."
};

static int show_msg = 1;

static void *open_file_read(const char *filename, const char *filetype,
    int *natoms) {

  Tcl_Interp *interp;
  char url[300];
  char cmd[300]; 
  char *pdbfile;
  const char *result;
  void *v;

  /*
   * Create and initialize the interpreter
   */
  interp = Tcl_CreateInterp();
  if (!interp) {
    fprintf(stderr, "Could not create new Tcl Interp\n");
    return NULL; 
  }
  if (Tcl_Init(interp) != TCL_OK) {
    fprintf(stderr, "Warning, could not create initialize Tcl Interp\n");
  }
  if (!Tcl_PkgRequire(interp, (char *)"http", (char *)"2.0", 0)) {
    fprintf(stderr, "Could not load http package\n");
    Tcl_DeleteInterp(interp);
    return NULL;
  }

  if (strlen(filename) != 4) {
    fprintf(stderr, "PDB code %s is invalid; PDB accession codes have four letters.\n", filename);
    Tcl_DeleteInterp(interp);
    return NULL;
  }

  if (show_msg) {
    int i;
    show_msg = 0;
    for (i=0; i<3; i++) printf("%s\n", rcsbmsg[i]);
  }

  /* Adapted to new PDB website layout, changed on 1/1/2006 */
  sprintf(url, "http://www.rcsb.org/pdb/downloadFile.do?fileFormat=pdb&compression=NO&structureId=%s",filename);
  sprintf(cmd, "set token [::http::geturl \"%s\"]", url);
  if (Tcl_Eval(interp, cmd) != TCL_OK) {
    fprintf(stderr, "Error loading PDB: %s\n", Tcl_GetStringResult(interp));
    Tcl_DeleteInterp(interp);
    return NULL;
  } 
  sprintf(cmd, "upvar #0 $token state");
  Tcl_Eval(interp, cmd); 
  
  result = Tcl_GetVar2(interp, (char *)"state", "body", TCL_GLOBAL_ONLY); 
  if (!result) {
    fprintf(stderr, "Error loading PDB: %s\n", Tcl_GetStringResult(interp));
    Tcl_DeleteInterp(interp);
    return NULL;
  } 
  pdbfile = strdup(result);
  Tcl_DeleteInterp(interp);

  /* XXX this code needs updating still */
  /* pdbfile will be free'd by close_pdb() */
  v = pdb_read(pdbfile, natoms); 
  return v;
}
   
static int read_pdb_structure(void *mydata, int *optflags, 
    molfile_atom_t *atoms) {

  pdbdata *pdb = (pdbdata *)mydata;
  char *pos = pdb->pdbstr;
  char *next;
  int i, rectype, atomserial, pteidx;
  char ridstr[8];
  char elementsymbol[3];
  molfile_atom_t *atom;
  int badptecount = 0;
  elementsymbol[2]=0;

  *optflags = MOLFILE_INSERTION | MOLFILE_OCCUPANCY | MOLFILE_BFACTOR | 
              MOLFILE_ALTLOC | MOLFILE_ATOMICNUMBER | MOLFILE_BONDSSPECIAL;

  i=0; /* Count atoms */
  do {
    rectype = my_read_pdb_record(pos, &next);
    switch (rectype) {
    case PDB_ATOM:
      atom = atoms+i;
      get_pdb_fields(pos, next-pos, &atomserial,
          atom->name, atom->resname, atom->chain, atom->segid, 
          ridstr, atom->insertion, atom->altloc, elementsymbol,
          NULL, NULL, NULL, &atom->occupancy, &atom->bfactor);

      if (pdb->idxmap != NULL && atomserial < 100000) {
        pdb->idxmap[atomserial] = i; /* record new serial number translation */
      }

      atom->resid = atoi(ridstr);

      /* determine atomic number from the element symbol */
      pteidx = get_pte_idx_from_string(elementsymbol);
      atom->atomicnumber = pteidx;
      if (pteidx != 0) {
        atom->mass = get_pte_mass(pteidx);
        atom->radius = get_pte_vdw_radius(pteidx);
      } else {
        badptecount++; /* unrecognized element */
      }

      strcpy(atom->type, atom->name);
      i++;
      break;

    case PDB_CONECT:
      /* only read CONECT records for structures where we know they can */
      /* be valid for all of the atoms in the structure                 */
      if (pdb->idxmap != NULL) {
        char cbuf[PDB_BUFFER_LENGTH];
        int len = next-pos;

        if (len > PDB_BUFFER_LENGTH) 
          len = PDB_BUFFER_LENGTH;
        strncpy(cbuf, pos, len);
        get_pdb_conect(cbuf, pdb->natoms, pdb->idxmap,
                       &pdb->maxbnum, &pdb->nbonds, &pdb->from, &pdb->to);
      }
      break;

    default:
      /* other record types are ignored in the structure callback */
      /* and are dealt with in the timestep callback or elsewhere */
      break;
    }
    pos = next;
  } while (rectype != PDB_END && rectype != PDB_EOF);

  /* if all atoms are recognized, set the mass and radius flags too,  */
  /* otherwise let VMD guess these for itself using it's own methods  */
  if (badptecount == 0) {
    *optflags |= MOLFILE_MASS | MOLFILE_RADIUS;
  }

  return MOLFILE_SUCCESS;
}


static int read_bonds(void *v, int *nbonds, int **fromptr, int **toptr, 
                      float **bondorder, int **bondtype, 
                      int *nbondtypes, char ***bondtypename) {
  pdbdata *pdb = (pdbdata *)v;
 
  *nbonds = 0;
  *fromptr = NULL;
  *toptr = NULL;
  *bondorder = NULL; /* PDB files don't have bond order information */
  *bondtype = NULL;
  *nbondtypes = 0;
  *bondtypename = NULL;

/* The newest plugin API allows us to return CONECT records as
 * additional bonds above and beyond what the distance search returns.
 * Without that feature, we otherwise have to check completeness and
 * ignore them if they don't look to be fully specified for this molecule */
#if !defined(MOLFILE_BONDSSPECIAL)
  if (pdb->natoms >= 100000) {
    printf("webpdbplugin) Warning: more than 99,999 atoms, ignored CONECT records\n");
    return MOLFILE_SUCCESS;
  } else if (((float) pdb->nconect / (float) pdb->natoms) <= 0.85) {
    printf("webpdbplugin) Warning: Probable incomplete bond structure specified,\n");
    printf("webpdbplugin)          ignoring CONECT records\n");
    return MOLFILE_SUCCESS;
  } else if (pdb->nconect == 0) {
    return MOLFILE_SUCCESS;
  }
#endif

  *nbonds = pdb->nbonds;
  *fromptr = pdb->from;
  *toptr = pdb->to;

  return MOLFILE_SUCCESS;
}


static int read_next_timestep(void *v, int natoms, molfile_timestep_t *ts) {
  pdbdata *pdb = (pdbdata *)v;
  char *pos = pdb->pos;
  char *next;
  float *x, *y, *z;
  float occup, bfac;
  int indx, i = 0;

  if (ts) {
    x = ts->coords;
    y = x+1;
    z = x+2;
  } else {
    x = y = z = 0;
  }
  do {
    indx = my_read_pdb_record(pos, &next);
    if((indx == PDB_END || indx == PDB_EOF) && (i < pdb->natoms)) {
      return MOLFILE_ERROR;
    } else if(indx == PDB_ATOM) {
      if(i++ >= pdb->natoms) {
        break;
      }
      /* just get the coordinates, and store them */
      if (ts) {
        get_pdb_coordinates(pos, x, y, z, &occup, &bfac);
        x += 3;
        y += 3;
        z += 3;
      }
    } else if (indx == PDB_CRYST1) {
      if (ts) {
        get_pdb_cryst1(pos, &ts->alpha, &ts->beta, &ts->gamma,
                               &ts->A, &ts->B, &ts->C);
      }
    }
    pos = next;
  } while(!(indx == PDB_END || indx == PDB_EOF));
  pdb->pos = pos;

  return MOLFILE_SUCCESS;
}

static void close_pdb_read(void *v) {
  pdbdata *pdb = (pdbdata *)v;
  if (!pdb) return;
  free(pdb->pdbstr);
  if (pdb->idxmap != NULL)
    free(pdb->idxmap);
  if (pdb->meta->remarks != NULL)
    free(pdb->meta->remarks);
  if (pdb->meta != NULL)
    free(pdb->meta);
  free(pdb);
}


static int read_molecule_metadata(void *v, molfile_metadata_t **metadata) {
  pdbdata *pdb = (pdbdata *)v;
  *metadata = pdb->meta;
  return MOLFILE_SUCCESS;
}

/* 
 * Registration stuff
 */

static molfile_plugin_t plugin;

VMDPLUGIN_API int VMDPLUGIN_init() {
  memset(&plugin, 0, sizeof(molfile_plugin_t));
  plugin.abiversion = vmdplugin_ABIVERSION;
  plugin.type = MOLFILE_PLUGIN_TYPE;
  plugin.name = "webpdb";
  plugin.prettyname = "Web PDB Download";
  plugin.author = "Justin Gullingsrud, John Stone";
  plugin.majorv = 1;
  plugin.minorv = 16;
  plugin.is_reentrant = VMDPLUGIN_THREADSAFE;
  plugin.filename_extension = "";
  plugin.open_file_read = open_file_read;
  plugin.read_structure = read_pdb_structure;
  plugin.read_bonds = read_bonds;
  plugin.read_next_timestep = read_next_timestep;
  plugin.close_file_read = close_pdb_read;
  plugin.read_molecule_metadata = read_molecule_metadata;
  return VMDPLUGIN_SUCCESS;
}

VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
  (*cb)(v, (vmdplugin_t *)&plugin);
  return VMDPLUGIN_SUCCESS;
}

VMDPLUGIN_API int VMDPLUGIN_fini() {
  return VMDPLUGIN_SUCCESS;
}


#ifdef TEST_WEBPDB_PLUGIN

int main(int argc, char *argv[]) {
  char *file;
  if (argc < 2) {
    fprintf(stderr, "Usage: %s <pdbcode>\n", argv[0]);
    return -1;
  }
  file = (char *)open_file_read(argv[1], "webpdb",  NULL);
  printf("%s\n", file);
  free(file);
  return 0;
}

#endif