libStatGen Software  1
StringHash.cpp
1 /*
2  * Copyright (C) 2010-2012 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "StringHash.h"
19 #include "InputFile.h"
20 #include "Error.h"
21 
22 StringHash::StringHash(int startsize)
23  : StringHashBase()
24 {
25  count = 0;
26  size = startsize;
27  mask = startsize - 1;
28 
29  // In this implementation, the size of hash tables must be a power of two
30  if (startsize & mask)
31  error("StringHash: Hash table size must be a power of two.\n");
32 
33  strings = new String * [size];
34  objects = new void * [size];
35  keys = new unsigned int [size];
36 
37  for (unsigned int i = 0; i < size; i++)
38  {
39  strings[i] = NULL;
40  objects[i] = NULL;
41  }
42 };
43 
44 StringHash::~StringHash()
45 {
46  for (unsigned int i = 0; i < size; i++)
47  if (strings[i] != NULL)
48  delete strings[i];
49 
50  if(strings) delete [] strings;
51  if(objects) delete [] objects;
52  if(keys) delete [] keys;
53 }
54 
55 void StringHash::Clear()
56 {
57  for (unsigned int i = 0; i < size; i++)
58  if (strings[i] != NULL)
59  {
60  delete strings[i];
61  strings[i] = NULL;
62  }
63 
64  count = 0;
65 
66  if (size > 256)
67  SetSize(256);
68 }
69 
70 void StringHash::SetSize(int newsize)
71 {
72  int newmask = newsize - 1;
73 
74  String ** newstrings = new String * [newsize];
75  void ** newobjects = new void * [newsize];
76  unsigned int * newkeys = new unsigned int [newsize];
77 
78  for (int i = 0; i < newsize; i++)
79  {
80  newstrings[i] = NULL;
81  newobjects[i] = NULL;
82  }
83 
84  if (count)
85  for (unsigned int i = 0; i < size; i++)
86  if (strings[i] != NULL)
87  {
88  unsigned int key = keys[i];
89  unsigned int h = key & newmask;
90 
91  while (newstrings[h] != NULL &&
92  (newkeys[h] != key ||
93  (!stringsEqual(*(newstrings[h]), *(strings[i])))))
94  h = (h + 1) & newmask;
95 
96  newkeys[h] = key;
97  newstrings[h] = strings[i];
98  newobjects[h] = objects[i];
99  }
100 
101  if(strings) delete [] strings;
102  if(objects) delete [] objects;
103  if(keys) delete [] keys;
104 
105  strings = newstrings;
106  objects = newobjects;
107  keys = newkeys;
108  size = newsize;
109  mask = newmask;
110 }
111 
112 int StringHash::Add(const String & string, void * object)
113 {
114  unsigned int key = getKey(string);
115  unsigned int h = Iterate(key, string);
116 
117  if (strings[h] == NULL)
118  Insert(h, key, string);
119 
120  objects[h] = object;
121 
122  if (count * 2 > size)
123  {
124  Grow();
125  return Iterate(key, string);
126  }
127 
128  return h;
129 }
130 
131 int StringHash::Find(const String & string, void *(*create_object)())
132 {
133  unsigned int key = getKey(string);
134  unsigned int h = Iterate(key, string);
135 
136  if (strings[h] == NULL && create_object == NULL)
137  return -1;
138 
139  if (strings[h] == NULL && create_object != NULL)
140  {
141  Insert(h, key, string);
142  objects[h] = create_object();
143 
144  if (count * 2 > size)
145  {
146  Grow();
147  return Iterate(key, string);
148  }
149  }
150 
151  return h;
152 }
153 
154 int StringHash::Find(const String & string) const
155 {
156  unsigned int key = getKey(string);
157  unsigned int h = Iterate(key, string);
158 
159  if (strings[h] == NULL)
160  return -1;
161 
162  return h;
163 }
164 void * StringHash::CreateHash()
165 {
166  return (void *) new StringHash();
167 }
168 
169 void StringHash::Delete(unsigned int index)
170 {
171  if (index >= size || strings[index] == NULL)
172  return;
173 
174  delete strings[index];
175  strings[index] = NULL;
176  count--;
177 
178  if (count * 8 < size && size > 32)
179  Shrink();
180  else
181  {
182  // rehash the next strings until we find empty slot
183  index = (index + 1) & mask;
184 
185  while (strings[index] != NULL)
186  {
187  if ((keys[index] & mask) != index)
188  {
189  unsigned int h = Iterate(keys[index], *strings[index]);
190 
191  if (h != (unsigned int) index)
192  {
193  keys[h] = keys[index];
194  strings[h] = strings[index];
195  objects[h] = objects[index];
196 
197  strings[index] = NULL;
198  objects[index] = NULL;
199  }
200  }
201 
202  index = (index + 1) & mask;
203  }
204  }
205 }
206 
207 void StringHash::ReadLinesFromFile(const char * filename)
208 {
209  IFILE f = ifopen(filename, "rb");
210  if (f == NULL) return;
211  ReadLinesFromFile(f);
212  ifclose(f);
213 }
214 
215 void StringHash::ReadLinesFromFile(FILE * f)
216 {
217  String buffer;
218 
219  while (!feof(f))
220  {
221  buffer.ReadLine(f);
222  Add(buffer.Trim());
223  }
224 }
225 
226 void StringHash::ReadLinesFromFile(IFILE & f)
227 {
228  String buffer;
229 
230  while (!ifeof(f))
231  {
232  buffer.ReadLine(f);
233  Add(buffer.Trim());
234  }
235 }
236 
237 // StringIntHash implementation
238 
239 StringIntHash::StringIntHash(int startsize)
240  : StringHashBase()
241 {
242  count = 0;
243  size = startsize;
244  mask = startsize - 1;
245 
246  // In this implementation, the size of hash tables must be a power of two
247  if (startsize & mask)
248  error("StringIntHash: Hash table size must be a power of two.\n");
249 
250  strings = new String * [size];
251  integers = new int [size];
252  keys = new unsigned int [size];
253 
254  for (unsigned int i = 0; i < size; i++)
255  strings[i] = NULL;
256 };
257 
258 StringIntHash::~StringIntHash()
259 {
260  for (unsigned int i = 0; i < size; i++)
261  if (strings[i] != NULL)
262  delete strings[i];
263 
264  if(strings) delete [] strings;
265  if(integers) delete [] integers;
266  if(keys) delete [] keys;
267 }
268 
269 void StringIntHash::SetSize(int newsize)
270 {
271  int newmask = newsize - 1;
272 
273  String ** newstrings = new String * [newsize];
274  int * newintegers = new int [newsize];
275  unsigned int * newkeys = new unsigned int [newsize];
276 
277  for (int i = 0; i < newsize; i++)
278  newstrings[i] = NULL;
279 
280  for (unsigned int i = 0; i < size; i++)
281  if (strings[i] != NULL)
282  {
283  unsigned int key = keys[i];
284  unsigned int h = key & newmask;
285 
286  while (newstrings[h] != NULL &&
287  (newkeys[h] != key || (!stringsEqual(*(newstrings[h]), *(strings[i])))))
288  h = (h + 1) & newmask;
289 
290  newkeys[h] = key;
291  newstrings[h] = strings[i];
292  newintegers[h] = integers[i];
293  }
294 
295  if(strings) delete [] strings;
296  if(integers) delete [] integers;
297  if(keys) delete [] keys;
298 
299  strings = newstrings;
300  integers = newintegers;
301  keys = newkeys;
302  size = newsize;
303  mask = newmask;
304 }
305 
306 void StringIntHash::Clear()
307 {
308  for (unsigned int i = 0; i < size; i++)
309  if (strings[i] != NULL)
310  {
311  delete strings[i];
312  strings[i] = NULL;
313  }
314 
315  count = 0;
316 
317  if (size > 256)
318  SetSize(256);
319 }
320 
321 int StringIntHash::Add(const String & string, int value)
322 {
323  unsigned int key = getKey(string);
324  unsigned int h = Iterate(key, string);
325 
326  if (strings[h] == NULL)
327  Insert(h, key, string);
328 
329  integers[h] = value;
330 
331  if (count * 2 > size)
332  {
333  Grow();
334  return Iterate(key, string);
335  }
336 
337  return h;
338 }
339 
340 int StringIntHash::Find(const String & string, int defaultValue)
341 {
342  unsigned int key = getKey(string);
343  unsigned int h = Iterate(key, string);
344 
345  if (strings[h] == NULL)
346  {
347  Insert(h, key, string);
348  integers[h] = defaultValue;
349 
350  if (count * 2 > size)
351  {
352  Grow();
353  return Iterate(key, string);
354  }
355  }
356 
357  return h;
358 }
359 
360 int StringIntHash::Find(const String & string) const
361 {
362  unsigned int key = getKey(string);
363  unsigned int h = Iterate(key, string);
364 
365  if (strings[h] == NULL)
366  return -1;
367 
368  return h;
369 }
370 
371 void StringIntHash::Delete(unsigned int index)
372 {
373  if (index >= size || strings[index] == NULL)
374  return;
375 
376  delete strings[index];
377  strings[index] = NULL;
378  count--;
379 
380  if (count * 8 < size && size > 32)
381  Shrink();
382  else
383  {
384  // rehash the next strings until we find empty slot
385  index = (index + 1) & mask;
386 
387  while (strings[index] != NULL)
388  {
389  if ((keys[index] & mask) != index)
390  {
391  unsigned int h = Iterate(keys[index], *strings[index]);
392 
393  if (h != (unsigned int) index)
394  {
395  keys[h] = keys[index];
396  strings[h] = strings[index];
397  integers[h] = integers[index];
398 
399  strings[index] = NULL;
400  }
401  }
402 
403  index = (index + 1) & mask;
404  }
405  }
406 }
407 
408 // StringDoubleHash implementation
409 
410 StringDoubleHash::StringDoubleHash(int startsize)
411  : StringHashBase()
412 {
413  count = 0;
414  size = startsize;
415  mask = startsize - 1;
416 
417  // In this implementation, the size of hash tables must be a power of two
418  if (startsize & mask)
419  error("StringDoubleHash: Hash table size must be a power of two.\n");
420 
421  strings = new String * [size];
422  doubles = new double [size];
423  keys = new unsigned int [size];
424 
425  for (unsigned int i = 0; i < size; i++)
426  strings[i] = NULL;
427 };
428 
429 StringDoubleHash::~StringDoubleHash()
430 {
431  for (unsigned int i = 0; i < size; i++)
432  if (strings[i] != NULL)
433  delete strings[i];
434 
435  if(strings) delete [] strings;
436  if(doubles) delete [] doubles;
437  if(keys) delete [] keys;
438 }
439 
440 void StringDoubleHash::SetSize(int newsize)
441 {
442  int newmask = newsize - 1;
443 
444  String ** newstrings = new String * [newsize];
445  double * newdoubles = new double [newsize];
446  unsigned int * newkeys = new unsigned int [newsize];
447 
448  for (int i = 0; i < newsize; i++)
449  newstrings[i] = NULL;
450 
451  for (unsigned int i = 0; i < size; i++)
452  if (strings[i] != NULL)
453  {
454  unsigned int key = keys[i];
455  unsigned int h = key & newmask;
456 
457  while (newstrings[h] != NULL &&
458  (newkeys[h] != key || (!stringsEqual(*(newstrings[h]), *(strings[i])))))
459  h = (h + 1) & newmask;
460 
461  newkeys[h] = key;
462  newstrings[h] = strings[i];
463  newdoubles[h] = doubles[i];
464  }
465 
466  if(strings) delete [] strings;
467  if(doubles) delete [] doubles;
468  if(keys) delete [] keys;
469 
470  strings = newstrings;
471  doubles = newdoubles;
472  keys = newkeys;
473  size = newsize;
474  mask = newmask;
475 }
476 
477 int StringDoubleHash::Add(const String & string, double value)
478 {
479  unsigned int key = getKey(string);
480  unsigned int h = Iterate(key, string);
481 
482  if (strings[h] == NULL)
483  Insert(h, key, string);
484 
485  doubles[h] = value;
486 
487  if (count * 2 > size)
488  {
489  Grow();
490  return Iterate(key, string);
491  }
492 
493  return h;
494 }
495 
496 int StringDoubleHash::Find(const String & string, double defaultValue)
497 {
498  unsigned int key = getKey(string);
499  unsigned int h = Iterate(key, string);
500 
501  if (strings[h] == NULL)
502  {
503  Insert(h, key, string);
504  doubles[h] = defaultValue;
505 
506  if (count * 2 > size)
507  {
508  Grow();
509  return Iterate(key, string);
510  }
511  }
512 
513  return h;
514 }
515 
516 int StringDoubleHash::Find(const String & string) const
517 {
518  unsigned int key = getKey(string);
519  unsigned int h = Iterate(key, string);
520 
521  if (strings[h] == NULL)
522  return -1;
523 
524  return h;
525 }
526 
527 void StringDoubleHash::Delete(unsigned int index)
528 {
529  if (index >= size || strings[index] == NULL)
530  return;
531 
532  delete strings[index];
533  strings[index] = NULL;
534  count--;
535 
536  if (count * 8 < size && size > 32)
537  Shrink();
538  else
539  {
540  // rehash the next strings until we find empty slot
541  index = (index + 1) & mask;
542 
543  while (strings[index] != NULL)
544  {
545  if ((keys[index] & mask) != index)
546  {
547  unsigned int h = Iterate(keys[index], *strings[index]);
548 
549  if (h != (unsigned int) index)
550  {
551  keys[h] = keys[index];
552  strings[h] = strings[index];
553  doubles[h] = doubles[index];
554 
555  strings[index] = NULL;
556  }
557  }
558 
559  index = (index + 1) & mask;
560  }
561  }
562 }
563 
564 void StringHash::Print()
565 {
566  Print(stdout);
567 }
568 
569 void StringHash::Print(const char * filename)
570 {
571  FILE * output = fopen(filename, "wt");
572  if (output == NULL)
573  return;
574  Print(output);
575  fclose(output);
576 }
577 
578 void StringHash::Print(FILE * output)
579 {
580  for (unsigned int i = 0; i < size; i++)
581  if (SlotInUse(i))
582  strings[i]->WriteLine(output);
583 }
584 
585 String StringHash::StringList(char separator)
586 {
587  String list;
588 
589  for (unsigned int i = 0; i < size; i++)
590  if (SlotInUse(i))
591  list += *strings[i] + separator;
592 
593  list.SetLength(list.Length() - 1);
594 
595  return list;
596 }
597 
598 int StringIntHash::GetCount(const String & key) const
599 {
600  int index = Find(key);
601  return index == -1 ? 0 : integers[index];
602 }
603 
604 int StringIntHash::IncrementCount(const String & key)
605 {
606  int index = Find(key);
607 
608  if (index != -1)
609  return ++(integers[index]);
610 
611  SetInteger(key, 1);
612  return 1;
613 }
614 
615 int StringIntHash::IncrementCount(const String & key, int amount)
616 {
617  int index = Find(key);
618 
619  if (index != -1)
620  return (integers[index] += amount);
621 
622  SetInteger(key, amount);
623  return amount;
624 }
625 
626 int StringIntHash::DecrementCount(const String & key)
627 {
628  int index = Find(key);
629 
630  if (index != -1)
631  return --(integers[index]);
632 
633  SetInteger(key, -1);
634  return -1;
635 }
636 
637 void StringDoubleHash::Clear()
638 {
639  for (unsigned int i = 0; i < size; i++)
640  if (strings[i] != NULL)
641  {
642  delete strings[i];
643  strings[i] = NULL;
644  }
645 
646  count = 0;
647 
648  if (size > 256)
649  SetSize(256);
650 }
651 
652 StringHash & StringHash::operator = (const StringHash & rhs)
653 {
654  Clear();
655 
656  for (int i = 0; i < rhs.Capacity(); i++)
657  if (rhs.SlotInUse(i))
658  Add(*(rhs.strings[i]), rhs.objects[i]);
659 
660  return *this;
661 }
662 
663 StringIntHash & StringIntHash::operator = (const StringIntHash & rhs)
664 {
665  Clear();
666 
667  for (int i = 0; i < rhs.Capacity(); i++)
668  if (rhs.SlotInUse(i))
669  Add(*(rhs.strings[i]), rhs.integers[i]);
670 
671  return *this;
672 }
673 
674 bool StringIntHash::operator == (const StringIntHash & rhs) const
675 {
676  if (Capacity() != rhs.Capacity()) return false;
677  if (Entries() != rhs.Entries()) return false;
678  for (int i = 0; i < rhs.Capacity(); i++)
679  {
680  if(rhs.SlotInUse(i) != SlotInUse(i))
681  {
682  return(false);
683  }
684  if (rhs.SlotInUse(i))
685  {
686  if(*(strings[i]) != *(rhs.strings[i]))
687  {
688  return(false);
689  }
690  if(rhs.integers[i] != integers[i])
691  {
692  return(false);
693  }
694  }
695  }
696  return(true);
697 }
698 
699 StringDoubleHash & StringDoubleHash::operator = (const StringDoubleHash & rhs)
700 {
701  Clear();
702 
703  for (int i = 0; i < rhs.Capacity(); i++)
704  if (rhs.SlotInUse(i))
705  Add(*(rhs.strings[i]), rhs.doubles[i]);
706 
707  return *this;
708 }
709 
710 void StringHash::Swap(StringHash & s)
711 {
712  String ** tstrings = s.strings;
713  s.strings = strings;
714  strings = tstrings;
715 
716  void ** tobjects = s.objects;
717  s.objects = objects;
718  objects = tobjects;
719 
720  unsigned int * tkeys = s.keys;
721  s.keys = keys;
722  keys = tkeys;
723 
724  unsigned int temp = s.count;
725  s.count = count;
726  count = temp;
727 
728  temp = s.size;
729  s.size = size;
730  size = temp;
731 
732  temp = s.mask;
733  s.mask = mask;
734  mask = temp;
735 }
736 
int ifeof(IFILE file)
Check to see if we have reached the EOF (returns 0 if not EOF).
Definition: InputFile.h:654
Class for easily reading/writing files without having to worry about file type (uncompressed, gzip, bgzf) when reading.
Definition: InputFile.h:36
IFILE ifopen(const char *filename, const char *mode, InputFile::ifileCompression compressionMode=InputFile::DEFAULT)
Open a file with the specified name and mode, using a filename of "-" to indicate stdin/stdout...
Definition: InputFile.h:562
int ifclose(IFILE &file)
Close the file.
Definition: InputFile.h:580