casacore
casa
Utilities
cregex.h
Go to the documentation of this file.
1
/*
2
cregex.h: Extended regular expression matching and search library
3
Copyright (C) 1993,1994,1995,1997,1999,2001
4
Associated Universities, Inc. Washington DC, USA.
5
6
This library is free software; you can redistribute it and/or modify it
7
under the terms of the GNU Library General Public License as published by
8
the Free Software Foundation; either version 2 of the License, or (at your
9
option) any later version.
10
11
This library is distributed in the hope that it will be useful, but WITHOUT
12
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
14
License for more details.
15
16
You should have received a copy of the GNU Library General Public License
17
along with this library; if not, write to the Free Software Foundation,
18
Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
19
20
Correspondence concerning AIPS++ should be addressed as follows:
21
Internet email: aips2-request@nrao.edu.
22
Postal address: AIPS++ Project Office
23
National Radio Astronomy Observatory
24
520 Edgemont Road
25
Charlottesville, VA 22903-2475 USA
26
27
*/
28
//# $Id$
29
30
#ifndef CASA_CREGEX_H
31
#define CASA_CREGEX_H
32
33
/* Definitions for data structures callers pass the regex library. */
34
35
#ifdef __cplusplus
36
extern
"C"
{
37
#endif
38
39
/* Define number of parens for which we record the beginnings and ends.
40
This affects how much space the `struct re_registers' type takes up. */
41
#ifndef RE_NREGS
42
#define RE_NREGS 32
43
#endif
44
45
#define BYTEWIDTH 8
46
47
48
#include <casacore/casa/aips.h>
49
50
namespace
casacore
{
//# NAMESPACE CASACORE - BEGIN
51
52
/* Maximum number of duplicates an interval can allow. */
53
/* Has been changed to copy from the limits.h file
54
#if defined(_AIX) || defined(__sgi)
55
# undef RE_DUP_MAX
56
#endif
57
#define RE_DUP_MAX ((1 << 15) - 1)
58
*/
59
60
/* This defines the various regexp syntaxes.
61
It can be set using the function a2_re_set_syntax. */
63
64
65
/* The following bits are used in the obscure_syntax variable to choose among
66
alternative regexp syntaxes. */
67
68
/* If this bit is set, plain parentheses serve as grouping, and backslash
69
parentheses are needed for literal searching.
70
If not set, backslash-parentheses are grouping, and plain parentheses
71
are for literal searching. */
72
#define RE_NO_BK_PARENS 1
73
74
/* If this bit is set, plain | serves as the `or'-operator, and \| is a
75
literal.
76
If not set, \| serves as the `or'-operator, and | is a literal. */
77
#define RE_NO_BK_VBAR (1 << 1)
78
79
/* If this bit is not set, plain + or ? serves as an operator, and \+, \? are
80
literals.
81
If set, \+, \? are operators and plain +, ? are literals. */
82
#define RE_BK_PLUS_QM (1 << 2)
83
84
/* If this bit is set, | binds tighter than ^ or $.
85
If not set, the contrary. */
86
#define RE_TIGHT_VBAR (1 << 3)
87
88
/* If this bit is set, then treat newline as an OR operator.
89
If not set, treat it as a normal character. */
90
#define RE_NEWLINE_OR (1 << 4)
91
92
/* If this bit is set, then special characters may act as normal
93
characters in some contexts. Specifically, this applies to:
94
^ -- only special at the beginning, or after ( or |;
95
$ -- only special at the end, or before ) or |;
96
*, +, ? -- only special when not after the beginning, (, or |.
97
If this bit is not set, special characters (such as *, ^, and $)
98
always have their special meaning regardless of the surrounding
99
context. */
100
#define RE_CONTEXT_INDEP_OPS (1 << 5)
101
102
/* If this bit is not set, then \ before anything inside [ and ] is taken as
103
a real \.
104
If set, then such a \ escapes the following character. This is a
105
special case for awk. */
106
#define RE_AWK_CLASS_HACK (1 << 6)
107
108
/* If this bit is set, then \{ and \} or { and } serve as interval operators.
109
If not set, then \{ and \} and { and } are treated as literals. */
110
#define RE_INTERVALS (1 << 7)
111
112
/* If this bit is not set, then \{ and \} serve as interval operators and
113
{ and } are literals.
114
If set, then { and } serve as interval operators and \{ and \} are
115
literals. */
116
#define RE_NO_BK_CURLY_BRACES (1 << 8)
117
118
/* If this bit is set, then character classes are supported; they are:
119
[:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
120
[:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
121
If not set, then character classes are not supported. */
122
#define RE_CHAR_CLASSES (1 << 9)
123
124
/* If this bit is set, then the dot re doesn't match a null byte.
125
If not set, it does. */
126
#define RE_DOT_NOT_NULL (1 << 10)
127
128
/* If this bit is set, then [^...] doesn't match a newline.
129
If not set, it does. */
130
#define RE_HAT_NOT_NEWLINE (1 << 11)
131
132
/* If this bit is not set, back references are recognized.
133
If set, they aren't. */
134
#define RE_NO_BK_REFS (1 << 12)
135
136
/* If this bit is set, back references must refer to a preceding
137
subexpression. If not set, a back reference to a nonexistent
138
subexpression is treated as literal characters. */
139
#define RE_NO_EMPTY_BK_REF (1 << 13)
140
141
/* If this bit is set, bracket expressions can't be empty.
142
If it is set, they can be empty. */
143
#define RE_NO_EMPTY_BRACKETS (1 << 14)
144
145
/* If this bit is set, then *, +, ? and { cannot be first in an re or
146
immediately after a |, or a (. Furthermore, a | cannot be first or
147
last in an re, or immediately follow another | or a (. Also, a ^
148
cannot appear in a nonleading position and a $ cannot appear in a
149
nontrailing position (outside of bracket expressions, that is). */
150
#define RE_CONTEXTUAL_INVALID_OPS (1 << 15)
151
152
/* If this bit is set, then +, ? and | aren't recognized as operators.
153
If it's not, they are. */
154
#define RE_LIMITED_OPS (1 << 16)
155
156
/* If this bit is set, then an ending range point has to collate higher
157
or equal to the starting range point.
158
If it's not set, then when the ending range point collates higher
159
than the starting range point, the range is just considered empty. */
160
#define RE_NO_EMPTY_RANGES (1 << 17)
161
162
/* If this bit is set, then a hyphen (-) can't be an ending range point.
163
If it isn't, then it can. */
164
#define RE_NO_HYPHEN_RANGE_END (1 << 18)
165
166
167
/* Define combinations of bits for the standard possibilities. */
168
#define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
169
| RE_CONTEXT_INDEP_OPS)
170
#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
171
| RE_CONTEXT_INDEP_OPS | RE_AWK_CLASS_HACK)
172
#define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
173
| RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR)
174
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
175
#define RE_SYNTAX_EMACS 0
176
#define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \
177
| RE_CHAR_CLASSES | RE_DOT_NOT_NULL \
178
| RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \
179
| RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \
180
| RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END)
181
182
#define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \
183
| RE_NO_BK_VBAR | RE_NO_BK_PARENS \
184
| RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \
185
| RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \
186
| RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \
187
| RE_NO_HYPHEN_RANGE_END)
188
189
190
/* This data structure is used to represent a compiled pattern. */
191
192
// <summary>
193
// This data structure is used to represent a compiled pattern.
194
// </summary>
195
// <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="" demos="">
196
// </reviewed>
197
// <synopsis>
198
// This data structure is used to represent a compiled pattern.
199
// It is used by the regular expression functions in cregex.cc.
200
// </synopsis>
201
202
struct
re_pattern_buffer
203
{
204
char
*
buffer
;
/* Space holding the compiled pattern commands. */
205
long
allocated
;
/* Size of space that `buffer' points to. */
206
long
used
;
/* Length of portion of buffer actually occupied */
207
char
*
fastmap
;
/* Pointer to fastmap, if any, or zero if none. */
208
/* a2_re_search uses the fastmap, if there is one,
209
to skip over totally implausible characters. */
210
char
*
translate
;
/* Translate table to apply to all characters before
211
comparing, or zero for no translation.
212
The translation is applied to a pattern when it is
213
compiled and to data when it is matched. */
214
char
fastmap_accurate
;
215
/* Set to zero when a new pattern is stored,
216
set to one when the fastmap is updated from it. */
217
char
can_be_null
;
/* Set to one by compiling fastmap
218
if this pattern might match the null string.
219
It does not necessarily match the null string
220
in that case, but if this is zero, it cannot.
221
2 as value means can match null string
222
but at end of range or before a character
223
listed in the fastmap. */
224
};
225
226
227
/* search.c (search_buffer) needs this one value. It is defined both in
228
regex.c and here. */
229
#define RE_EXACTN_VALUE 1
230
231
232
/* Structure to store register contents data in.
233
234
Pass the address of such a structure as an argument to a2_re_match, etc.,
235
if you want this information back.
236
237
For i from 1 to RE_NREGS - 1, start[i] records the starting index in
238
the string of where the ith subexpression matched, and end[i] records
239
one after the ending index. start[0] and end[0] are analogous, for
240
the entire pattern. */
241
242
// <summary>
243
// Data structure to store register contents data in.
244
// </summary>
245
// <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="" demos="">
246
// </reviewed>
247
// <synopsis>
248
// This data structure is used to store register contents data in.
249
// It is used by the regular expression functions in cregex.cc.
250
// </synopsis>
251
252
struct
re_registers
253
{
254
int
start
[
RE_NREGS
];
255
int
end
[
RE_NREGS
];
256
};
257
258
259
260
#if defined(__STDC__) || defined(__cplusplus)
261
262
extern
const
char
*
a2_re_compile_pattern
(
char
*,
int
,
struct
re_pattern_buffer
*);
263
extern
int
a2_re_set_syntax (
int
syntax);
264
/* Is this really advertised? */
265
extern
void
a2_re_compile_fastmap
(
struct
re_pattern_buffer
*);
266
extern
int
a2_re_search
(
struct
re_pattern_buffer
*,
char
*,
int
,
int
,
int
,
267
struct
re_registers
*);
268
extern
int
a2_re_search_2
(
struct
re_pattern_buffer
*,
char
*,
int
,
269
char
*,
int
,
int
,
int
,
270
struct
re_registers
*,
int
);
271
extern
int
a2_re_match
(
struct
re_pattern_buffer
*,
char
*,
int
,
int
,
272
struct
re_registers
*);
273
extern
int
a2_re_match_2
(
struct
re_pattern_buffer
*,
char
*,
int
,
274
char
*,
int
,
int
,
struct
re_registers
*,
int
);
275
276
/* 4.2 bsd compatibility. */
277
// extern const char *re_comp (char *);
278
// extern int re_exec (char *);
279
280
#else
/* !__STDC__ */
281
282
extern
const
char
*
a2_re_compile_pattern
();
283
/* Is this really advertised? */
284
extern
void
a2_re_compile_fastmap
();
285
extern
int
a2_re_search
(),
a2_re_search_2
();
286
extern
int
a2_re_match
(),
a2_re_match_2
();
287
288
/* 4.2 bsd compatibility. */
289
extern
const
char
*
re_comp
();
290
extern
int
re_exec
();
291
292
#endif
/* __STDC__ */
293
294
295
#ifdef SYNTAX_TABLE
296
#endif
298
299
#ifdef __cplusplus
300
}
302
#endif
303
304
305
}
//# NAMESPACE CASACORE - END
306
307
#endif
/* !__REGEXP_LIBRARY */
casacore::re_registers::end
int end[RE_NREGS]
Definition:
cregex.h:255
casacore::a2_re_match
int a2_re_match()
casacore::re_pattern_buffer::allocated
long allocated
Definition:
cregex.h:205
casacore::re_comp
const char * re_comp()
casacore::re_pattern_buffer::used
long used
Definition:
cregex.h:206
casacore::obscure_syntax
int obscure_syntax
casacore::re_exec
int re_exec()
casacore::re_registers
Data structure to store register contents data in.
Definition:
cregex.h:252
casacore::a2_re_search
int a2_re_search()
casacore::a2_re_match_2
int a2_re_match_2()
casacore::re_pattern_buffer::buffer
char * buffer
Definition:
cregex.h:204
casacore::re_pattern_buffer::fastmap
char * fastmap
Definition:
cregex.h:207
RE_NREGS
#define RE_NREGS
Definition:
cregex.h:42
casacore::re_pattern_buffer::translate
char * translate
Definition:
cregex.h:210
casacore
this file contains all the compiler specific defines
Definition:
mainpage.dox:28
casacore::re_pattern_buffer::can_be_null
char can_be_null
Definition:
cregex.h:217
casacore::re_pattern_buffer
This data structure is used to represent a compiled pattern.
Definition:
cregex.h:202
casacore::a2_re_search_2
int a2_re_search_2()
casacore::re_pattern_buffer::fastmap_accurate
char fastmap_accurate
Definition:
cregex.h:214
casacore::re_registers::start
int start[RE_NREGS]
Definition:
cregex.h:254
casacore::a2_re_compile_fastmap
void a2_re_compile_fastmap()
casacore::a2_re_compile_pattern
const char * a2_re_compile_pattern()
Generated by
1.8.17