diff options
Diffstat (limited to 'src/src/pcre/pcre_tables.c')
-rw-r--r-- | src/src/pcre/pcre_tables.c | 320 |
1 files changed, 0 insertions, 320 deletions
diff --git a/src/src/pcre/pcre_tables.c b/src/src/pcre/pcre_tables.c deleted file mode 100644 index 530e44038..000000000 --- a/src/src/pcre/pcre_tables.c +++ /dev/null @@ -1,320 +0,0 @@ -/* $Cambridge: exim/src/src/pcre/pcre_tables.c,v 1.6 2007/11/12 13:02:20 nm4 Exp $ */ - -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2007 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains some fixed tables that are used by more than one of the -PCRE code modules. The tables are also #included by the pcretest program, which -uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name -clashes with the library. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - - -/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that -the definition is next to the definition of the opcodes in pcre_internal.h. */ - -const uschar _pcre_OP_lengths[] = { OP_LENGTHS }; - - - -/************************************************* -* Tables for UTF-8 support * -*************************************************/ - -/* These are the breakpoints for different numbers of bytes in a UTF-8 -character. */ - -#ifdef SUPPORT_UTF8 - -const int _pcre_utf8_table1[] = - { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; - -const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int); - -/* These are the indicator bits and the mask for the data bits to set in the -first byte of a character, indexed by the number of additional bytes. */ - -const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; -const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; - -/* Table of the number of extra bytes, indexed by the first byte masked with -0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ - -const uschar _pcre_utf8_table4[] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; - -/* The pcre_utt[] table below translates Unicode property names into type and -code values. It is searched by binary chop, so must be in collating sequence of -name. Originally, the table contained pointers to the name strings in the first -field of each entry. However, that leads to a large number of relocations when -a shared library is dynamically loaded. A significant reduction is made by -putting all the names into a single, large string and then using offsets in the -table itself. Maintenance is more error-prone, but frequent changes to this -data is unlikely. */ - -const char _pcre_utt_names[] = - "Any\0" - "Arabic\0" - "Armenian\0" - "Balinese\0" - "Bengali\0" - "Bopomofo\0" - "Braille\0" - "Buginese\0" - "Buhid\0" - "C\0" - "Canadian_Aboriginal\0" - "Cc\0" - "Cf\0" - "Cherokee\0" - "Cn\0" - "Co\0" - "Common\0" - "Coptic\0" - "Cs\0" - "Cuneiform\0" - "Cypriot\0" - "Cyrillic\0" - "Deseret\0" - "Devanagari\0" - "Ethiopic\0" - "Georgian\0" - "Glagolitic\0" - "Gothic\0" - "Greek\0" - "Gujarati\0" - "Gurmukhi\0" - "Han\0" - "Hangul\0" - "Hanunoo\0" - "Hebrew\0" - "Hiragana\0" - "Inherited\0" - "Kannada\0" - "Katakana\0" - "Kharoshthi\0" - "Khmer\0" - "L\0" - "L&\0" - "Lao\0" - "Latin\0" - "Limbu\0" - "Linear_B\0" - "Ll\0" - "Lm\0" - "Lo\0" - "Lt\0" - "Lu\0" - "M\0" - "Malayalam\0" - "Mc\0" - "Me\0" - "Mn\0" - "Mongolian\0" - "Myanmar\0" - "N\0" - "Nd\0" - "New_Tai_Lue\0" - "Nko\0" - "Nl\0" - "No\0" - "Ogham\0" - "Old_Italic\0" - "Old_Persian\0" - "Oriya\0" - "Osmanya\0" - "P\0" - "Pc\0" - "Pd\0" - "Pe\0" - "Pf\0" - "Phags_Pa\0" - "Phoenician\0" - "Pi\0" - "Po\0" - "Ps\0" - "Runic\0" - "S\0" - "Sc\0" - "Shavian\0" - "Sinhala\0" - "Sk\0" - "Sm\0" - "So\0" - "Syloti_Nagri\0" - "Syriac\0" - "Tagalog\0" - "Tagbanwa\0" - "Tai_Le\0" - "Tamil\0" - "Telugu\0" - "Thaana\0" - "Thai\0" - "Tibetan\0" - "Tifinagh\0" - "Ugaritic\0" - "Yi\0" - "Z\0" - "Zl\0" - "Zp\0" - "Zs\0"; - -const ucp_type_table _pcre_utt[] = { - { 0, PT_ANY, 0 }, - { 4, PT_SC, ucp_Arabic }, - { 11, PT_SC, ucp_Armenian }, - { 20, PT_SC, ucp_Balinese }, - { 29, PT_SC, ucp_Bengali }, - { 37, PT_SC, ucp_Bopomofo }, - { 46, PT_SC, ucp_Braille }, - { 54, PT_SC, ucp_Buginese }, - { 63, PT_SC, ucp_Buhid }, - { 69, PT_GC, ucp_C }, - { 71, PT_SC, ucp_Canadian_Aboriginal }, - { 91, PT_PC, ucp_Cc }, - { 94, PT_PC, ucp_Cf }, - { 97, PT_SC, ucp_Cherokee }, - { 106, PT_PC, ucp_Cn }, - { 109, PT_PC, ucp_Co }, - { 112, PT_SC, ucp_Common }, - { 119, PT_SC, ucp_Coptic }, - { 126, PT_PC, ucp_Cs }, - { 129, PT_SC, ucp_Cuneiform }, - { 139, PT_SC, ucp_Cypriot }, - { 147, PT_SC, ucp_Cyrillic }, - { 156, PT_SC, ucp_Deseret }, - { 164, PT_SC, ucp_Devanagari }, - { 175, PT_SC, ucp_Ethiopic }, - { 184, PT_SC, ucp_Georgian }, - { 193, PT_SC, ucp_Glagolitic }, - { 204, PT_SC, ucp_Gothic }, - { 211, PT_SC, ucp_Greek }, - { 217, PT_SC, ucp_Gujarati }, - { 226, PT_SC, ucp_Gurmukhi }, - { 235, PT_SC, ucp_Han }, - { 239, PT_SC, ucp_Hangul }, - { 246, PT_SC, ucp_Hanunoo }, - { 254, PT_SC, ucp_Hebrew }, - { 261, PT_SC, ucp_Hiragana }, - { 270, PT_SC, ucp_Inherited }, - { 280, PT_SC, ucp_Kannada }, - { 288, PT_SC, ucp_Katakana }, - { 297, PT_SC, ucp_Kharoshthi }, - { 308, PT_SC, ucp_Khmer }, - { 314, PT_GC, ucp_L }, - { 316, PT_LAMP, 0 }, - { 319, PT_SC, ucp_Lao }, - { 323, PT_SC, ucp_Latin }, - { 329, PT_SC, ucp_Limbu }, - { 335, PT_SC, ucp_Linear_B }, - { 344, PT_PC, ucp_Ll }, - { 347, PT_PC, ucp_Lm }, - { 350, PT_PC, ucp_Lo }, - { 353, PT_PC, ucp_Lt }, - { 356, PT_PC, ucp_Lu }, - { 359, PT_GC, ucp_M }, - { 361, PT_SC, ucp_Malayalam }, - { 371, PT_PC, ucp_Mc }, - { 374, PT_PC, ucp_Me }, - { 377, PT_PC, ucp_Mn }, - { 380, PT_SC, ucp_Mongolian }, - { 390, PT_SC, ucp_Myanmar }, - { 398, PT_GC, ucp_N }, - { 400, PT_PC, ucp_Nd }, - { 403, PT_SC, ucp_New_Tai_Lue }, - { 415, PT_SC, ucp_Nko }, - { 419, PT_PC, ucp_Nl }, - { 422, PT_PC, ucp_No }, - { 425, PT_SC, ucp_Ogham }, - { 431, PT_SC, ucp_Old_Italic }, - { 442, PT_SC, ucp_Old_Persian }, - { 454, PT_SC, ucp_Oriya }, - { 460, PT_SC, ucp_Osmanya }, - { 468, PT_GC, ucp_P }, - { 470, PT_PC, ucp_Pc }, - { 473, PT_PC, ucp_Pd }, - { 476, PT_PC, ucp_Pe }, - { 479, PT_PC, ucp_Pf }, - { 482, PT_SC, ucp_Phags_Pa }, - { 491, PT_SC, ucp_Phoenician }, - { 502, PT_PC, ucp_Pi }, - { 505, PT_PC, ucp_Po }, - { 508, PT_PC, ucp_Ps }, - { 511, PT_SC, ucp_Runic }, - { 517, PT_GC, ucp_S }, - { 519, PT_PC, ucp_Sc }, - { 522, PT_SC, ucp_Shavian }, - { 530, PT_SC, ucp_Sinhala }, - { 538, PT_PC, ucp_Sk }, - { 541, PT_PC, ucp_Sm }, - { 544, PT_PC, ucp_So }, - { 547, PT_SC, ucp_Syloti_Nagri }, - { 560, PT_SC, ucp_Syriac }, - { 567, PT_SC, ucp_Tagalog }, - { 575, PT_SC, ucp_Tagbanwa }, - { 584, PT_SC, ucp_Tai_Le }, - { 591, PT_SC, ucp_Tamil }, - { 597, PT_SC, ucp_Telugu }, - { 604, PT_SC, ucp_Thaana }, - { 611, PT_SC, ucp_Thai }, - { 616, PT_SC, ucp_Tibetan }, - { 624, PT_SC, ucp_Tifinagh }, - { 633, PT_SC, ucp_Ugaritic }, - { 642, PT_SC, ucp_Yi }, - { 645, PT_GC, ucp_Z }, - { 647, PT_PC, ucp_Zl }, - { 650, PT_PC, ucp_Zp }, - { 653, PT_PC, ucp_Zs } -}; - -const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); - -#endif /* SUPPORT_UTF8 */ - -/* End of pcre_tables.c */ |