123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 |
- #include "lptypes.h"
- #include "lpcset.h"
- /*
- ** Add to 'c' the index of the (only) bit set in byte 'b'
- */
- static int onlybit (int c, int b) {
- if ((b & 0xF0) != 0) { c += 4; b >>= 4; }
- if ((b & 0x0C) != 0) { c += 2; b >>= 2; }
- if ((b & 0x02) != 0) { c += 1; }
- return c;
- }
- /*
- ** Check whether a charset is empty (returns IFail), singleton (IChar),
- ** full (IAny), or none of those (ISet). When singleton, 'info.offset'
- ** returns which character it is. When generic set, 'info' returns
- ** information about its range.
- */
- Opcode charsettype (const byte *cs, charsetinfo *info) {
- int low0, low1, high0, high1;
- for (low1 = 0; low1 < CHARSETSIZE && cs[low1] == 0; low1++)
- /* find lowest byte with a 1-bit */;
- if (low1 == CHARSETSIZE)
- return IFail; /* no characters in set */
- for (high1 = CHARSETSIZE - 1; cs[high1] == 0; high1--)
- /* find highest byte with a 1-bit; low1 is a sentinel */;
- if (low1 == high1) { /* only one byte with 1-bits? */
- int b = cs[low1];
- if ((b & (b - 1)) == 0) { /* does byte has only one 1-bit? */
- info->offset = onlybit(low1 * BITSPERCHAR, b); /* get that bit */
- return IChar; /* single character */
- }
- }
- for (low0 = 0; low0 < CHARSETSIZE && cs[low0] == 0xFF; low0++)
- /* find lowest byte with a 0-bit */;
- if (low0 == CHARSETSIZE)
- return IAny; /* set has all bits set */
- for (high0 = CHARSETSIZE - 1; cs[high0] == 0xFF; high0--)
- /* find highest byte with a 0-bit; low0 is a sentinel */;
- if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */
- info->offset = low1;
- info->size = high1 - low1 + 1;
- info->deflt = 0; /* all discharged bits were 0 */
- }
- else {
- info->offset = low0;
- info->size = high0 - low0 + 1;
- info->deflt = 0xFF; /* all discharged bits were 1 */
- }
- info->cs = cs + info->offset;
- return ISet;
- }
- /*
- ** Get a byte from a compact charset. If index is inside the charset
- ** range, get the byte from the supporting charset (correcting it
- ** by the offset). Otherwise, return the default for the set.
- */
- byte getbytefromcharset (const charsetinfo *info, int index) {
- if (index < info->size)
- return info->cs[index];
- else return info->deflt;
- }
- /*
- ** If 'tree' is a 'char' pattern (TSet, TChar, TAny, TFalse), convert it
- ** into a charset and return 1; else return 0.
- */
- int tocharset (TTree *tree, Charset *cs) {
- switch (tree->tag) {
- case TChar: { /* only one char */
- assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX);
- clearset(cs->cs); /* erase all chars */
- setchar(cs->cs, tree->u.n); /* add that one */
- return 1;
- }
- case TAny: {
- fillset(cs->cs, 0xFF); /* add all characters to the set */
- return 1;
- }
- case TFalse: {
- clearset(cs->cs); /* empty set */
- return 1;
- }
- case TSet: { /* fill set */
- int i;
- fillset(cs->cs, tree->u.set.deflt);
- for (i = 0; i < tree->u.set.size; i++)
- cs->cs[tree->u.set.offset + i] = treebuffer(tree)[i];
- return 1;
- }
- default: return 0;
- }
- }
- void tree2cset (TTree *tree, charsetinfo *info) {
- assert(tree->tag == TSet);
- info->offset = tree->u.set.offset;
- info->size = tree->u.set.size;
- info->deflt = tree->u.set.deflt;
- info->cs = treebuffer(tree);
- }
|