-/* see copyright notice in squirrel.h */\r
-#include <squirrel.h>\r
-#include <string.h>\r
-#include <ctype.h>\r
-#include <setjmp.h>\r
-#include "sqstdstring.h"\r
-\r
-#ifdef _DEBUG\r
-#include <stdio.h>\r
-\r
-static const SQChar *g_nnames[] =\r
-{\r
- _SC("NONE"),_SC("OP_GREEDY"), _SC("OP_OR"),\r
- _SC("OP_EXPR"),_SC("OP_NOCAPEXPR"),_SC("OP_DOT"), _SC("OP_CLASS"),\r
- _SC("OP_CCLASS"),_SC("OP_NCLASS"),_SC("OP_RANGE"),_SC("OP_CHAR"),\r
- _SC("OP_EOL"),_SC("OP_BOL"),_SC("OP_WB")\r
-};\r
-\r
-#endif\r
-\r
-#define OP_GREEDY MAX_CHAR+1 // * + ? {n}\r
-#define OP_OR MAX_CHAR+2\r
-#define OP_EXPR MAX_CHAR+3 //parentesis ()\r
-#define OP_NOCAPEXPR MAX_CHAR+4 //parentesis (?:)\r
-#define OP_DOT MAX_CHAR+5\r
-#define OP_CLASS MAX_CHAR+6\r
-#define OP_CCLASS MAX_CHAR+7\r
-#define OP_NCLASS MAX_CHAR+8 //negates class the [^\r
-#define OP_RANGE MAX_CHAR+9\r
-#define OP_CHAR MAX_CHAR+10\r
-#define OP_EOL MAX_CHAR+11\r
-#define OP_BOL MAX_CHAR+12\r
-#define OP_WB MAX_CHAR+13\r
-\r
-#define SQREX_SYMBOL_ANY_CHAR '.'\r
-#define SQREX_SYMBOL_GREEDY_ONE_OR_MORE '+'\r
-#define SQREX_SYMBOL_GREEDY_ZERO_OR_MORE '*'\r
-#define SQREX_SYMBOL_GREEDY_ZERO_OR_ONE '?'\r
-#define SQREX_SYMBOL_BRANCH '|'\r
-#define SQREX_SYMBOL_END_OF_STRING '$'\r
-#define SQREX_SYMBOL_BEGINNING_OF_STRING '^'\r
-#define SQREX_SYMBOL_ESCAPE_CHAR '\\'\r
-\r
-\r
-typedef int SQRexNodeType;\r
-\r
-typedef struct tagSQRexNode{\r
- SQRexNodeType type;\r
- long left;\r
- long right;\r
- int next;\r
-}SQRexNode;\r
-\r
-struct SQRex{\r
- const SQChar *_eol;\r
- const SQChar *_bol;\r
- const SQChar *_p;\r
- int _first;\r
- int _op;\r
- SQRexNode *_nodes;\r
- int _nallocated;\r
- int _nsize;\r
- int _nsubexpr;\r
- SQRexMatch *_matches;\r
- int _currsubexp;\r
- void *_jmpbuf;\r
- const SQChar **_error;\r
-};\r
-\r
-static int sqstd_rex_list(SQRex *exp);\r
-\r
-static int sqstd_rex_newnode(SQRex *exp, SQRexNodeType type)\r
-{\r
- SQRexNode n;\r
- n.type = type;\r
- n.next = n.right = n.left = -1;\r
- if(type == OP_EXPR)\r
- n.right = exp->_nsubexpr++;\r
- if(exp->_nallocated < (exp->_nsize + 1)) {\r
- int oldsize = exp->_nallocated;\r
- exp->_nallocated *= 2;\r
- exp->_nodes = (SQRexNode *)sq_realloc(exp->_nodes, oldsize * sizeof(SQRexNode) ,exp->_nallocated * sizeof(SQRexNode));\r
- }\r
- exp->_nodes[exp->_nsize++] = n;\r
- return (int)exp->_nsize - 1;\r
-}\r
-\r
-static void sqstd_rex_error(SQRex *exp,const SQChar *error)\r
-{\r
- if(exp->_error) *exp->_error = error;\r
- longjmp(*((jmp_buf*)exp->_jmpbuf),-1);\r
-}\r
-\r
-static void sqstd_rex_expect(SQRex *exp, int n){\r
- if((*exp->_p) != n) \r
- sqstd_rex_error(exp, _SC("expected paren"));\r
- exp->_p++;\r
-}\r
-\r
-static SQBool sqstd_rex_ischar(SQChar c)\r
-{\r
- switch(c) {\r
- case SQREX_SYMBOL_BRANCH:case SQREX_SYMBOL_GREEDY_ZERO_OR_MORE:\r
- case SQREX_SYMBOL_GREEDY_ZERO_OR_ONE:case SQREX_SYMBOL_GREEDY_ONE_OR_MORE:\r
- case SQREX_SYMBOL_BEGINNING_OF_STRING:case SQREX_SYMBOL_END_OF_STRING:\r
- case SQREX_SYMBOL_ANY_CHAR:case SQREX_SYMBOL_ESCAPE_CHAR:case '(':case ')':case '[':case '{': case '}':\r
- return SQFalse;\r
- }\r
- return SQTrue;\r
-}\r
-\r
-static SQChar sqstd_rex_escapechar(SQRex *exp)\r
-{\r
- if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR){\r
- exp->_p++;\r
- switch(*exp->_p) {\r
- case 'v': exp->_p++; return '\v';\r
- case 'n': exp->_p++; return '\n';\r
- case 't': exp->_p++; return '\t';\r
- case 'r': exp->_p++; return '\r';\r
- case 'f': exp->_p++; return '\f';\r
- default: return (*exp->_p++);\r
- }\r
- } else if(!sqstd_rex_ischar(*exp->_p)) sqstd_rex_error(exp,_SC("letter expected"));\r
- return (*exp->_p++);\r
-}\r
-\r
-static int sqstd_rex_charclass(SQRex *exp,int classid)\r
-{\r
- int n = sqstd_rex_newnode(exp,OP_CCLASS);\r
- exp->_nodes[n].left = classid;\r
- return n;\r
-}\r
-\r
-static int sqstd_rex_charnode(SQRex *exp,SQBool isclass)\r
-{\r
- if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR) {\r
- exp->_p++;\r
- switch(*exp->_p) {\r
- case 'n': exp->_p++; return sqstd_rex_newnode(exp,'\n');\r
- case 't': exp->_p++; return sqstd_rex_newnode(exp,'\t');\r
- case 'r': exp->_p++; return sqstd_rex_newnode(exp,'\r');\r
- case 'f': exp->_p++; return sqstd_rex_newnode(exp,'\f');\r
- case 'v': exp->_p++; return sqstd_rex_newnode(exp,'\v');\r
- case 'a': case 'A': case 'w': case 'W': case 's': case 'S': \r
- case 'd': case 'D': case 'x': case 'X': case 'c': case 'C': \r
- case 'p': case 'P': case 'l': case 'u': \r
- {\r
- SQChar t = *exp->_p;\r
- exp->_p++; \r
- return sqstd_rex_charclass(exp,t);\r
- }\r
- case 'b': \r
- case 'B':\r
- if(!isclass) {\r
- int node = sqstd_rex_newnode(exp,OP_WB);\r
- exp->_nodes[node].left = *exp->_p;\r
- exp->_p++; \r
- return node;\r
- } //else default\r
- default: return sqstd_rex_newnode(exp,(*exp->_p++));\r
- }\r
- }\r
- else if(!sqstd_rex_ischar(*exp->_p)) {\r
- \r
- sqstd_rex_error(exp,_SC("letter expected"));\r
- }\r
- return sqstd_rex_newnode(exp,*exp->_p++);\r
-}\r
-static int sqstd_rex_class(SQRex *exp)\r
-{\r
- int ret = -1;\r
- int first = -1,chain;\r
- if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING){\r
- ret = sqstd_rex_newnode(exp,OP_NCLASS);\r
- exp->_p++;\r
- }else ret = sqstd_rex_newnode(exp,OP_CLASS);\r
- \r
- if(*exp->_p == ']' || *exp->_p == '-'){\r
- first = *exp->_p;\r
- exp->_p++;\r
- }\r
- chain = ret;\r
- while(*exp->_p != ']' && exp->_p != exp->_eol) {\r
- if(*exp->_p == '-' && first != -1){ \r
- int r;\r
- if(*exp->_p++ == ']') sqstd_rex_error(exp,_SC("unfinished range"));\r
- r = sqstd_rex_newnode(exp,OP_RANGE);\r
- if(first>*exp->_p) sqstd_rex_error(exp,_SC("invalid range"));\r
- if(exp->_nodes[first].type == OP_CCLASS) sqstd_rex_error(exp,_SC("cannot use character classes in ranges"));\r
- exp->_nodes[r].left = exp->_nodes[first].type;\r
- exp->_nodes[r].right = sqstd_rex_escapechar(exp);\r
- exp->_nodes[chain].next = r;\r
- chain = r;\r
- first = -1;\r
- }\r
- else{\r
- if(first!=-1){\r
- int c = first;\r
- exp->_nodes[chain].next = c;\r
- chain = c;\r
- first = sqstd_rex_charnode(exp,SQTrue);\r
- }\r
- else{\r
- first = sqstd_rex_charnode(exp,SQTrue);\r
- }\r
- }\r
- }\r
- if(first!=-1){\r
- int c = first;\r
- exp->_nodes[chain].next = c;\r
- chain = c;\r
- first = -1;\r
- }\r
- /* hack? */\r
- exp->_nodes[ret].left = exp->_nodes[ret].next;\r
- exp->_nodes[ret].next = -1;\r
- return ret;\r
-}\r
-\r
-static int sqstd_rex_parsenumber(SQRex *exp)\r
-{\r
- int ret = *exp->_p-'0';\r
- int positions = 10;\r
- exp->_p++;\r
- while(isdigit(*exp->_p)) {\r
- ret = ret*10+(*exp->_p++-'0');\r
- if(positions==1000000000) sqstd_rex_error(exp,_SC("overflow in numeric constant"));\r
- positions *= 10;\r
- };\r
- return ret;\r
-}\r
-\r
-static int sqstd_rex_element(SQRex *exp)\r
-{\r
- int ret;\r
- switch(*exp->_p)\r
- {\r
- case '(': {\r
- int expr;\r
- exp->_p++;\r
- \r
- \r
- if(*exp->_p =='?') {\r
- exp->_p++;\r
- sqstd_rex_expect(exp,':');\r
- expr = sqstd_rex_newnode(exp,OP_NOCAPEXPR);\r
- }\r
- else\r
- expr = sqstd_rex_newnode(exp,OP_EXPR);\r
- exp->_nodes[expr].left = sqstd_rex_list(exp);\r
- ret = expr;\r
- sqstd_rex_expect(exp,')');\r
- }\r
- break;\r
- case '[':\r
- exp->_p++;\r
- ret = sqstd_rex_class(exp);\r
- sqstd_rex_expect(exp,']');\r
- break;\r
- case SQREX_SYMBOL_END_OF_STRING: exp->_p++; ret = sqstd_rex_newnode(exp,OP_EOL);break;\r
- case SQREX_SYMBOL_ANY_CHAR: exp->_p++; ret = sqstd_rex_newnode(exp,OP_DOT);break;\r
- default:\r
- ret = sqstd_rex_charnode(exp,SQFalse);\r
- break;\r
- }\r
- /* scope block */\r
- {\r
- int op;\r
- unsigned short p0 = 0, p1 = 0;\r
- switch(*exp->_p){\r
- case SQREX_SYMBOL_GREEDY_ZERO_OR_MORE: p0 = 0; p1 = 0xFFFF; exp->_p++; goto __end;\r
- case SQREX_SYMBOL_GREEDY_ONE_OR_MORE: p0 = 1; p1 = 0xFFFF; exp->_p++; goto __end;\r
- case SQREX_SYMBOL_GREEDY_ZERO_OR_ONE: p0 = 0; p1 = 1; exp->_p++; goto __end;\r
- case '{':{\r
- exp->_p++;\r
- if(!isdigit(*exp->_p)) sqstd_rex_error(exp,_SC("number expected"));\r
- p0 = sqstd_rex_parsenumber(exp);\r
- switch(*exp->_p) {\r
- case '}':\r
- p1 = p0; exp->_p++;\r
- goto __end;\r
- case ',':\r
- exp->_p++;\r
- p1 = 0xFFFF;\r
- if(isdigit(*exp->_p)){\r
- p1 = sqstd_rex_parsenumber(exp);\r
- }\r
- sqstd_rex_expect(exp,'}');\r
- goto __end;\r
- default:\r
- sqstd_rex_error(exp,_SC(", or } expected"));\r
- }\r
- }\r
- __end: {\r
- int nnode = sqstd_rex_newnode(exp,OP_GREEDY);\r
- op = OP_GREEDY;\r
- exp->_nodes[nnode].left = ret;\r
- exp->_nodes[nnode].right = ((p0)<<16)|p1;\r
- ret = nnode;\r
- }\r
- }\r
- }\r
- if(*exp->_p != SQREX_SYMBOL_BRANCH && *exp->_p != ')' && *exp->_p != SQREX_SYMBOL_GREEDY_ZERO_OR_MORE && *exp->_p != SQREX_SYMBOL_GREEDY_ONE_OR_MORE && *exp->_p != '\0')\r
- exp->_nodes[ret].next = sqstd_rex_element(exp);\r
- return ret;\r
-}\r
-\r
-static int sqstd_rex_list(SQRex *exp)\r
-{\r
- int ret=-1,e;\r
- if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING) {\r
- exp->_p++;\r
- ret = sqstd_rex_newnode(exp,OP_BOL);\r
- }\r
- e = sqstd_rex_element(exp);\r
- if(ret != -1) {\r
- exp->_nodes[ret].next = e;\r
- }\r
- else ret = e;\r
-\r
- if(*exp->_p == SQREX_SYMBOL_BRANCH) {\r
- int temp;\r
- exp->_p++;\r
- temp = sqstd_rex_newnode(exp,OP_OR);\r
- exp->_nodes[temp].left = ret;\r
- exp->_nodes[temp].right = sqstd_rex_list(exp);\r
- ret = temp;\r
- }\r
- return ret;\r
-}\r
-\r
-static SQBool sqstd_rex_matchcclass(int cclass,SQChar c)\r
-{\r
- switch(cclass) {\r
- case 'a': return isalpha(c)?SQTrue:SQFalse;\r
- case 'A': return !isalpha(c)?SQTrue:SQFalse;\r
- case 'w': return (isalnum(c) || c == '_')?SQTrue:SQFalse;\r
- case 'W': return (!isalnum(c) && c != '_')?SQTrue:SQFalse;\r
- case 's': return isspace(c)?SQTrue:SQFalse;\r
- case 'S': return !isspace(c)?SQTrue:SQFalse;\r
- case 'd': return isdigit(c)?SQTrue:SQFalse;\r
- case 'D': return !isdigit(c)?SQTrue:SQFalse;\r
- case 'x': return isxdigit(c)?SQTrue:SQFalse;\r
- case 'X': return !isxdigit(c)?SQTrue:SQFalse;\r
- case 'c': return iscntrl(c)?SQTrue:SQFalse;\r
- case 'C': return !iscntrl(c)?SQTrue:SQFalse;\r
- case 'p': return ispunct(c)?SQTrue:SQFalse;\r
- case 'P': return !ispunct(c)?SQTrue:SQFalse;\r
- case 'l': return islower(c)?SQTrue:SQFalse;\r
- case 'u': return isupper(c)?SQTrue:SQFalse;\r
- }\r
- return SQFalse; /*cannot happen*/\r
-}\r
-\r
-static SQBool sqstd_rex_matchclass(SQRex* exp,SQRexNode *node,SQChar c)\r
-{\r
- do {\r
- switch(node->type) {\r
- case OP_RANGE:\r
- if(c >= node->left && c <= node->right) return SQTrue;\r
- break;\r
- case OP_CCLASS:\r
- if(sqstd_rex_matchcclass(node->left,c)) return SQTrue;\r
- break;\r
- default:\r
- if(c == node->type)return SQTrue;\r
- }\r
- } while((node->next != -1) && (node = &exp->_nodes[node->next]));\r
- return SQFalse;\r
-}\r
-\r
-static const SQChar *sqstd_rex_matchnode(SQRex* exp,SQRexNode *node,const SQChar *str)\r
-{\r
- SQRexNodeType type = node->type;\r
- switch(type) {\r
- case OP_GREEDY: {\r
- int p0 = (node->right >> 16)&0x0000FFFF, p1 = node->right&0x0000FFFF, nmaches = 0;\r
- const SQChar *s=str, *good = str;\r
- while((nmaches == 0xFFFF || nmaches < p1) \r
- && (s = sqstd_rex_matchnode(exp,&exp->_nodes[node->left],s))) {\r
- good=s;\r
- nmaches++;\r
- if(s >= exp->_eol)\r
- break;\r
- }\r
- if(p0 == p1 && p0 == nmaches) return good;\r
- else if(nmaches >= p0 && p1 == 0xFFFF) return good;\r
- else if(nmaches >= p0 && nmaches <= p1) return good;\r
- return NULL;\r
- }\r
- case OP_OR: {\r
- const SQChar *asd = str;\r
- SQRexNode *temp=&exp->_nodes[node->left];\r
- while(asd = sqstd_rex_matchnode(exp,temp,asd)) {\r
- if(temp->next != -1)\r
- temp = &exp->_nodes[temp->next];\r
- else\r
- return asd;\r
- }\r
- asd = str;\r
- temp = &exp->_nodes[node->right];\r
- while(asd = sqstd_rex_matchnode(exp,temp,asd)) {\r
- if(temp->next != -1)\r
- temp = &exp->_nodes[temp->next];\r
- else\r
- return asd;\r
- }\r
- return NULL;\r
- break;\r
- }\r
- case OP_EXPR:\r
- case OP_NOCAPEXPR:{\r
- SQRexNode *n = &exp->_nodes[node->left];\r
- const SQChar *cur = str;\r
- int capture = -1;\r
- if(node->type != OP_NOCAPEXPR && node->right == exp->_currsubexp) {\r
- capture = exp->_currsubexp;\r
- exp->_matches[capture].begin = cur;\r
- exp->_currsubexp++;\r
- }\r
-\r
- do {\r
- if(!(cur = sqstd_rex_matchnode(exp,n,cur))) {\r
- if(capture != -1){\r
- exp->_matches[capture].begin = 0;\r
- exp->_matches[capture].len = 0;\r
- }\r
- return NULL;\r
- }\r
- } while((n->next != -1) && (n = &exp->_nodes[n->next]));\r
-\r
- if(capture != -1) \r
- exp->_matches[capture].len = cur - exp->_matches[capture].begin;\r
- return cur;\r
- } \r
- case OP_WB:\r
- if(str == exp->_bol && !isspace(*str)\r
- || (str == exp->_eol && !isspace(*(str-1)))\r
- || (!isspace(*str) && isspace(*(str+1)))\r
- || (isspace(*str) && !isspace(*(str+1))) ) {\r
- return (node->left == 'b')?str:NULL;\r
- }\r
- return (node->left == 'b')?NULL:str;\r
- case OP_BOL:\r
- if(str == exp->_bol) return str;\r
- return NULL;\r
- case OP_EOL:\r
- if(str == exp->_eol) return str;\r
- return NULL;\r
- case OP_DOT:\r
- *str++;\r
- return str;\r
- case OP_NCLASS:\r
- case OP_CLASS:\r
- if(sqstd_rex_matchclass(exp,&exp->_nodes[node->left],*str)?(type == OP_CLASS?SQTrue:SQFalse):(type == OP_NCLASS?SQTrue:SQFalse)) {\r
- *str++;\r
- return str;\r
- }\r
- return NULL;\r
- case OP_CCLASS:\r
- if(sqstd_rex_matchcclass(node->left,*str)) {\r
- *str++;\r
- return str;\r
- }\r
- return NULL;\r
- default: /* char */\r
- if(*str != node->type) return NULL;\r
- *str++;\r
- return str;\r
- }\r
- return NULL;\r
-}\r
-\r
-/* public api */\r
-SQRex *sqstd_rex_compile(const SQChar *pattern,const SQChar **error)\r
-{\r
- SQRex *exp = (SQRex *)sq_malloc(sizeof(SQRex));\r
- exp->_p = pattern;\r
- exp->_nallocated = (int)scstrlen(pattern) * sizeof(SQChar);\r
- exp->_nodes = (SQRexNode *)sq_malloc(exp->_nallocated * sizeof(SQRexNode));\r
- exp->_nsize = 0;\r
- exp->_matches = 0;\r
- exp->_nsubexpr = 0;\r
- exp->_first = sqstd_rex_newnode(exp,OP_EXPR);\r
- exp->_error = error;\r
- exp->_jmpbuf = sq_malloc(sizeof(jmp_buf));\r
- if(setjmp(*((jmp_buf*)exp->_jmpbuf)) == 0) {\r
- exp->_nodes[exp->_first].left=sqstd_rex_list(exp);\r
- if(*exp->_p!='\0')\r
- sqstd_rex_error(exp,_SC("unexpected character"));\r
-#ifdef _DEBUG\r
- {\r
- int nsize,i;\r
- SQRexNode *t;\r
- nsize = exp->_nsize;\r
- t = &exp->_nodes[0];\r
- scprintf(_SC("\n"));\r
- for(i = 0;i < nsize; i++) {\r
- if(exp->_nodes[i].type>MAX_CHAR)\r
- scprintf(_SC("[%02d] %10s "),i,g_nnames[exp->_nodes[i].type-MAX_CHAR]);\r
- else\r
- scprintf(_SC("[%02d] %10c "),i,exp->_nodes[i].type);\r
- scprintf(_SC("left %02d right %02d next %02d\n"),exp->_nodes[i].left,exp->_nodes[i].right,exp->_nodes[i].next);\r
- }\r
- scprintf(_SC("\n"));\r
- }\r
-#endif\r
- exp->_matches = (SQRexMatch *) sq_malloc(exp->_nsubexpr * sizeof(SQRexMatch));\r
- memset(exp->_matches,0,exp->_nsubexpr * sizeof(SQRexMatch));\r
- }\r
- else{\r
- sqstd_rex_free(exp);\r
- return NULL;\r
- }\r
- return exp;\r
-}\r
-\r
-void sqstd_rex_free(SQRex *exp)\r
-{\r
- if(exp) {\r
- if(exp->_nodes) sq_free(exp->_nodes,exp->_nallocated * sizeof(SQRexNode));\r
- if(exp->_jmpbuf) sq_free(exp->_jmpbuf,sizeof(jmp_buf));\r
- if(exp->_matches) sq_free(exp->_matches,exp->_nsubexpr * sizeof(SQRexMatch));\r
- sq_free(exp,sizeof(SQRex));\r
- }\r
-}\r
-\r
-SQBool sqstd_rex_match(SQRex* exp,const SQChar* text)\r
-{\r
- const SQChar* res = NULL;\r
- exp->_bol = text;\r
- exp->_eol = text + scstrlen(text);\r
- exp->_currsubexp = 0;\r
- res = sqstd_rex_matchnode(exp,exp->_nodes,text);\r
- if(res == NULL || res != exp->_eol)\r
- return SQFalse;\r
- return SQTrue;\r
-}\r
-\r
-SQBool sqstd_rex_searchrange(SQRex* exp,const SQChar* text_begin,const SQChar* text_end,const SQChar** out_begin, const SQChar** out_end)\r
-{\r
- const SQChar *cur = NULL;\r
- int node = exp->_first;\r
- if(text_begin >= text_end) return SQFalse;\r
- exp->_bol = text_begin;\r
- exp->_eol = text_end;\r
- do {\r
- cur = text_begin;\r
- while(node != -1) {\r
- exp->_currsubexp = 0;\r
- cur = sqstd_rex_matchnode(exp,&exp->_nodes[node],cur);\r
- if(!cur)\r
- break;\r
- node = exp->_nodes[node].next;\r
- }\r
- *text_begin++;\r
- } while(cur == NULL && text_begin != text_end);\r
-\r
- if(cur == NULL)\r
- return SQFalse;\r
-\r
- --text_begin;\r
-\r
- if(out_begin) *out_begin = text_begin;\r
- if(out_end) *out_end = cur;\r
- return SQTrue;\r
-}\r
-\r
-SQBool sqstd_rex_search(SQRex* exp,const SQChar* text, const SQChar** out_begin, const SQChar** out_end)\r
-{\r
- return sqstd_rex_searchrange(exp,text,text + scstrlen(text),out_begin,out_end);\r
-}\r
-\r
-int sqstd_rex_getsubexpcount(SQRex* exp)\r
-{\r
- return exp->_nsubexpr;\r
-}\r
-\r
-SQBool sqstd_rex_getsubexp(SQRex* exp, int n, SQRexMatch *subexp)\r
-{\r
- if( n<0 || n >= exp->_nsubexpr) return SQFalse;\r
- *subexp = exp->_matches[n];\r
- return SQTrue;\r
-}\r
-\r
+/* see copyright notice in squirrel.h */
+#include <squirrel.h>
+#include <string.h>
+#include <ctype.h>
+#include <setjmp.h>
+#include "sqstdstring.h"
+
+#ifdef _DEBUG
+#include <stdio.h>
+
+static const SQChar *g_nnames[] =
+{
+ _SC("NONE"),_SC("OP_GREEDY"), _SC("OP_OR"),
+ _SC("OP_EXPR"),_SC("OP_NOCAPEXPR"),_SC("OP_DOT"), _SC("OP_CLASS"),
+ _SC("OP_CCLASS"),_SC("OP_NCLASS"),_SC("OP_RANGE"),_SC("OP_CHAR"),
+ _SC("OP_EOL"),_SC("OP_BOL"),_SC("OP_WB")
+};
+
+#endif
+
+#define OP_GREEDY MAX_CHAR+1 // * + ? {n}
+#define OP_OR MAX_CHAR+2
+#define OP_EXPR MAX_CHAR+3 //parentesis ()
+#define OP_NOCAPEXPR MAX_CHAR+4 //parentesis (?:)
+#define OP_DOT MAX_CHAR+5
+#define OP_CLASS MAX_CHAR+6
+#define OP_CCLASS MAX_CHAR+7
+#define OP_NCLASS MAX_CHAR+8 //negates class the [^
+#define OP_RANGE MAX_CHAR+9
+#define OP_CHAR MAX_CHAR+10
+#define OP_EOL MAX_CHAR+11
+#define OP_BOL MAX_CHAR+12
+#define OP_WB MAX_CHAR+13
+
+#define SQREX_SYMBOL_ANY_CHAR '.'
+#define SQREX_SYMBOL_GREEDY_ONE_OR_MORE '+'
+#define SQREX_SYMBOL_GREEDY_ZERO_OR_MORE '*'
+#define SQREX_SYMBOL_GREEDY_ZERO_OR_ONE '?'
+#define SQREX_SYMBOL_BRANCH '|'
+#define SQREX_SYMBOL_END_OF_STRING '$'
+#define SQREX_SYMBOL_BEGINNING_OF_STRING '^'
+#define SQREX_SYMBOL_ESCAPE_CHAR '\\'
+
+
+typedef int SQRexNodeType;
+
+typedef struct tagSQRexNode{
+ SQRexNodeType type;
+ long left;
+ long right;
+ int next;
+}SQRexNode;
+
+struct SQRex{
+ const SQChar *_eol;
+ const SQChar *_bol;
+ const SQChar *_p;
+ int _first;
+ int _op;
+ SQRexNode *_nodes;
+ int _nallocated;
+ int _nsize;
+ int _nsubexpr;
+ SQRexMatch *_matches;
+ int _currsubexp;
+ void *_jmpbuf;
+ const SQChar **_error;
+};
+
+static int sqstd_rex_list(SQRex *exp);
+
+static int sqstd_rex_newnode(SQRex *exp, SQRexNodeType type)
+{
+ SQRexNode n;
+ n.type = type;
+ n.next = n.right = n.left = -1;
+ if(type == OP_EXPR)
+ n.right = exp->_nsubexpr++;
+ if(exp->_nallocated < (exp->_nsize + 1)) {
+ int oldsize = exp->_nallocated;
+ exp->_nallocated *= 2;
+ exp->_nodes = (SQRexNode *)sq_realloc(exp->_nodes, oldsize * sizeof(SQRexNode) ,exp->_nallocated * sizeof(SQRexNode));
+ }
+ exp->_nodes[exp->_nsize++] = n;
+ return (int)exp->_nsize - 1;
+}
+
+static void sqstd_rex_error(SQRex *exp,const SQChar *error)
+{
+ if(exp->_error) *exp->_error = error;
+ longjmp(*((jmp_buf*)exp->_jmpbuf),-1);
+}
+
+static void sqstd_rex_expect(SQRex *exp, int n){
+ if((*exp->_p) != n)
+ sqstd_rex_error(exp, _SC("expected paren"));
+ exp->_p++;
+}
+
+static SQBool sqstd_rex_ischar(SQChar c)
+{
+ switch(c) {
+ case SQREX_SYMBOL_BRANCH:case SQREX_SYMBOL_GREEDY_ZERO_OR_MORE:
+ case SQREX_SYMBOL_GREEDY_ZERO_OR_ONE:case SQREX_SYMBOL_GREEDY_ONE_OR_MORE:
+ case SQREX_SYMBOL_BEGINNING_OF_STRING:case SQREX_SYMBOL_END_OF_STRING:
+ case SQREX_SYMBOL_ANY_CHAR:case SQREX_SYMBOL_ESCAPE_CHAR:case '(':case ')':case '[':case '{': case '}':
+ return SQFalse;
+ }
+ return SQTrue;
+}
+
+static SQChar sqstd_rex_escapechar(SQRex *exp)
+{
+ if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR){
+ exp->_p++;
+ switch(*exp->_p) {
+ case 'v': exp->_p++; return '\v';
+ case 'n': exp->_p++; return '\n';
+ case 't': exp->_p++; return '\t';
+ case 'r': exp->_p++; return '\r';
+ case 'f': exp->_p++; return '\f';
+ default: return (*exp->_p++);
+ }
+ } else if(!sqstd_rex_ischar(*exp->_p)) sqstd_rex_error(exp,_SC("letter expected"));
+ return (*exp->_p++);
+}
+
+static int sqstd_rex_charclass(SQRex *exp,int classid)
+{
+ int n = sqstd_rex_newnode(exp,OP_CCLASS);
+ exp->_nodes[n].left = classid;
+ return n;
+}
+
+static int sqstd_rex_charnode(SQRex *exp,SQBool isclass)
+{
+ if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR) {
+ exp->_p++;
+ switch(*exp->_p) {
+ case 'n': exp->_p++; return sqstd_rex_newnode(exp,'\n');
+ case 't': exp->_p++; return sqstd_rex_newnode(exp,'\t');
+ case 'r': exp->_p++; return sqstd_rex_newnode(exp,'\r');
+ case 'f': exp->_p++; return sqstd_rex_newnode(exp,'\f');
+ case 'v': exp->_p++; return sqstd_rex_newnode(exp,'\v');
+ case 'a': case 'A': case 'w': case 'W': case 's': case 'S':
+ case 'd': case 'D': case 'x': case 'X': case 'c': case 'C':
+ case 'p': case 'P': case 'l': case 'u':
+ {
+ SQChar t = *exp->_p;
+ exp->_p++;
+ return sqstd_rex_charclass(exp,t);
+ }
+ case 'b':
+ case 'B':
+ if(!isclass) {
+ int node = sqstd_rex_newnode(exp,OP_WB);
+ exp->_nodes[node].left = *exp->_p;
+ exp->_p++;
+ return node;
+ } //else default
+ default: return sqstd_rex_newnode(exp,(*exp->_p++));
+ }
+ }
+ else if(!sqstd_rex_ischar(*exp->_p)) {
+
+ sqstd_rex_error(exp,_SC("letter expected"));
+ }
+ return sqstd_rex_newnode(exp,*exp->_p++);
+}
+static int sqstd_rex_class(SQRex *exp)
+{
+ int ret = -1;
+ int first = -1,chain;
+ if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING){
+ ret = sqstd_rex_newnode(exp,OP_NCLASS);
+ exp->_p++;
+ }else ret = sqstd_rex_newnode(exp,OP_CLASS);
+
+ if(*exp->_p == ']' || *exp->_p == '-'){
+ first = *exp->_p;
+ exp->_p++;
+ }
+ chain = ret;
+ while(*exp->_p != ']' && exp->_p != exp->_eol) {
+ if(*exp->_p == '-' && first != -1){
+ int r;
+ if(*exp->_p++ == ']') sqstd_rex_error(exp,_SC("unfinished range"));
+ r = sqstd_rex_newnode(exp,OP_RANGE);
+ if(first>*exp->_p) sqstd_rex_error(exp,_SC("invalid range"));
+ if(exp->_nodes[first].type == OP_CCLASS) sqstd_rex_error(exp,_SC("cannot use character classes in ranges"));
+ exp->_nodes[r].left = exp->_nodes[first].type;
+ exp->_nodes[r].right = sqstd_rex_escapechar(exp);
+ exp->_nodes[chain].next = r;
+ chain = r;
+ first = -1;
+ }
+ else{
+ if(first!=-1){
+ int c = first;
+ exp->_nodes[chain].next = c;
+ chain = c;
+ first = sqstd_rex_charnode(exp,SQTrue);
+ }
+ else{
+ first = sqstd_rex_charnode(exp,SQTrue);
+ }
+ }
+ }
+ if(first!=-1){
+ int c = first;
+ exp->_nodes[chain].next = c;
+ chain = c;
+ first = -1;
+ }
+ /* hack? */
+ exp->_nodes[ret].left = exp->_nodes[ret].next;
+ exp->_nodes[ret].next = -1;
+ return ret;
+}
+
+static int sqstd_rex_parsenumber(SQRex *exp)
+{
+ int ret = *exp->_p-'0';
+ int positions = 10;
+ exp->_p++;
+ while(isdigit(*exp->_p)) {
+ ret = ret*10+(*exp->_p++-'0');
+ if(positions==1000000000) sqstd_rex_error(exp,_SC("overflow in numeric constant"));
+ positions *= 10;
+ };
+ return ret;
+}
+
+static int sqstd_rex_element(SQRex *exp)
+{
+ int ret;
+ switch(*exp->_p)
+ {
+ case '(': {
+ int expr;
+ exp->_p++;
+
+
+ if(*exp->_p =='?') {
+ exp->_p++;
+ sqstd_rex_expect(exp,':');
+ expr = sqstd_rex_newnode(exp,OP_NOCAPEXPR);
+ }
+ else
+ expr = sqstd_rex_newnode(exp,OP_EXPR);
+ exp->_nodes[expr].left = sqstd_rex_list(exp);
+ ret = expr;
+ sqstd_rex_expect(exp,')');
+ }
+ break;
+ case '[':
+ exp->_p++;
+ ret = sqstd_rex_class(exp);
+ sqstd_rex_expect(exp,']');
+ break;
+ case SQREX_SYMBOL_END_OF_STRING: exp->_p++; ret = sqstd_rex_newnode(exp,OP_EOL);break;
+ case SQREX_SYMBOL_ANY_CHAR: exp->_p++; ret = sqstd_rex_newnode(exp,OP_DOT);break;
+ default:
+ ret = sqstd_rex_charnode(exp,SQFalse);
+ break;
+ }
+ /* scope block */
+ {
+ int op;
+ unsigned short p0 = 0, p1 = 0;
+ switch(*exp->_p){
+ case SQREX_SYMBOL_GREEDY_ZERO_OR_MORE: p0 = 0; p1 = 0xFFFF; exp->_p++; goto __end;
+ case SQREX_SYMBOL_GREEDY_ONE_OR_MORE: p0 = 1; p1 = 0xFFFF; exp->_p++; goto __end;
+ case SQREX_SYMBOL_GREEDY_ZERO_OR_ONE: p0 = 0; p1 = 1; exp->_p++; goto __end;
+ case '{':{
+ exp->_p++;
+ if(!isdigit(*exp->_p)) sqstd_rex_error(exp,_SC("number expected"));
+ p0 = sqstd_rex_parsenumber(exp);
+ switch(*exp->_p) {
+ case '}':
+ p1 = p0; exp->_p++;
+ goto __end;
+ case ',':
+ exp->_p++;
+ p1 = 0xFFFF;
+ if(isdigit(*exp->_p)){
+ p1 = sqstd_rex_parsenumber(exp);
+ }
+ sqstd_rex_expect(exp,'}');
+ goto __end;
+ default:
+ sqstd_rex_error(exp,_SC(", or } expected"));
+ }
+ }
+ __end: {
+ int nnode = sqstd_rex_newnode(exp,OP_GREEDY);
+ op = OP_GREEDY;
+ exp->_nodes[nnode].left = ret;
+ exp->_nodes[nnode].right = ((p0)<<16)|p1;
+ ret = nnode;
+ }
+ }
+ }
+ if(*exp->_p != SQREX_SYMBOL_BRANCH && *exp->_p != ')' && *exp->_p != SQREX_SYMBOL_GREEDY_ZERO_OR_MORE && *exp->_p != SQREX_SYMBOL_GREEDY_ONE_OR_MORE && *exp->_p != '\0')
+ exp->_nodes[ret].next = sqstd_rex_element(exp);
+ return ret;
+}
+
+static int sqstd_rex_list(SQRex *exp)
+{
+ int ret=-1,e;
+ if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING) {
+ exp->_p++;
+ ret = sqstd_rex_newnode(exp,OP_BOL);
+ }
+ e = sqstd_rex_element(exp);
+ if(ret != -1) {
+ exp->_nodes[ret].next = e;
+ }
+ else ret = e;
+
+ if(*exp->_p == SQREX_SYMBOL_BRANCH) {
+ int temp;
+ exp->_p++;
+ temp = sqstd_rex_newnode(exp,OP_OR);
+ exp->_nodes[temp].left = ret;
+ exp->_nodes[temp].right = sqstd_rex_list(exp);
+ ret = temp;
+ }
+ return ret;
+}
+
+static SQBool sqstd_rex_matchcclass(int cclass,SQChar c)
+{
+ switch(cclass) {
+ case 'a': return isalpha(c)?SQTrue:SQFalse;
+ case 'A': return !isalpha(c)?SQTrue:SQFalse;
+ case 'w': return (isalnum(c) || c == '_')?SQTrue:SQFalse;
+ case 'W': return (!isalnum(c) && c != '_')?SQTrue:SQFalse;
+ case 's': return isspace(c)?SQTrue:SQFalse;
+ case 'S': return !isspace(c)?SQTrue:SQFalse;
+ case 'd': return isdigit(c)?SQTrue:SQFalse;
+ case 'D': return !isdigit(c)?SQTrue:SQFalse;
+ case 'x': return isxdigit(c)?SQTrue:SQFalse;
+ case 'X': return !isxdigit(c)?SQTrue:SQFalse;
+ case 'c': return iscntrl(c)?SQTrue:SQFalse;
+ case 'C': return !iscntrl(c)?SQTrue:SQFalse;
+ case 'p': return ispunct(c)?SQTrue:SQFalse;
+ case 'P': return !ispunct(c)?SQTrue:SQFalse;
+ case 'l': return islower(c)?SQTrue:SQFalse;
+ case 'u': return isupper(c)?SQTrue:SQFalse;
+ }
+ return SQFalse; /*cannot happen*/
+}
+
+static SQBool sqstd_rex_matchclass(SQRex* exp,SQRexNode *node,SQChar c)
+{
+ do {
+ switch(node->type) {
+ case OP_RANGE:
+ if(c >= node->left && c <= node->right) return SQTrue;
+ break;
+ case OP_CCLASS:
+ if(sqstd_rex_matchcclass(node->left,c)) return SQTrue;
+ break;
+ default:
+ if(c == node->type)return SQTrue;
+ }
+ } while((node->next != -1) && (node = &exp->_nodes[node->next]));
+ return SQFalse;
+}
+
+static const SQChar *sqstd_rex_matchnode(SQRex* exp,SQRexNode *node,const SQChar *str)
+{
+ SQRexNodeType type = node->type;
+ switch(type) {
+ case OP_GREEDY: {
+ int p0 = (node->right >> 16)&0x0000FFFF, p1 = node->right&0x0000FFFF, nmaches = 0;
+ const SQChar *s=str, *good = str;
+ while((nmaches == 0xFFFF || nmaches < p1)
+ && (s = sqstd_rex_matchnode(exp,&exp->_nodes[node->left],s))) {
+ good=s;
+ nmaches++;
+ if(s >= exp->_eol)
+ break;
+ }
+ if(p0 == p1 && p0 == nmaches) return good;
+ else if(nmaches >= p0 && p1 == 0xFFFF) return good;
+ else if(nmaches >= p0 && nmaches <= p1) return good;
+ return NULL;
+ }
+ case OP_OR: {
+ const SQChar *asd = str;
+ SQRexNode *temp=&exp->_nodes[node->left];
+ while(asd = sqstd_rex_matchnode(exp,temp,asd)) {
+ if(temp->next != -1)
+ temp = &exp->_nodes[temp->next];
+ else
+ return asd;
+ }
+ asd = str;
+ temp = &exp->_nodes[node->right];
+ while(asd = sqstd_rex_matchnode(exp,temp,asd)) {
+ if(temp->next != -1)
+ temp = &exp->_nodes[temp->next];
+ else
+ return asd;
+ }
+ return NULL;
+ break;
+ }
+ case OP_EXPR:
+ case OP_NOCAPEXPR:{
+ SQRexNode *n = &exp->_nodes[node->left];
+ const SQChar *cur = str;
+ int capture = -1;
+ if(node->type != OP_NOCAPEXPR && node->right == exp->_currsubexp) {
+ capture = exp->_currsubexp;
+ exp->_matches[capture].begin = cur;
+ exp->_currsubexp++;
+ }
+
+ do {
+ if(!(cur = sqstd_rex_matchnode(exp,n,cur))) {
+ if(capture != -1){
+ exp->_matches[capture].begin = 0;
+ exp->_matches[capture].len = 0;
+ }
+ return NULL;
+ }
+ } while((n->next != -1) && (n = &exp->_nodes[n->next]));
+
+ if(capture != -1)
+ exp->_matches[capture].len = cur - exp->_matches[capture].begin;
+ return cur;
+ }
+ case OP_WB:
+ if(str == exp->_bol && !isspace(*str)
+ || (str == exp->_eol && !isspace(*(str-1)))
+ || (!isspace(*str) && isspace(*(str+1)))
+ || (isspace(*str) && !isspace(*(str+1))) ) {
+ return (node->left == 'b')?str:NULL;
+ }
+ return (node->left == 'b')?NULL:str;
+ case OP_BOL:
+ if(str == exp->_bol) return str;
+ return NULL;
+ case OP_EOL:
+ if(str == exp->_eol) return str;
+ return NULL;
+ case OP_DOT:
+ *str++;
+ return str;
+ case OP_NCLASS:
+ case OP_CLASS:
+ if(sqstd_rex_matchclass(exp,&exp->_nodes[node->left],*str)?(type == OP_CLASS?SQTrue:SQFalse):(type == OP_NCLASS?SQTrue:SQFalse)) {
+ *str++;
+ return str;
+ }
+ return NULL;
+ case OP_CCLASS:
+ if(sqstd_rex_matchcclass(node->left,*str)) {
+ *str++;
+ return str;
+ }
+ return NULL;
+ default: /* char */
+ if(*str != node->type) return NULL;
+ *str++;
+ return str;
+ }
+ return NULL;
+}
+
+/* public api */
+SQRex *sqstd_rex_compile(const SQChar *pattern,const SQChar **error)
+{
+ SQRex *exp = (SQRex *)sq_malloc(sizeof(SQRex));
+ exp->_p = pattern;
+ exp->_nallocated = (int)scstrlen(pattern) * sizeof(SQChar);
+ exp->_nodes = (SQRexNode *)sq_malloc(exp->_nallocated * sizeof(SQRexNode));
+ exp->_nsize = 0;
+ exp->_matches = 0;
+ exp->_nsubexpr = 0;
+ exp->_first = sqstd_rex_newnode(exp,OP_EXPR);
+ exp->_error = error;
+ exp->_jmpbuf = sq_malloc(sizeof(jmp_buf));
+ if(setjmp(*((jmp_buf*)exp->_jmpbuf)) == 0) {
+ exp->_nodes[exp->_first].left=sqstd_rex_list(exp);
+ if(*exp->_p!='\0')
+ sqstd_rex_error(exp,_SC("unexpected character"));
+#ifdef _DEBUG
+ {
+ int nsize,i;
+ SQRexNode *t;
+ nsize = exp->_nsize;
+ t = &exp->_nodes[0];
+ scprintf(_SC("\n"));
+ for(i = 0;i < nsize; i++) {
+ if(exp->_nodes[i].type>MAX_CHAR)
+ scprintf(_SC("[%02d] %10s "),i,g_nnames[exp->_nodes[i].type-MAX_CHAR]);
+ else
+ scprintf(_SC("[%02d] %10c "),i,exp->_nodes[i].type);
+ scprintf(_SC("left %02d right %02d next %02d\n"),exp->_nodes[i].left,exp->_nodes[i].right,exp->_nodes[i].next);
+ }
+ scprintf(_SC("\n"));
+ }
+#endif
+ exp->_matches = (SQRexMatch *) sq_malloc(exp->_nsubexpr * sizeof(SQRexMatch));
+ memset(exp->_matches,0,exp->_nsubexpr * sizeof(SQRexMatch));
+ }
+ else{
+ sqstd_rex_free(exp);
+ return NULL;
+ }
+ return exp;
+}
+
+void sqstd_rex_free(SQRex *exp)
+{
+ if(exp) {
+ if(exp->_nodes) sq_free(exp->_nodes,exp->_nallocated * sizeof(SQRexNode));
+ if(exp->_jmpbuf) sq_free(exp->_jmpbuf,sizeof(jmp_buf));
+ if(exp->_matches) sq_free(exp->_matches,exp->_nsubexpr * sizeof(SQRexMatch));
+ sq_free(exp,sizeof(SQRex));
+ }
+}
+
+SQBool sqstd_rex_match(SQRex* exp,const SQChar* text)
+{
+ const SQChar* res = NULL;
+ exp->_bol = text;
+ exp->_eol = text + scstrlen(text);
+ exp->_currsubexp = 0;
+ res = sqstd_rex_matchnode(exp,exp->_nodes,text);
+ if(res == NULL || res != exp->_eol)
+ return SQFalse;
+ return SQTrue;
+}
+
+SQBool sqstd_rex_searchrange(SQRex* exp,const SQChar* text_begin,const SQChar* text_end,const SQChar** out_begin, const SQChar** out_end)
+{
+ const SQChar *cur = NULL;
+ int node = exp->_first;
+ if(text_begin >= text_end) return SQFalse;
+ exp->_bol = text_begin;
+ exp->_eol = text_end;
+ do {
+ cur = text_begin;
+ while(node != -1) {
+ exp->_currsubexp = 0;
+ cur = sqstd_rex_matchnode(exp,&exp->_nodes[node],cur);
+ if(!cur)
+ break;
+ node = exp->_nodes[node].next;
+ }
+ *text_begin++;
+ } while(cur == NULL && text_begin != text_end);
+
+ if(cur == NULL)
+ return SQFalse;
+
+ --text_begin;
+
+ if(out_begin) *out_begin = text_begin;
+ if(out_end) *out_end = cur;
+ return SQTrue;
+}
+
+SQBool sqstd_rex_search(SQRex* exp,const SQChar* text, const SQChar** out_begin, const SQChar** out_end)
+{
+ return sqstd_rex_searchrange(exp,text,text + scstrlen(text),out_begin,out_end);
+}
+
+int sqstd_rex_getsubexpcount(SQRex* exp)
+{
+ return exp->_nsubexpr;
+}
+
+SQBool sqstd_rex_getsubexp(SQRex* exp, int n, SQRexMatch *subexp)
+{
+ if( n<0 || n >= exp->_nsubexpr) return SQFalse;
+ *subexp = exp->_matches[n];
+ return SQTrue;
+}
+