lpcap.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. #include "lua.h"
  2. #include "lauxlib.h"
  3. #include "lpcap.h"
  4. #include "lpprint.h"
  5. #include "lptypes.h"
  6. #define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
  7. #define pushluaval(cs) getfromktable(cs, (cs)->cap->idx)
  8. #define skipclose(cs,head) \
  9. if (isopencap(head)) { assert(isclosecap(cs->cap)); cs->cap++; }
  10. /*
  11. ** Return the size of capture 'cap'. If it is an open capture, 'close'
  12. ** must be its corresponding close.
  13. */
  14. static Index_t capsize (Capture *cap, Capture *close) {
  15. if (isopencap(cap)) {
  16. assert(isclosecap(close));
  17. return close->index - cap->index;
  18. }
  19. else
  20. return cap->siz - 1;
  21. }
  22. static Index_t closesize (CapState *cs, Capture *head) {
  23. return capsize(head, cs->cap);
  24. }
  25. /*
  26. ** Put at the cache for Lua values the value indexed by 'v' in ktable
  27. ** of the running pattern (if it is not there yet); returns its index.
  28. */
  29. static int updatecache (CapState *cs, int v) {
  30. int idx = cs->ptop + 1; /* stack index of cache for Lua values */
  31. if (v != cs->valuecached) { /* not there? */
  32. getfromktable(cs, v); /* get value from 'ktable' */
  33. lua_replace(cs->L, idx); /* put it at reserved stack position */
  34. cs->valuecached = v; /* keep track of what is there */
  35. }
  36. return idx;
  37. }
  38. static int pushcapture (CapState *cs);
  39. /*
  40. ** Goes back in a list of captures looking for an open capture
  41. ** corresponding to a close one.
  42. */
  43. static Capture *findopen (Capture *cap) {
  44. int n = 0; /* number of closes waiting an open */
  45. for (;;) {
  46. cap--;
  47. if (isclosecap(cap)) n++; /* one more open to skip */
  48. else if (isopencap(cap))
  49. if (n-- == 0) return cap;
  50. }
  51. }
  52. /*
  53. ** Go to the next capture at the same level.
  54. */
  55. static void nextcap (CapState *cs) {
  56. Capture *cap = cs->cap;
  57. if (isopencap(cap)) { /* must look for a close? */
  58. int n = 0; /* number of opens waiting a close */
  59. for (;;) { /* look for corresponding close */
  60. cap++;
  61. if (isopencap(cap)) n++;
  62. else if (isclosecap(cap))
  63. if (n-- == 0) break;
  64. }
  65. cs->cap = cap + 1; /* + 1 to skip last close */
  66. }
  67. else {
  68. Capture *next;
  69. for (next = cap + 1; capinside(cap, next); next++)
  70. ; /* skip captures inside current one */
  71. cs->cap = next;
  72. }
  73. }
  74. /*
  75. ** Push on the Lua stack all values generated by nested captures inside
  76. ** the current capture. Returns number of values pushed. 'addextra'
  77. ** makes it push the entire match after all captured values. The
  78. ** entire match is pushed also if there are no other nested values,
  79. ** so the function never returns zero.
  80. */
  81. static int pushnestedvalues (CapState *cs, int addextra) {
  82. Capture *head = cs->cap++; /* original capture */
  83. int n = 0; /* number of pushed subvalues */
  84. /* repeat for all nested patterns */
  85. while (capinside(head, cs->cap))
  86. n += pushcapture(cs);
  87. if (addextra || n == 0) { /* need extra? */
  88. lua_pushlstring(cs->L, cs->s + head->index, closesize(cs, head));
  89. n++;
  90. }
  91. skipclose(cs, head);
  92. return n;
  93. }
  94. /*
  95. ** Push only the first value generated by nested captures
  96. */
  97. static void pushonenestedvalue (CapState *cs) {
  98. int n = pushnestedvalues(cs, 0);
  99. if (n > 1)
  100. lua_pop(cs->L, n - 1); /* pop extra values */
  101. }
  102. /*
  103. ** Checks whether group 'grp' is visible to 'ref', that is, 'grp' is
  104. ** not nested inside a full capture that does not contain 'ref'. (We
  105. ** only need to care for full captures because the search at 'findback'
  106. ** skips open-end blocks; so, if 'grp' is nested in a non-full capture,
  107. ** 'ref' is also inside it.) To check this, we search backward for the
  108. ** inner full capture enclosing 'grp'. A full capture cannot contain
  109. ** non-full captures, so a close capture means we cannot be inside a
  110. ** full capture anymore.
  111. */
  112. static int capvisible (CapState *cs, Capture *grp, Capture *ref) {
  113. Capture *cap = grp;
  114. int i = MAXLOP; /* maximum distance for an 'open' */
  115. while (i-- > 0 && cap-- > cs->ocap) {
  116. if (isclosecap(cap))
  117. return 1; /* can stop the search */
  118. else if (grp->index - cap->index >= UCHAR_MAX)
  119. return 1; /* can stop the search */
  120. else if (capinside(cap, grp)) /* is 'grp' inside cap? */
  121. return capinside(cap, ref); /* ok iff cap also contains 'ref' */
  122. }
  123. return 1; /* 'grp' is not inside any capture */
  124. }
  125. /*
  126. ** Try to find a named group capture with the name given at the top of
  127. ** the stack; goes backward from 'ref'.
  128. */
  129. static Capture *findback (CapState *cs, Capture *ref) {
  130. lua_State *L = cs->L;
  131. Capture *cap = ref;
  132. while (cap-- > cs->ocap) { /* repeat until end of list */
  133. if (isclosecap(cap))
  134. cap = findopen(cap); /* skip nested captures */
  135. else if (capinside(cap, ref))
  136. continue; /* enclosing captures are not visible to 'ref' */
  137. if (captype(cap) == Cgroup && capvisible(cs, cap, ref)) {
  138. getfromktable(cs, cap->idx); /* get group name */
  139. if (lp_equal(L, -2, -1)) { /* right group? */
  140. lua_pop(L, 2); /* remove reference name and group name */
  141. return cap;
  142. }
  143. else lua_pop(L, 1); /* remove group name */
  144. }
  145. }
  146. luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1));
  147. return NULL; /* to avoid warnings */
  148. }
  149. /*
  150. ** Back-reference capture. Return number of values pushed.
  151. */
  152. static int backrefcap (CapState *cs) {
  153. int n;
  154. Capture *curr = cs->cap;
  155. pushluaval(cs); /* reference name */
  156. cs->cap = findback(cs, curr); /* find corresponding group */
  157. n = pushnestedvalues(cs, 0); /* push group's values */
  158. cs->cap = curr + 1;
  159. return n;
  160. }
  161. /*
  162. ** Table capture: creates a new table and populates it with nested
  163. ** captures.
  164. */
  165. static int tablecap (CapState *cs) {
  166. lua_State *L = cs->L;
  167. Capture *head = cs->cap++;
  168. int n = 0;
  169. lua_newtable(L);
  170. while (capinside(head, cs->cap)) {
  171. if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */
  172. pushluaval(cs); /* push group name */
  173. pushonenestedvalue(cs);
  174. lua_settable(L, -3);
  175. }
  176. else { /* not a named group */
  177. int i;
  178. int k = pushcapture(cs);
  179. for (i = k; i > 0; i--) /* store all values into table */
  180. lua_rawseti(L, -(i + 1), n + i);
  181. n += k;
  182. }
  183. }
  184. skipclose(cs, head);
  185. return 1; /* number of values pushed (only the table) */
  186. }
  187. /*
  188. ** Table-query capture
  189. */
  190. static int querycap (CapState *cs) {
  191. int idx = cs->cap->idx;
  192. pushonenestedvalue(cs); /* get nested capture */
  193. lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */
  194. if (!lua_isnil(cs->L, -1))
  195. return 1;
  196. else { /* no value */
  197. lua_pop(cs->L, 1); /* remove nil */
  198. return 0;
  199. }
  200. }
  201. /*
  202. ** Fold capture
  203. */
  204. static int foldcap (CapState *cs) {
  205. int n;
  206. lua_State *L = cs->L;
  207. Capture *head = cs->cap++;
  208. int idx = head->idx;
  209. if (isclosecap(cs->cap) || /* no nested captures (large subject)? */
  210. (n = pushcapture(cs)) == 0) /* nested captures with no values? */
  211. return luaL_error(L, "no initial value for fold capture");
  212. if (n > 1)
  213. lua_pop(L, n - 1); /* leave only one result for accumulator */
  214. while (capinside(head, cs->cap)) {
  215. lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */
  216. lua_insert(L, -2); /* put it before accumulator */
  217. n = pushcapture(cs); /* get next capture's values */
  218. lua_call(L, n + 1, 1); /* call folding function */
  219. }
  220. skipclose(cs, head);
  221. return 1; /* only accumulator left on the stack */
  222. }
  223. /*
  224. ** Function capture
  225. */
  226. static int functioncap (CapState *cs) {
  227. int n;
  228. int top = lua_gettop(cs->L);
  229. pushluaval(cs); /* push function */
  230. n = pushnestedvalues(cs, 0); /* push nested captures */
  231. lua_call(cs->L, n, LUA_MULTRET); /* call function */
  232. return lua_gettop(cs->L) - top; /* return function's results */
  233. }
  234. /*
  235. ** Accumulator capture
  236. */
  237. static int accumulatorcap (CapState *cs) {
  238. lua_State *L = cs->L;
  239. int n;
  240. if (lua_gettop(L) < cs->firstcap)
  241. luaL_error(L, "no previous value for accumulator capture");
  242. pushluaval(cs); /* push function */
  243. lua_insert(L, -2); /* previous value becomes first argument */
  244. n = pushnestedvalues(cs, 0); /* push nested captures */
  245. lua_call(L, n + 1, 1); /* call function */
  246. return 0; /* did not add any extra value */
  247. }
  248. /*
  249. ** Select capture
  250. */
  251. static int numcap (CapState *cs) {
  252. int idx = cs->cap->idx; /* value to select */
  253. if (idx == 0) { /* no values? */
  254. nextcap(cs); /* skip entire capture */
  255. return 0; /* no value produced */
  256. }
  257. else {
  258. int n = pushnestedvalues(cs, 0);
  259. if (n < idx) /* invalid index? */
  260. return luaL_error(cs->L, "no capture '%d'", idx);
  261. else {
  262. lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */
  263. lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */
  264. lua_pop(cs->L, n - 1); /* remove other captures */
  265. return 1;
  266. }
  267. }
  268. }
  269. /*
  270. ** Return the stack index of the first runtime capture in the given
  271. ** list of captures (or zero if no runtime captures)
  272. */
  273. int finddyncap (Capture *cap, Capture *last) {
  274. for (; cap < last; cap++) {
  275. if (cap->kind == Cruntime)
  276. return cap->idx; /* stack position of first capture */
  277. }
  278. return 0; /* no dynamic captures in this segment */
  279. }
  280. /*
  281. ** Calls a runtime capture. Returns number of captures "removed" by the
  282. ** call, that is, those inside the group capture. Captures to be added
  283. ** are on the Lua stack.
  284. */
  285. int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
  286. int n, id;
  287. lua_State *L = cs->L;
  288. int otop = lua_gettop(L);
  289. Capture *open = findopen(close); /* get open group capture */
  290. assert(captype(open) == Cgroup);
  291. id = finddyncap(open, close); /* get first dynamic capture argument */
  292. close->kind = Cclose; /* closes the group */
  293. close->index = s - cs->s;
  294. cs->cap = open; cs->valuecached = 0; /* prepare capture state */
  295. luaL_checkstack(L, 4, "too many runtime captures");
  296. pushluaval(cs); /* push function to be called */
  297. lua_pushvalue(L, SUBJIDX); /* push original subject */
  298. lua_pushinteger(L, s - cs->s + 1); /* push current position */
  299. n = pushnestedvalues(cs, 0); /* push nested captures */
  300. lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */
  301. if (id > 0) { /* are there old dynamic captures to be removed? */
  302. int i;
  303. for (i = id; i <= otop; i++)
  304. lua_remove(L, id); /* remove old dynamic captures */
  305. *rem = otop - id + 1; /* total number of dynamic captures removed */
  306. }
  307. else
  308. *rem = 0; /* no dynamic captures removed */
  309. return close - open - 1; /* number of captures to be removed */
  310. }
  311. /*
  312. ** Auxiliary structure for substitution and string captures: keep
  313. ** information about nested captures for future use, avoiding to push
  314. ** string results into Lua
  315. */
  316. typedef struct StrAux {
  317. int isstring; /* whether capture is a string */
  318. union {
  319. Capture *cp; /* if not a string, respective capture */
  320. struct { /* if it is a string... */
  321. Index_t idx; /* starts here */
  322. Index_t siz; /* with this size */
  323. } s;
  324. } u;
  325. } StrAux;
  326. #define MAXSTRCAPS 10
  327. /*
  328. ** Collect values from current capture into array 'cps'. Current
  329. ** capture must be Cstring (first call) or Csimple (recursive calls).
  330. ** (In first call, fills %0 with whole match for Cstring.)
  331. ** Returns number of elements in the array that were filled.
  332. */
  333. static int getstrcaps (CapState *cs, StrAux *cps, int n) {
  334. int k = n++;
  335. Capture *head = cs->cap++;
  336. cps[k].isstring = 1; /* get string value */
  337. cps[k].u.s.idx = head->index; /* starts here */
  338. while (capinside(head, cs->cap)) {
  339. if (n >= MAXSTRCAPS) /* too many captures? */
  340. nextcap(cs); /* skip extra captures (will not need them) */
  341. else if (captype(cs->cap) == Csimple) /* string? */
  342. n = getstrcaps(cs, cps, n); /* put info. into array */
  343. else {
  344. cps[n].isstring = 0; /* not a string */
  345. cps[n].u.cp = cs->cap; /* keep original capture */
  346. nextcap(cs);
  347. n++;
  348. }
  349. }
  350. cps[k].u.s.siz = closesize(cs, head);
  351. skipclose(cs, head);
  352. return n;
  353. }
  354. /*
  355. ** add next capture value (which should be a string) to buffer 'b'
  356. */
  357. static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
  358. /*
  359. ** String capture: add result to buffer 'b' (instead of pushing
  360. ** it into the stack)
  361. */
  362. static void stringcap (luaL_Buffer *b, CapState *cs) {
  363. StrAux cps[MAXSTRCAPS];
  364. int n;
  365. size_t len, i;
  366. const char *fmt; /* format string */
  367. fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len);
  368. n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */
  369. for (i = 0; i < len; i++) { /* traverse format string */
  370. if (fmt[i] != '%') /* not an escape? */
  371. luaL_addchar(b, fmt[i]); /* add it to buffer */
  372. else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */
  373. luaL_addchar(b, fmt[i]); /* add to buffer */
  374. else {
  375. int l = fmt[i] - '0'; /* capture index */
  376. if (l > n)
  377. luaL_error(cs->L, "invalid capture index (%d)", l);
  378. else if (cps[l].isstring)
  379. luaL_addlstring(b, cs->s + cps[l].u.s.idx, cps[l].u.s.siz);
  380. else {
  381. Capture *curr = cs->cap;
  382. cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */
  383. if (!addonestring(b, cs, "capture"))
  384. luaL_error(cs->L, "no values in capture index %d", l);
  385. cs->cap = curr; /* continue from where it stopped */
  386. }
  387. }
  388. }
  389. }
  390. /*
  391. ** Substitution capture: add result to buffer 'b'
  392. */
  393. static void substcap (luaL_Buffer *b, CapState *cs) {
  394. const char *curr = cs->s + cs->cap->index;
  395. Capture *head = cs->cap++;
  396. while (capinside(head, cs->cap)) {
  397. Capture *cap = cs->cap;
  398. const char *caps = cs->s + cap->index;
  399. luaL_addlstring(b, curr, caps - curr); /* add text up to capture */
  400. if (addonestring(b, cs, "replacement"))
  401. curr = caps + capsize(cap, cs->cap - 1); /* continue after match */
  402. else /* no capture value */
  403. curr = caps; /* keep original text in final result */
  404. }
  405. /* add last piece of text */
  406. luaL_addlstring(b, curr, cs->s + head->index + closesize(cs, head) - curr);
  407. skipclose(cs, head);
  408. }
  409. /*
  410. ** Evaluates a capture and adds its first value to buffer 'b'; returns
  411. ** whether there was a value
  412. */
  413. static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
  414. switch (captype(cs->cap)) {
  415. case Cstring:
  416. stringcap(b, cs); /* add capture directly to buffer */
  417. return 1;
  418. case Csubst:
  419. substcap(b, cs); /* add capture directly to buffer */
  420. return 1;
  421. case Cacc: /* accumulator capture? */
  422. return luaL_error(cs->L, "invalid context for an accumulator capture");
  423. default: {
  424. lua_State *L = cs->L;
  425. int n = pushcapture(cs);
  426. if (n > 0) {
  427. if (n > 1) lua_pop(L, n - 1); /* only one result */
  428. if (!lua_isstring(L, -1))
  429. return luaL_error(L, "invalid %s value (a %s)",
  430. what, luaL_typename(L, -1));
  431. luaL_addvalue(b);
  432. }
  433. return n;
  434. }
  435. }
  436. }
  437. #if !defined(MAXRECLEVEL)
  438. #define MAXRECLEVEL 200
  439. #endif
  440. /*
  441. ** Push all values of the current capture into the stack; returns
  442. ** number of values pushed
  443. */
  444. static int pushcapture (CapState *cs) {
  445. lua_State *L = cs->L;
  446. int res;
  447. luaL_checkstack(L, 4, "too many captures");
  448. if (cs->reclevel++ > MAXRECLEVEL)
  449. return luaL_error(L, "subcapture nesting too deep");
  450. switch (captype(cs->cap)) {
  451. case Cposition: {
  452. lua_pushinteger(L, cs->cap->index + 1);
  453. cs->cap++;
  454. res = 1;
  455. break;
  456. }
  457. case Cconst: {
  458. pushluaval(cs);
  459. cs->cap++;
  460. res = 1;
  461. break;
  462. }
  463. case Carg: {
  464. int arg = (cs->cap++)->idx;
  465. if (arg + FIXEDARGS > cs->ptop)
  466. return luaL_error(L, "reference to absent extra argument #%d", arg);
  467. lua_pushvalue(L, arg + FIXEDARGS);
  468. res = 1;
  469. break;
  470. }
  471. case Csimple: {
  472. int k = pushnestedvalues(cs, 1);
  473. lua_insert(L, -k); /* make whole match be first result */
  474. res = k;
  475. break;
  476. }
  477. case Cruntime: {
  478. lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */
  479. res = 1;
  480. break;
  481. }
  482. case Cstring: {
  483. luaL_Buffer b;
  484. luaL_buffinit(L, &b);
  485. stringcap(&b, cs);
  486. luaL_pushresult(&b);
  487. res = 1;
  488. break;
  489. }
  490. case Csubst: {
  491. luaL_Buffer b;
  492. luaL_buffinit(L, &b);
  493. substcap(&b, cs);
  494. luaL_pushresult(&b);
  495. res = 1;
  496. break;
  497. }
  498. case Cgroup: {
  499. if (cs->cap->idx == 0) /* anonymous group? */
  500. res = pushnestedvalues(cs, 0); /* add all nested values */
  501. else { /* named group: add no values */
  502. nextcap(cs); /* skip capture */
  503. res = 0;
  504. }
  505. break;
  506. }
  507. case Cbackref: res = backrefcap(cs); break;
  508. case Ctable: res = tablecap(cs); break;
  509. case Cfunction: res = functioncap(cs); break;
  510. case Cacc: res = accumulatorcap(cs); break;
  511. case Cnum: res = numcap(cs); break;
  512. case Cquery: res = querycap(cs); break;
  513. case Cfold: res = foldcap(cs); break;
  514. default: assert(0); res = 0;
  515. }
  516. cs->reclevel--;
  517. return res;
  518. }
  519. /*
  520. ** Prepare a CapState structure and traverse the entire list of
  521. ** captures in the stack pushing its results. 's' is the subject
  522. ** string, 'r' is the final position of the match, and 'ptop'
  523. ** the index in the stack where some useful values were pushed.
  524. ** Returns the number of results pushed. (If the list produces no
  525. ** results, push the final position of the match.)
  526. */
  527. int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
  528. Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
  529. int n = 0;
  530. /* printcaplist(capture); */
  531. if (!isclosecap(capture)) { /* is there any capture? */
  532. CapState cs;
  533. cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0;
  534. cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
  535. cs.firstcap = lua_gettop(L) + 1; /* where first value (if any) will go */
  536. do { /* collect their values */
  537. n += pushcapture(&cs);
  538. } while (!isclosecap(cs.cap));
  539. assert(lua_gettop(L) - cs.firstcap == n - 1);
  540. }
  541. if (n == 0) { /* no capture values? */
  542. lua_pushinteger(L, r - s + 1); /* return only end position */
  543. n = 1;
  544. }
  545. return n;
  546. }