test.lua 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668
  1. #!/usr/bin/env lua
  2. -- require"strict" -- just to be pedantic
  3. local m = require"lpeg"
  4. -- for general use
  5. local a, b, c, d, e, f, g, p, t
  6. -- compatibility with Lua 5.2
  7. local unpack = rawget(table, "unpack") or unpack
  8. local loadstring = rawget(_G, "loadstring") or load
  9. local any = m.P(1)
  10. local space = m.S" \t\n"^0
  11. local function checkeq (x, y, p)
  12. if p then print(x,y) end
  13. if type(x) ~= "table" then assert(x == y)
  14. else
  15. for k,v in pairs(x) do checkeq(v, y[k], p) end
  16. for k,v in pairs(y) do checkeq(v, x[k], p) end
  17. end
  18. end
  19. local mt = getmetatable(m.P(1))
  20. local allchar = {}
  21. for i=0,255 do allchar[i + 1] = i end
  22. allchar = string.char(unpack(allchar))
  23. assert(#allchar == 256)
  24. local function cs2str (c)
  25. return m.match(m.Cs((c + m.P(1)/"")^0), allchar)
  26. end
  27. local function eqcharset (c1, c2)
  28. assert(cs2str(c1) == cs2str(c2))
  29. end
  30. print"General tests for LPeg library"
  31. assert(type(m.version) == "string")
  32. print(m.version)
  33. assert(m.type("alo") ~= "pattern")
  34. assert(m.type(io.input) ~= "pattern")
  35. assert(m.type(m.P"alo") == "pattern")
  36. -- tests for some basic optimizations
  37. assert(m.match(m.P(false) + "a", "a") == 2)
  38. assert(m.match(m.P(true) + "a", "a") == 1)
  39. assert(m.match("a" + m.P(false), "b") == nil)
  40. assert(m.match("a" + m.P(true), "b") == 1)
  41. assert(m.match(m.P(false) * "a", "a") == nil)
  42. assert(m.match(m.P(true) * "a", "a") == 2)
  43. assert(m.match("a" * m.P(false), "a") == nil)
  44. assert(m.match("a" * m.P(true), "a") == 2)
  45. assert(m.match(#m.P(false) * "a", "a") == nil)
  46. assert(m.match(#m.P(true) * "a", "a") == 2)
  47. assert(m.match("a" * #m.P(false), "a") == nil)
  48. assert(m.match("a" * #m.P(true), "a") == 2)
  49. assert(m.match(m.P(1)^0, "abcd") == 5)
  50. assert(m.match(m.S("")^0, "abcd") == 1)
  51. -- tests for locale
  52. do
  53. assert(m.locale(m) == m)
  54. local t = {}
  55. assert(m.locale(t, m) == t)
  56. local x = m.locale()
  57. for n,v in pairs(x) do
  58. assert(type(n) == "string")
  59. eqcharset(v, m[n])
  60. end
  61. end
  62. assert(m.match(3, "aaaa"))
  63. assert(m.match(4, "aaaa"))
  64. assert(not m.match(5, "aaaa"))
  65. assert(m.match(-3, "aa"))
  66. assert(not m.match(-3, "aaa"))
  67. assert(not m.match(-3, "aaaa"))
  68. assert(not m.match(-4, "aaaa"))
  69. assert(m.P(-5):match"aaaa")
  70. assert(m.match("a", "alo") == 2)
  71. assert(m.match("al", "alo") == 3)
  72. assert(not m.match("alu", "alo"))
  73. assert(m.match(true, "") == 1)
  74. local digit = m.S"0123456789"
  75. local upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  76. local lower = m.S"abcdefghijklmnopqrstuvwxyz"
  77. local letter = m.S"" + upper + lower
  78. local alpha = letter + digit + m.R()
  79. eqcharset(m.S"", m.P(false))
  80. eqcharset(upper, m.R("AZ"))
  81. eqcharset(lower, m.R("az"))
  82. eqcharset(upper + lower, m.R("AZ", "az"))
  83. eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90"))
  84. eqcharset(digit, m.S"01234567" + "8" + "9")
  85. eqcharset(upper, letter - lower)
  86. eqcharset(m.S(""), m.R())
  87. assert(cs2str(m.S("")) == "")
  88. eqcharset(m.S"\0", "\0")
  89. eqcharset(m.S"\1\0\2", m.R"\0\2")
  90. eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0")
  91. eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2")
  92. eqcharset(m.S("\0\255"), m.P"\0" + "\255") -- charset extremes
  93. local word = alpha^1 * (1 - alpha)^0
  94. assert((word^0 * -1):match"alo alo")
  95. assert(m.match(word^1 * -1, "alo alo"))
  96. assert(m.match(word^2 * -1, "alo alo"))
  97. assert(not m.match(word^3 * -1, "alo alo"))
  98. assert(not m.match(word^-1 * -1, "alo alo"))
  99. assert(m.match(word^-2 * -1, "alo alo"))
  100. assert(m.match(word^-3 * -1, "alo alo"))
  101. local eos = m.P(-1)
  102. assert(m.match(digit^0 * letter * digit * eos, "1298a1"))
  103. assert(not m.match(digit^0 * letter * eos, "1257a1"))
  104. b = {
  105. [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")"
  106. }
  107. assert(m.match(b, "(al())()"))
  108. assert(not m.match(b * eos, "(al())()"))
  109. assert(m.match(b * eos, "((al())()(é))"))
  110. assert(not m.match(b, "(al()()"))
  111. assert(not m.match(letter^1 - "for", "foreach"))
  112. assert(m.match(letter^1 - ("for" * eos), "foreach"))
  113. assert(not m.match(letter^1 - ("for" * eos), "for"))
  114. function basiclookfor (p)
  115. return m.P {
  116. [1] = p + (1 * m.V(1))
  117. }
  118. end
  119. function caplookfor (p)
  120. return basiclookfor(p:C())
  121. end
  122. assert(m.match(caplookfor(letter^1), " 4achou123...") == "achou")
  123. a = {m.match(caplookfor(letter^1)^0, " two words, one more ")}
  124. checkeq(a, {"two", "words", "one", "more"})
  125. assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7)
  126. a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")}
  127. checkeq(a, {"123", "d"})
  128. -- bug in LPeg 0.12 (nil value does not create a 'ktable')
  129. assert(m.match(m.Cc(nil), "") == nil)
  130. a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")}
  131. checkeq(a, {"abcd", "l"})
  132. a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
  133. checkeq(a, {10,20,30,2})
  134. a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
  135. checkeq(a, {1,10,20,30,2})
  136. a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
  137. checkeq(a, {1,10,20,30,2})
  138. a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
  139. checkeq(a, {1,7,8,10,20,30,2})
  140. a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')}
  141. checkeq(a, {1,2,3,4})
  142. a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")}
  143. checkeq(a, {1, 5})
  144. t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")}
  145. checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""})
  146. -- bug in 0.12 ('hascapture' did not check for captures inside a rule)
  147. do
  148. local pat = m.P{
  149. 'S';
  150. S1 = m.C('abc') + 3,
  151. S = #m.V('S1') -- rule has capture, but '#' must ignore it
  152. }
  153. assert(pat:match'abc' == 1)
  154. end
  155. -- bug: loop in 'hascaptures'
  156. do
  157. local p = m.C(-m.P{m.P'x' * m.V(1) + m.P'y'})
  158. assert(p:match("xxx") == "")
  159. end
  160. -- test for small capture boundary
  161. for i = 250,260 do
  162. assert(#m.match(m.C(i), string.rep('a', i)) == i)
  163. assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i)
  164. end
  165. -- tests for any*n and any*-n
  166. for n = 1, 550, 13 do
  167. local x_1 = string.rep('x', n - 1)
  168. local x = x_1 .. 'a'
  169. assert(not m.P(n):match(x_1))
  170. assert(m.P(n):match(x) == n + 1)
  171. assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4)
  172. assert(m.C(n):match(x) == x)
  173. assert(m.C(m.C(n)):match(x) == x)
  174. assert(m.P(-n):match(x_1) == 1)
  175. assert(not m.P(-n):match(x))
  176. assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20)
  177. local n3 = math.floor(n/3)
  178. assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1)
  179. end
  180. -- true values
  181. assert(m.P(0):match("x") == 1)
  182. assert(m.P(0):match("") == 1)
  183. assert(m.C(0):match("x") == "")
  184. assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1)
  185. assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0)
  186. assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd")
  187. p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4
  188. -- test for alternation optimization
  189. assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2)
  190. assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3)
  191. assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1)
  192. assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3)
  193. assert(m.match("a" * m.P"b"^0 * "c" + "cd" + "ax" + "cy", "ax") == 3)
  194. assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3)
  195. assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3)
  196. assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4)
  197. assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3)
  198. assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4)
  199. assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4)
  200. assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
  201. assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3)
  202. assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4)
  203. assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4)
  204. assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
  205. assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3)
  206. assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4)
  207. assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4)
  208. assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4)
  209. assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3)
  210. assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3)
  211. assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4)
  212. assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3)
  213. assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1)
  214. assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1)
  215. assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3)
  216. assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4)
  217. assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3)
  218. assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2)
  219. assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1)
  220. assert(not m.match(("aa" * m.P"bc"^-1 + "aab") * "e", "aabe"))
  221. assert(m.match("alo" * (m.P"\n" + -1), "alo") == 4)
  222. -- bug in 0.12 (rc1)
  223. assert(m.match((m.P"\128\187\191" + m.S"abc")^0, "\128\187\191") == 4)
  224. assert(m.match(m.S"\0\128\255\127"^0, string.rep("\0\128\255\127", 10)) ==
  225. 4*10 + 1)
  226. -- optimizations with optional parts
  227. assert(m.match(("ab" * -m.P"c")^-1, "abc") == 1)
  228. assert(m.match(("ab" * #m.P"c")^-1, "abd") == 1)
  229. assert(m.match(("ab" * m.B"c")^-1, "ab") == 1)
  230. assert(m.match(("ab" * m.P"cd"^0)^-1, "abcdcdc") == 7)
  231. assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3)
  232. p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1
  233. assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21)
  234. -- bug in 0.12.2
  235. -- p = { ('ab' ('c' 'ef'?)*)? }
  236. p = m.C(('ab' * ('c' * m.P'ef'^-1)^0)^-1)
  237. s = "abcefccefc"
  238. assert(s == p:match(s))
  239. pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510"
  240. assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) ==
  241. m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi))
  242. print"+"
  243. -- tests for capture optimizations
  244. assert(m.match((m.P(3) + 4 * m.Cp()) * "a", "abca") == 5)
  245. t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")}
  246. checkeq(t, {3, 6})
  247. -- tests for numbered captures
  248. p = m.C(1)
  249. assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 3, "abcdefgh") == "a")
  250. assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 1, "abcdefgh") == "abcdef")
  251. assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 4, "abcdefgh") == "bc")
  252. assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 0, "abcdefgh") == 7)
  253. a, b, c = m.match(p * (m.C(p * m.C(2)) * m.C(3) / 4) * p, "abcdefgh")
  254. assert(a == "a" and b == "efg" and c == "h")
  255. -- test for table captures
  256. t = m.match(m.Ct(letter^1), "alo")
  257. checkeq(t, {})
  258. t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo")
  259. assert(n == "t" and table.concat(t) == "alo")
  260. t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
  261. assert(table.concat(t, ";") == "alo;a;l;o")
  262. t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
  263. assert(table.concat(t, ";") == "alo;a;l;o")
  264. t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo")
  265. assert(table.concat(t[1], ";") == "1;2;2;3;3;4")
  266. t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo")
  267. checkeq(t, {"alo", "a", "o"})
  268. -- tests for groups
  269. p = m.Cg(1) -- no capture
  270. assert(p:match('x') == 'x')
  271. p = m.Cg(m.P(true)/function () end * 1) -- no value
  272. assert(p:match('x') == 'x')
  273. p = m.Cg(m.Cg(m.Cg(m.C(1))))
  274. assert(p:match('x') == 'x')
  275. p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2)))
  276. t = {p:match'abc'}
  277. checkeq(t, {'a', 'b', 'c', 1, 2})
  278. p = m.Ct(m.Cg(m.Cc(10), "hi") * m.C(1)^0 * m.Cg(m.Cc(20), "ho"))
  279. t = p:match''
  280. checkeq(t, {hi = 10, ho = 20})
  281. t = p:match'abc'
  282. checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'})
  283. -- non-string group names
  284. p = m.Ct(m.Cg(1, print) * m.Cg(1, 23.5) * m.Cg(1, io))
  285. t = p:match('abcdefghij')
  286. assert(t[print] == 'a' and t[23.5] == 'b' and t[io] == 'c')
  287. -- test for error messages
  288. local function checkerr (msg, f, ...)
  289. local st, err = pcall(f, ...)
  290. assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err))
  291. end
  292. checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a")
  293. checkerr("rule '1' used outside a grammar", m.match, m.V(1), "")
  294. checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "")
  295. checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "")
  296. checkerr("undefined in given grammar", m.match, { m.V{} }, "")
  297. checkerr("rule 'A' is not a pattern", m.P, { m.P(1), A = {} })
  298. checkerr("grammar has no initial rule", m.P, { [print] = {} })
  299. -- grammar with a long call chain before left recursion
  300. p = {'a',
  301. a = m.V'b' * m.V'c' * m.V'd' * m.V'a',
  302. b = m.V'c',
  303. c = m.V'd',
  304. d = m.V'e',
  305. e = m.V'f',
  306. f = m.V'g',
  307. g = m.P''
  308. }
  309. checkerr("rule 'a' may be left recursive", m.match, p, "a")
  310. -- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit)
  311. -- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1
  312. -- that is optimized to ICommit L1
  313. p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' }
  314. assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc')
  315. do
  316. print "testing large dynamic Cc"
  317. local lim = 2^16 - 1
  318. local c = 0
  319. local function seq (n)
  320. if n == 1 then c = c + 1; return m.Cc(c)
  321. else
  322. local m = math.floor(n / 2)
  323. return seq(m) * seq(n - m)
  324. end
  325. end
  326. p = m.Ct(seq(lim))
  327. t = p:match('')
  328. assert(t[lim] == lim)
  329. checkerr("too many", function () p = p / print end)
  330. checkerr("too many", seq, lim + 1)
  331. end
  332. do
  333. -- nesting of captures too deep
  334. local p = m.C(1)
  335. for i = 1, 300 do
  336. p = m.Ct(p)
  337. end
  338. checkerr("too deep", p.match, p, "x")
  339. end
  340. -- tests for non-pattern as arguments to pattern functions
  341. p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 }
  342. assert(m.match(p, "aaabaac") == 7)
  343. p = m.P'abc' * 2 * -5 * true * 'de' -- mix of numbers and strings and booleans
  344. assert(p:match("abc01de") == 8)
  345. assert(p:match("abc01de3456") == nil)
  346. p = 'abc' * (2 * (-5 * (true * m.P'de')))
  347. assert(p:match("abc01de") == 8)
  348. assert(p:match("abc01de3456") == nil)
  349. p = { m.V(2), m.P"abc" } *
  350. (m.P{ "xx", xx = m.P"xx" } + { "x", x = m.P"a" * m.V"x" + "" })
  351. assert(p:match("abcaaaxx") == 7)
  352. assert(p:match("abcxx") == 6)
  353. -- a large table capture
  354. t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000))
  355. assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a')
  356. print('+')
  357. -- bug in 0.10 (rechecking a grammar, after tail-call optimization)
  358. m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } }
  359. local V = m.V
  360. local Space = m.S(" \n\t")^0
  361. local Number = m.C(m.R("09")^1) * Space
  362. local FactorOp = m.C(m.S("+-")) * Space
  363. local TermOp = m.C(m.S("*/")) * Space
  364. local Open = "(" * Space
  365. local Close = ")" * Space
  366. local function f_factor (v1, op, v2, d)
  367. assert(d == nil)
  368. if op == "+" then return v1 + v2
  369. else return v1 - v2
  370. end
  371. end
  372. local function f_term (v1, op, v2, d)
  373. assert(d == nil)
  374. if op == "*" then return v1 * v2
  375. else return v1 / v2
  376. end
  377. end
  378. G = m.P{ "Exp",
  379. Exp = V"Factor" * (FactorOp * V"Factor" % f_factor)^0;
  380. Factor = V"Term" * (TermOp * V"Term" % f_term)^0;
  381. Term = Number / tonumber + Open * V"Exp" * Close;
  382. }
  383. G = Space * G * -1
  384. for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1- 8"} do
  385. assert(m.match(G, s) == loadstring("return "..s)())
  386. end
  387. -- test for grammars (errors deep in calling non-terminals)
  388. g = m.P{
  389. [1] = m.V(2) + "a",
  390. [2] = "a" * m.V(3) * "x",
  391. [3] = "b" * m.V(3) + "c"
  392. }
  393. assert(m.match(g, "abbbcx") == 7)
  394. assert(m.match(g, "abbbbx") == 2)
  395. -- tests for \0
  396. assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4)
  397. assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5)
  398. assert(m.match(m.P(1)^3, "\0\1\0a") == 5)
  399. assert(not m.match(-4, "\0\1\0a"))
  400. assert(m.match("\0\1\0a", "\0\1\0a") == 5)
  401. assert(m.match("\0\0\0", "\0\0\0") == 4)
  402. assert(not m.match("\0\0\0", "\0\0"))
  403. -- tests for predicates
  404. assert(not m.match(-m.P("a") * 2, "alo"))
  405. assert(m.match(- -m.P("a") * 2, "alo") == 3)
  406. assert(m.match(#m.P("a") * 2, "alo") == 3)
  407. assert(m.match(##m.P("a") * 2, "alo") == 3)
  408. assert(not m.match(##m.P("c") * 2, "alo"))
  409. assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
  410. assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
  411. assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
  412. assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
  413. -- fixed length
  414. do
  415. -- 'and' predicate using fixed length
  416. local p = m.C(#("a" * (m.P("bd") + "cd")) * 2)
  417. assert(p:match("acd") == "ac")
  418. p = #m.P{ "a" * m.V(2), m.P"b" } * 2
  419. assert(p:match("abc") == 3)
  420. p = #(m.P"abc" * m.B"c")
  421. assert(p:match("abc") == 1 and not p:match("ab"))
  422. p = m.P{ "a" * m.V(2), m.P"b"^1 }
  423. checkerr("pattern may not have fixed length", m.B, p)
  424. p = "abc" * (m.P"b"^1 + m.P"a"^0)
  425. checkerr("pattern may not have fixed length", m.B, p)
  426. end
  427. p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3)
  428. assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1)
  429. p = -m.P'a' * m.Cc(10) + #m.P'a' * m.Cc(20)
  430. assert(p:match('a') == 20 and p:match('') == 10 and p:match('b') == 10)
  431. -- look-behind predicate
  432. assert(not m.match(m.B'a', 'a'))
  433. assert(m.match(1 * m.B'a', 'a') == 2)
  434. assert(not m.match(m.B(1), 'a'))
  435. assert(m.match(1 * m.B(1), 'a') == 2)
  436. assert(m.match(-m.B(1), 'a') == 1)
  437. assert(m.match(m.B(250), string.rep('a', 250)) == nil)
  438. assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251)
  439. -- look-behind with an open call
  440. checkerr("pattern may not have fixed length", m.B, m.V'S1')
  441. checkerr("too long to look behind", m.B, 260)
  442. B = #letter * -m.B(letter) + -letter * m.B(letter)
  443. x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
  444. checkeq(m.match(x, 'ar cal c'), {1,3,4,7,9,10})
  445. checkeq(m.match(x, ' ar cal '), {2,4,5,8})
  446. checkeq(m.match(x, ' '), {})
  447. checkeq(m.match(x, 'aloalo'), {1,7})
  448. assert(m.match(B, "a") == 1)
  449. assert(m.match(1 * B, "a") == 2)
  450. assert(not m.B(1 - letter):match(""))
  451. assert((-m.B(letter)):match("") == 1)
  452. assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5)
  453. assert(not (4 * m.B(#letter * 5)):match("aaaaaaaa"))
  454. assert((4 * -m.B(#letter * 5)):match("aaaaaaaa") == 5)
  455. -- look-behind with grammars
  456. assert(m.match('a' * m.B{'x', x = m.P(3)}, 'aaa') == nil)
  457. assert(m.match('aa' * m.B{'x', x = m.P('aaa')}, 'aaaa') == nil)
  458. assert(m.match('aaa' * m.B{'x', x = m.P('aaa')}, 'aaaaa') == 4)
  459. -- bug in 0.9
  460. assert(m.match(('a' * #m.P'b'), "ab") == 2)
  461. assert(not m.match(('a' * #m.P'b'), "a"))
  462. assert(not m.match(#m.S'567', ""))
  463. assert(m.match(#m.S'567' * 1, "6") == 2)
  464. -- tests for Tail Calls
  465. p = m.P{ 'a' * m.V(1) + '' }
  466. assert(p:match(string.rep('a', 1000)) == 1001)
  467. -- create a grammar for a simple DFA for even number of 0s and 1s
  468. --
  469. -- ->1 <---0---> 2
  470. -- ^ ^
  471. -- | |
  472. -- 1 1
  473. -- | |
  474. -- V V
  475. -- 3 <---0---> 4
  476. --
  477. -- this grammar should keep no backtracking information
  478. p = m.P{
  479. [1] = '0' * m.V(2) + '1' * m.V(3) + -1,
  480. [2] = '0' * m.V(1) + '1' * m.V(4),
  481. [3] = '0' * m.V(4) + '1' * m.V(1),
  482. [4] = '0' * m.V(3) + '1' * m.V(2),
  483. }
  484. assert(p:match(string.rep("00", 10000)))
  485. assert(p:match(string.rep("01", 10000)))
  486. assert(p:match(string.rep("011", 10000)))
  487. assert(not p:match(string.rep("011", 10000) .. "1"))
  488. assert(not p:match(string.rep("011", 10001)))
  489. -- this grammar does need backtracking info.
  490. local lim = 10000
  491. p = m.P{ '0' * m.V(1) + '0' }
  492. checkerr("stack overflow", m.match, p, string.rep("0", lim))
  493. m.setmaxstack(2*lim)
  494. checkerr("stack overflow", m.match, p, string.rep("0", lim))
  495. m.setmaxstack(2*lim + 4)
  496. assert(m.match(p, string.rep("0", lim)) == lim + 1)
  497. -- this repetition should not need stack space (only the call does)
  498. p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' }
  499. m.setmaxstack(200)
  500. assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362)
  501. m.setmaxstack(100) -- restore low limit
  502. -- tests for optional start position
  503. assert(m.match("a", "abc", 1))
  504. assert(m.match("b", "abc", 2))
  505. assert(m.match("c", "abc", 3))
  506. assert(not m.match(1, "abc", 4))
  507. assert(m.match("a", "abc", -3))
  508. assert(m.match("b", "abc", -2))
  509. assert(m.match("c", "abc", -1))
  510. assert(m.match("abc", "abc", -4)) -- truncate to position 1
  511. assert(m.match("", "abc", 10)) -- empty string is everywhere!
  512. assert(m.match("", "", 10))
  513. assert(not m.match(1, "", 1))
  514. assert(not m.match(1, "", -1))
  515. assert(not m.match(1, "", 0))
  516. print("+")
  517. -- tests for argument captures
  518. checkerr("invalid argument", m.Carg, 0)
  519. checkerr("invalid argument", m.Carg, -1)
  520. checkerr("invalid argument", m.Carg, 2^18)
  521. checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1)
  522. assert(m.match(m.Carg(1), 'a', 1, print) == print)
  523. x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)}
  524. checkeq(x, {10, 20})
  525. assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") *
  526. m.Cmt(m.Cb("a"), function (s,i,x)
  527. assert(s == "a" and i == 1);
  528. return i, x+1
  529. end) *
  530. m.Carg(2), function (s,i,a,b,c)
  531. assert(s == "a" and i == 1 and c == nil);
  532. return i, 2*a + 3*b
  533. end) * "a",
  534. "a", 1, false, 100, 1000) == 2*1001 + 3*100)
  535. -- tests for Lua functions
  536. t = {}
  537. s = ""
  538. p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end) * false
  539. s = "hi, this is a test"
  540. assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1)
  541. assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3)
  542. assert(not m.match(p, s))
  543. p = mt.__add(function (s, i) return i end, function (s, i) return nil end)
  544. assert(m.match(p, "alo"))
  545. p = mt.__mul(function (s, i) return i end, function (s, i) return nil end)
  546. assert(not m.match(p, "alo"))
  547. t = {}
  548. p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end
  549. s = "hi, this is a test"
  550. assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1)
  551. assert(#t == string.len(s) and t[1] == 2 and t[2] == 3)
  552. t = {}
  553. p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i;
  554. return i <= s1:len() and i end) * 1
  555. s = "hi, this is a test"
  556. assert(m.match(p^0, s) == string.len(s) + 1)
  557. assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2)
  558. p = function (s1, i) return m.match(m.P"a"^1, s1, i) end
  559. assert(m.match(p, "aaaa") == 5)
  560. assert(m.match(p, "abaa") == 2)
  561. assert(not m.match(p, "baaa"))
  562. checkerr("invalid position", m.match, function () return 2^20 end, s)
  563. checkerr("invalid position", m.match, function () return 0 end, s)
  564. checkerr("invalid position", m.match, function (s, i) return i - 1 end, s)
  565. checkerr("invalid position", m.match,
  566. m.P(1)^0 * function (_, i) return i - 1 end, s)
  567. assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s))
  568. checkerr("invalid position", m.match,
  569. m.P(1)^0 * function (_, i) return i + 1 end, s)
  570. assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s))
  571. checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)
  572. assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s))
  573. assert(m.match(m.P(1)^0 * function (_, i) return true end, s) ==
  574. string.len(s) + 1)
  575. for i = 1, string.len(s) + 1 do
  576. assert(m.match(function (_, _) return i end, s) == i)
  577. end
  578. p = (m.P(function (s, i) return i%2 == 0 and i end) * 1
  579. + m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i end) * 3)^0
  580. * -1
  581. assert(p:match(string.rep('a', 14000)))
  582. -- tests for Function Replacements
  583. f = function (a, ...) if a ~= "x" then return {a, ...} end end
  584. t = m.match(m.C(1)^0/f, "abc")
  585. checkeq(t, {"a", "b", "c"})
  586. t = m.match(m.C(1)^0/f/f, "abc")
  587. checkeq(t, {{"a", "b", "c"}})
  588. t = m.match(m.P(1)^0/f/f, "abc") -- no capture
  589. checkeq(t, {{"abc"}})
  590. t = m.match((m.P(1)^0/f * m.Cp())/f, "abc")
  591. checkeq(t, {{"abc"}, 4})
  592. t = m.match((m.C(1)^0/f * m.Cp())/f, "abc")
  593. checkeq(t, {{"a", "b", "c"}, 4})
  594. t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc")
  595. checkeq(t, {4})
  596. t = m.match(m.C(m.C(1)^0)/f, "abc")
  597. checkeq(t, {"abc", "a", "b", "c"})
  598. g = function (...) return 1, ... end
  599. t = {m.match(m.C(1)^0/g/g, "abc")}
  600. checkeq(t, {1, 1, "a", "b", "c"})
  601. t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")}
  602. t1 = {1,1,nil,nil,4,nil,3,nil,nil}
  603. for i=1,10 do assert(t[i] == t1[i]) end
  604. -- bug in 0.12.2: ktable with only nil could be eliminated when joining
  605. -- with a pattern without ktable
  606. assert((m.P"aaa" * m.Cc(nil)):match"aaa" == nil)
  607. t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")}
  608. checkeq(t, {"a", "ax", "b", "bx", "c", "cx"})
  609. t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc")
  610. checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1})
  611. -- tests for Query Replacements
  612. assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10)
  613. assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10)
  614. assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40)
  615. t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc")
  616. checkeq(t, {40})
  617. assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e")
  618. assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde")
  619. assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e")
  620. assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.")
  621. assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+")
  622. assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde")
  623. assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde")
  624. assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde")
  625. assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde")
  626. assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx")
  627. assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3")
  628. assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3)
  629. assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3)
  630. assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0")
  631. assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd")
  632. assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d")
  633. assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad")
  634. assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a")
  635. assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx")
  636. assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") ==
  637. "411 - abc ")
  638. assert(m.match(m.P(1)/"%0", "abc") == "a")
  639. checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc")
  640. checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc")
  641. p = m.C(1)
  642. p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1"
  643. assert(p:match("1234567890") == "9 - 1")
  644. assert(m.match(m.Cc(print), "") == print)
  645. -- too many captures (just ignore extra ones)
  646. p = m.C(1)^0 / "%2-%9-%0-%9"
  647. assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8")
  648. s = string.rep("12345678901234567890", 20)
  649. assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3")
  650. -- string captures with non-string subcaptures
  651. p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1"
  652. assert(p:match'x' == 'alo - x - alo')
  653. checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a")
  654. -- long strings for string capture
  655. l = 10000
  656. s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l)
  657. p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1'
  658. assert(p:match(s) == string.rep('c', l) ..
  659. string.rep('b', l) ..
  660. string.rep('a', l))
  661. print"+"
  662. -- accumulator capture
  663. function f (x) return x + 1 end
  664. assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)
  665. assert(m.match(m.Cc(0) * (m.C(1) % f)^0, "alo alo") == 7)
  666. t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}
  667. checkeq(t, {1})
  668. p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0,
  669. rawset)
  670. t = p:match("a=b;c=du;xux=yuy;")
  671. checkeq(t, {a="b", c="du", xux="yuy"})
  672. -- errors in fold capture
  673. -- no initial capture
  674. checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
  675. -- no initial capture (very long match forces fold to be a pair open-close)
  676. checkerr("no initial value", m.match, m.Cf(m.P(500), print),
  677. string.rep('a', 600))
  678. -- errors in accumulator capture
  679. -- no initial capture
  680. checkerr("no previous value", m.match, m.P(5) % print, 'aaaaaa')
  681. -- no initial capture (very long match forces fold to be a pair open-close)
  682. checkerr("no previous value", m.match, m.P(500) % print,
  683. string.rep('a', 600))
  684. -- tests for loop checker
  685. local function isnullable (p)
  686. checkerr("may accept empty string", function (p) return p^0 end, m.P(p))
  687. end
  688. isnullable(m.P("x")^-4)
  689. assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3)
  690. assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3)
  691. isnullable("")
  692. isnullable(m.P("x")^0)
  693. isnullable(m.P("x")^-1)
  694. isnullable(m.P("x") + 1 + 2 + m.P("a")^-1)
  695. isnullable(-m.P("ab"))
  696. isnullable(- -m.P("ab"))
  697. isnullable(# #(m.P("ab") + "xy"))
  698. isnullable(- #m.P("ab")^0)
  699. isnullable(# -m.P("ab")^1)
  700. isnullable(#m.V(3))
  701. isnullable(m.V(3) + m.V(1) + m.P('a')^-1)
  702. isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
  703. assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc")
  704. == 3)
  705. assert(m.match(m.P""^-3, "a") == 1)
  706. local function find (p, s)
  707. return m.match(basiclookfor(p), s)
  708. end
  709. local function badgrammar (g, expected)
  710. local stat, msg = pcall(m.P, g)
  711. assert(not stat)
  712. if expected then assert(find(expected, msg)) end
  713. end
  714. badgrammar({[1] = m.V(1)}, "rule '1'")
  715. badgrammar({[1] = m.V(2)}, "rule '2'") -- invalid non-terminal
  716. badgrammar({[1] = m.V"x"}, "rule 'x'") -- invalid non-terminal
  717. badgrammar({[1] = m.V{}}, "rule '(a table)'") -- invalid non-terminal
  718. badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
  719. badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
  720. badgrammar({[1] = -1 * m.V(1)}, "rule '1'") -- left-recursive
  721. badgrammar({[1] = -1 + m.V(1)}, "rule '1'") -- left-recursive
  722. badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'") -- left-recursive
  723. badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'") -- inf. loop
  724. badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'") -- inf. loop
  725. badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'") -- inf. loop
  726. badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'") -- inf. loop
  727. badgrammar({ -(m.V(1) * 'a') }, "rule '1'") -- inf. loop
  728. badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'") -- left recursive
  729. badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'")
  730. assert(m.match({'a' * -m.V(1)}, "aaa") == 2)
  731. assert(m.match({'a' * -m.V(1)}, "aaaa") == nil)
  732. -- good x bad grammars
  733. m.P{ ('a' * m.V(1))^-1 }
  734. m.P{ -('a' * m.V(1)) }
  735. m.P{ ('abc' * m.V(1))^-1 }
  736. m.P{ -('abc' * m.V(1)) }
  737. badgrammar{ #m.P('abc') * m.V(1) }
  738. badgrammar{ -('a' + m.V(1)) }
  739. m.P{ #('a' * m.V(1)) }
  740. badgrammar{ #('a' + m.V(1)) }
  741. m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) }
  742. badgrammar{ m.B{ m.P'abc' } * m.V(1) }
  743. badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) }
  744. -- simple tests for maximum sizes:
  745. local p = m.P"a"
  746. for i=1,14 do p = p * p end
  747. p = {}
  748. for i=1,100 do p[i] = m.P"a" end
  749. p = m.P(p)
  750. -- strange values for rule labels
  751. p = m.P{ "print",
  752. print = m.V(print),
  753. [print] = m.V(_G),
  754. [_G] = m.P"a",
  755. }
  756. assert(p:match("a"))
  757. -- initial rule
  758. g = {}
  759. for i = 1, 10 do g["i"..i] = "a" * m.V("i"..i+1) end
  760. g.i11 = m.P""
  761. for i = 1, 10 do
  762. g[1] = "i"..i
  763. local p = m.P(g)
  764. assert(p:match("aaaaaaaaaaa") == 11 - i + 1)
  765. end
  766. print "testing back references"
  767. checkerr("back reference 'x' not found", m.match, m.Cb('x'), '')
  768. checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')
  769. p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
  770. t = p:match("ab")
  771. checkeq(t, {"a", "b"})
  772. do
  773. -- some basic cases
  774. assert(m.match(m.Cg(m.Cc(3), "a") * m.Cb("a"), "a") == 3)
  775. assert(m.match(m.Cg(m.C(1), 133) * m.Cb(133), "X") == "X")
  776. -- first reference to 'x' should not see the group enclosing it
  777. local p = m.Cg(m.Cb('x'), 'x') * m.Cb('x')
  778. checkerr("back reference 'x' not found", m.match, p, '')
  779. local p = m.Cg(m.Cb('x') * m.C(1), 'x') * m.Cb('x')
  780. checkerr("back reference 'x' not found", m.match, p, 'abc')
  781. -- reference to 'x' should not see the group enclosed in another capture
  782. local s = string.rep("a", 30)
  783. local p = (m.C(1)^-4 * m.Cg(m.C(1), 'x')) / {} * m.Cb('x')
  784. checkerr("back reference 'x' not found", m.match, p, s)
  785. local p = (m.C(1)^-20 * m.Cg(m.C(1), 'x')) / {} * m.Cb('x')
  786. checkerr("back reference 'x' not found", m.match, p, s)
  787. -- second reference 'k' should refer to 10 and first ref. 'k'
  788. p = m.Cg(m.Cc(20), 'k') * m.Cg(m.Cc(10) * m.Cb('k') * m.C(1), 'k')
  789. * (m.Cb('k') / function (a,b,c) return a*10 + b + tonumber(c) end)
  790. -- 10 * 10 (Cc) + 20 (Cb) + 7 (C) == 127
  791. assert(p:match("756") == 127)
  792. end
  793. p = m.P(true)
  794. for i = 1, 10 do p = p * m.Cg(1, i) end
  795. for i = 1, 10 do
  796. local p = p * m.Cb(i)
  797. assert(p:match('abcdefghij') == string.sub('abcdefghij', i, i))
  798. end
  799. t = {}
  800. function foo (p) t[#t + 1] = p; return p .. "x" end
  801. p = m.Cg(m.C(2) / foo, "x") * m.Cb"x" *
  802. m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
  803. m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
  804. m.Cg(m.Cb('x') / foo, "x") * m.Cb"x"
  805. x = {p:match'ab'}
  806. checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'})
  807. checkeq(t, {'ab',
  808. 'ab', 'abx',
  809. 'ab', 'abx', 'abxx',
  810. 'ab', 'abx', 'abxx', 'abxxx'})
  811. -- tests for match-time captures
  812. p = m.P'a' * (function (s, i) return (s:sub(i, i) == 'b') and i + 1 end)
  813. + 'acd'
  814. assert(p:match('abc') == 3)
  815. assert(p:match('acd') == 4)
  816. local function id (s, i, ...)
  817. return true, ...
  818. end
  819. do -- run-time capture in an end predicate (should discard its value)
  820. local x = 0
  821. function foo (s, i)
  822. x = x + 1
  823. return true, x
  824. end
  825. local p = #(m.Cmt("", foo) * "xx") * m.Cmt("", foo)
  826. assert(p:match("xx") == 2)
  827. end
  828. assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) +
  829. m.R'09'^1 / string.char +
  830. m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y")
  831. p = m.P{'S',
  832. S = m.V'atom' * space
  833. + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id),
  834. atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id)
  835. }
  836. x = p:match"(a g () ((b) c) (d (e)))"
  837. checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}});
  838. x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))}
  839. assert(#x == 500)
  840. local function id(s, i, x)
  841. if x == 'a' then return i, 1, 3, 7
  842. else return nil, 2, 4, 6, 8
  843. end
  844. end
  845. p = ((m.P(id) * 1 + m.Cmt(2, id) * 1 + m.Cmt(1, id) * 1))^0
  846. assert(table.concat{p:match('abababab')} == string.rep('137', 4))
  847. local function ref (s, i, x)
  848. return m.match(x, s, i - x:len())
  849. end
  850. assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4)
  851. assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4)
  852. assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo'))
  853. ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end
  854. assert(m.Cmt(1, ref):match'2')
  855. assert(not m.Cmt(1, ref):match'1')
  856. assert(m.Cmt(m.P(1)^0, ref):match'03')
  857. function ref (s, i, a, b)
  858. if a == b then return i, a:upper() end
  859. end
  860. p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref)
  861. p = (any - p)^0 * p * any^0 * -1
  862. assert(p:match'abbbc-bc ddaa' == 'BC')
  863. do -- match-time captures cannot be optimized away
  864. local touch = 0
  865. f = m.P(function () touch = touch + 1; return true end)
  866. local function check(n) n = n or 1; assert(touch == n); touch = 0 end
  867. assert(m.match(f * false + 'b', 'a') == nil); check()
  868. assert(m.match(f * false + 'b', '') == nil); check()
  869. assert(m.match( (f * 'a')^0 * 'b', 'b') == 2); check()
  870. assert(m.match( (f * 'a')^0 * 'b', '') == nil); check()
  871. assert(m.match( (f * 'a')^-1 * 'b', 'b') == 2); check()
  872. assert(m.match( (f * 'a')^-1 * 'b', '') == nil); check()
  873. assert(m.match( ('b' + f * 'a')^-1 * 'b', '') == nil); check()
  874. assert(m.match( (m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); check()
  875. assert(m.match( (-m.P(1) * m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil);
  876. check()
  877. assert(m.match( (f * 'a' + 'b')^-1 * 'b', '') == nil); check()
  878. assert(m.match(f * 'a' + f * 'b', 'b') == 2); check(2)
  879. assert(m.match(f * 'a' + f * 'b', 'a') == 2); check(1)
  880. assert(m.match(-f * 'a' + 'b', 'b') == 2); check(1)
  881. assert(m.match(-f * 'a' + 'b', '') == nil); check(1)
  882. end
  883. c = '[' * m.Cg(m.P'='^0, "init") * '[' *
  884. { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2)
  885. return s1 == s2 end)
  886. + 1 * m.V(1) } / 0
  887. assert(c:match'[==[]]====]]]]==]===[]' == 18)
  888. assert(c:match'[[]=]====]=]]]==]===[]' == 14)
  889. assert(not c:match'[[]=]====]=]=]==]===[]')
  890. -- old bug: optimization of concat with fail removed match-time capture
  891. p = m.Cmt(0, function (s) p = s end) * m.P(false)
  892. assert(not p:match('alo'))
  893. assert(p == 'alo')
  894. -- ensure that failed match-time captures are not kept on Lua stack
  895. do
  896. local t = {__mode = "kv"}; setmetatable(t,t)
  897. local c = 0
  898. local function foo (s,i)
  899. collectgarbage();
  900. assert(next(t) == "__mode" and next(t, "__mode") == nil)
  901. local x = {}
  902. t[x] = true
  903. c = c + 1
  904. return i, x
  905. end
  906. local p = m.P{ m.Cmt(0, foo) * m.P(false) + m.P(1) * m.V(1) + m.P"" }
  907. p:match(string.rep('1', 10))
  908. assert(c == 11)
  909. end
  910. -- Return a match-time capture that returns 'n' captures
  911. local function manyCmt (n)
  912. return m.Cmt("a", function ()
  913. local a = {}; for i = 1, n do a[i] = n - i end
  914. return true, unpack(a)
  915. end)
  916. end
  917. -- bug in 1.0: failed match-time that used previous match-time results
  918. do
  919. local x
  920. local function aux (...) x = #{...}; return false end
  921. local res = {m.match(m.Cmt(manyCmt(20), aux) + manyCmt(10), "a")}
  922. assert(#res == 10 and res[1] == 9 and res[10] == 0)
  923. end
  924. -- bug in 1.0: problems with math-times returning too many captures
  925. if _VERSION >= "Lua 5.2" then
  926. local lim = 2^11 - 10
  927. local res = {m.match(manyCmt(lim), "a")}
  928. assert(#res == lim and res[1] == lim - 1 and res[lim] == 0)
  929. checkerr("too many", m.match, manyCmt(2^15), "a")
  930. end
  931. p = (m.P(function () return true, "a" end) * 'a'
  932. + m.P(function (s, i) return i, "aa", 20 end) * 'b'
  933. + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0
  934. t = {p:match('abacc')}
  935. checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'})
  936. do print"testing large grammars"
  937. local lim = 1000 -- number of rules
  938. local t = {}
  939. for i = 3, lim do
  940. t[i] = m.V(i - 1) -- each rule calls previous one
  941. end
  942. t[1] = m.V(lim) -- start on last rule
  943. t[2] = m.C("alo") -- final rule
  944. local P = m.P(t) -- build grammar
  945. assert(P:match("alo") == "alo")
  946. t[#t + 1] = m.P("x") -- one more rule...
  947. checkerr("too many rules", m.P, t)
  948. end
  949. print "testing UTF-8 ranges"
  950. do -- a few typical UTF-8 ranges
  951. local p = m.utfR(0x410, 0x44f)^1 / "cyr: %0"
  952. + m.utfR(0x4e00, 0x9fff)^1 / "cjk: %0"
  953. + m.utfR(0x1F600, 0x1F64F)^1 / "emot: %0"
  954. + m.utfR(0, 0x7f)^1 / "ascii: %0"
  955. + m.utfR(0, 0x10ffff) / "other: %0"
  956. p = m.Ct(p^0) * -m.P(1)
  957. local cyr = "ждюя"
  958. local emot = "\240\159\152\128\240\159\153\128" -- 😀🙀
  959. local cjk = "专举乸"
  960. local ascii = "alo"
  961. local last = "\244\143\191\191" -- U+10FFFF
  962. local s = cyr .. "—" .. emot .. "—" .. cjk .. "—" .. ascii .. last
  963. t = (p:match(s))
  964. assert(t[1] == "cyr: " .. cyr and t[2] == "other: —" and
  965. t[3] == "emot: " .. emot and t[4] == "other: —" and
  966. t[5] == "cjk: " .. cjk and t[6] == "other: —" and
  967. t[7] == "ascii: " .. ascii and t[8] == "other: " .. last and
  968. t[9] == nil)
  969. -- failing UTF-8 matches and borders
  970. assert(not m.match(m.utfR(10, 0x2000), "\9"))
  971. assert(not m.match(m.utfR(10, 0x2000), "\226\128\129"))
  972. assert(m.match(m.utfR(10, 0x2000), "\10") == 2)
  973. assert(m.match(m.utfR(10, 0x2000), "\226\128\128") == 4)
  974. end
  975. do -- valid and invalid code points
  976. local p = m.utfR(0, 0x10ffff)^0
  977. assert(p:match("汉字\128") == #"汉字" + 1)
  978. assert(p:match("\244\159\191") == 1)
  979. assert(p:match("\244\159\191\191") == 1)
  980. assert(p:match("\255") == 1)
  981. -- basic errors
  982. checkerr("empty range", m.utfR, 1, 0)
  983. checkerr("invalid code point", m.utfR, 1, 0x10ffff + 1)
  984. end
  985. do -- back references (fixed width)
  986. -- match a byte after a CJK point
  987. local p = m.B(m.utfR(0x4e00, 0x9fff)) * m.C(1)
  988. p = m.P{ p + m.P(1) * m.V(1) } -- search for 'p'
  989. assert(p:match("ab д 专X x") == "X")
  990. -- match a byte after a hebrew point
  991. local p = m.B(m.utfR(0x5d0, 0x5ea)) * m.C(1)
  992. p = m.P(#"ש") * p
  993. assert(p:match("שX") == "X")
  994. checkerr("fixed length", m.B, m.utfR(0, 0x10ffff))
  995. end
  996. -------------------------------------------------------------------
  997. -- Tests for 're' module
  998. -------------------------------------------------------------------
  999. print"testing 're' module"
  1000. local re = require "re"
  1001. local match, compile = re.match, re.compile
  1002. assert(match("a", ".") == 2)
  1003. assert(match("a", "''") == 1)
  1004. assert(match("", " ! . ") == 1)
  1005. assert(not match("a", " ! . "))
  1006. assert(match("abcde", " ( . . ) * ") == 5)
  1007. assert(match("abbcde", " [a-c] +") == 5)
  1008. assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7)
  1009. assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8)
  1010. assert(match("abbc--", " [a-c] + +") == 5)
  1011. assert(match("abbc--", " [ac-] +") == 2)
  1012. assert(match("abbc--", " [-acb] + ") == 7)
  1013. assert(not match("abbcde", " [b-z] + "))
  1014. assert(match("abb\"de", '"abb"["]"de"') == 7)
  1015. assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee")
  1016. assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8)
  1017. assert(re.match("aaand", "[a]^2") == 3)
  1018. local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")}
  1019. checkeq(t, {4, 5, 7})
  1020. local t = {match("abceefe", "((&&'e' {})? .)*")}
  1021. checkeq(t, {4, 5, 7})
  1022. local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")}
  1023. checkeq(t, {4, 5, 7})
  1024. local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")}
  1025. checkeq(t, {4, 5, 7})
  1026. assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5)
  1027. assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4)
  1028. assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8)
  1029. assert(match("abc", "a <- (. a)?") == 4)
  1030. b = "balanced <- '(' ([^()] / balanced)* ')'"
  1031. assert(match("(abc)", b))
  1032. assert(match("(a(b)((c) (d)))", b))
  1033. assert(not match("(a(b ((c) (d)))", b))
  1034. b = compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
  1035. assert(b == m.P(b))
  1036. assert(b:match"((((a))(b)))")
  1037. local g = [[
  1038. S <- "0" B / "1" A / "" -- balanced strings
  1039. A <- "0" S / "1" A A -- one more 0
  1040. B <- "1" S / "0" B B -- one more 1
  1041. ]]
  1042. assert(match("00011011", g) == 9)
  1043. local g = [[
  1044. S <- ("0" B / "1" A)*
  1045. A <- "0" / "1" A A
  1046. B <- "1" / "0" B B
  1047. ]]
  1048. assert(match("00011011", g) == 9)
  1049. assert(match("000110110", g) == 9)
  1050. assert(match("011110110", g) == 3)
  1051. assert(match("000110010", g) == 1)
  1052. s = "aaaaaaaaaaaaaaaaaaaaaaaa"
  1053. assert(match(s, "'a'^3") == 4)
  1054. assert(match(s, "'a'^0") == 1)
  1055. assert(match(s, "'a'^+3") == s:len() + 1)
  1056. assert(not match(s, "'a'^+30"))
  1057. assert(match(s, "'a'^-30") == s:len() + 1)
  1058. assert(match(s, "'a'^-5") == 6)
  1059. for i = 1, s:len() do
  1060. assert(match(s, string.format("'a'^+%d", i)) >= i + 1)
  1061. assert(match(s, string.format("'a'^-%d", i)) <= i + 1)
  1062. assert(match(s, string.format("'a'^%d", i)) == i + 1)
  1063. end
  1064. assert(match("01234567890123456789", "[0-9]^3+") == 19)
  1065. assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123")
  1066. t = match("0123456789", "{| {.}* |}")
  1067. checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"})
  1068. assert(match("012345", "{| (..) -> '%0%0' |}")[1] == "0101")
  1069. assert(match("abcdef", "( {.} {.} {.} {.} {.} ) -> 3") == "c")
  1070. assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 3") == "d")
  1071. assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 0") == 6)
  1072. assert(not match("abcdef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
  1073. assert(match("abcbef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
  1074. eqcharset(compile"[]]", "]")
  1075. eqcharset(compile"[][]", m.S"[]")
  1076. eqcharset(compile"[]-]", m.S"-]")
  1077. eqcharset(compile"[-]", m.S"-")
  1078. eqcharset(compile"[az-]", m.S"a-z")
  1079. eqcharset(compile"[-az]", m.S"a-z")
  1080. eqcharset(compile"[a-z]", m.R"az")
  1081. eqcharset(compile"[]['\"]", m.S[[]['"]])
  1082. eqcharset(compile"[^]]", any - "]")
  1083. eqcharset(compile"[^][]", any - m.S"[]")
  1084. eqcharset(compile"[^]-]", any - m.S"-]")
  1085. eqcharset(compile"[^]-]", any - m.S"-]")
  1086. eqcharset(compile"[^-]", any - m.S"-")
  1087. eqcharset(compile"[^az-]", any - m.S"a-z")
  1088. eqcharset(compile"[^-az]", any - m.S"a-z")
  1089. eqcharset(compile"[^a-z]", any - m.R"az")
  1090. eqcharset(compile"[^]['\"]", any - m.S[[]['"]])
  1091. -- tests for comments in 're'
  1092. e = compile[[
  1093. A <- _B -- \t \n %nl .<> <- -> --
  1094. _B <- 'x' --]]
  1095. assert(e:match'xy' == 2)
  1096. -- tests for 're' with pre-definitions
  1097. defs = {digits = m.R"09", letters = m.R"az", _=m.P"__"}
  1098. e = compile("%letters (%letters / %digits)*", defs)
  1099. assert(e:match"x123" == 5)
  1100. e = compile("%_", defs)
  1101. assert(e:match"__" == 3)
  1102. e = compile([[
  1103. S <- A+
  1104. A <- %letters+ B
  1105. B <- %digits+
  1106. ]], defs)
  1107. e = compile("{[0-9]+'.'?[0-9]*} -> sin", math)
  1108. assert(e:match("2.34") == math.sin(2.34))
  1109. e = compile("'pi' -> math", _G)
  1110. assert(e:match("pi") == math.pi)
  1111. e = compile("[ ]* 'version' -> _VERSION", _G)
  1112. assert(e:match(" version") == _VERSION)
  1113. function eq (_, _, a, b) return a == b end
  1114. c = re.compile([[
  1115. longstring <- '[' {:init: '='* :} '[' close
  1116. close <- ']' =init ']' / . close
  1117. ]])
  1118. assert(c:match'[==[]]===]]]]==]===[]' == 17)
  1119. assert(c:match'[[]=]====]=]]]==]===[]' == 14)
  1120. assert(not c:match'[[]=]====]=]=]==]===[]')
  1121. c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. "
  1122. assert(c:match'[==[]]===]]]]==]')
  1123. assert(c:match'[[]=]====]=][]==]===[]]')
  1124. assert(not c:match'[[]=]====]=]=]==]===[]')
  1125. assert(re.find("hi alalo", "{:x:..:} =x") == 4)
  1126. assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4)
  1127. assert(not re.find("hi alalo", "{:x:..:} =x", 5))
  1128. assert(re.find("hi alalo", "{'al'}", 5) == 6)
  1129. assert(re.find("hi aloalolo", "{:x:..:} =x") == 8)
  1130. assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11)
  1131. -- re.find discards any captures
  1132. local a,b,c = re.find("alo", "{.}{'o'}")
  1133. assert(a == 2 and b == 3 and c == nil)
  1134. local function match (s,p)
  1135. local i,e = re.find(s,p)
  1136. if i then return s:sub(i, e) end
  1137. end
  1138. assert(match("alo alo", '[a-z]+') == "alo")
  1139. assert(match("alo alo", '{:x: [a-z]+ :} =x') == nil)
  1140. assert(match("alo alo", "{:x: [a-z]+ :} ' ' =x") == "alo alo")
  1141. assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo")
  1142. assert(re.gsub("alo alo", "%w+", ".") == ". .")
  1143. assert(re.gsub("hi, how are you", "[aeiou]", string.upper) ==
  1144. "hI, hOw ArE yOU")
  1145. s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]'
  1146. c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' "
  1147. assert(re.gsub(s, c, "%2") == 'hi and =]')
  1148. assert(re.gsub(s, c, "%0") == s)
  1149. assert(re.gsub('[=[hi]=]', c, "%2") == '=')
  1150. assert(re.find("", "!.") == 1)
  1151. assert(re.find("alo", "!.") == 4)
  1152. function addtag (s, i, t, tag) t.tag = tag; return i, t end
  1153. c = re.compile([[
  1154. doc <- block !.
  1155. block <- (start {| (block / { [^<]+ })* |} end?) => addtag
  1156. start <- '<' {:tag: [a-z]+ :} '>'
  1157. end <- '</' { =tag } '>'
  1158. ]], {addtag = addtag})
  1159. x = c:match[[
  1160. <x>hi<b>hello</b>but<b>totheend</x>]]
  1161. checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but',
  1162. {'totheend'}})
  1163. -- test for folding captures
  1164. c = re.compile([[
  1165. S <- (number (%s+ number)*) ~> add
  1166. number <- %d+ -> tonumber
  1167. ]], {tonumber = tonumber, add = function (a,b) return a + b end})
  1168. assert(c:match("3 401 50") == 3 + 401 + 50)
  1169. -- test for accumulator captures
  1170. c = re.compile([[
  1171. S <- number (%s+ number >> add)*
  1172. number <- %d+ -> tonumber
  1173. ]], {tonumber = tonumber, add = function (a,b) return a + b end})
  1174. assert(c:match("3 401 50") == 3 + 401 + 50)
  1175. -- tests for look-ahead captures
  1176. x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")}
  1177. checkeq(x, {"", "alo", ""})
  1178. assert(re.match("aloalo",
  1179. "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}")
  1180. == "AallooAalloo")
  1181. -- bug in 0.9 (and older versions), due to captures in look-aheads
  1182. x = re.compile[[ {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~} ]]
  1183. assert(x:match"alo alo" == "+ +")
  1184. -- valid capture in look-ahead (used inside the look-ahead itself)
  1185. x = re.compile[[
  1186. S <- &({:two: .. :} . =two) {[a-z]+} / . S
  1187. ]]
  1188. assert(x:match("hello aloaLo aloalo xuxu") == "aloalo")
  1189. p = re.compile[[
  1190. block <- {| {:ident:space*:} line
  1191. ((=ident !space line) / &(=ident space) block)* |}
  1192. line <- {[^%nl]*} %nl
  1193. space <- '_' -- should be ' ', but '_' is simpler for editors
  1194. ]]
  1195. t= p:match[[
  1196. 1
  1197. __1.1
  1198. __1.2
  1199. ____1.2.1
  1200. ____
  1201. 2
  1202. __2.1
  1203. ]]
  1204. checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = "____"}, ident = "__"},
  1205. "2", {"2.1", ident = "__"}, ident = ""})
  1206. -- nested grammars
  1207. p = re.compile[[
  1208. s <- a b !.
  1209. b <- ( x <- ('b' x)? )
  1210. a <- ( x <- 'a' x? )
  1211. ]]
  1212. assert(p:match'aaabbb')
  1213. assert(p:match'aaa')
  1214. assert(not p:match'bbb')
  1215. assert(not p:match'aaabbba')
  1216. -- testing groups
  1217. t = {re.match("abc", "{:S <- {:.:} {S} / '':}")}
  1218. checkeq(t, {"a", "bc", "b", "c", "c", ""})
  1219. t = re.match("1234", "{| {:a:.:} {:b:.:} {:c:.{.}:} |}")
  1220. checkeq(t, {a="1", b="2", c="4"})
  1221. t = re.match("1234", "{|{:a:.:} {:b:{.}{.}:} {:c:{.}:}|}")
  1222. checkeq(t, {a="1", b="2", c="4"})
  1223. t = re.match("12345", "{| {:.:} {:b:{.}{.}:} {:{.}{.}:} |}")
  1224. checkeq(t, {"1", b="2", "4", "5"})
  1225. t = re.match("12345", "{| {:.:} {:{:b:{.}{.}:}:} {:{.}{.}:} |}")
  1226. checkeq(t, {"1", "23", "4", "5"})
  1227. t = re.match("12345", "{| {:.:} {{:b:{.}{.}:}} {:{.}{.}:} |}")
  1228. checkeq(t, {"1", "23", "4", "5"})
  1229. -- testing pre-defined names
  1230. assert(os.setlocale("C") == "C")
  1231. function eqlpeggsub (p1, p2)
  1232. local s1 = cs2str(re.compile(p1))
  1233. local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "")
  1234. -- if s1 ~= s2 then print(#s1,#s2) end
  1235. assert(s1 == s2)
  1236. end
  1237. eqlpeggsub("%w", "%w")
  1238. eqlpeggsub("%a", "%a")
  1239. eqlpeggsub("%l", "%l")
  1240. eqlpeggsub("%u", "%u")
  1241. eqlpeggsub("%p", "%p")
  1242. eqlpeggsub("%d", "%d")
  1243. eqlpeggsub("%x", "%x")
  1244. eqlpeggsub("%s", "%s")
  1245. eqlpeggsub("%c", "%c")
  1246. eqlpeggsub("%W", "%W")
  1247. eqlpeggsub("%A", "%A")
  1248. eqlpeggsub("%L", "%L")
  1249. eqlpeggsub("%U", "%U")
  1250. eqlpeggsub("%P", "%P")
  1251. eqlpeggsub("%D", "%D")
  1252. eqlpeggsub("%X", "%X")
  1253. eqlpeggsub("%S", "%S")
  1254. eqlpeggsub("%C", "%C")
  1255. eqlpeggsub("[%w]", "%w")
  1256. eqlpeggsub("[_%w]", "_%w")
  1257. eqlpeggsub("[^%w]", "%W")
  1258. eqlpeggsub("[%W%S]", "%W%S")
  1259. re.updatelocale()
  1260. -- testing nested substitutions x string captures
  1261. p = re.compile[[
  1262. text <- {~ item* ~}
  1263. item <- macro / [^()] / '(' item* ')'
  1264. arg <- ' '* {~ (!',' item)* ~}
  1265. args <- '(' arg (',' arg)* ')'
  1266. macro <- ('apply' args) -> '%1(%2)'
  1267. / ('add' args) -> '%1 + %2'
  1268. / ('mul' args) -> '%1 * %2'
  1269. ]]
  1270. assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)")
  1271. rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
  1272. assert(rev:match"0123456789" == "9876543210")
  1273. -- testing error messages in re
  1274. local function errmsg (p, err)
  1275. checkerr(err, re.compile, p)
  1276. end
  1277. errmsg('aaaa', "rule 'aaaa'")
  1278. errmsg('a', 'outside')
  1279. errmsg('b <- a', 'undefined')
  1280. errmsg("x <- 'a' x <- 'b'", 'already defined')
  1281. errmsg("'a' -", "near '-'")
  1282. print"OK"