re.lua 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. --
  2. -- Copyright 2007-2023, Lua.org & PUC-Rio (see 'lpeg.html' for license)
  3. -- written by Roberto Ierusalimschy
  4. --
  5. -- imported functions and modules
  6. local tonumber, type, print, error = tonumber, type, print, error
  7. local setmetatable = setmetatable
  8. local m = require"lpeg"
  9. -- 'm' will be used to parse expressions, and 'mm' will be used to
  10. -- create expressions; that is, 're' runs on 'm', creating patterns
  11. -- on 'mm'
  12. local mm = m
  13. -- patterns' metatable
  14. local mt = getmetatable(mm.P(0))
  15. local version = _VERSION
  16. -- No more global accesses after this point
  17. _ENV = nil -- does no harm in Lua 5.1
  18. local any = m.P(1)
  19. -- Pre-defined names
  20. local Predef = { nl = m.P"\n" }
  21. local mem
  22. local fmem
  23. local gmem
  24. local function updatelocale ()
  25. mm.locale(Predef)
  26. Predef.a = Predef.alpha
  27. Predef.c = Predef.cntrl
  28. Predef.d = Predef.digit
  29. Predef.g = Predef.graph
  30. Predef.l = Predef.lower
  31. Predef.p = Predef.punct
  32. Predef.s = Predef.space
  33. Predef.u = Predef.upper
  34. Predef.w = Predef.alnum
  35. Predef.x = Predef.xdigit
  36. Predef.A = any - Predef.a
  37. Predef.C = any - Predef.c
  38. Predef.D = any - Predef.d
  39. Predef.G = any - Predef.g
  40. Predef.L = any - Predef.l
  41. Predef.P = any - Predef.p
  42. Predef.S = any - Predef.s
  43. Predef.U = any - Predef.u
  44. Predef.W = any - Predef.w
  45. Predef.X = any - Predef.x
  46. mem = {} -- restart memoization
  47. fmem = {}
  48. gmem = {}
  49. local mt = {__mode = "v"}
  50. setmetatable(mem, mt)
  51. setmetatable(fmem, mt)
  52. setmetatable(gmem, mt)
  53. end
  54. updatelocale()
  55. local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
  56. local function patt_error (s, i)
  57. local msg = (#s < i + 20) and s:sub(i)
  58. or s:sub(i,i+20) .. "..."
  59. msg = ("pattern error near '%s'"):format(msg)
  60. error(msg, 2)
  61. end
  62. local function mult (p, n)
  63. local np = mm.P(true)
  64. while n >= 1 do
  65. if n%2 >= 1 then np = np * p end
  66. p = p * p
  67. n = n/2
  68. end
  69. return np
  70. end
  71. local function equalcap (s, i, c)
  72. if type(c) ~= "string" then return nil end
  73. local e = #c + i
  74. if s:sub(i, e - 1) == c then return e else return nil end
  75. end
  76. local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
  77. local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
  78. local arrow = S * "<-"
  79. local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
  80. name = m.C(name)
  81. -- a defined name only have meaning in a given environment
  82. local Def = name * m.Carg(1)
  83. local function getdef (id, defs)
  84. local c = defs and defs[id]
  85. if not c then error("undefined name: " .. id) end
  86. return c
  87. end
  88. -- match a name and return a group of its corresponding definition
  89. -- and 'f' (to be folded in 'Suffix')
  90. local function defwithfunc (f)
  91. return m.Cg(Def / getdef * m.Cc(f))
  92. end
  93. local num = m.C(m.R"09"^1) * S / tonumber
  94. local String = "'" * m.C((any - "'")^0) * "'" +
  95. '"' * m.C((any - '"')^0) * '"'
  96. local defined = "%" * Def / function (c,Defs)
  97. local cat = Defs and Defs[c] or Predef[c]
  98. if not cat then error ("name '" .. c .. "' undefined") end
  99. return cat
  100. end
  101. local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
  102. local item = (defined + Range + m.C(any)) / m.P
  103. local Class =
  104. "["
  105. * (m.C(m.P"^"^-1)) -- optional complement symbol
  106. * (item * ((item % mt.__add) - "]")^0) /
  107. function (c, p) return c == "^" and any - p or p end
  108. * "]"
  109. local function adddef (t, k, exp)
  110. if t[k] then
  111. error("'"..k.."' already defined as a rule")
  112. else
  113. t[k] = exp
  114. end
  115. return t
  116. end
  117. local function firstdef (n, r) return adddef({n}, n, r) end
  118. local function NT (n, b)
  119. if not b then
  120. error("rule '"..n.."' used outside a grammar")
  121. else return mm.V(n)
  122. end
  123. end
  124. local exp = m.P{ "Exp",
  125. Exp = S * ( m.V"Grammar"
  126. + m.V"Seq" * ("/" * S * m.V"Seq" % mt.__add)^0 );
  127. Seq = (m.Cc(m.P"") * (m.V"Prefix" % mt.__mul)^0)
  128. * (#seq_follow + patt_error);
  129. Prefix = "&" * S * m.V"Prefix" / mt.__len
  130. + "!" * S * m.V"Prefix" / mt.__unm
  131. + m.V"Suffix";
  132. Suffix = m.V"Primary" * S *
  133. ( ( m.P"+" * m.Cc(1, mt.__pow)
  134. + m.P"*" * m.Cc(0, mt.__pow)
  135. + m.P"?" * m.Cc(-1, mt.__pow)
  136. + "^" * ( m.Cg(num * m.Cc(mult))
  137. + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
  138. )
  139. + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
  140. + m.P"{}" * m.Cc(nil, m.Ct)
  141. + defwithfunc(mt.__div)
  142. )
  143. + "=>" * S * defwithfunc(mm.Cmt)
  144. + ">>" * S * defwithfunc(mt.__mod)
  145. + "~>" * S * defwithfunc(mm.Cf)
  146. ) % function (a,b,f) return f(a,b) end * S
  147. )^0;
  148. Primary = "(" * m.V"Exp" * ")"
  149. + String / mm.P
  150. + Class
  151. + defined
  152. + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
  153. function (n, p) return mm.Cg(p, n) end
  154. + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
  155. + m.P"{}" / mm.Cp
  156. + "{~" * m.V"Exp" * "~}" / mm.Cs
  157. + "{|" * m.V"Exp" * "|}" / mm.Ct
  158. + "{" * m.V"Exp" * "}" / mm.C
  159. + m.P"." * m.Cc(any)
  160. + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT;
  161. Definition = name * arrow * m.V"Exp";
  162. Grammar = m.Cg(m.Cc(true), "G") *
  163. ((m.V"Definition" / firstdef) * (m.V"Definition" % adddef)^0) / mm.P
  164. }
  165. local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
  166. local function compile (p, defs)
  167. if mm.type(p) == "pattern" then return p end -- already compiled
  168. local cp = pattern:match(p, 1, defs)
  169. if not cp then error("incorrect pattern", 3) end
  170. return cp
  171. end
  172. local function match (s, p, i)
  173. local cp = mem[p]
  174. if not cp then
  175. cp = compile(p)
  176. mem[p] = cp
  177. end
  178. return cp:match(s, i or 1)
  179. end
  180. local function find (s, p, i)
  181. local cp = fmem[p]
  182. if not cp then
  183. cp = compile(p) / 0
  184. cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
  185. fmem[p] = cp
  186. end
  187. local i, e = cp:match(s, i or 1)
  188. if i then return i, e - 1
  189. else return i
  190. end
  191. end
  192. local function gsub (s, p, rep)
  193. local g = gmem[p] or {} -- ensure gmem[p] is not collected while here
  194. gmem[p] = g
  195. local cp = g[rep]
  196. if not cp then
  197. cp = compile(p)
  198. cp = mm.Cs((cp / rep + 1)^0)
  199. g[rep] = cp
  200. end
  201. return cp:match(s)
  202. end
  203. -- exported names
  204. local re = {
  205. compile = compile,
  206. match = match,
  207. find = find,
  208. gsub = gsub,
  209. updatelocale = updatelocale,
  210. }
  211. if version == "Lua 5.1" then _G.re = re end
  212. return re