re.lua 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. -- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $
  2. -- imported functions and modules
  3. local tonumber, type, print, error = tonumber, type, print, error
  4. local setmetatable = setmetatable
  5. local m = require"lpeg"
  6. -- 'm' will be used to parse expressions, and 'mm' will be used to
  7. -- create expressions; that is, 're' runs on 'm', creating patterns
  8. -- on 'mm'
  9. local mm = m
  10. -- pattern's metatable
  11. local mt = getmetatable(mm.P(0))
  12. -- No more global accesses after this point
  13. local version = _VERSION
  14. if version == "Lua 5.2" then _ENV = nil end
  15. local any = m.P(1)
  16. -- Pre-defined names
  17. local Predef = { nl = m.P"\n" }
  18. local mem
  19. local fmem
  20. local gmem
  21. local function updatelocale ()
  22. mm.locale(Predef)
  23. Predef.a = Predef.alpha
  24. Predef.c = Predef.cntrl
  25. Predef.d = Predef.digit
  26. Predef.g = Predef.graph
  27. Predef.l = Predef.lower
  28. Predef.p = Predef.punct
  29. Predef.s = Predef.space
  30. Predef.u = Predef.upper
  31. Predef.w = Predef.alnum
  32. Predef.x = Predef.xdigit
  33. Predef.A = any - Predef.a
  34. Predef.C = any - Predef.c
  35. Predef.D = any - Predef.d
  36. Predef.G = any - Predef.g
  37. Predef.L = any - Predef.l
  38. Predef.P = any - Predef.p
  39. Predef.S = any - Predef.s
  40. Predef.U = any - Predef.u
  41. Predef.W = any - Predef.w
  42. Predef.X = any - Predef.x
  43. mem = {} -- restart memoization
  44. fmem = {}
  45. gmem = {}
  46. local mt = {__mode = "v"}
  47. setmetatable(mem, mt)
  48. setmetatable(fmem, mt)
  49. setmetatable(gmem, mt)
  50. end
  51. updatelocale()
  52. local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
  53. local function getdef (id, defs)
  54. local c = defs and defs[id]
  55. if not c then error("undefined name: " .. id) end
  56. return c
  57. end
  58. local function patt_error (s, i)
  59. local msg = (#s < i + 20) and s:sub(i)
  60. or s:sub(i,i+20) .. "..."
  61. msg = ("pattern error near '%s'"):format(msg)
  62. error(msg, 2)
  63. end
  64. local function mult (p, n)
  65. local np = mm.P(true)
  66. while n >= 1 do
  67. if n%2 >= 1 then np = np * p end
  68. p = p * p
  69. n = n/2
  70. end
  71. return np
  72. end
  73. local function equalcap (s, i, c)
  74. if type(c) ~= "string" then return nil end
  75. local e = #c + i
  76. if s:sub(i, e - 1) == c then return e else return nil end
  77. end
  78. local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
  79. local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
  80. local arrow = S * "<-"
  81. local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
  82. name = m.C(name)
  83. -- a defined name only have meaning in a given environment
  84. local Def = name * m.Carg(1)
  85. local num = m.C(m.R"09"^1) * S / tonumber
  86. local String = "'" * m.C((any - "'")^0) * "'" +
  87. '"' * m.C((any - '"')^0) * '"'
  88. local defined = "%" * Def / function (c,Defs)
  89. local cat = Defs and Defs[c] or Predef[c]
  90. if not cat then error ("name '" .. c .. "' undefined") end
  91. return cat
  92. end
  93. local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
  94. local item = defined + Range + m.C(any)
  95. local Class =
  96. "["
  97. * (m.C(m.P"^"^-1)) -- optional complement symbol
  98. * m.Cf(item * (item - "]")^0, mt.__add) /
  99. function (c, p) return c == "^" and any - p or p end
  100. * "]"
  101. local function adddef (t, k, exp)
  102. if t[k] then
  103. error("'"..k.."' already defined as a rule")
  104. else
  105. t[k] = exp
  106. end
  107. return t
  108. end
  109. local function firstdef (n, r) return adddef({n}, n, r) end
  110. local function NT (n, b)
  111. if not b then
  112. error("rule '"..n.."' used outside a grammar")
  113. else return mm.V(n)
  114. end
  115. end
  116. local exp = m.P{ "Exp",
  117. Exp = S * ( m.V"Grammar"
  118. + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) );
  119. Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul)
  120. * (#seq_follow + patt_error);
  121. Prefix = "&" * S * m.V"Prefix" / mt.__len
  122. + "!" * S * m.V"Prefix" / mt.__unm
  123. + m.V"Suffix";
  124. Suffix = m.Cf(m.V"Primary" * S *
  125. ( ( m.P"+" * m.Cc(1, mt.__pow)
  126. + m.P"*" * m.Cc(0, mt.__pow)
  127. + m.P"?" * m.Cc(-1, mt.__pow)
  128. + "^" * ( m.Cg(num * m.Cc(mult))
  129. + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
  130. )
  131. + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
  132. + m.P"{}" * m.Cc(nil, m.Ct)
  133. + m.Cg(Def / getdef * m.Cc(mt.__div))
  134. )
  135. + "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt))
  136. ) * S
  137. )^0, function (a,b,f) return f(a,b) end );
  138. Primary = "(" * m.V"Exp" * ")"
  139. + String / mm.P
  140. + Class
  141. + defined
  142. + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
  143. function (n, p) return mm.Cg(p, n) end
  144. + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
  145. + m.P"{}" / mm.Cp
  146. + "{~" * m.V"Exp" * "~}" / mm.Cs
  147. + "{|" * m.V"Exp" * "|}" / mm.Ct
  148. + "{" * m.V"Exp" * "}" / mm.C
  149. + m.P"." * m.Cc(any)
  150. + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT;
  151. Definition = name * arrow * m.V"Exp";
  152. Grammar = m.Cg(m.Cc(true), "G") *
  153. m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0,
  154. adddef) / mm.P
  155. }
  156. local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
  157. local function compile (p, defs)
  158. if mm.type(p) == "pattern" then return p end -- already compiled
  159. local cp = pattern:match(p, 1, defs)
  160. if not cp then error("incorrect pattern", 3) end
  161. return cp
  162. end
  163. local function match (s, p, i)
  164. local cp = mem[p]
  165. if not cp then
  166. cp = compile(p)
  167. mem[p] = cp
  168. end
  169. return cp:match(s, i or 1)
  170. end
  171. local function find (s, p, i)
  172. local cp = fmem[p]
  173. if not cp then
  174. cp = compile(p) / 0
  175. cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
  176. fmem[p] = cp
  177. end
  178. local i, e = cp:match(s, i or 1)
  179. if i then return i, e - 1
  180. else return i
  181. end
  182. end
  183. local function gsub (s, p, rep)
  184. local g = gmem[p] or {} -- ensure gmem[p] is not collected while here
  185. gmem[p] = g
  186. local cp = g[rep]
  187. if not cp then
  188. cp = compile(p)
  189. cp = mm.Cs((cp / rep + 1)^0)
  190. g[rep] = cp
  191. end
  192. return cp:match(s)
  193. end
  194. -- exported names
  195. local re = {
  196. compile = compile,
  197. match = match,
  198. find = find,
  199. gsub = gsub,
  200. updatelocale = updatelocale,
  201. }
  202. if version == "Lua 5.1" then
  203. --I need this to work with strict.lua, sorry for breaking compatibility.
  204. --_G.re = re
  205. end
  206. return re