I've been working on porting lua pattern matching into javascript pattern matching, but I'm not that well versed in regex.
It works most of the time, but I'm hoping to see if anyone else is interested at looking at it and fixing it up a bit:
Code: Select all
var m_c = {
"p": "[!\"#$%&'()*+,-./:;<=>?@[\\\]^_`{|}~]",
"a": "[A-Za-z]",
"l": "[a-z]",
"u": "[A-Z]",
"s": "[\\f\\v\\n\\r\\t ]",
"c": "[\\0\\v\\n\\r\\t]",
"d": "[0-9]",
"w": "[0-9a-zA-Z]",
"x": "[0-9a-fA-F]"
}
function matchclass(s) {
return m_c[s]
}
function LRegExEmptyCaps(lregex) {
var arrs = []
lregex = lregex.replace(/(%*)\((.*?)\)/gm, function (match, esc, paren) {
if (esc.length % 2 == 0) {
if (paren.length == 0) {
arrs.push(true)
} else {
arrs.push(false)
}
}
return match
})
return arrs
}
function LRegEx(lregex, opt) {
lregex = lregex.replace(/(%*)([{}\\])/gm, function (match, esc, toesc) {
if (esc.length % 2 == 0) {
return esc + "\\" + toesc
} else {
return match
}
})
lregex = lregex.replace(/(%*)-/gm, function (match, esc) {
if (esc.length % 2 == 0) {
return esc + "*?"
} else {
return "-"
}
})
// lregex = lregex.replace(/%([^bpaluscdw])/gm, "\\$1")
lregex = lregex.replace(/%(.|\n)(.?)(.?)/gm, function(match, p1, p2, p3) {
if (p1 == "b") {
var p2_esc = p2.replace(/([\[\].\\{}\^?!|])/gm, "\\\\$1")
var p3_esc = p3.replace(/([\[\].\\{}\^?!|])/gm, "\\\\$1")
return p2_esc + "(?:(?:[^" + p2 + p3 + "]+)|(?:[^" + p2 + p3 + "]*\\{[^" + p2 + p3 + "]*\\}[^" + p2 + p3 + "]*)+)" + p3_esc
}
var tail = p2 + p3
if (p2.length == 1 && p3.length == 1) {
if (p2 == "%") {
var set = matchclass(p3)
if (set != undefined) {
tail = set
} else {
tail = "\\" + p3
}
}
}
var set = matchclass(p1)
if (set != undefined) {
return set + tail
} else {
return "\\" + p1 + tail
}
})
return new RegExp( lregex, opt || "gm" )
}
function string_find(str, pattern, start) {
var jspattern = LRegEx(pattern, "m")
var emptyCaps = LRegExEmptyCaps(pattern)
var offset = 0
if (typeof start != undefined && start != null) {
offset = start - 1
str = str.substring(offset)
}
var match = jspattern.exec(str)
if (match != null) {
var res = [offset + match.index+1, offset + match.index+match[0].length]
str.replace(LRegEx(pattern), function() {
var curmatches = Array.prototype.slice.call(arguments, 1, arguments.length-2)
// matches ()
var index = arguments[arguments.length-2]
for (var i in curmatches) {
var match = curmatches[i]
if (emptyCaps[i]) {
curmatches[i] = index + 2
}
index += match.length
}
// end matches ()
res = res.concat(curmatches)
return arguments[0]
})
return res
} else
return []
}
function string_match(str, pattern, start) {
return string_find(str, pattern, start).splice(2)
}
function string_gmatch(str, pattern) {
var matches = []
var emptyCaps = LRegExEmptyCaps(pattern)
str.replace(LRegEx(pattern), function() {
var curmatches
if (arguments.length == 3) {
curmatches = [arguments[0]]
} else {
curmatches = Array.prototype.slice.call(arguments, 1, arguments.length-2)
}
// matches ()
var index = arguments[arguments.length-2]
for (var i in curmatches) {
var match = curmatches[i]
if (emptyCaps[i]) {
curmatches[i] = index + 2
}
index += match.length
}
// end matches ()
matches.push(curmatches)
return arguments[0]
})
// we'd return a pairs(matches) for the user to iterate through
return matches
}
function string_gsub() {
// todo: this is the hardest to implement
}
// string.find("Hello Lua user", "Lua") -- this is lua
console.log(string_find("Hello Lua user", "Lua", 4)) // expected: 7, 9
console.log(string_find("Hello Lua user", "Banana")) // expected: nil
console.log(string_find("Hello Lua user", "%w+")) // expected: 1, 5
console.log(string_find("Hello Lua user", "(Lua) (user)()")) // expected: 7, 14, 'Lua', 'user'
console.log(string_match("Hello Lua user", "(Lua) (user)")) // expected: 'Lua', 'user'
console.log(string_gmatch("Hello Lua user test", "(.-) ()")) // expected 'Hello', 'Lua' and 'user', 'test'