match.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. // Package match provides a simple pattern matcher with unicode support.
  2. package yu_match
  3. import (
  4. yu_fast "gogs.qqck.cn/s/tools/fast"
  5. "unicode/utf8"
  6. )
  7. // Match returns true if str matches pattern. This is a very
  8. // simple wildcard match where '*' matches on any number characters
  9. // and '?' matches on any one character.
  10. //
  11. // pattern:
  12. //
  13. // { term }
  14. //
  15. // term:
  16. //
  17. // '*' matches any sequence of non-Separator characters
  18. // '?' matches any single non-Separator character
  19. // c matches character c (c != '*', '?', '\\')
  20. // '\\' c matches character c
  21. func Match(str, pattern string) bool {
  22. if pattern == "*" {
  23. return true
  24. }
  25. return match(str, pattern, 0, nil, -1) == rMatch
  26. }
  27. // MatchLimit is the same as Match but will limit the complexity of the match
  28. // operation. This is to avoid long running matches, specifically to avoid ReDos
  29. // attacks from arbritary inputs.
  30. //
  31. // How it works:
  32. // The underlying match routine is recursive and may call itself when it
  33. // encounters a sandwiched wildcard pattern, such as: `user:*:name`.
  34. // Everytime it calls itself a counter is incremented.
  35. // The operation is stopped when counter > maxcomp*len(str).
  36. func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) {
  37. if pattern == "*" {
  38. return true, false
  39. }
  40. counter := 0
  41. r := match(str, pattern, len(str), &counter, maxcomp)
  42. if r == rStop {
  43. return false, true
  44. }
  45. return r == rMatch, false
  46. }
  47. type result int
  48. const (
  49. rNoMatch result = iota
  50. rMatch
  51. rStop
  52. )
  53. func match(str, pat string, slen int, counter *int, maxcomp int) result {
  54. // check complexity limit
  55. if maxcomp > -1 {
  56. if *counter > slen*maxcomp {
  57. return rStop
  58. }
  59. *counter++
  60. }
  61. for len(pat) > 0 {
  62. var wild bool
  63. pc, ps := rune(pat[0]), 1
  64. if pc > 0x7f {
  65. pc, ps = utf8.DecodeRuneInString(pat)
  66. }
  67. var sc rune
  68. var ss int
  69. if len(str) > 0 {
  70. sc, ss = rune(str[0]), 1
  71. if sc > 0x7f {
  72. sc, ss = utf8.DecodeRuneInString(str)
  73. }
  74. }
  75. switch pc {
  76. case '?':
  77. if ss == 0 {
  78. return rNoMatch
  79. }
  80. case '*':
  81. // Ignore repeating stars.
  82. for len(pat) > 1 && pat[1] == '*' {
  83. pat = pat[1:]
  84. }
  85. // If this star is the last character then it must be a match.
  86. if len(pat) == 1 {
  87. return rMatch
  88. }
  89. // Match and trim any non-wildcard suffix characters.
  90. var ok bool
  91. str, pat, ok = matchTrimSuffix(str, pat)
  92. if !ok {
  93. return rNoMatch
  94. }
  95. // Check for single star again.
  96. if len(pat) == 1 {
  97. return rMatch
  98. }
  99. // Perform recursive wildcard search.
  100. r := match(str, pat[1:], slen, counter, maxcomp)
  101. if r != rNoMatch {
  102. return r
  103. }
  104. if len(str) == 0 {
  105. return rNoMatch
  106. }
  107. wild = true
  108. default:
  109. if ss == 0 {
  110. return rNoMatch
  111. }
  112. if pc == '\\' {
  113. pat = pat[ps:]
  114. pc, ps = utf8.DecodeRuneInString(pat)
  115. if ps == 0 {
  116. return rNoMatch
  117. }
  118. }
  119. if sc != pc {
  120. return rNoMatch
  121. }
  122. }
  123. str = str[ss:]
  124. if !wild {
  125. pat = pat[ps:]
  126. }
  127. }
  128. if len(str) == 0 {
  129. return rMatch
  130. }
  131. return rNoMatch
  132. }
  133. // matchTrimSuffix matches and trims any non-wildcard suffix characters.
  134. // Returns the trimed string and pattern.
  135. //
  136. // This is called because the pattern contains extra data after the wildcard
  137. // star. Here we compare any suffix characters in the pattern to the suffix of
  138. // the target string. Basically a reverse match that stops when a wildcard
  139. // character is reached. This is a little trickier than a forward match because
  140. // we need to evaluate an escaped character in reverse.
  141. //
  142. // Any matched characters will be trimmed from both the target
  143. // string and the pattern.
  144. func matchTrimSuffix(str, pat string) (string, string, bool) {
  145. // It's expected that the pattern has at least two bytes and the first byte
  146. // is a wildcard star '*'
  147. match := true
  148. for len(str) > 0 && len(pat) > 1 {
  149. pc, ps := utf8.DecodeLastRuneInString(pat)
  150. var esc bool
  151. for i := 0; ; i++ {
  152. if pat[len(pat)-ps-i-1] != '\\' {
  153. if i&1 == 1 {
  154. esc = true
  155. ps++
  156. }
  157. break
  158. }
  159. }
  160. if pc == '*' && !esc {
  161. match = true
  162. break
  163. }
  164. sc, ss := utf8.DecodeLastRuneInString(str)
  165. if !((pc == '?' && !esc) || pc == sc) {
  166. match = false
  167. break
  168. }
  169. str = str[:len(str)-ss]
  170. pat = pat[:len(pat)-ps]
  171. }
  172. return str, pat, match
  173. }
  174. var maxRuneBytes = [...]byte{244, 143, 191, 191}
  175. // Allowable parses the pattern and determines the minimum and maximum allowable
  176. // values that the pattern can represent.
  177. // When the max cannot be determined, 'true' will be returned
  178. // for infinite.
  179. func Allowable(pattern string) (min, max string) {
  180. if pattern == "" || pattern[0] == '*' {
  181. return "", ""
  182. }
  183. minb := make([]byte, 0, len(pattern))
  184. maxb := make([]byte, 0, len(pattern))
  185. var wild bool
  186. for i := 0; i < len(pattern); i++ {
  187. if pattern[i] == '*' {
  188. wild = true
  189. break
  190. }
  191. if pattern[i] == '?' {
  192. minb = append(minb, 0)
  193. maxb = append(maxb, maxRuneBytes[:]...)
  194. } else {
  195. minb = append(minb, pattern[i])
  196. maxb = append(maxb, pattern[i])
  197. }
  198. }
  199. if wild {
  200. r, n := utf8.DecodeLastRune(maxb)
  201. if r != utf8.RuneError {
  202. if r < utf8.MaxRune {
  203. r++
  204. if r > 0x7f {
  205. b := make([]byte, 4)
  206. nn := utf8.EncodeRune(b, r)
  207. maxb = append(maxb[:len(maxb)-n], b[:nn]...)
  208. } else {
  209. maxb = append(maxb[:len(maxb)-n], byte(r))
  210. }
  211. }
  212. }
  213. }
  214. return yu_fast.B2S(minb), yu_fast.B2S(maxb)
  215. }
  216. // IsPattern returns true if the string is a pattern.
  217. func IsPattern(str string) bool {
  218. for i := 0; i < len(str); i++ {
  219. if str[i] == '*' || str[i] == '?' {
  220. return true
  221. }
  222. }
  223. return false
  224. }