Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 | 169x 169x 117x 117x 102x 15x 15x 15x 15x 9x 6x 6x 1x 5x 6x 6x 6x 6x 37x 37x 37x 5x 1x 5x 5x 32x 1x 1x 1x 31x 1x 1x 1x 30x 6x 6x 5x 25x 1x 169x 169x 169x 169x 169x 15x 15x 18x 1x 17x 17x 15x 18x 145x 41x 104x 104x 104x 5x 104x 6x 104x 96x 104x 104x 104x 4x 4x 1x 3x 3x 1x 2x 216x 216x 216x 31x 185x 185x 169x 185x 19x 185x | /**
* Minimal regex translation for user-supplied grep patterns.
*
* The init wizard's Mastra server sends regex sources written for
* ripgrep (Rust regex syntax). JS `RegExp` covers almost everything
* rg's default mode supports, with one real gap: **inline flag groups**
* like `(?i)foo`. JS requires flags at RegExp construction time; it
* can't flip them mid-pattern.
*
* This module bridges that gap by recognizing a leading `(?[imsU]+)`
* or `(?[imsU]+:…)` and translating it to JS flags. Everything else
* is passed to `new RegExp` unchanged — if it's not valid JS regex,
* `ValidationError` is thrown with the engine's error message.
*
* ### Scope
*
* - Leading-only. `foo(?i)bar` (mid-pattern flag) stays as-is, which
* will typically fail to compile under JS and raise ValidationError.
* - Flag mapping: `i` → `i`, `m` → `m`, `s` → `s`, `U` → `s` (rg's
* `U` == multiline-dotall is modeled by JS's `/s` flag).
* - The scoped form `(?i:foo)bar` is translated as
* `{ cleaned: "foobar", flags: "i" }` — we widen the flag to the
* whole pattern because JS can't scope flags to a group. This is a
* documented limitation.
*/
import { ValidationError } from "../errors.js";
/**
* Matches a leading inline-flag group at position 0 of a regex source.
* Group 1 captures the flag letters. Group 2 captures `:` if the form
* is the scoped `(?i:...)` variant, empty otherwise.
*
* We don't support uppercase-off flags (e.g. rg's `(?-i)`) — those are
* rare and harder to translate cleanly; they raise ValidationError at
* compile time if they sneak through.
*/
const INLINE_FLAG_RE = /^\(\?([imsU]+)(:|\))/;
/** Canonical JS-side flag alphabet we emit. Sorted for determinism. */
const VALID_JS_FLAGS = "imsu";
/**
* Extract a leading inline-flag group from `source`.
*
* @returns `{ cleaned: pattern-with-flags-stripped, flags: jsFlagString }`.
* Callers combine `flags` with their own options (e.g.,
* `caseSensitive: false` → force `i`) and pass to `new RegExp`.
*
* When `source` has no leading flag group the function returns
* `{ cleaned: source, flags: "" }` without inspecting the rest of the
* pattern.
*/
export function extractInlineFlags(source: string): {
cleaned: string;
flags: string;
} {
const match = INLINE_FLAG_RE.exec(source);
if (!match) {
return { cleaned: source, flags: "" };
}
const rawFlags = match[1] as string;
const separator = match[2] as ":" | ")";
const flags = translateFlags(rawFlags);
if (separator === ")") {
// (?i)pattern — strip the flag group entirely.
return { cleaned: source.slice(match[0].length), flags };
}
// (?i:pattern)tail — unwrap the group, widening the flag to the
// whole cleaned source. We have to find the matching closing paren,
// respecting nested groups. A small state machine is enough; we
// don't need to parse character classes specially because `)` inside
// `[...]` doesn't close a group.
const inner = unwrapScopedGroup(source, match[0].length);
if (inner === null) {
// Malformed group — leave source alone, report no flags. `new
// RegExp` downstream will raise a ValidationError.
return { cleaned: source, flags: "" };
}
return { cleaned: inner, flags };
}
/**
* Unwrap `(?i:foo)bar` → `foobar`.
*
* `openIndex` points one past the closing `:` (start of `foo`). We
* find the matching `)`, tracking parenthesis nesting and skipping
* paired `[]` ranges. If we run off the end or the syntax is
* malformed, return null so the caller falls back to "no translation."
*
* The branchy control flow is inherent to a tiny regex-syntax
* tokenizer — we track three states (char class, paren depth, escape)
* and each needs its own branch.
*/
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: regex tokenizer is inherently branchy
function unwrapScopedGroup(source: string, openIndex: number): string | null {
let depth = 1;
let i = openIndex;
let inClass = false;
while (i < source.length) {
const ch = source.charCodeAt(i);
// Backslash escapes the next char regardless of context.
Iif (ch === CHAR_BACKSLASH) {
i += 2;
continue;
}
if (inClass) {
if (ch === CHAR_CLOSE_BRACKET) {
inClass = false;
}
i += 1;
continue;
}
if (ch === CHAR_OPEN_BRACKET) {
inClass = true;
i += 1;
continue;
}
if (ch === CHAR_OPEN_PAREN) {
depth += 1;
i += 1;
continue;
}
if (ch === CHAR_CLOSE_PAREN) {
depth -= 1;
if (depth === 0) {
// `foo` is source[openIndex..i]; tail is source[i+1..].
return source.slice(openIndex, i) + source.slice(i + 1);
}
}
i += 1;
}
return null;
}
const CHAR_BACKSLASH = "\\".charCodeAt(0);
const CHAR_OPEN_PAREN = "(".charCodeAt(0);
const CHAR_CLOSE_PAREN = ")".charCodeAt(0);
const CHAR_OPEN_BRACKET = "[".charCodeAt(0);
const CHAR_CLOSE_BRACKET = "]".charCodeAt(0);
/**
* Translate rg-style inline flag letters to JS RegExp flag letters.
* Unknown letters are dropped silently (the guard regex already
* restricts the input to `[imsU]+`).
*/
function translateFlags(raw: string): string {
const seen = new Set<string>();
for (const letter of raw) {
if (letter === "U") {
// rg's U == dotall (--multiline-dotall). Model with JS /s.
seen.add("s");
} else Eif (letter === "i" || letter === "m" || letter === "s") {
seen.add(letter);
}
}
// Deterministic, RegExp-accepted order.
return [...seen]
.filter((f) => VALID_JS_FLAGS.includes(f))
.sort()
.join("");
}
/** Options for `compilePattern`. Both default to falsy. */
export type CompilePatternOptions = {
/**
* When false, forces the `i` flag regardless of inline flags.
* Default: true (case-sensitive, matching `rg`'s default).
*/
caseSensitive?: boolean;
/** Force the `m` flag. Default: false. */
multiline?: boolean;
};
/**
* Compile a user-supplied pattern (string or RegExp) into a JS RegExp
* suitable for grep.
*
* Pre-compiled regex input is trusted and returned unchanged —
* callers that want `caseSensitive: false` on an existing RegExp
* must reconstruct it.
*
* String input goes through `extractInlineFlags` + `new RegExp`.
* The resulting regex is always `g`-less: grep tests one line at a
* time, and the `g` flag's `lastIndex` state is a foot-gun in that
* usage. Callers that want a `matchAll`-style regex should build
* their own.
*
* Throws `ValidationError` on any compile-time regex error,
* preserving the engine's message for user-facing diagnostics.
*/
export function compilePattern(
pattern: string | RegExp,
opts: CompilePatternOptions = {}
): RegExp {
if (pattern instanceof RegExp) {
return pattern;
}
const { cleaned, flags: inline } = extractInlineFlags(pattern);
const flags = new Set<string>();
for (const f of inline) {
flags.add(f);
}
if (opts.caseSensitive === false) {
flags.add("i");
}
if (opts.multiline) {
flags.add("m");
}
const flagString = [...flags].sort().join("");
try {
return new RegExp(cleaned, flagString);
} catch (error) {
throw new ValidationError(
`Invalid grep pattern: ${(error as Error).message}`,
"pattern"
);
}
}
/**
* Return a RegExp with the `g` flag set. If the input already has
* `g`, it's returned as-is; otherwise we clone with `g` added.
*
* `content.matchAll(regex)` and `regex.exec(content)` with manual
* `lastIndex` management both require `/g`. The grep engine iterates
* matches on the whole file buffer, so we need to guarantee the flag
* is present — `compilePattern` strips `g` by default (historically
* grep tested one line at a time), so callers must pass through this
* helper before a whole-buffer iteration.
*/
export function ensureGlobalFlag(regex: RegExp): RegExp {
if (regex.flags.includes("g")) {
return regex;
}
return new RegExp(regex.source, `${regex.flags}g`);
}
/**
* Return a RegExp with the `m` (multiline) flag set so `^` and `$`
* match at line boundaries inside a multi-line buffer.
*
* Why this exists: grep historically worked by splitting content on
* `\n` and testing each line individually, which made `^` match the
* start of any line by accident (each line was its own string). Now
* that grep iterates the whole buffer via `matchAll`, patterns like
* `^foo` need the `m` flag for equivalent semantics — without it,
* `^` anchors to the buffer start and only matches the first line.
*/
export function ensureMultilineFlag(regex: RegExp): RegExp {
if (regex.flags.includes("m")) {
return regex;
}
return new RegExp(regex.source, `${regex.flags}m`);
}
/**
* Compose `ensureGlobalFlag` + `ensureMultilineFlag` in one clone.
* Single-pass avoids building a throwaway intermediate RegExp.
*/
export function ensureGlobalMultilineFlags(regex: RegExp): RegExp {
const needsG = !regex.flags.includes("g");
const needsM = !regex.flags.includes("m");
if (!(needsG || needsM)) {
return regex;
}
let flags = regex.flags;
if (needsG) {
flags += "g";
}
if (needsM) {
flags += "m";
}
return new RegExp(regex.source, flags);
}
|