All files / src/lib/scan ignore.ts

89.85% Statements 62/69
82% Branches 41/50
100% Functions 6/6
89.85% Lines 62/69

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283                                                                                                                                                                                                430x         430x 430x 430x 430x         430x 430x           430x 395x   430x 345x 345x 285x           430x                         8x       8x 1x   7x 7x 7x 6x     6x 6x 6x 6x       6x         1x                                             31515x     1x       31514x 2x     31512x 31515x     31515x 31494x                 18x 18x 31515x             18x 18x 21x 21x 21x     18x 18x 18x 18x 2x 16x 4x       21x   18x               6x     6x     6x     6x 6x                   630x 630x 36x 35x     594x            
/**
 * IgnoreStack — per-directory `.gitignore` aggregation for the scanner.
 *
 * ### Why a stack of instances
 *
 * The `ignore` npm package implements one `.gitignore`-file's semantics
 * (last-matching rule wins inside that file, negations with `!`, etc.).
 * It does NOT know about nested `.gitignore` files.
 *
 * Real git treats nested `.gitignore` files cumulatively: parent rules
 * apply inside every subtree, and child `.gitignore` files can add new
 * rules that apply only in their subtree (including negations that
 * un-ignore something a parent had ignored). See `gitignore(5)`.
 *
 * To match that semantics we keep a `Map<relDir, Ignore>` of per-dir
 * `.gitignore` contents. `isIgnored(relPath)` iterates the ancestor
 * dirs root-first, asks each `Ignore` whether it ignores the file with
 * the path re-anchored to that dir. Because we consult parents first
 * and children last, a child file's `!negation` patterns naturally
 * override parent matches (the child's answer is the last one we see).
 *
 * ### Fast path for root-only stacks
 *
 * Most of the time, `nestedGitignore: true` callers don't actually
 * encounter any nested `.gitignore` files — the root `.gitignore` is
 * the only one loaded. `isIgnored` detects this via a fast-path check
 * (`#nestedByRelDir.size === 0`) and forwards directly to the root
 * `Ignore` instance, skipping all the path-splitting + ancestor-walking
 * machinery. This keeps per-query cost near the underlying library's
 * floor (~0.25µs) when the expensive nested path isn't needed.
 *
 * ### Built-in skip list
 *
 * `alwaysSkipDirs` is a list of directory basenames (e.g., `node_modules`,
 * `.git`) that must be skipped even when no `.gitignore` mentions them.
 * These are seeded as patterns in the root `Ignore` instance.
 *
 * ### `.git/info/exclude`
 *
 * When `includeGitInfoExclude: true`, the root instance also reads
 * `${cwd}/.git/info/exclude` if it exists. Matches ripgrep's behavior.
 */
 
import { readFile } from "node:fs/promises";
import path from "node:path";
import ignore, { type Ignore } from "ignore";
import { handleFileError } from "../dsn/fs-utils.js";
import type { IgnoreMatcher } from "./types.js";
 
/** Options for constructing an `IgnoreStack`. */
export type IgnoreStackOptions = {
  /** Walker `cwd`. Absolute path. */
  cwd: string;
  /**
   * Directory basenames that must always be skipped. Seeded into the
   * root instance as bare gitignore patterns (basename-anywhere match).
   */
  alwaysSkipDirs: readonly string[];
  /**
   * When false, `.gitignore` / `.git/info/exclude` files are NOT read.
   * Only `alwaysSkipDirs` patterns apply. Default: true.
   */
  respectGitignore?: boolean;
  /**
   * When true (and `respectGitignore` is also true), the root instance
   * is seeded with the contents of `${cwd}/.git/info/exclude` in
   * addition to the root `.gitignore`.
   */
  includeGitInfoExclude?: boolean;
};
 
/**
 * Per-directory `.gitignore` aggregator.
 *
 * Construct with `await IgnoreStack.create(opts)` — initialization is
 * async because it reads the root `.gitignore` (and optionally
 * `.git/info/exclude`) up front.
 */
export class IgnoreStack implements IgnoreMatcher {
  /** Walker root — absolute. Used to translate absolute dir paths to relative keys. */
  readonly #cwd: string;
  /** Length of `cwd + "/"`. Cached for slicing. */
  readonly #cwdPrefixLen: number;
  /**
   * Root-level matcher. Holds `alwaysSkipDirs` patterns + root
   * `.gitignore` + `.git/info/exclude`. Always present.
   */
  readonly #rootIg: Ignore;
  /**
   * Nested `.gitignore` instances keyed by POSIX-relative dir path
   * (e.g., `"packages/foo"`, `"src/deep"`). Empty when no nested
   * gitignores are loaded — that's the fast-path case.
   *
   * Keys never include a leading or trailing `/`. The root is NOT in
   * this map — it lives in `#rootIg`.
   */
  readonly #nestedByRelDir = new Map<string, Ignore>();
  /** When false, `loadFromDir` is a no-op. */
  readonly #respectGitignore: boolean;
 
  private constructor(cwd: string, respectGitignore: boolean, rootIg: Ignore) {
    this.#cwd = cwd;
    this.#cwdPrefixLen = cwd.length + 1;
    this.#respectGitignore = respectGitignore;
    this.#rootIg = rootIg;
  }
 
  /** Build an IgnoreStack and load its root-level patterns. */
  static async create(opts: IgnoreStackOptions): Promise<IgnoreStack> {
    const respectGitignore = opts.respectGitignore ?? true;
    const root = ignore();
    // Seed always-skip directory names as basename-matching patterns.
    // The `ignore` package treats a bare name as basename-anywhere —
    // perfect for skipping any `node_modules` subtree in the walk.
    // These apply even when `respectGitignore: false` because they're
    // the walker's policy, not part of the user's gitignore.
    if (opts.alwaysSkipDirs.length > 0) {
      root.add([...opts.alwaysSkipDirs]);
    }
    if (respectGitignore) {
      await appendGitignoreFile(root, path.join(opts.cwd, ".gitignore"));
      if (opts.includeGitInfoExclude) {
        await appendGitignoreFile(
          root,
          path.join(opts.cwd, ".git", "info", "exclude")
        );
      }
    }
    return new IgnoreStack(opts.cwd, respectGitignore, root);
  }
 
  /**
   * Read `${absDir}/.gitignore` into a new `Ignore` instance scoped to
   * that dir. No-op if the file is missing or empty, or when
   * `respectGitignore: false` was set on construction.
   *
   * Callers (the walker) invoke this on directory descent when
   * `nestedGitignore: true`. Idempotent: calling twice with the same
   * path replaces the earlier instance.
   */
  async loadFromDir(absDir: string): Promise<void> {
    Iif (!this.#respectGitignore) {
      return;
    }
    // Never re-seed the root — that was populated by `create()`.
    if (absDir === this.#cwd) {
      return;
    }
    const gitignorePath = path.join(absDir, ".gitignore");
    try {
      const content = await readFile(gitignorePath, "utf-8");
      Iif (!content || content.trim().length === 0) {
        return;
      }
      const ig = ignore();
      ig.add(content);
      const relDir = this.#relDirFor(absDir);
      Iif (relDir === null) {
        // Absolute dir isn't under cwd — refuse to register it.
        return;
      }
      this.#nestedByRelDir.set(relDir, ig);
    } catch (error) {
      // ENOENT is the expected case — most directories don't have a
      // `.gitignore`. Anything else (permission, I/O) goes through
      // `handleFileError` so genuine bugs still surface to Sentry.
      handleFileError(error, {
        operation: "scan.ignore.loadFromDir",
        path: gitignorePath,
      });
    }
  }
 
  /**
   * Fast-path-aware `isIgnored`.
   *
   * When no nested gitignores are loaded (the common case), query
   * `#rootIg` directly — bypasses all ancestor-walking and path
   * splicing, bringing per-query cost to the underlying `ignore`
   * package's floor.
   *
   * Otherwise: walk the ancestor prefix chain root→leaf, applying each
   * loaded `Ignore` in turn. Inside each instance, `ignore`'s
   * last-match-wins semantics handle intra-file negations; across
   * instances, a child `!foo` pattern flips an earlier `ignored=true`
   * back to `false` because we see later results last.
   */
  // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: two-tier fast+slow path with negation handling is inherently branchy
  isIgnored(relPath: string, isDirectory: boolean): boolean {
    if (path.isAbsolute(relPath)) {
      // Programming error — a misuse that would silently produce wrong
      // results inside `ignore`. Throwing here flags it immediately.
      throw new Error(
        `IgnoreStack.isIgnored requires a relative path, got: ${relPath}`
      );
    }
    if (relPath === "" || relPath === ".") {
      return false;
    }
 
    const trailingSlash = isDirectory ? "/" : "";
    const query = `${relPath}${trailingSlash}`;
 
    // --- Tier 1: root-only fast path (most common) ---
    if (this.#nestedByRelDir.size === 0) {
      return this.#rootIg.ignores(query);
    }
 
    // --- Tier 2: walk loaded ancestor prefixes root→leaf ---
    //
    // Start with the root's verdict. Then, for each prefix of `relPath`
    // that is a loaded nested dir, re-query with the path reanchored
    // to that dir. A match flips `ignored`; an `unignored` (negation)
    // result flips it back to false.
    const rootResult = this.#rootIg.test(query);
    let ignored = rootResult.ignored && !rootResult.unignored;
    Iif (rootResult.unignored) {
      ignored = false;
    }
 
    // Walk `a/b/c/file.ts` → prefixes "a", "a/b", "a/b/c".
    // We skip the final segment (the file itself — a file can't own
    // a `.gitignore`).
    let prefixEnd = relPath.indexOf("/");
    while (prefixEnd !== -1) {
      const prefix = relPath.slice(0, prefixEnd);
      const ig = this.#nestedByRelDir.get(prefix);
      if (ig !== undefined) {
        // Rebase the query under this dir: path inside the nested scope
        // is the suffix past `prefix/`.
        const suffix = `${relPath.slice(prefixEnd + 1)}${trailingSlash}`;
        Eif (suffix.length > 0) {
          const result = ig.test(suffix);
          if (result.unignored) {
            ignored = false;
          } else if (result.ignored) {
            ignored = true;
          }
        }
      }
      prefixEnd = relPath.indexOf("/", prefixEnd + 1);
    }
    return ignored;
  }
 
  /**
   * Convert an absolute directory path to its POSIX-relative form
   * under `cwd`. Returns null when `absDir` isn't a descendant of cwd.
   */
  #relDirFor(absDir: string): string | null {
    Iif (!absDir.startsWith(this.#cwd)) {
      return null;
    }
    Iif (absDir.length === this.#cwd.length) {
      return "";
    }
    Iif (absDir[this.#cwd.length] !== path.sep) {
      return null;
    }
    const tail = absDir.slice(this.#cwdPrefixLen);
    return path.sep === path.posix.sep ? tail : tail.replaceAll(path.sep, "/");
  }
}
 
/**
 * Load the contents of a gitignore-like file into an existing `Ignore`
 * instance. Swallows ENOENT (the common case); routes other errors to
 * `handleFileError` so genuine bugs surface to Sentry.
 */
async function appendGitignoreFile(ig: Ignore, absPath: string): Promise<void> {
  try {
    const content = await readFile(absPath, "utf-8");
    if (content.length > 0) {
      ig.add(content);
    }
  } catch (error) {
    handleFileError(error, {
      operation: "scan.ignore.appendGitignoreFile",
      path: absPath,
    });
  }
}