diff -Nru ripgrep-0.6.0/appveyor.yml ripgrep-0.10.0.3/appveyor.yml --- ripgrep-0.6.0/appveyor.yml 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/appveyor.yml 2018-09-10 21:10:55.000000000 +0000 @@ -1,16 +1,39 @@ +cache: + - c:\cargo\registry + - c:\cargo\git + +init: + - mkdir c:\cargo + - mkdir c:\rustup + - SET PATH=c:\cargo\bin;%PATH% + +clone_folder: c:\projects\ripgrep + environment: + CARGO_HOME: "c:\\cargo" + RUSTUP_HOME: "c:\\rustup" + CARGO_TARGET_DIR: "c:\\projects\\ripgrep\\target" global: PROJECT_NAME: ripgrep RUST_BACKTRACE: full matrix: - - TARGET: i686-pc-windows-gnu - CHANNEL: stable - - TARGET: i686-pc-windows-msvc - CHANNEL: stable - TARGET: x86_64-pc-windows-gnu CHANNEL: stable + BITS: 64 + MSYS2: 1 - TARGET: x86_64-pc-windows-msvc CHANNEL: stable + BITS: 64 + - TARGET: i686-pc-windows-gnu + CHANNEL: stable + BITS: 32 + MSYS2: 1 + - TARGET: i686-pc-windows-msvc + CHANNEL: stable + BITS: 32 + +matrix: + fast_finish: true # Install Rust and Cargo # (Based on from https://github.com/rust-lang/libc/blob/master/appveyor.yml) @@ -18,32 +41,25 @@ - curl -sSf -o rustup-init.exe https://win.rustup.rs/ - rustup-init.exe -y --default-host %TARGET% - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin - - if defined MSYS2_BITS set PATH=%PATH%;C:\msys64\mingw%MSYS2_BITS%\bin + - if defined MSYS2 set PATH=C:\msys64\mingw%BITS%\bin;%PATH% - rustc -V - cargo -V -# ??? +# Hack to work around a harmless warning in Appveyor builds? build: false # Equivalent to Travis' `script` phase -# TODO modify this phase as you see fit test_script: - - cargo test --verbose - - cargo test --verbose --manifest-path grep/Cargo.toml - - cargo test --verbose --manifest-path globset/Cargo.toml - - cargo test --verbose --manifest-path ignore/Cargo.toml - - cargo test --verbose --manifest-path wincolor/Cargo.toml - - cargo test --verbose --manifest-path termcolor/Cargo.toml + - cargo test --verbose --all --features pcre2 before_deploy: # Generate artifacts for release - # TODO(burntsushi): How can we enable SSSE3 on Windows? - - cargo build --release + - cargo build --release --features pcre2 - mkdir staging - copy target\release\rg.exe staging - ps: copy target\release\build\ripgrep-*\out\_rg.ps1 staging - cd staging - # release zipfile will look like 'rust-everywhere-v1.2.3-x86_64-pc-windows-msvc' + # release zipfile will look like 'ripgrep-1.2.3-x86_64-pc-windows-msvc' - 7z a ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip * - appveyor PushArtifact ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip @@ -56,9 +72,6 @@ provider: GitHub # deploy when a new tag is pushed and only on the stable channel on: - # channel to use to produce the release artifacts - # NOTE make sure you only release *once* per target - # TODO you may want to pick a different channel CHANNEL: stable appveyor_repo_tag: true @@ -66,7 +79,3 @@ only: - /\d+\.\d+\.\d+/ - master - # - appveyor - # - /\d+\.\d+\.\d+/ - # except: - # - master diff -Nru ripgrep-0.6.0/benchsuite/runs/2018-01-08-archlinux-cheetah/raw.csv ripgrep-0.10.0.3/benchsuite/runs/2018-01-08-archlinux-cheetah/raw.csv --- ripgrep-0.6.0/benchsuite/runs/2018-01-08-archlinux-cheetah/raw.csv 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/benchsuite/runs/2018-01-08-archlinux-cheetah/raw.csv 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,806 @@ +benchmark,warmup_iter,iter,name,command,duration,lines,env +linux_alternates,1,5,rg (ignore),rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.10186767578125,68, +linux_alternates,1,5,rg (ignore),rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.10199356079101562,68, +linux_alternates,1,5,rg (ignore),rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09750819206237793,68, +linux_alternates,1,5,rg (ignore),rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09634733200073242,68, +linux_alternates,1,5,rg (ignore),rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.10117292404174805,68, +linux_alternates,1,5,ag (ignore),ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.49642109870910645,68, +linux_alternates,1,5,ag (ignore),ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.48993706703186035,68, +linux_alternates,1,5,ag (ignore),ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.4837028980255127,68, +linux_alternates,1,5,ag (ignore),ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.4773833751678467,68, +linux_alternates,1,5,ag (ignore),ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.558436393737793,68, +linux_alternates,1,5,git grep (ignore),git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2605454921722412,68,LC_ALL=C +linux_alternates,1,5,git grep (ignore),git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.26748204231262207,68,LC_ALL=C +linux_alternates,1,5,git grep (ignore),git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.26719212532043457,68,LC_ALL=C +linux_alternates,1,5,git grep (ignore),git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2719383239746094,68,LC_ALL=C +linux_alternates,1,5,git grep (ignore),git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.26963257789611816,68,LC_ALL=C +linux_alternates,1,5,rg (whitelist),rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.08797001838684082,68, +linux_alternates,1,5,rg (whitelist),rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09073781967163086,68, +linux_alternates,1,5,rg (whitelist),rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.0914468765258789,68, +linux_alternates,1,5,rg (whitelist),rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09071612358093262,68, +linux_alternates,1,5,rg (whitelist),rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.0914316177368164,68, +linux_alternates,1,5,ucg (whitelist),ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.1372535228729248,68, +linux_alternates,1,5,ucg (whitelist),ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.13880419731140137,68, +linux_alternates,1,5,ucg (whitelist),ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.13315439224243164,68, +linux_alternates,1,5,ucg (whitelist),ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.1367807388305664,68, +linux_alternates,1,5,ucg (whitelist),ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.13135552406311035,68, +linux_alternates_casei,1,5,rg (ignore),rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12781810760498047,160, +linux_alternates_casei,1,5,rg (ignore),rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.11988544464111328,160, +linux_alternates_casei,1,5,rg (ignore),rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.1205439567565918,160, +linux_alternates_casei,1,5,rg (ignore),rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12867259979248047,160, +linux_alternates_casei,1,5,rg (ignore),rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.1215970516204834,160, +linux_alternates_casei,1,5,ag (ignore),ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5444357395172119,160, +linux_alternates_casei,1,5,ag (ignore),ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5511739253997803,160, +linux_alternates_casei,1,5,ag (ignore),ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5382294654846191,160, +linux_alternates_casei,1,5,ag (ignore),ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5499558448791504,160, +linux_alternates_casei,1,5,ag (ignore),ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.6376545429229736,160, +linux_alternates_casei,1,5,git grep (ignore),git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.9767155647277832,160,LC_ALL=C +linux_alternates_casei,1,5,git grep (ignore),git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.920574426651001,160,LC_ALL=C +linux_alternates_casei,1,5,git grep (ignore),git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.9352290630340576,160,LC_ALL=C +linux_alternates_casei,1,5,git grep (ignore),git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.8866012096405029,160,LC_ALL=C +linux_alternates_casei,1,5,git grep (ignore),git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.9189445972442627,160,LC_ALL=C +linux_alternates_casei,1,5,rg (whitelist),rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09351730346679688,160, +linux_alternates_casei,1,5,rg (whitelist),rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09393739700317383,160, +linux_alternates_casei,1,5,rg (whitelist),rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09986448287963867,160, +linux_alternates_casei,1,5,rg (whitelist),rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09596824645996094,160, +linux_alternates_casei,1,5,rg (whitelist),rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09604883193969727,160, +linux_alternates_casei,1,5,ucg (whitelist),ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.23943114280700684,160, +linux_alternates_casei,1,5,ucg (whitelist),ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2587015628814697,160, +linux_alternates_casei,1,5,ucg (whitelist),ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2543606758117676,160, +linux_alternates_casei,1,5,ucg (whitelist),ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2490406036376953,160, +linux_alternates_casei,1,5,ucg (whitelist),ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.24046540260314941,160, +linux_literal,1,5,rg (ignore),rg -n PM_RESUME,0.08253765106201172,16, +linux_literal,1,5,rg (ignore),rg -n PM_RESUME,0.08176755905151367,16, +linux_literal,1,5,rg (ignore),rg -n PM_RESUME,0.08141684532165527,16, +linux_literal,1,5,rg (ignore),rg -n PM_RESUME,0.08108830451965332,16, +linux_literal,1,5,rg (ignore),rg -n PM_RESUME,0.08082938194274902,16, +linux_literal,1,5,rg (ignore) (mmap),rg -n --mmap PM_RESUME,0.6870582103729248,16, +linux_literal,1,5,rg (ignore) (mmap),rg -n --mmap PM_RESUME,0.807842493057251,16, +linux_literal,1,5,rg (ignore) (mmap),rg -n --mmap PM_RESUME,0.8129942417144775,16, +linux_literal,1,5,rg (ignore) (mmap),rg -n --mmap PM_RESUME,0.7582321166992188,16, +linux_literal,1,5,rg (ignore) (mmap),rg -n --mmap PM_RESUME,0.6869800090789795,16, +linux_literal,1,5,ag (ignore) (mmap),ag -s PM_RESUME,0.6534101963043213,16, +linux_literal,1,5,ag (ignore) (mmap),ag -s PM_RESUME,0.6020612716674805,16, +linux_literal,1,5,ag (ignore) (mmap),ag -s PM_RESUME,0.6712157726287842,16, +linux_literal,1,5,ag (ignore) (mmap),ag -s PM_RESUME,0.6267571449279785,16, +linux_literal,1,5,ag (ignore) (mmap),ag -s PM_RESUME,0.505136251449585,16, +linux_literal,1,5,pt (ignore),pt PM_RESUME,0.21415948867797852,16, +linux_literal,1,5,pt (ignore),pt PM_RESUME,0.19318318367004395,16, +linux_literal,1,5,pt (ignore),pt PM_RESUME,0.21352124214172363,16, +linux_literal,1,5,pt (ignore),pt PM_RESUME,0.18979454040527344,16, +linux_literal,1,5,pt (ignore),pt PM_RESUME,0.16629600524902344,16, +linux_literal,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git PM_RESUME,0.46967077255249023,16, +linux_literal,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git PM_RESUME,0.46343088150024414,16, +linux_literal,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git PM_RESUME,0.4723978042602539,16, +linux_literal,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git PM_RESUME,0.4741063117980957,16, +linux_literal,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git PM_RESUME,0.4613051414489746,16, +linux_literal,1,5,git grep (ignore),git grep -I -n PM_RESUME,0.20196986198425293,16,LC_ALL=C +linux_literal,1,5,git grep (ignore),git grep -I -n PM_RESUME,0.18932533264160156,16,LC_ALL=C +linux_literal,1,5,git grep (ignore),git grep -I -n PM_RESUME,0.19396305084228516,16,LC_ALL=C +linux_literal,1,5,git grep (ignore),git grep -I -n PM_RESUME,0.1952073574066162,16,LC_ALL=C +linux_literal,1,5,git grep (ignore),git grep -I -n PM_RESUME,0.20149731636047363,16,LC_ALL=C +linux_literal,1,5,rg (whitelist),rg -n --no-ignore -tall PM_RESUME,0.08270478248596191,16, +linux_literal,1,5,rg (whitelist),rg -n --no-ignore -tall PM_RESUME,0.08414745330810547,16, +linux_literal,1,5,rg (whitelist),rg -n --no-ignore -tall PM_RESUME,0.08627724647521973,16, +linux_literal,1,5,rg (whitelist),rg -n --no-ignore -tall PM_RESUME,0.08978700637817383,16, +linux_literal,1,5,rg (whitelist),rg -n --no-ignore -tall PM_RESUME,0.0836489200592041,16, +linux_literal,1,5,ucg (whitelist),ucg --nosmart-case PM_RESUME,0.15774202346801758,16, +linux_literal,1,5,ucg (whitelist),ucg --nosmart-case PM_RESUME,0.16005396842956543,16, +linux_literal,1,5,ucg (whitelist),ucg --nosmart-case PM_RESUME,0.15743708610534668,16, +linux_literal,1,5,ucg (whitelist),ucg --nosmart-case PM_RESUME,0.16156601905822754,16, +linux_literal,1,5,ucg (whitelist),ucg --nosmart-case PM_RESUME,0.1557624340057373,16, +linux_literal_casei,1,5,rg (ignore),rg -n -i PM_RESUME,0.1028127670288086,374, +linux_literal_casei,1,5,rg (ignore),rg -n -i PM_RESUME,0.10258054733276367,374, +linux_literal_casei,1,5,rg (ignore),rg -n -i PM_RESUME,0.10902261734008789,374, +linux_literal_casei,1,5,rg (ignore),rg -n -i PM_RESUME,0.10802555084228516,374, +linux_literal_casei,1,5,rg (ignore),rg -n -i PM_RESUME,0.10153412818908691,374, +linux_literal_casei,1,5,rg (ignore) (mmap),rg -n -i --mmap PM_RESUME,0.7902817726135254,374, +linux_literal_casei,1,5,rg (ignore) (mmap),rg -n -i --mmap PM_RESUME,0.7985179424285889,374, +linux_literal_casei,1,5,rg (ignore) (mmap),rg -n -i --mmap PM_RESUME,0.8208649158477783,374, +linux_literal_casei,1,5,rg (ignore) (mmap),rg -n -i --mmap PM_RESUME,0.7937076091766357,374, +linux_literal_casei,1,5,rg (ignore) (mmap),rg -n -i --mmap PM_RESUME,0.7936429977416992,374, +linux_literal_casei,1,5,ag (ignore) (mmap),ag -i PM_RESUME,0.5215470790863037,374, +linux_literal_casei,1,5,ag (ignore) (mmap),ag -i PM_RESUME,0.46518707275390625,374, +linux_literal_casei,1,5,ag (ignore) (mmap),ag -i PM_RESUME,0.4467353820800781,374, +linux_literal_casei,1,5,ag (ignore) (mmap),ag -i PM_RESUME,0.4595184326171875,374, +linux_literal_casei,1,5,ag (ignore) (mmap),ag -i PM_RESUME,0.4531285762786865,374, +linux_literal_casei,1,5,pt (ignore),pt -i PM_RESUME,14.187762022018433,374, +linux_literal_casei,1,5,pt (ignore),pt -i PM_RESUME,14.178058385848999,374, +linux_literal_casei,1,5,pt (ignore),pt -i PM_RESUME,14.096448421478271,374, +linux_literal_casei,1,5,pt (ignore),pt -i PM_RESUME,14.190524339675903,374, +linux_literal_casei,1,5,pt (ignore),pt -i PM_RESUME,14.231573343276978,374, +linux_literal_casei,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git PM_RESUME,0.4668574333190918,374, +linux_literal_casei,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git PM_RESUME,0.46050214767456055,374, +linux_literal_casei,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git PM_RESUME,0.46228861808776855,374, +linux_literal_casei,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git PM_RESUME,0.44957947731018066,374, +linux_literal_casei,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git PM_RESUME,0.4612581729888916,374, +linux_literal_casei,1,5,git grep (ignore),git grep -I -n -i PM_RESUME,0.1932981014251709,370,LC_ALL=C +linux_literal_casei,1,5,git grep (ignore),git grep -I -n -i PM_RESUME,0.20561552047729492,370,LC_ALL=C +linux_literal_casei,1,5,git grep (ignore),git grep -I -n -i PM_RESUME,0.19516706466674805,370,LC_ALL=C +linux_literal_casei,1,5,git grep (ignore),git grep -I -n -i PM_RESUME,0.20196247100830078,370,LC_ALL=C +linux_literal_casei,1,5,git grep (ignore),git grep -I -n -i PM_RESUME,0.19236421585083008,370,LC_ALL=C +linux_literal_casei,1,5,rg (whitelist),rg -n -i --no-ignore -tall PM_RESUME,0.09555959701538086,370, +linux_literal_casei,1,5,rg (whitelist),rg -n -i --no-ignore -tall PM_RESUME,0.09589338302612305,370, +linux_literal_casei,1,5,rg (whitelist),rg -n -i --no-ignore -tall PM_RESUME,0.09479856491088867,370, +linux_literal_casei,1,5,rg (whitelist),rg -n -i --no-ignore -tall PM_RESUME,0.09741568565368652,370, +linux_literal_casei,1,5,rg (whitelist),rg -n -i --no-ignore -tall PM_RESUME,0.10127615928649902,370, +linux_literal_casei,1,5,ucg (whitelist),ucg -i PM_RESUME,0.15514039993286133,370, +linux_literal_casei,1,5,ucg (whitelist),ucg -i PM_RESUME,0.15668940544128418,370, +linux_literal_casei,1,5,ucg (whitelist),ucg -i PM_RESUME,0.15429425239562988,370, +linux_literal_casei,1,5,ucg (whitelist),ucg -i PM_RESUME,0.15332818031311035,370, +linux_literal_casei,1,5,ucg (whitelist),ucg -i PM_RESUME,0.14861536026000977,370, +linux_literal_default,1,5,rg,rg PM_RESUME,0.08931398391723633,16, +linux_literal_default,1,5,rg,rg PM_RESUME,0.08717465400695801,16, +linux_literal_default,1,5,rg,rg PM_RESUME,0.0879361629486084,16, +linux_literal_default,1,5,rg,rg PM_RESUME,0.08688950538635254,16, +linux_literal_default,1,5,rg,rg PM_RESUME,0.09138607978820801,16, +linux_literal_default,1,5,ag,ag PM_RESUME,0.5342838764190674,16, +linux_literal_default,1,5,ag,ag PM_RESUME,0.47187042236328125,16, +linux_literal_default,1,5,ag,ag PM_RESUME,0.4456596374511719,16, +linux_literal_default,1,5,ag,ag PM_RESUME,0.4507424831390381,16, +linux_literal_default,1,5,ag,ag PM_RESUME,0.44472575187683105,16, +linux_literal_default,1,5,ucg,ucg PM_RESUME,0.15556907653808594,16, +linux_literal_default,1,5,ucg,ucg PM_RESUME,0.1533644199371338,16, +linux_literal_default,1,5,ucg,ucg PM_RESUME,0.15392351150512695,16, +linux_literal_default,1,5,ucg,ucg PM_RESUME,0.1535196304321289,16, +linux_literal_default,1,5,ucg,ucg PM_RESUME,0.15589547157287598,16, +linux_literal_default,1,5,pt,pt PM_RESUME,0.2261514663696289,16, +linux_literal_default,1,5,pt,pt PM_RESUME,0.2731902599334717,16, +linux_literal_default,1,5,pt,pt PM_RESUME,0.2563004493713379,16, +linux_literal_default,1,5,pt,pt PM_RESUME,0.2575085163116455,16, +linux_literal_default,1,5,pt,pt PM_RESUME,0.1724245548248291,16, +linux_literal_default,1,5,sift,sift PM_RESUME,0.13233542442321777,16, +linux_literal_default,1,5,sift,sift PM_RESUME,0.1256580352783203,16, +linux_literal_default,1,5,sift,sift PM_RESUME,0.12435102462768555,16, +linux_literal_default,1,5,sift,sift PM_RESUME,0.1259307861328125,16, +linux_literal_default,1,5,sift,sift PM_RESUME,0.12412142753601074,16, +linux_literal_default,1,5,git grep,git grep PM_RESUME,0.1742086410522461,16,LC_ALL=en_US.UTF-8 +linux_literal_default,1,5,git grep,git grep PM_RESUME,0.16890597343444824,16,LC_ALL=en_US.UTF-8 +linux_literal_default,1,5,git grep,git grep PM_RESUME,0.16680669784545898,16,LC_ALL=en_US.UTF-8 +linux_literal_default,1,5,git grep,git grep PM_RESUME,0.16899871826171875,16,LC_ALL=en_US.UTF-8 +linux_literal_default,1,5,git grep,git grep PM_RESUME,0.19794917106628418,16,LC_ALL=en_US.UTF-8 +linux_no_literal,1,5,rg (ignore),rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.33940672874450684,490, +linux_no_literal,1,5,rg (ignore),rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.3274960517883301,490, +linux_no_literal,1,5,rg (ignore),rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.32681775093078613,490, +linux_no_literal,1,5,rg (ignore),rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.32865071296691895,490, +linux_no_literal,1,5,rg (ignore),rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.3240926265716553,490, +linux_no_literal,1,5,rg (ignore) (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.17426586151123047,490, +linux_no_literal,1,5,rg (ignore) (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.17265701293945312,490, +linux_no_literal,1,5,rg (ignore) (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.1703634262084961,490, +linux_no_literal,1,5,rg (ignore) (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.17192435264587402,490, +linux_no_literal,1,5,rg (ignore) (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.1704559326171875,490, +linux_no_literal,1,5,ag (ignore) (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.8443403244018555,766, +linux_no_literal,1,5,ag (ignore) (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.6956703662872314,766, +linux_no_literal,1,5,ag (ignore) (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.6938261985778809,766, +linux_no_literal,1,5,ag (ignore) (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.695967435836792,766, +linux_no_literal,1,5,ag (ignore) (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.6945271492004395,766, +linux_no_literal,1,5,pt (ignore) (ASCII),pt -e \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},12.645716428756714,490, +linux_no_literal,1,5,pt (ignore) (ASCII),pt -e \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},12.441533088684082,490, +linux_no_literal,1,5,pt (ignore) (ASCII),pt -e \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},12.472522735595703,490, +linux_no_literal,1,5,pt (ignore) (ASCII),pt -e \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},12.42497444152832,490, +linux_no_literal,1,5,pt (ignore) (ASCII),pt -e \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},12.407486200332642,490, +linux_no_literal,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},9.091489553451538,490, +linux_no_literal,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},9.049214124679565,490, +linux_no_literal,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.879419803619385,490, +linux_no_literal,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},9.07261848449707,490, +linux_no_literal,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.918747901916504,490, +linux_no_literal,1,5,git grep (ignore),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.334321975708008,490,LC_ALL=en_US.UTF-8 +linux_no_literal,1,5,git grep (ignore),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.993232727050781,490,LC_ALL=en_US.UTF-8 +linux_no_literal,1,5,git grep (ignore),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.622304916381836,490,LC_ALL=en_US.UTF-8 +linux_no_literal,1,5,git grep (ignore),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.35973048210144,490,LC_ALL=en_US.UTF-8 +linux_no_literal,1,5,git grep (ignore),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.39980435371399,490,LC_ALL=en_US.UTF-8 +linux_no_literal,1,5,git grep (ignore) (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},2.0318400859832764,490,LC_ALL=C +linux_no_literal,1,5,git grep (ignore) (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},1.8587837219238281,490,LC_ALL=C +linux_no_literal,1,5,git grep (ignore) (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},1.873384714126587,490,LC_ALL=C +linux_no_literal,1,5,git grep (ignore) (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},1.8111364841461182,490,LC_ALL=C +linux_no_literal,1,5,git grep (ignore) (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},1.8385357856750488,490,LC_ALL=C +linux_no_literal,1,5,rg (whitelist),rg -n --no-ignore -tall \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.28792643547058105,458, +linux_no_literal,1,5,rg (whitelist),rg -n --no-ignore -tall \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.28545212745666504,458, +linux_no_literal,1,5,rg (whitelist),rg -n --no-ignore -tall \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.28576135635375977,458, +linux_no_literal,1,5,rg (whitelist),rg -n --no-ignore -tall \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.29883813858032227,458, +linux_no_literal,1,5,rg (whitelist),rg -n --no-ignore -tall \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.28493285179138184,458, +linux_no_literal,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.15974783897399902,458, +linux_no_literal,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.15943312644958496,458, +linux_no_literal,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.160233736038208,458, +linux_no_literal,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.16201996803283691,458, +linux_no_literal,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.16033530235290527,458, +linux_no_literal,1,5,ucg (whitelist) (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.4639148712158203,416, +linux_no_literal,1,5,ucg (whitelist) (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.46042823791503906,416, +linux_no_literal,1,5,ucg (whitelist) (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.45925426483154297,416, +linux_no_literal,1,5,ucg (whitelist) (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.477064847946167,416, +linux_no_literal,1,5,ucg (whitelist) (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.507554292678833,416, +linux_re_literal_suffix,1,5,rg (ignore),rg -n [A-Z]+_RESUME,0.08520364761352539,1652, +linux_re_literal_suffix,1,5,rg (ignore),rg -n [A-Z]+_RESUME,0.08203816413879395,1652, +linux_re_literal_suffix,1,5,rg (ignore),rg -n [A-Z]+_RESUME,0.08355021476745605,1652, +linux_re_literal_suffix,1,5,rg (ignore),rg -n [A-Z]+_RESUME,0.0865166187286377,1652, +linux_re_literal_suffix,1,5,rg (ignore),rg -n [A-Z]+_RESUME,0.08125448226928711,1652, +linux_re_literal_suffix,1,5,ag (ignore),ag -s [A-Z]+_RESUME,0.4846627712249756,1652, +linux_re_literal_suffix,1,5,ag (ignore),ag -s [A-Z]+_RESUME,0.48070311546325684,1652, +linux_re_literal_suffix,1,5,ag (ignore),ag -s [A-Z]+_RESUME,0.4813041687011719,1652, +linux_re_literal_suffix,1,5,ag (ignore),ag -s [A-Z]+_RESUME,0.4755582809448242,1652, +linux_re_literal_suffix,1,5,ag (ignore),ag -s [A-Z]+_RESUME,0.4926290512084961,1652, +linux_re_literal_suffix,1,5,pt (ignore),pt -e [A-Z]+_RESUME,14.124520540237427,1652, +linux_re_literal_suffix,1,5,pt (ignore),pt -e [A-Z]+_RESUME,14.151537656784058,1652, +linux_re_literal_suffix,1,5,pt (ignore),pt -e [A-Z]+_RESUME,14.157994270324707,1652, +linux_re_literal_suffix,1,5,pt (ignore),pt -e [A-Z]+_RESUME,14.102291822433472,1652, +linux_re_literal_suffix,1,5,pt (ignore),pt -e [A-Z]+_RESUME,14.103861093521118,1652, +linux_re_literal_suffix,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git [A-Z]+_RESUME,4.182392835617065,1652, +linux_re_literal_suffix,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git [A-Z]+_RESUME,4.190829277038574,1652, +linux_re_literal_suffix,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git [A-Z]+_RESUME,3.9770240783691406,1652, +linux_re_literal_suffix,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git [A-Z]+_RESUME,3.9978606700897217,1652, +linux_re_literal_suffix,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git [A-Z]+_RESUME,4.146454572677612,1652, +linux_re_literal_suffix,1,5,git grep (ignore),git grep -E -I -n [A-Z]+_RESUME,0.5080702304840088,1652,LC_ALL=C +linux_re_literal_suffix,1,5,git grep (ignore),git grep -E -I -n [A-Z]+_RESUME,0.5281260013580322,1652,LC_ALL=C +linux_re_literal_suffix,1,5,git grep (ignore),git grep -E -I -n [A-Z]+_RESUME,0.5350546836853027,1652,LC_ALL=C +linux_re_literal_suffix,1,5,git grep (ignore),git grep -E -I -n [A-Z]+_RESUME,0.5474245548248291,1652,LC_ALL=C +linux_re_literal_suffix,1,5,git grep (ignore),git grep -E -I -n [A-Z]+_RESUME,0.5256762504577637,1652,LC_ALL=C +linux_re_literal_suffix,1,5,rg (whitelist),rg -n --no-ignore -tall [A-Z]+_RESUME,0.07924222946166992,1630, +linux_re_literal_suffix,1,5,rg (whitelist),rg -n --no-ignore -tall [A-Z]+_RESUME,0.0767812728881836,1630, +linux_re_literal_suffix,1,5,rg (whitelist),rg -n --no-ignore -tall [A-Z]+_RESUME,0.07874488830566406,1630, +linux_re_literal_suffix,1,5,rg (whitelist),rg -n --no-ignore -tall [A-Z]+_RESUME,0.0804905891418457,1630, +linux_re_literal_suffix,1,5,rg (whitelist),rg -n --no-ignore -tall [A-Z]+_RESUME,0.07479119300842285,1630, +linux_re_literal_suffix,1,5,ucg (whitelist),ucg --nosmart-case [A-Z]+_RESUME,0.13643193244934082,1630, +linux_re_literal_suffix,1,5,ucg (whitelist),ucg --nosmart-case [A-Z]+_RESUME,0.13543128967285156,1630, +linux_re_literal_suffix,1,5,ucg (whitelist),ucg --nosmart-case [A-Z]+_RESUME,0.13312768936157227,1630, +linux_re_literal_suffix,1,5,ucg (whitelist),ucg --nosmart-case [A-Z]+_RESUME,0.13562273979187012,1630, +linux_re_literal_suffix,1,5,ucg (whitelist),ucg --nosmart-case [A-Z]+_RESUME,0.13236212730407715,1630, +linux_unicode_greek,1,5,rg,rg -n \p{Greek},0.17355775833129883,23, +linux_unicode_greek,1,5,rg,rg -n \p{Greek},0.1676032543182373,23, +linux_unicode_greek,1,5,rg,rg -n \p{Greek},0.1727275848388672,23, +linux_unicode_greek,1,5,rg,rg -n \p{Greek},0.17095375061035156,23, +linux_unicode_greek,1,5,rg,rg -n \p{Greek},0.17271947860717773,23, +linux_unicode_greek,1,5,pt,pt -e \p{Greek},14.14364218711853,23, +linux_unicode_greek,1,5,pt,pt -e \p{Greek},14.137334108352661,23, +linux_unicode_greek,1,5,pt,pt -e \p{Greek},14.083475351333618,23, +linux_unicode_greek,1,5,pt,pt -e \p{Greek},14.095231056213379,23, +linux_unicode_greek,1,5,pt,pt -e \p{Greek},14.151906490325928,23, +linux_unicode_greek,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \p{Greek},2.8376963138580322,23, +linux_unicode_greek,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \p{Greek},2.8271427154541016,23, +linux_unicode_greek,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \p{Greek},2.8310961723327637,23, +linux_unicode_greek,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \p{Greek},2.826141595840454,23, +linux_unicode_greek,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \p{Greek},2.805818796157837,23, +linux_unicode_greek_casei,1,5,rg,rg -n -i \p{Greek},0.16843819618225098,103, +linux_unicode_greek_casei,1,5,rg,rg -n -i \p{Greek},0.1704998016357422,103, +linux_unicode_greek_casei,1,5,rg,rg -n -i \p{Greek},0.17055058479309082,103, +linux_unicode_greek_casei,1,5,rg,rg -n -i \p{Greek},0.17064881324768066,103, +linux_unicode_greek_casei,1,5,rg,rg -n -i \p{Greek},0.1699228286743164,103, +linux_unicode_greek_casei,1,5,pt,pt -i -e \p{Greek},14.164355993270874,23, +linux_unicode_greek_casei,1,5,pt,pt -i -e \p{Greek},14.099931478500366,23, +linux_unicode_greek_casei,1,5,pt,pt -i -e \p{Greek},14.155095338821411,23, +linux_unicode_greek_casei,1,5,pt,pt -i -e \p{Greek},14.109308004379272,23, +linux_unicode_greek_casei,1,5,pt,pt -i -e \p{Greek},14.072362422943115,23, +linux_unicode_greek_casei,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git \p{Greek},0.003945589065551758,, +linux_unicode_greek_casei,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git \p{Greek},0.004189729690551758,, +linux_unicode_greek_casei,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git \p{Greek},0.0034589767456054688,, +linux_unicode_greek_casei,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git \p{Greek},0.003614187240600586,, +linux_unicode_greek_casei,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git \p{Greek},0.003975629806518555,, +linux_unicode_word,1,5,rg (ignore),rg -n \wAh,0.09798526763916016,186, +linux_unicode_word,1,5,rg (ignore),rg -n \wAh,0.09575009346008301,186, +linux_unicode_word,1,5,rg (ignore),rg -n \wAh,0.10181760787963867,186, +linux_unicode_word,1,5,rg (ignore),rg -n \wAh,0.09650158882141113,186, +linux_unicode_word,1,5,rg (ignore),rg -n \wAh,0.09717488288879395,186, +linux_unicode_word,1,5,rg (ignore) (ASCII),rg -n (?-u)\wAh,0.09417867660522461,174, +linux_unicode_word,1,5,rg (ignore) (ASCII),rg -n (?-u)\wAh,0.09903812408447266,174, +linux_unicode_word,1,5,rg (ignore) (ASCII),rg -n (?-u)\wAh,0.09407877922058105,174, +linux_unicode_word,1,5,rg (ignore) (ASCII),rg -n (?-u)\wAh,0.09681963920593262,174, +linux_unicode_word,1,5,rg (ignore) (ASCII),rg -n (?-u)\wAh,0.09762454032897949,174, +linux_unicode_word,1,5,ag (ignore) (ASCII),ag -s \wAh,0.5779609680175781,174, +linux_unicode_word,1,5,ag (ignore) (ASCII),ag -s \wAh,0.635645866394043,174, +linux_unicode_word,1,5,ag (ignore) (ASCII),ag -s \wAh,0.6109263896942139,174, +linux_unicode_word,1,5,ag (ignore) (ASCII),ag -s \wAh,0.6260912418365479,174, +linux_unicode_word,1,5,ag (ignore) (ASCII),ag -s \wAh,0.6823546886444092,174, +linux_unicode_word,1,5,pt (ignore) (ASCII),pt -e \wAh,14.178487062454224,174, +linux_unicode_word,1,5,pt (ignore) (ASCII),pt -e \wAh,14.190000057220459,174, +linux_unicode_word,1,5,pt (ignore) (ASCII),pt -e \wAh,14.16363000869751,174, +linux_unicode_word,1,5,pt (ignore) (ASCII),pt -e \wAh,14.160430431365967,174, +linux_unicode_word,1,5,pt (ignore) (ASCII),pt -e \wAh,14.2189621925354,174, +linux_unicode_word,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \wAh,4.17629337310791,174, +linux_unicode_word,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \wAh,4.051238059997559,174, +linux_unicode_word,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \wAh,4.323853015899658,174, +linux_unicode_word,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \wAh,4.085661172866821,174, +linux_unicode_word,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \wAh,4.036486625671387,174, +linux_unicode_word,1,5,git grep (ignore),git grep -E -I -n \wAh,4.620476961135864,186,LC_ALL=en_US.UTF-8 +linux_unicode_word,1,5,git grep (ignore),git grep -E -I -n \wAh,4.536192417144775,186,LC_ALL=en_US.UTF-8 +linux_unicode_word,1,5,git grep (ignore),git grep -E -I -n \wAh,4.510494232177734,186,LC_ALL=en_US.UTF-8 +linux_unicode_word,1,5,git grep (ignore),git grep -E -I -n \wAh,6.001620769500732,186,LC_ALL=en_US.UTF-8 +linux_unicode_word,1,5,git grep (ignore),git grep -E -I -n \wAh,4.602652311325073,186,LC_ALL=en_US.UTF-8 +linux_unicode_word,1,5,git grep (ignore) (ASCII),git grep -E -I -n \wAh,1.3785994052886963,174,LC_ALL=C +linux_unicode_word,1,5,git grep (ignore) (ASCII),git grep -E -I -n \wAh,1.4163663387298584,174,LC_ALL=C +linux_unicode_word,1,5,git grep (ignore) (ASCII),git grep -E -I -n \wAh,1.402677297592163,174,LC_ALL=C +linux_unicode_word,1,5,git grep (ignore) (ASCII),git grep -E -I -n \wAh,1.3327512741088867,174,LC_ALL=C +linux_unicode_word,1,5,git grep (ignore) (ASCII),git grep -E -I -n \wAh,1.3501760959625244,174,LC_ALL=C +linux_unicode_word,1,5,rg (whitelist),rg -n --no-ignore -tall \wAh,0.07958698272705078,180, +linux_unicode_word,1,5,rg (whitelist),rg -n --no-ignore -tall \wAh,0.0798649787902832,180, +linux_unicode_word,1,5,rg (whitelist),rg -n --no-ignore -tall \wAh,0.08086204528808594,180, +linux_unicode_word,1,5,rg (whitelist),rg -n --no-ignore -tall \wAh,0.0814356803894043,180, +linux_unicode_word,1,5,rg (whitelist),rg -n --no-ignore -tall \wAh,0.08273720741271973,180, +linux_unicode_word,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\wAh,0.08280825614929199,168, +linux_unicode_word,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\wAh,0.08074021339416504,168, +linux_unicode_word,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\wAh,0.0821676254272461,168, +linux_unicode_word,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\wAh,0.07926368713378906,168, +linux_unicode_word,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\wAh,0.08405280113220215,168, +linux_unicode_word,1,5,ucg (ASCII),ucg --nosmart-case \wAh,0.1545090675354004,168, +linux_unicode_word,1,5,ucg (ASCII),ucg --nosmart-case \wAh,0.1517190933227539,168, +linux_unicode_word,1,5,ucg (ASCII),ucg --nosmart-case \wAh,0.15704965591430664,168, +linux_unicode_word,1,5,ucg (ASCII),ucg --nosmart-case \wAh,0.15523767471313477,168, +linux_unicode_word,1,5,ucg (ASCII),ucg --nosmart-case \wAh,0.1582942008972168,168, +linux_word,1,5,rg (ignore),rg -n -w PM_RESUME,0.09102368354797363,6, +linux_word,1,5,rg (ignore),rg -n -w PM_RESUME,0.08986210823059082,6, +linux_word,1,5,rg (ignore),rg -n -w PM_RESUME,0.08989477157592773,6, +linux_word,1,5,rg (ignore),rg -n -w PM_RESUME,0.0895695686340332,6, +linux_word,1,5,rg (ignore),rg -n -w PM_RESUME,0.09547114372253418,6, +linux_word,1,5,ag (ignore),ag -s -w PM_RESUME,0.4948008060455322,6, +linux_word,1,5,ag (ignore),ag -s -w PM_RESUME,0.45710110664367676,6, +linux_word,1,5,ag (ignore),ag -s -w PM_RESUME,0.44803452491760254,6, +linux_word,1,5,ag (ignore),ag -s -w PM_RESUME,0.44779396057128906,6, +linux_word,1,5,ag (ignore),ag -s -w PM_RESUME,0.4563112258911133,6, +linux_word,1,5,pt (ignore),pt -w PM_RESUME,14.233235597610474,6, +linux_word,1,5,pt (ignore),pt -w PM_RESUME,14.277648687362671,6, +linux_word,1,5,pt (ignore),pt -w PM_RESUME,14.218127727508545,6, +linux_word,1,5,pt (ignore),pt -w PM_RESUME,14.171622037887573,6, +linux_word,1,5,pt (ignore),pt -w PM_RESUME,14.214240312576294,6, +linux_word,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -w --git PM_RESUME,3.1536731719970703,6, +linux_word,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -w --git PM_RESUME,3.2415099143981934,6, +linux_word,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -w --git PM_RESUME,3.2526626586914062,6, +linux_word,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -w --git PM_RESUME,3.2590816020965576,6, +linux_word,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -w --git PM_RESUME,3.222473621368408,6, +linux_word,1,5,git grep (ignore),git grep -E -I -n -w PM_RESUME,0.16982412338256836,6,LC_ALL=C +linux_word,1,5,git grep (ignore),git grep -E -I -n -w PM_RESUME,0.16739583015441895,6,LC_ALL=C +linux_word,1,5,git grep (ignore),git grep -E -I -n -w PM_RESUME,0.16866540908813477,6,LC_ALL=C +linux_word,1,5,git grep (ignore),git grep -E -I -n -w PM_RESUME,0.18207120895385742,6,LC_ALL=C +linux_word,1,5,git grep (ignore),git grep -E -I -n -w PM_RESUME,0.17716264724731445,6,LC_ALL=C +linux_word,1,5,rg (whitelist),rg -n -w --no-ignore -tall PM_RESUME,0.07490420341491699,6, +linux_word,1,5,rg (whitelist),rg -n -w --no-ignore -tall PM_RESUME,0.07714152336120605,6, +linux_word,1,5,rg (whitelist),rg -n -w --no-ignore -tall PM_RESUME,0.07552146911621094,6, +linux_word,1,5,rg (whitelist),rg -n -w --no-ignore -tall PM_RESUME,0.07651710510253906,6, +linux_word,1,5,rg (whitelist),rg -n -w --no-ignore -tall PM_RESUME,0.0757131576538086,6, +linux_word,1,5,ucg (whitelist),ucg --nosmart-case -w PM_RESUME,0.1530015468597412,6, +linux_word,1,5,ucg (whitelist),ucg --nosmart-case -w PM_RESUME,0.15152239799499512,6, +linux_word,1,5,ucg (whitelist),ucg --nosmart-case -w PM_RESUME,0.1571195125579834,6, +linux_word,1,5,ucg (whitelist),ucg --nosmart-case -w PM_RESUME,0.15993595123291016,6, +linux_word,1,5,ucg (whitelist),ucg --nosmart-case -w PM_RESUME,0.15633797645568848,6, +subtitles_en_alternate,1,5,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.33371877670288086,848, +subtitles_en_alternate,1,5,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3207988739013672,848, +subtitles_en_alternate,1,5,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3301675319671631,848, +subtitles_en_alternate,1,5,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.29731154441833496,848, +subtitles_en_alternate,1,5,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2711911201477051,848, +subtitles_en_alternate,1,5,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.186570405960083,848, +subtitles_en_alternate,1,5,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.1659939289093018,848, +subtitles_en_alternate,1,5,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.187847137451172,848, +subtitles_en_alternate,1,5,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.3522064685821533,848, +subtitles_en_alternate,1,5,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.316105842590332,848, +subtitles_en_alternate,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1400718688964844,848, +subtitles_en_alternate,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1492774486541748,848, +subtitles_en_alternate,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1337254047393799,848, +subtitles_en_alternate,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1037378311157227,848, +subtitles_en_alternate,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1312851905822754,848, +subtitles_en_alternate,1,5,grep (lines),grep -E -an Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.8294000625610352,848,LC_ALL=C +subtitles_en_alternate,1,5,grep (lines),grep -E -an Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.808884620666504,848,LC_ALL=C +subtitles_en_alternate,1,5,grep (lines),grep -E -an Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.8134734630584717,848,LC_ALL=C +subtitles_en_alternate,1,5,grep (lines),grep -E -an Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.8405649662017822,848,LC_ALL=C +subtitles_en_alternate,1,5,grep (lines),grep -E -an Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.8500289916992188,848,LC_ALL=C +subtitles_en_alternate,1,5,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.21175312995910645,848, +subtitles_en_alternate,1,5,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2118232250213623,848, +subtitles_en_alternate,1,5,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.21287035942077637,848, +subtitles_en_alternate,1,5,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.21167230606079102,848, +subtitles_en_alternate,1,5,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.28102636337280273,848, +subtitles_en_alternate,1,5,grep,grep -E -a Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5029187202453613,848,LC_ALL=C +subtitles_en_alternate,1,5,grep,grep -E -a Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.49977445602417,848,LC_ALL=C +subtitles_en_alternate,1,5,grep,grep -E -a Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.508340835571289,848,LC_ALL=C +subtitles_en_alternate,1,5,grep,grep -E -a Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5002548694610596,848,LC_ALL=C +subtitles_en_alternate,1,5,grep,grep -E -a Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.629526138305664,848,LC_ALL=C +subtitles_en_alternate_casei,1,5,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.730497360229492,862, +subtitles_en_alternate_casei,1,5,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.781018018722534,862, +subtitles_en_alternate_casei,1,5,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.7858059406280518,862, +subtitles_en_alternate_casei,1,5,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.7127914428710938,862, +subtitles_en_alternate_casei,1,5,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.717308759689331,862, +subtitles_en_alternate_casei,1,5,ucg (ASCII),ucg -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.428208351135254,862, +subtitles_en_alternate_casei,1,5,ucg (ASCII),ucg -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.389420509338379,862, +subtitles_en_alternate_casei,1,5,ucg (ASCII),ucg -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.403301954269409,862, +subtitles_en_alternate_casei,1,5,ucg (ASCII),ucg -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.4691550731658936,862, +subtitles_en_alternate_casei,1,5,ucg (ASCII),ucg -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.4245004653930664,862, +subtitles_en_alternate_casei,1,5,grep (ASCII),grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.978189706802368,862,LC_ALL=C +subtitles_en_alternate_casei,1,5,grep (ASCII),grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.974303722381592,862,LC_ALL=C +subtitles_en_alternate_casei,1,5,grep (ASCII),grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.982886552810669,862,LC_ALL=C +subtitles_en_alternate_casei,1,5,grep (ASCII),grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.90018630027771,862,LC_ALL=C +subtitles_en_alternate_casei,1,5,grep (ASCII),grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.0078439712524414,862,LC_ALL=C +subtitles_en_alternate_casei,1,5,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.9129142761230469,862, +subtitles_en_alternate_casei,1,5,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.9066660404205322,862, +subtitles_en_alternate_casei,1,5,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.946380615234375,862, +subtitles_en_alternate_casei,1,5,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.9672930240631104,862, +subtitles_en_alternate_casei,1,5,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.028451919555664,862, +subtitles_en_alternate_casei,1,5,grep,grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.9427030086517334,862,LC_ALL=en_US.UTF-8 +subtitles_en_alternate_casei,1,5,grep,grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.938739061355591,862,LC_ALL=en_US.UTF-8 +subtitles_en_alternate_casei,1,5,grep,grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.921248435974121,862,LC_ALL=en_US.UTF-8 +subtitles_en_alternate_casei,1,5,grep,grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.9194068908691406,862,LC_ALL=en_US.UTF-8 +subtitles_en_alternate_casei,1,5,grep,grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.917184829711914,862,LC_ALL=en_US.UTF-8 +subtitles_en_literal,1,5,rg,rg Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.12293672561645508,629, +subtitles_en_literal,1,5,rg,rg Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1259000301361084,629, +subtitles_en_literal,1,5,rg,rg Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.12285709381103516,629, +subtitles_en_literal,1,5,rg,rg Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.12280964851379395,629, +subtitles_en_literal,1,5,rg,rg Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1547396183013916,629, +subtitles_en_literal,1,5,rg (no mmap),rg --no-mmap Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.22011375427246094,629, +subtitles_en_literal,1,5,rg (no mmap),rg --no-mmap Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.23095202445983887,629, +subtitles_en_literal,1,5,rg (no mmap),rg --no-mmap Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2577846050262451,629, +subtitles_en_literal,1,5,rg (no mmap),rg --no-mmap Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2563819885253906,629, +subtitles_en_literal,1,5,rg (no mmap),rg --no-mmap Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.24869346618652344,629, +subtitles_en_literal,1,5,pt,pt -N Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.415337324142456,629, +subtitles_en_literal,1,5,pt,pt -N Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.4208543300628662,629, +subtitles_en_literal,1,5,pt,pt -N Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.416351079940796,629, +subtitles_en_literal,1,5,pt,pt -N Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.4270708560943604,629, +subtitles_en_literal,1,5,pt,pt -N Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.4243996143341064,629, +subtitles_en_literal,1,5,sift,sift Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2245020866394043,629, +subtitles_en_literal,1,5,sift,sift Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2382345199584961,629, +subtitles_en_literal,1,5,sift,sift Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.23533034324645996,629, +subtitles_en_literal,1,5,sift,sift Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2577829360961914,629, +subtitles_en_literal,1,5,sift,sift Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2599349021911621,629, +subtitles_en_literal,1,5,grep,grep -a Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4733700752258301,629,LC_ALL=C +subtitles_en_literal,1,5,grep,grep -a Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4598572254180908,629,LC_ALL=C +subtitles_en_literal,1,5,grep,grep -a Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5303301811218262,629,LC_ALL=C +subtitles_en_literal,1,5,grep,grep -a Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4775106906890869,629,LC_ALL=C +subtitles_en_literal,1,5,grep,grep -a Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4881136417388916,629,LC_ALL=C +subtitles_en_literal,1,5,rg (lines),rg -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.20051789283752441,629, +subtitles_en_literal,1,5,rg (lines),rg -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17326998710632324,629, +subtitles_en_literal,1,5,rg (lines),rg -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.20733428001403809,629, +subtitles_en_literal,1,5,rg (lines),rg -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.189713716506958,629, +subtitles_en_literal,1,5,rg (lines),rg -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17817258834838867,629, +subtitles_en_literal,1,5,ag (lines),ag -s Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5327835083007812,629, +subtitles_en_literal,1,5,ag (lines),ag -s Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5411181449890137,629, +subtitles_en_literal,1,5,ag (lines),ag -s Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.600783109664917,629, +subtitles_en_literal,1,5,ag (lines),ag -s Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5838911533355713,629, +subtitles_en_literal,1,5,ag (lines),ag -s Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.6051928997039795,629, +subtitles_en_literal,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4090385437011719,629, +subtitles_en_literal,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3816399574279785,629, +subtitles_en_literal,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.38033008575439453,629, +subtitles_en_literal,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3731727600097656,629, +subtitles_en_literal,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.38796329498291016,629, +subtitles_en_literal,1,5,pt (lines),pt Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.4102630615234375,629, +subtitles_en_literal,1,5,pt (lines),pt Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.4137451648712158,629, +subtitles_en_literal,1,5,pt (lines),pt Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.4649333953857422,629, +subtitles_en_literal,1,5,pt (lines),pt Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.430387258529663,629, +subtitles_en_literal,1,5,pt (lines),pt Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.541991949081421,629, +subtitles_en_literal,1,5,sift (lines),sift -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.6231405735015869,629, +subtitles_en_literal,1,5,sift (lines),sift -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5986526012420654,629, +subtitles_en_literal,1,5,sift (lines),sift -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5821917057037354,629, +subtitles_en_literal,1,5,sift (lines),sift -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.6045489311218262,629, +subtitles_en_literal,1,5,sift (lines),sift -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5986905097961426,629, +subtitles_en_literal,1,5,grep (lines),grep -an Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.8278565406799316,629,LC_ALL=C +subtitles_en_literal,1,5,grep (lines),grep -an Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.777052640914917,629,LC_ALL=C +subtitles_en_literal,1,5,grep (lines),grep -an Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.7619414329528809,629,LC_ALL=C +subtitles_en_literal,1,5,grep (lines),grep -an Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.8248744010925293,629,LC_ALL=C +subtitles_en_literal,1,5,grep (lines),grep -an Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.824932336807251,629,LC_ALL=C +subtitles_en_literal_casei,1,5,rg,rg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2718961238861084,642, +subtitles_en_literal_casei,1,5,rg,rg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.27082157135009766,642, +subtitles_en_literal_casei,1,5,rg,rg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.27086758613586426,642, +subtitles_en_literal_casei,1,5,rg,rg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.274705171585083,642, +subtitles_en_literal_casei,1,5,rg,rg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3337059020996094,642, +subtitles_en_literal_casei,1,5,grep,grep -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.9112112522125244,642,LC_ALL=en_US.UTF-8 +subtitles_en_literal_casei,1,5,grep,grep -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.907888650894165,642,LC_ALL=en_US.UTF-8 +subtitles_en_literal_casei,1,5,grep,grep -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.912668228149414,642,LC_ALL=en_US.UTF-8 +subtitles_en_literal_casei,1,5,grep,grep -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.9082865715026855,642,LC_ALL=en_US.UTF-8 +subtitles_en_literal_casei,1,5,grep,grep -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.9177796840667725,642,LC_ALL=en_US.UTF-8 +subtitles_en_literal_casei,1,5,grep (ASCII),grep -E -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.6020669937133789,642,LC_ALL=C +subtitles_en_literal_casei,1,5,grep (ASCII),grep -E -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.568228006362915,642,LC_ALL=C +subtitles_en_literal_casei,1,5,grep (ASCII),grep -E -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5648214817047119,642,LC_ALL=C +subtitles_en_literal_casei,1,5,grep (ASCII),grep -E -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5568234920501709,642,LC_ALL=C +subtitles_en_literal_casei,1,5,grep (ASCII),grep -E -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5588953495025635,642,LC_ALL=C +subtitles_en_literal_casei,1,5,rg (lines),rg -n -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3486766815185547,642, +subtitles_en_literal_casei,1,5,rg (lines),rg -n -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.34010815620422363,642, +subtitles_en_literal_casei,1,5,rg (lines),rg -n -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.33849263191223145,642, +subtitles_en_literal_casei,1,5,rg (lines),rg -n -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3917088508605957,642, +subtitles_en_literal_casei,1,5,rg (lines),rg -n -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.39266490936279297,642, +subtitles_en_literal_casei,1,5,ag (lines) (ASCII),ag -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5564041137695312,642, +subtitles_en_literal_casei,1,5,ag (lines) (ASCII),ag -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5533506870269775,642, +subtitles_en_literal_casei,1,5,ag (lines) (ASCII),ag -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.6205368041992188,642, +subtitles_en_literal_casei,1,5,ag (lines) (ASCII),ag -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5530028343200684,642, +subtitles_en_literal_casei,1,5,ag (lines) (ASCII),ag -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.6189889907836914,642, +subtitles_en_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3834850788116455,642, +subtitles_en_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.41916346549987793,642, +subtitles_en_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3895289897918701,642, +subtitles_en_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4278140068054199,642, +subtitles_en_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4013493061065674,642, +subtitles_en_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17953085899353027,629, +subtitles_en_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17679834365844727,629, +subtitles_en_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17448186874389648,629, +subtitles_en_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.21117281913757324,629, +subtitles_en_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1848156452178955,629, +subtitles_en_literal_word,1,5,ag (ASCII),ag -sw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5236153602600098,629, +subtitles_en_literal_word,1,5,ag (ASCII),ag -sw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.52512526512146,629, +subtitles_en_literal_word,1,5,ag (ASCII),ag -sw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5218794345855713,629, +subtitles_en_literal_word,1,5,ag (ASCII),ag -sw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5384306907653809,629, +subtitles_en_literal_word,1,5,ag (ASCII),ag -sw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5150353908538818,629, +subtitles_en_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3757903575897217,629, +subtitles_en_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3744041919708252,629, +subtitles_en_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.37261366844177246,629, +subtitles_en_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.40795230865478516,629, +subtitles_en_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3868849277496338,629, +subtitles_en_literal_word,1,5,grep (ASCII),grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.8265349864959717,629,LC_ALL=C +subtitles_en_literal_word,1,5,grep (ASCII),grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.8123743534088135,629,LC_ALL=C +subtitles_en_literal_word,1,5,grep (ASCII),grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.7669925689697266,629,LC_ALL=C +subtitles_en_literal_word,1,5,grep (ASCII),grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.766636848449707,629,LC_ALL=C +subtitles_en_literal_word,1,5,grep (ASCII),grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.7665839195251465,629,LC_ALL=C +subtitles_en_literal_word,1,5,rg,rg -nw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1879115104675293,629, +subtitles_en_literal_word,1,5,rg,rg -nw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18082356452941895,629, +subtitles_en_literal_word,1,5,rg,rg -nw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18497347831726074,629, +subtitles_en_literal_word,1,5,rg,rg -nw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1769394874572754,629, +subtitles_en_literal_word,1,5,rg,rg -nw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1917715072631836,629, +subtitles_en_literal_word,1,5,grep,grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.8192996978759766,629,LC_ALL=en_US.UTF-8 +subtitles_en_literal_word,1,5,grep,grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.8193323612213135,629,LC_ALL=en_US.UTF-8 +subtitles_en_literal_word,1,5,grep,grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.7837738990783691,629,LC_ALL=en_US.UTF-8 +subtitles_en_literal_word,1,5,grep,grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.7639024257659912,629,LC_ALL=en_US.UTF-8 +subtitles_en_literal_word,1,5,grep,grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.7634689807891846,629,LC_ALL=en_US.UTF-8 +subtitles_en_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.7922985553741455,13, +subtitles_en_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.7885758876800537,13, +subtitles_en_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.802325963973999,13, +subtitles_en_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.792595386505127,13, +subtitles_en_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.7909605503082275,13, +subtitles_en_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5903098583221436,13, +subtitles_en_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5982813835144043,13, +subtitles_en_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5926671028137207,13, +subtitles_en_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5976767539978027,13, +subtitles_en_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.593153953552246,13, +subtitles_en_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,6.614634275436401,48, +subtitles_en_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,6.574857473373413,48, +subtitles_en_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,6.54079270362854,48, +subtitles_en_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,6.600660800933838,48, +subtitles_en_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,6.531627178192139,48, +subtitles_en_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,5.361133337020874,13, +subtitles_en_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,5.456786870956421,13, +subtitles_en_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,5.403071403503418,13, +subtitles_en_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,5.398236274719238,13, +subtitles_en_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,5.348573923110962,13, +subtitles_en_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.5057969093322754,13,LC_ALL=C +subtitles_en_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.4157862663269043,13,LC_ALL=C +subtitles_en_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.471182346343994,13,LC_ALL=C +subtitles_en_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.4590909481048584,13,LC_ALL=C +subtitles_en_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.3759689331054688,13,LC_ALL=C +subtitles_en_surrounding_words,1,5,rg,rg -n \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18518710136413574,317, +subtitles_en_surrounding_words,1,5,rg,rg -n \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18791556358337402,317, +subtitles_en_surrounding_words,1,5,rg,rg -n \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18598675727844238,317, +subtitles_en_surrounding_words,1,5,rg,rg -n \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18552684783935547,317, +subtitles_en_surrounding_words,1,5,rg,rg -n \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.19262075424194336,317, +subtitles_en_surrounding_words,1,5,grep,grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1321008205413818,317,LC_ALL=en_US.UTF-8 +subtitles_en_surrounding_words,1,5,grep,grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0709969997406006,317,LC_ALL=en_US.UTF-8 +subtitles_en_surrounding_words,1,5,grep,grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1117346286773682,317,LC_ALL=en_US.UTF-8 +subtitles_en_surrounding_words,1,5,grep,grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0880234241485596,317,LC_ALL=en_US.UTF-8 +subtitles_en_surrounding_words,1,5,grep,grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0745558738708496,317,LC_ALL=en_US.UTF-8 +subtitles_en_surrounding_words,1,5,rg (ASCII),rg -n (?-u)\w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1827528476715088,317, +subtitles_en_surrounding_words,1,5,rg (ASCII),rg -n (?-u)\w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18874144554138184,317, +subtitles_en_surrounding_words,1,5,rg (ASCII),rg -n (?-u)\w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17983436584472656,317, +subtitles_en_surrounding_words,1,5,rg (ASCII),rg -n (?-u)\w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18831133842468262,317, +subtitles_en_surrounding_words,1,5,rg (ASCII),rg -n (?-u)\w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17810606956481934,317, +subtitles_en_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,4.5957207679748535,323, +subtitles_en_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,4.627211570739746,323, +subtitles_en_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,4.554431200027466,323, +subtitles_en_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,4.492656469345093,323, +subtitles_en_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,4.443558216094971,323, +subtitles_en_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.522758722305298,317, +subtitles_en_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.502918004989624,317, +subtitles_en_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.6503307819366455,317, +subtitles_en_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.58940052986145,317, +subtitles_en_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.569624423980713,317, +subtitles_en_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0672054290771484,317,LC_ALL=C +subtitles_en_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0729331970214844,317,LC_ALL=C +subtitles_en_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.052501916885376,317,LC_ALL=C +subtitles_en_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0711696147918701,317,LC_ALL=C +subtitles_en_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0863316059112549,317,LC_ALL=C +subtitles_ru_alternate,1,5,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0312588214874268,691, +subtitles_ru_alternate,1,5,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.063939094543457,691, +subtitles_ru_alternate,1,5,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0000121593475342,691, +subtitles_ru_alternate,1,5,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9842438697814941,691, +subtitles_ru_alternate,1,5,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.95733642578125,691, +subtitles_ru_alternate,1,5,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.7781903743743896,691, +subtitles_ru_alternate,1,5,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.861164093017578,691, +subtitles_ru_alternate,1,5,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.8268885612487793,691, +subtitles_ru_alternate,1,5,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.8621268272399902,691, +subtitles_ru_alternate,1,5,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.8216166496276855,691, +subtitles_ru_alternate,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0069098472595215,691, +subtitles_ru_alternate,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.025178909301758,691, +subtitles_ru_alternate,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0631070137023926,691, +subtitles_ru_alternate,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0902633666992188,691, +subtitles_ru_alternate,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0272655487060547,691, +subtitles_ru_alternate,1,5,grep (lines),grep -E -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.510146617889404,691,LC_ALL=C +subtitles_ru_alternate,1,5,grep (lines),grep -E -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.541701793670654,691,LC_ALL=C +subtitles_ru_alternate,1,5,grep (lines),grep -E -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.506088733673096,691,LC_ALL=C +subtitles_ru_alternate,1,5,grep (lines),grep -E -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.51838755607605,691,LC_ALL=C +subtitles_ru_alternate,1,5,grep (lines),grep -E -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.486810684204102,691,LC_ALL=C +subtitles_ru_alternate,1,5,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9679937362670898,691, +subtitles_ru_alternate,1,5,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9942011833190918,691, +subtitles_ru_alternate,1,5,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9233448505401611,691, +subtitles_ru_alternate,1,5,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9294781684875488,691, +subtitles_ru_alternate,1,5,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.8729774951934814,691, +subtitles_ru_alternate,1,5,grep,grep -E -a Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.100147485733032,691,LC_ALL=C +subtitles_ru_alternate,1,5,grep,grep -E -a Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.075790166854858,691,LC_ALL=C +subtitles_ru_alternate,1,5,grep,grep -E -a Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.069685220718384,691,LC_ALL=C +subtitles_ru_alternate,1,5,grep,grep -E -a Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.0526063442230225,691,LC_ALL=C +subtitles_ru_alternate,1,5,grep,grep -E -a Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.129194498062134,691,LC_ALL=C +subtitles_ru_alternate_casei,1,5,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.7894201278686523,691, +subtitles_ru_alternate_casei,1,5,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.7878782749176025,691, +subtitles_ru_alternate_casei,1,5,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.796328544616699,691, +subtitles_ru_alternate_casei,1,5,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.8249149322509766,691, +subtitles_ru_alternate_casei,1,5,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.7949724197387695,691, +subtitles_ru_alternate_casei,1,5,ucg (ASCII),ucg -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.075739622116089,691, +subtitles_ru_alternate_casei,1,5,ucg (ASCII),ucg -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.013590097427368,691, +subtitles_ru_alternate_casei,1,5,ucg (ASCII),ucg -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.012375593185425,691, +subtitles_ru_alternate_casei,1,5,ucg (ASCII),ucg -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.023118495941162,691, +subtitles_ru_alternate_casei,1,5,ucg (ASCII),ucg -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0641982555389404,691, +subtitles_ru_alternate_casei,1,5,grep (ASCII),grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.467320442199707,691,LC_ALL=C +subtitles_ru_alternate_casei,1,5,grep (ASCII),grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.486851692199707,691,LC_ALL=C +subtitles_ru_alternate_casei,1,5,grep (ASCII),grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.479818344116211,691,LC_ALL=C +subtitles_ru_alternate_casei,1,5,grep (ASCII),grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.516186475753784,691,LC_ALL=C +subtitles_ru_alternate_casei,1,5,grep (ASCII),grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.471773862838745,691,LC_ALL=C +subtitles_ru_alternate_casei,1,5,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,11.026185274124146,735, +subtitles_ru_alternate_casei,1,5,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,11.168465614318848,735, +subtitles_ru_alternate_casei,1,5,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,11.039950370788574,735, +subtitles_ru_alternate_casei,1,5,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,11.089850425720215,735, +subtitles_ru_alternate_casei,1,5,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,11.112446546554565,735, +subtitles_ru_alternate_casei,1,5,grep,grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.822641849517822,735,LC_ALL=en_US.UTF-8 +subtitles_ru_alternate_casei,1,5,grep,grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.808355331420898,735,LC_ALL=en_US.UTF-8 +subtitles_ru_alternate_casei,1,5,grep,grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.80171275138855,735,LC_ALL=en_US.UTF-8 +subtitles_ru_alternate_casei,1,5,grep,grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.794351577758789,735,LC_ALL=en_US.UTF-8 +subtitles_ru_alternate_casei,1,5,grep,grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.844403266906738,735,LC_ALL=en_US.UTF-8 +subtitles_ru_literal,1,5,rg,rg Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.20681476593017578,583, +subtitles_ru_literal,1,5,rg,rg Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.190568208694458,583, +subtitles_ru_literal,1,5,rg,rg Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.18462657928466797,583, +subtitles_ru_literal,1,5,rg,rg Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.1873643398284912,583, +subtitles_ru_literal,1,5,rg,rg Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.20382428169250488,583, +subtitles_ru_literal,1,5,rg (no mmap),rg --no-mmap Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.3085510730743408,583, +subtitles_ru_literal,1,5,rg (no mmap),rg --no-mmap Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.318758487701416,583, +subtitles_ru_literal,1,5,rg (no mmap),rg --no-mmap Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.3177149295806885,583, +subtitles_ru_literal,1,5,rg (no mmap),rg --no-mmap Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.31236958503723145,583, +subtitles_ru_literal,1,5,rg (no mmap),rg --no-mmap Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.31880998611450195,583, +subtitles_ru_literal,1,5,pt,pt -N Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.152938365936279,583, +subtitles_ru_literal,1,5,pt,pt -N Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.124867677688599,583, +subtitles_ru_literal,1,5,pt,pt -N Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.132290363311768,583, +subtitles_ru_literal,1,5,pt,pt -N Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.158328056335449,583, +subtitles_ru_literal,1,5,pt,pt -N Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.1022467613220215,583, +subtitles_ru_literal,1,5,sift,sift Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.807113409042358,583, +subtitles_ru_literal,1,5,sift,sift Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.8178558349609375,583, +subtitles_ru_literal,1,5,sift,sift Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.925220012664795,583, +subtitles_ru_literal,1,5,sift,sift Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.861236333847046,583, +subtitles_ru_literal,1,5,sift,sift Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.763278484344482,583, +subtitles_ru_literal,1,5,grep,grep -a Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.704503059387207,583,LC_ALL=C +subtitles_ru_literal,1,5,grep,grep -a Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6887199878692627,583,LC_ALL=C +subtitles_ru_literal,1,5,grep,grep -a Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7092702388763428,583,LC_ALL=C +subtitles_ru_literal,1,5,grep,grep -a Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6964359283447266,583,LC_ALL=C +subtitles_ru_literal,1,5,grep,grep -a Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6928379535675049,583,LC_ALL=C +subtitles_ru_literal,1,5,rg (lines),rg -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2646975517272949,583, +subtitles_ru_literal,1,5,rg (lines),rg -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.26806163787841797,583, +subtitles_ru_literal,1,5,rg (lines),rg -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2700214385986328,583, +subtitles_ru_literal,1,5,rg (lines),rg -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2669072151184082,583, +subtitles_ru_literal,1,5,rg (lines),rg -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2656106948852539,583, +subtitles_ru_literal,1,5,ag (lines),ag -s Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.9972407817840576,583, +subtitles_ru_literal,1,5,ag (lines),ag -s Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.906053066253662,583, +subtitles_ru_literal,1,5,ag (lines),ag -s Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.864766836166382,583, +subtitles_ru_literal,1,5,ag (lines),ag -s Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.7820546627044678,583, +subtitles_ru_literal,1,5,ag (lines),ag -s Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.7599871158599854,583, +subtitles_ru_literal,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.411653995513916,583, +subtitles_ru_literal,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.394604206085205,583, +subtitles_ru_literal,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.362853765487671,583, +subtitles_ru_literal,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.4795477390289307,583, +subtitles_ru_literal,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.4428844451904297,583, +subtitles_ru_literal,1,5,pt (lines),pt Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.122563123703003,583, +subtitles_ru_literal,1,5,pt (lines),pt Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.17008900642395,583, +subtitles_ru_literal,1,5,pt (lines),pt Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.1965367794036865,583, +subtitles_ru_literal,1,5,pt (lines),pt Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.152370929718018,583, +subtitles_ru_literal,1,5,pt (lines),pt Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.106513738632202,583, +subtitles_ru_literal,1,5,sift (lines),sift -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.408761978149414,583, +subtitles_ru_literal,1,5,sift (lines),sift -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.423579454421997,583, +subtitles_ru_literal,1,5,sift (lines),sift -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.2807464599609375,583, +subtitles_ru_literal,1,5,sift (lines),sift -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.3771467208862305,583, +subtitles_ru_literal,1,5,sift (lines),sift -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.378506422042847,583, +subtitles_ru_literal,1,5,grep (lines),grep -an Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.121800422668457,583,LC_ALL=C +subtitles_ru_literal,1,5,grep (lines),grep -an Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.1189923286437988,583,LC_ALL=C +subtitles_ru_literal,1,5,grep (lines),grep -an Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0678138732910156,583,LC_ALL=C +subtitles_ru_literal,1,5,grep (lines),grep -an Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0668041706085205,583,LC_ALL=C +subtitles_ru_literal,1,5,grep (lines),grep -an Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0713574886322021,583,LC_ALL=C +subtitles_ru_literal_casei,1,5,rg,rg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9427816867828369,604, +subtitles_ru_literal_casei,1,5,rg,rg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0397350788116455,604, +subtitles_ru_literal_casei,1,5,rg,rg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9732518196105957,604, +subtitles_ru_literal_casei,1,5,rg,rg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9387776851654053,604, +subtitles_ru_literal_casei,1,5,rg,rg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9536802768707275,604, +subtitles_ru_literal_casei,1,5,grep,grep -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.338641405105591,604,LC_ALL=en_US.UTF-8 +subtitles_ru_literal_casei,1,5,grep,grep -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.280565023422241,604,LC_ALL=en_US.UTF-8 +subtitles_ru_literal_casei,1,5,grep,grep -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.241750240325928,604,LC_ALL=en_US.UTF-8 +subtitles_ru_literal_casei,1,5,grep,grep -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.316105604171753,604,LC_ALL=en_US.UTF-8 +subtitles_ru_literal_casei,1,5,grep,grep -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.307560205459595,604,LC_ALL=en_US.UTF-8 +subtitles_ru_literal_casei,1,5,grep (ASCII),grep -E -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7379302978515625,583,LC_ALL=C +subtitles_ru_literal_casei,1,5,grep (ASCII),grep -E -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7226619720458984,583,LC_ALL=C +subtitles_ru_literal_casei,1,5,grep (ASCII),grep -E -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.683293342590332,583,LC_ALL=C +subtitles_ru_literal_casei,1,5,grep (ASCII),grep -E -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.714146614074707,583,LC_ALL=C +subtitles_ru_literal_casei,1,5,grep (ASCII),grep -E -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7654330730438232,583,LC_ALL=C +subtitles_ru_literal_casei,1,5,rg (lines),rg -n -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0237820148468018,604, +subtitles_ru_literal_casei,1,5,rg (lines),rg -n -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0194151401519775,604, +subtitles_ru_literal_casei,1,5,rg (lines),rg -n -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0364336967468262,604, +subtitles_ru_literal_casei,1,5,rg (lines),rg -n -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.035005807876587,604, +subtitles_ru_literal_casei,1,5,rg (lines),rg -n -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0438766479492188,604, +subtitles_ru_literal_casei,1,5,ag (lines) (ASCII),ag -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.619025468826294,, +subtitles_ru_literal_casei,1,5,ag (lines) (ASCII),ag -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.647244930267334,, +subtitles_ru_literal_casei,1,5,ag (lines) (ASCII),ag -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6785612106323242,, +subtitles_ru_literal_casei,1,5,ag (lines) (ASCII),ag -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6503715515136719,, +subtitles_ru_literal_casei,1,5,ag (lines) (ASCII),ag -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6314499378204346,, +subtitles_ru_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.8302316665649414,583, +subtitles_ru_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7719593048095703,583, +subtitles_ru_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7697594165802002,583, +subtitles_ru_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7312629222869873,583, +subtitles_ru_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.767866849899292,583, +subtitles_ru_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Шерлок Холмс(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.19411826133728027,, +subtitles_ru_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Шерлок Холмс(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.18651676177978516,, +subtitles_ru_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Шерлок Холмс(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.19614577293395996,, +subtitles_ru_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Шерлок Холмс(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.18459081649780273,, +subtitles_ru_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Шерлок Холмс(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.1797487735748291,, +subtitles_ru_literal_word,1,5,ag (ASCII),ag -sw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6507105827331543,, +subtitles_ru_literal_word,1,5,ag (ASCII),ag -sw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6480035781860352,, +subtitles_ru_literal_word,1,5,ag (ASCII),ag -sw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7138750553131104,, +subtitles_ru_literal_word,1,5,ag (ASCII),ag -sw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6521759033203125,, +subtitles_ru_literal_word,1,5,ag (ASCII),ag -sw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6728894710540771,, +subtitles_ru_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.3646819591522217,583, +subtitles_ru_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.3836848735809326,583, +subtitles_ru_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.419490337371826,583, +subtitles_ru_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.363335609436035,583, +subtitles_ru_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.488351345062256,583, +subtitles_ru_literal_word,1,5,grep (ASCII),grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.171506643295288,583,LC_ALL=C +subtitles_ru_literal_word,1,5,grep (ASCII),grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.1602776050567627,583,LC_ALL=C +subtitles_ru_literal_word,1,5,grep (ASCII),grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.084787368774414,583,LC_ALL=C +subtitles_ru_literal_word,1,5,grep (ASCII),grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0714166164398193,583,LC_ALL=C +subtitles_ru_literal_word,1,5,grep (ASCII),grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.083632469177246,583,LC_ALL=C +subtitles_ru_literal_word,1,5,rg,rg -nw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2769143581390381,579, +subtitles_ru_literal_word,1,5,rg,rg -nw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2694058418273926,579, +subtitles_ru_literal_word,1,5,rg,rg -nw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.26763367652893066,579, +subtitles_ru_literal_word,1,5,rg,rg -nw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2671318054199219,579, +subtitles_ru_literal_word,1,5,rg,rg -nw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2922348976135254,579, +subtitles_ru_literal_word,1,5,grep,grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.083528757095337,579,LC_ALL=en_US.UTF-8 +subtitles_ru_literal_word,1,5,grep,grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0857081413269043,579,LC_ALL=en_US.UTF-8 +subtitles_ru_literal_word,1,5,grep,grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.07025146484375,579,LC_ALL=en_US.UTF-8 +subtitles_ru_literal_word,1,5,grep,grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.071930170059204,579,LC_ALL=en_US.UTF-8 +subtitles_ru_literal_word,1,5,grep,grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0709245204925537,579,LC_ALL=en_US.UTF-8 +subtitles_ru_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.1552906036376953,41, +subtitles_ru_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.164951801300049,41, +subtitles_ru_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.175389289855957,41, +subtitles_ru_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.1861774921417236,41, +subtitles_ru_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.153625011444092,41, +subtitles_ru_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.7353317737579346,, +subtitles_ru_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.7592883110046387,, +subtitles_ru_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.7242491245269775,, +subtitles_ru_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.747089385986328,, +subtitles_ru_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.732586145401001,, +subtitles_ru_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0796375274658203,, +subtitles_ru_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.9670393466949463,, +subtitles_ru_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.9413447380065918,, +subtitles_ru_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.916764497756958,, +subtitles_ru_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.9110031127929688,, +subtitles_ru_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0622072219848633,, +subtitles_ru_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0975682735443115,, +subtitles_ru_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0741493701934814,, +subtitles_ru_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0423810482025146,, +subtitles_ru_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.000764846801758,, +subtitles_ru_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.6251120567321777,,LC_ALL=C +subtitles_ru_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.644089698791504,,LC_ALL=C +subtitles_ru_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.6416165828704834,,LC_ALL=C +subtitles_ru_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.6321892738342285,,LC_ALL=C +subtitles_ru_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.6264762878417969,,LC_ALL=C +subtitles_ru_surrounding_words,1,5,rg,rg -n \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.29879307746887207,278, +subtitles_ru_surrounding_words,1,5,rg,rg -n \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.3226010799407959,278, +subtitles_ru_surrounding_words,1,5,rg,rg -n \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.32187771797180176,278, +subtitles_ru_surrounding_words,1,5,rg,rg -n \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2825047969818115,278, +subtitles_ru_surrounding_words,1,5,rg,rg -n \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.283217191696167,278, +subtitles_ru_surrounding_words,1,5,grep,grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.3977878093719482,278,LC_ALL=en_US.UTF-8 +subtitles_ru_surrounding_words,1,5,grep,grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.4288139343261719,278,LC_ALL=en_US.UTF-8 +subtitles_ru_surrounding_words,1,5,grep,grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.4054889678955078,278,LC_ALL=en_US.UTF-8 +subtitles_ru_surrounding_words,1,5,grep,grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.4003441333770752,278,LC_ALL=en_US.UTF-8 +subtitles_ru_surrounding_words,1,5,grep,grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.5269148349761963,278,LC_ALL=en_US.UTF-8 +subtitles_ru_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.8912529945373535,, +subtitles_ru_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.9221522808074951,, +subtitles_ru_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.9416618347167969,, +subtitles_ru_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.893650770187378,, +subtitles_ru_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.8895554542541504,, +subtitles_ru_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0110745429992676,, +subtitles_ru_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.9790067672729492,, +subtitles_ru_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0426392555236816,, +subtitles_ru_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.121723175048828,, +subtitles_ru_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.1247596740722656,, +subtitles_ru_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.3579976558685303,,LC_ALL=C +subtitles_ru_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.382859468460083,,LC_ALL=C +subtitles_ru_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.393401861190796,,LC_ALL=C +subtitles_ru_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.474374532699585,,LC_ALL=C +subtitles_ru_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.3835601806640625,,LC_ALL=C diff -Nru ripgrep-0.6.0/benchsuite/runs/2018-01-08-archlinux-cheetah/README ripgrep-0.10.0.3/benchsuite/runs/2018-01-08-archlinux-cheetah/README --- ripgrep-0.6.0/benchsuite/runs/2018-01-08-archlinux-cheetah/README 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/benchsuite/runs/2018-01-08-archlinux-cheetah/README 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,59 @@ +This directory contains updated benchmarks as of 2018-01-08. They were captured +via the benchsuite script at `benchsuite/benchsuite` from the root of this +repository. The command that was run: + + $ ./benchsuite \ + --dir /tmp/benchsuite \ + --raw runs/2018-01-08-archlinux-cheetah/raw.csv \ + --warmup-iter 1 \ + --bench-iter 5 + +These results are most directly comparable to the +`2016-09-22-archlinux-cheetah` run in the parent directory. + +The versions of each tool are as follows: + + $ grep -V + grep (GNU grep) 3.1 + + $ ag -V + ag version 2.1.0 + Features: + +jit +lzma +zlib + + $ sift -V + sift 0.8.0 (linux/amd64) + built from commit 2ca94717 (which seems to be 0.9.0) + + $ pt --version + pt version 2.1.4 + + $ ucg -V + UniversalCodeGrep 0.3.3 + [...] + Build info + + Repo version: 0.3.3-251-g9b5a3e3 + + Compiler info: + Name ($(CXX)): "g++ -std=gnu++1z" + Version string: "g++ (GCC) 7.2.1 20171224" + + ISA extensions in use: + sse4.2: yes + popcnt: yes + + libpcre info: + Not linked against libpcre. + + libpcre2-8 info: + Version: 10.30 2017-08-14 + JIT support built in?: yes + JIT target architecture: x86 64bit (little endian + unaligned) + Newline style: LF + +The version of ripgrep was compiled from source on commit 85d463c0, with the +simd-accel and avx-accel features enabled: + + $ export RUSTFLAGS="-C target-cpu=native" + $ cargo build --release --features 'simd-accel avx-accel' diff -Nru ripgrep-0.6.0/benchsuite/runs/2018-01-08-archlinux-cheetah/summary ripgrep-0.10.0.3/benchsuite/runs/2018-01-08-archlinux-cheetah/summary --- ripgrep-0.6.0/benchsuite/runs/2018-01-08-archlinux-cheetah/summary 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/benchsuite/runs/2018-01-08-archlinux-cheetah/summary 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,235 @@ +linux_alternates (pattern: ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT) +------------------------------------------------------------------------- +rg (ignore) 0.100 +/- 0.003 (lines: 68) +ag (ignore) 0.501 +/- 0.033 (lines: 68) +git grep (ignore) 0.267 +/- 0.004 (lines: 68) +rg (whitelist)* 0.090 +/- 0.001 (lines: 68)* +ucg (whitelist) 0.135 +/- 0.003 (lines: 68) + +linux_alternates_casei (pattern: ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT) +------------------------------------------------------------------------------- +rg (ignore) 0.124 +/- 0.004 (lines: 160) +ag (ignore) 0.564 +/- 0.041 (lines: 160) +git grep (ignore) 0.928 +/- 0.033 (lines: 160) +rg (whitelist)* 0.096 +/- 0.003 (lines: 160)* +ucg (whitelist) 0.248 +/- 0.008 (lines: 160) + +linux_literal (pattern: PM_RESUME) +---------------------------------- +rg (ignore)* 0.082 +/- 0.001 (lines: 16)* +rg (ignore) (mmap) 0.751 +/- 0.062 (lines: 16) +ag (ignore) (mmap) 0.612 +/- 0.065 (lines: 16) +pt (ignore) 0.195 +/- 0.020 (lines: 16) +sift (ignore) 0.468 +/- 0.006 (lines: 16) +git grep (ignore) 0.196 +/- 0.005 (lines: 16) +rg (whitelist) 0.085 +/- 0.003 (lines: 16) +ucg (whitelist) 0.159 +/- 0.002 (lines: 16) + +linux_literal_casei (pattern: PM_RESUME) +---------------------------------------- +rg (ignore) 0.105 +/- 0.003 (lines: 374) +rg (ignore) (mmap) 0.799 +/- 0.012 (lines: 374) +ag (ignore) (mmap) 0.469 +/- 0.030 (lines: 374) +pt (ignore) 14.177 +/- 0.049 (lines: 374) +sift (ignore) 0.460 +/- 0.006 (lines: 374) +git grep (ignore) 0.198 +/- 0.006 (lines: 370) +rg (whitelist)* 0.097 +/- 0.003 (lines: 370)* +ucg (whitelist) 0.154 +/- 0.003 (lines: 370) + +linux_literal_default (pattern: PM_RESUME) +------------------------------------------ +rg* 0.089 +/- 0.002 (lines: 16)* +ag 0.469 +/- 0.038 (lines: 16) +ucg 0.154 +/- 0.001 (lines: 16) +pt 0.237 +/- 0.040 (lines: 16) +sift 0.126 +/- 0.003 (lines: 16) +git grep 0.175 +/- 0.013 (lines: 16) + +linux_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}) +----------------------------------------------------------------- +rg (ignore) 0.329 +/- 0.006 (lines: 490) +rg (ignore) (ASCII) 0.172 +/- 0.002 (lines: 490) +ag (ignore) (ASCII) 0.725 +/- 0.067 (lines: 766) +pt (ignore) (ASCII) 12.478 +/- 0.097 (lines: 490) +sift (ignore) (ASCII) 9.002 +/- 0.096 (lines: 490) +git grep (ignore) 8.542 +/- 0.277 (lines: 490) +git grep (ignore) (ASCII) 1.883 +/- 0.087 (lines: 490) +rg (whitelist) 0.289 +/- 0.006 (lines: 458) +rg (whitelist) (ASCII)* 0.160 +/- 0.001 (lines: 458)* +ucg (whitelist) (ASCII) 0.474 +/- 0.020 (lines: 416) + +linux_re_literal_suffix (pattern: [A-Z]+_RESUME) +------------------------------------------------ +rg (ignore) 0.084 +/- 0.002 (lines: 1652) +ag (ignore) 0.483 +/- 0.006 (lines: 1652) +pt (ignore) 14.128 +/- 0.026 (lines: 1652) +sift (ignore) 4.099 +/- 0.103 (lines: 1652) +git grep (ignore) 0.529 +/- 0.014 (lines: 1652) +rg (whitelist)* 0.078 +/- 0.002 (lines: 1630)* +ucg (whitelist) 0.135 +/- 0.002 (lines: 1630) + +linux_unicode_greek (pattern: \p{Greek}) +---------------------------------------- +rg* 0.172 +/- 0.002 (lines: 23)* +pt 14.122 +/- 0.031 (lines: 23) +sift 2.826 +/- 0.012 (lines: 23) + +linux_unicode_greek_casei (pattern: \p{Greek}) +---------------------------------------------- +rg 0.170 +/- 0.001 (lines: 103) +pt 14.120 +/- 0.039 (lines: 23) +sift* 0.004 +/- 0.000 (lines: 0)* + +linux_unicode_word (pattern: \wAh) +---------------------------------- +rg (ignore) 0.098 +/- 0.002 (lines: 186) +rg (ignore) (ASCII) 0.096 +/- 0.002 (lines: 174) +ag (ignore) (ASCII) 0.627 +/- 0.038 (lines: 174) +pt (ignore) (ASCII) 14.182 +/- 0.024 (lines: 174) +sift (ignore) (ASCII) 4.135 +/- 0.119 (lines: 174) +git grep (ignore) 4.854 +/- 0.643 (lines: 186) +git grep (ignore) (ASCII) 1.376 +/- 0.035 (lines: 174) +rg (whitelist) 0.081 +/- 0.001 (lines: 180)* +rg (whitelist) (ASCII)* 0.082 +/- 0.002 (lines: 168) +ucg (ASCII) 0.155 +/- 0.003 (lines: 168) + +linux_word (pattern: PM_RESUME) +------------------------------- +rg (ignore) 0.091 +/- 0.002 (lines: 6) +ag (ignore) 0.461 +/- 0.020 (lines: 6) +pt (ignore) 14.223 +/- 0.038 (lines: 6) +sift (ignore) 3.226 +/- 0.043 (lines: 6) +git grep (ignore) 0.173 +/- 0.006 (lines: 6) +rg (whitelist)* 0.076 +/- 0.001 (lines: 6)* +ucg (whitelist) 0.156 +/- 0.003 (lines: 6) + +subtitles_en_alternate (pattern: Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty) +--------------------------------------------------------------------------------------------------------------- +rg (lines) 0.311 +/- 0.026 (lines: 848) +ag (lines) 2.242 +/- 0.086 (lines: 848) +ucg (lines) 1.132 +/- 0.017 (lines: 848) +grep (lines) 1.828 +/- 0.017 (lines: 848) +rg* 0.226 +/- 0.031 (lines: 848)* +grep 1.528 +/- 0.057 (lines: 848) + +subtitles_en_alternate_casei (pattern: Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty) +--------------------------------------------------------------------------------------------------------------------- +ag (ASCII) 3.745 +/- 0.035 (lines: 862) +ucg (ASCII) 2.423 +/- 0.030 (lines: 862) +grep (ASCII) 2.969 +/- 0.040 (lines: 862) +rg* 1.952 +/- 0.049 (lines: 862)* +grep 2.928 +/- 0.012 (lines: 862) + +subtitles_en_literal (pattern: Sherlock Holmes) +----------------------------------------------- +rg* 0.130 +/- 0.014 (lines: 629)* +rg (no mmap) 0.243 +/- 0.017 (lines: 629) +pt 1.421 +/- 0.005 (lines: 629) +sift 0.243 +/- 0.015 (lines: 629) +grep 0.486 +/- 0.027 (lines: 629) +rg (lines) 0.190 +/- 0.014 (lines: 629) +ag (lines) 1.573 +/- 0.034 (lines: 629) +ucg (lines) 0.386 +/- 0.014 (lines: 629) +pt (lines) 1.452 +/- 0.055 (lines: 629) +sift (lines) 0.601 +/- 0.015 (lines: 629) +grep (lines) 0.803 +/- 0.031 (lines: 629) + +subtitles_en_literal_casei (pattern: Sherlock Holmes) +----------------------------------------------------- +rg* 0.284 +/- 0.028 (lines: 642)* +grep 1.912 +/- 0.004 (lines: 642) +grep (ASCII) 0.570 +/- 0.018 (lines: 642) +rg (lines) 0.362 +/- 0.028 (lines: 642) +ag (lines) (ASCII) 1.580 +/- 0.036 (lines: 642) +ucg (lines) (ASCII) 0.404 +/- 0.019 (lines: 642) + +subtitles_en_literal_word (pattern: Sherlock Holmes) +---------------------------------------------------- +rg (ASCII)* 0.185 +/- 0.015 (lines: 629) +ag (ASCII) 1.525 +/- 0.009 (lines: 629) +ucg (ASCII) 0.384 +/- 0.015 (lines: 629) +grep (ASCII) 0.788 +/- 0.029 (lines: 629) +rg 0.184 +/- 0.006 (lines: 629)* +grep 0.790 +/- 0.028 (lines: 629) + +subtitles_en_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}) +---------------------------------------------------------------------------------------- +rg 1.793 +/- 0.005 (lines: 13) +rg (ASCII)* 1.594 +/- 0.003 (lines: 13)* +ag (ASCII) 6.573 +/- 0.036 (lines: 48) +ucg (ASCII) 5.394 +/- 0.042 (lines: 13) +grep (ASCII) 3.446 +/- 0.050 (lines: 13) + +subtitles_en_surrounding_words (pattern: \w+\s+Holmes\s+\w+) +------------------------------------------------------------ +rg 0.187 +/- 0.003 (lines: 317) +grep 1.095 +/- 0.026 (lines: 317) +rg (ASCII)* 0.184 +/- 0.005 (lines: 317)* +ag (ASCII) 4.543 +/- 0.075 (lines: 323) +ucg (ASCII) 3.567 +/- 0.058 (lines: 317) +grep (ASCII) 1.070 +/- 0.012 (lines: 317) + +subtitles_ru_alternate (pattern: Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти) +----------------------------------------------------------------------------------------------------------- +rg (lines) 1.007 +/- 0.041 (lines: 691) +ag (lines) 3.830 +/- 0.035 (lines: 691) +ucg (lines) 2.043 +/- 0.034 (lines: 691) +grep (lines) 7.513 +/- 0.020 (lines: 691) +rg* 0.938 +/- 0.046 (lines: 691)* +grep 7.085 +/- 0.030 (lines: 691) + +subtitles_ru_alternate_casei (pattern: Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти) +----------------------------------------------------------------------------------------------------------------- +ag (ASCII) 3.799 +/- 0.015 (lines: 691) +ucg (ASCII)* 2.038 +/- 0.030 (lines: 691)* +grep (ASCII) 7.484 +/- 0.019 (lines: 691) +rg 11.087 +/- 0.057 (lines: 735) +grep 6.814 +/- 0.020 (lines: 735) + +subtitles_ru_literal (pattern: Шерлок Холмс) +-------------------------------------------- +rg* 0.195 +/- 0.010 (lines: 583)* +rg (no mmap) 0.315 +/- 0.005 (lines: 583) +pt 5.134 +/- 0.023 (lines: 583) +sift 5.835 +/- 0.061 (lines: 583) +grep 0.698 +/- 0.008 (lines: 583) +rg (lines) 0.267 +/- 0.002 (lines: 583) +ag (lines) 2.862 +/- 0.096 (lines: 583) +ucg (lines) 2.418 +/- 0.045 (lines: 583) +pt (lines) 5.150 +/- 0.036 (lines: 583) +sift (lines) 6.374 +/- 0.056 (lines: 583) +grep (lines) 1.089 +/- 0.028 (lines: 583) + +subtitles_ru_literal_casei (pattern: Шерлок Холмс) +-------------------------------------------------- +rg 0.970 +/- 0.041 (lines: 604) +grep 6.297 +/- 0.037 (lines: 604) +grep (ASCII) 0.725 +/- 0.030 (lines: 583) +rg (lines) 1.032 +/- 0.010 (lines: 604) +ag (lines) (ASCII)* 0.645 +/- 0.022 (lines: 0)* +ucg (lines) (ASCII) 0.774 +/- 0.036 (lines: 583) + +subtitles_ru_literal_word (pattern: Шерлок Холмс) +------------------------------------------------- +rg (ASCII)* 0.188 +/- 0.007 (lines: 0)* +ag (ASCII) 0.668 +/- 0.028 (lines: 0) +ucg (ASCII) 2.404 +/- 0.052 (lines: 583) +grep (ASCII) 1.114 +/- 0.048 (lines: 583) +rg 0.275 +/- 0.011 (lines: 579) +grep 1.076 +/- 0.008 (lines: 579) + +subtitles_ru_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}) +---------------------------------------------------------------------------------------- +rg 3.167 +/- 0.014 (lines: 41) +rg (ASCII) 2.740 +/- 0.014 (lines: 0) +ag (ASCII) 1.963 +/- 0.069 (lines: 0) +ucg (ASCII) 2.055 +/- 0.037 (lines: 0) +grep (ASCII)* 1.634 +/- 0.009 (lines: 0)* + +subtitles_ru_surrounding_words (pattern: \w+\s+Холмс\s+\w+) +----------------------------------------------------------- +rg* 0.302 +/- 0.020 (lines: 278)* +grep 1.432 +/- 0.055 (lines: 278) +ag (ASCII) 1.908 +/- 0.023 (lines: 0) +ucg (ASCII) 2.056 +/- 0.066 (lines: 0) +grep (ASCII) 1.398 +/- 0.044 (lines: 0) diff -Nru ripgrep-0.6.0/build.rs ripgrep-0.10.0.3/build.rs --- ripgrep-0.6.0/build.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/build.rs 2018-09-10 21:10:55.000000000 +0000 @@ -4,23 +4,181 @@ extern crate lazy_static; use std::env; -use std::fs; +use std::fs::{self, File}; +use std::io::{self, Read, Write}; +use std::path::Path; +use std::process; use clap::Shell; +use app::{RGArg, RGArgKind}; + #[allow(dead_code)] #[path = "src/app.rs"] mod app; fn main() { + // OUT_DIR is set by Cargo and it's where any additional build artifacts + // are written. let outdir = match env::var_os("OUT_DIR") { - None => return, Some(outdir) => outdir, + None => { + eprintln!( + "OUT_DIR environment variable not defined. \ + Please file a bug: \ + https://github.com/BurntSushi/ripgrep/issues/new"); + process::exit(1); + } }; fs::create_dir_all(&outdir).unwrap(); + let stamp_path = Path::new(&outdir).join("ripgrep-stamp"); + if let Err(err) = File::create(&stamp_path) { + panic!("failed to write {}: {}", stamp_path.display(), err); + } + if let Err(err) = generate_man_page(&outdir) { + eprintln!("failed to generate man page: {}", err); + } + + // Use clap to build completion files. let mut app = app::app(); app.gen_completions("rg", Shell::Bash, &outdir); app.gen_completions("rg", Shell::Fish, &outdir); app.gen_completions("rg", Shell::PowerShell, &outdir); + // Note that we do not use clap's support for zsh. Instead, zsh completions + // are manually maintained in `complete/_rg`. + + // Make the current git hash available to the build. + if let Some(rev) = git_revision_hash() { + println!("cargo:rustc-env=RIPGREP_BUILD_GIT_HASH={}", rev); + } +} + +fn git_revision_hash() -> Option { + let result = process::Command::new("git") + .args(&["rev-parse", "--short=10", "HEAD"]) + .output(); + result.ok().and_then(|output| { + let v = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if v.is_empty() { + None + } else { + Some(v) + } + }) +} + +fn generate_man_page>(outdir: P) -> io::Result<()> { + // If asciidoc isn't installed, then don't do anything. + if let Err(err) = process::Command::new("a2x").output() { + eprintln!("Could not run 'a2x' binary, skipping man page generation."); + eprintln!("Error from running 'a2x': {}", err); + return Ok(()); + } + // 1. Read asciidoc template. + // 2. Interpolate template with auto-generated docs. + // 3. Save interpolation to disk. + // 4. Use a2x (part of asciidoc) to convert to man page. + let outdir = outdir.as_ref(); + let cwd = env::current_dir()?; + let tpl_path = cwd.join("doc").join("rg.1.txt.tpl"); + let txt_path = outdir.join("rg.1.txt"); + + let mut tpl = String::new(); + File::open(&tpl_path)?.read_to_string(&mut tpl)?; + tpl = tpl.replace("{OPTIONS}", &formatted_options()?); + + let githash = git_revision_hash(); + let githash = githash.as_ref().map(|x| &**x); + tpl = tpl.replace("{VERSION}", &app::long_version(githash)); + + File::create(&txt_path)?.write_all(tpl.as_bytes())?; + let result = process::Command::new("a2x") + .arg("--no-xmllint") + .arg("--doctype").arg("manpage") + .arg("--format").arg("manpage") + .arg(&txt_path) + .spawn()? + .wait()?; + if !result.success() { + let msg = format!("'a2x' failed with exit code {:?}", result.code()); + return Err(ioerr(msg)); + } + Ok(()) +} + +fn formatted_options() -> io::Result { + let mut args = app::all_args_and_flags(); + args.sort_by(|x1, x2| x1.name.cmp(&x2.name)); + + let mut formatted = vec![]; + for arg in args { + if arg.hidden { + continue; + } + // ripgrep only has two positional arguments, and probably will only + // ever have two positional arguments, so we just hardcode them into + // the template. + if let app::RGArgKind::Positional{..} = arg.kind { + continue; + } + formatted.push(formatted_arg(&arg)?); + } + Ok(formatted.join("\n\n")) +} + +fn formatted_arg(arg: &RGArg) -> io::Result { + match arg.kind { + RGArgKind::Positional{..} => panic!("unexpected positional argument"), + RGArgKind::Switch { long, short, multiple } => { + let mut out = vec![]; + + let mut header = format!("--{}", long); + if let Some(short) = short { + header = format!("-{}, {}", short, header); + } + if multiple { + header = format!("*{}* ...::", header); + } else { + header = format!("*{}*::", header); + } + writeln!(out, "{}", header)?; + writeln!(out, "{}", formatted_doc_txt(arg)?)?; + + Ok(String::from_utf8(out).unwrap()) + } + RGArgKind::Flag { long, short, value_name, multiple, .. } => { + let mut out = vec![]; + + let mut header = format!("--{}", long); + if let Some(short) = short { + header = format!("-{}, {}", short, header); + } + if multiple { + header = format!("*{}* _{}_ ...::", header, value_name); + } else { + header = format!("*{}* _{}_::", header, value_name); + } + writeln!(out, "{}", header)?; + writeln!(out, "{}", formatted_doc_txt(arg)?)?; + + Ok(String::from_utf8(out).unwrap()) + } + } +} + +fn formatted_doc_txt(arg: &RGArg) -> io::Result { + let paragraphs: Vec<&str> = arg.doc_long.split("\n\n").collect(); + if paragraphs.is_empty() { + return Err(ioerr(format!("missing docs for --{}", arg.name))); + } + let first = format!(" {}", paragraphs[0].replace("\n", "\n ")); + if paragraphs.len() == 1 { + return Ok(first); + } + Ok(format!("{}\n+\n{}", first, paragraphs[1..].join("\n+\n"))) +} + +fn ioerr(msg: String) -> io::Error { + io::Error::new(io::ErrorKind::Other, msg) } diff -Nru ripgrep-0.6.0/.cargo/config ripgrep-0.10.0.3/.cargo/config --- ripgrep-0.6.0/.cargo/config 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/.cargo/config 2018-09-11 18:16:35.000000000 +0000 @@ -0,0 +1,6 @@ +[source.crates-io] +registry = 'https://github.com/rust-lang/crates.io-index' +replace-with = 'vendored-sources' + +[source.vendored-sources] +directory = '/home/jferry/misc/rust/packages/ripgrep/vendor' diff -Nru ripgrep-0.6.0/Cargo.lock ripgrep-0.10.0.3/Cargo.lock --- ripgrep-0.6.0/Cargo.lock 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/Cargo.lock 2018-09-10 21:10:55.000000000 +0000 @@ -1,289 +1,635 @@ -[root] -name = "ripgrep" -version = "0.6.0" +[[package]] +name = "aho-corasick" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "atty 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "bytecount 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "clap 2.26.0 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding_rs 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)", - "env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "grep 0.1.6", - "ignore 0.2.2", - "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "memmap 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", - "num_cpus 1.6.2 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "termcolor 0.3.2", + "memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "aho-corasick" -version = "0.6.3" +name = "arrayvec" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "ansi_term" -version = "0.9.0" +name = "atty" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] -name = "atty" -version = "0.2.2" +name = "base64" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.29 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder 1.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "bitflags" -version = "0.9.1" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "bytecount" -version = "0.1.7" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "simd 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "simd 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] +name = "byteorder" +version = "1.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "cc" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "cfg-if" -version = "0.1.2" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "clap" -version = "2.26.0" +version = "2.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cloudabi" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-channel" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-epoch 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", - "atty 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", - "strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", - "term_size 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "textwrap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "crossbeam" -version = "0.2.10" +name = "crossbeam-utils" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "encoding_rs" -version = "0.6.11" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "simd 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "simd 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "env_logger" -version = "0.4.3" +name = "encoding_rs_io" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.6 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "fnv" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "fs2" -version = "0.4.2" +name = "fuchsia-zircon" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.29 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] +name = "fuchsia-zircon-sys" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "glob" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "globset" -version = "0.2.0" +version = "0.4.2" dependencies = [ - "aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", - "fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "aho-corasick 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", + "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "grep" -version = "0.1.6" +version = "0.2.3" dependencies = [ - "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", + "grep-cli 0.1.1", + "grep-matcher 0.1.1", + "grep-pcre2 0.1.2", + "grep-printer 0.1.1", + "grep-regex 0.1.1", + "grep-searcher 0.1.1", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "termcolor 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.2.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "ignore" -version = "0.2.2" +name = "grep-cli" +version = "0.1.1" dependencies = [ - "crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", - "globset 0.2.0", - "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "walkdir 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "globset 0.4.2", + "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "same-file 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "termcolor 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "kernel32-sys" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" +name = "grep-matcher" +version = "0.1.1" +dependencies = [ + "memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "grep-pcre2" +version = "0.1.2" +dependencies = [ + "grep-matcher 0.1.1", + "pcre2 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "grep-printer" +version = "0.1.1" dependencies = [ - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "base64 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", + "grep-matcher 0.1.1", + "grep-regex 0.1.1", + "grep-searcher 0.1.1", + "serde 1.0.77 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.77 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", + "termcolor 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] +name = "grep-regex" +version = "0.1.1" +dependencies = [ + "grep-matcher 0.1.1", + "log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "grep-searcher" +version = "0.1.1" +dependencies = [ + "bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.6 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs_io 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "grep-matcher 0.1.1", + "grep-regex 0.1.1", + "log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ignore" +version = "0.4.4" +dependencies = [ + "crossbeam-channel 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", + "globset 0.4.2", + "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "same-file 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.2.5 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "itoa" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "lazy_static" -version = "0.2.8" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "version_check 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "libc" -version = "0.2.29" +version = "0.2.43" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] +name = "lock_api" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "owning_ref 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "log" -version = "0.3.8" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "memchr" -version = "1.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "libc 0.2.29 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "memmap" -version = "0.5.2" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "fs2 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.29 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] +name = "memoffset" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "nodrop" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "num_cpus" -version = "1.6.2" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "owning_ref" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "parking_lot" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lock_api 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "parking_lot_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "parking_lot_core" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "pcre2" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", + "pcre2-sys 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "pcre2-sys" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "pkg-config" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "proc-macro2" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "quote" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.18 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_core" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "redox_syscall" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "redox_termios" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "libc 0.2.29 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "regex" -version = "0.2.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", - "simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "aho-corasick 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "regex-syntax" -version = "0.4.1" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "remove_dir_all" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ripgrep" +version = "0.10.0" +dependencies = [ + "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", + "grep 0.2.3", + "ignore 0.4.4", + "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", + "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.77 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.77 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", + "termcolor 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ryu" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "safemem" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "same-file" -version = "0.1.3" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "simd" -version = "0.1.1" +name = "scopeguard" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "serde" +version = "1.0.77" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "serde_derive" +version = "1.0.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.18 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "serde_json" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "itoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.77 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "simd" -version = "0.2.0" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "smallvec" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "stable_deref_trait" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "strsim" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "term_size" -version = "0.3.0" +name = "syn" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.18 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "tempdir" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.29 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "termcolor" -version = "0.3.2" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "wincolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "termion" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "wincolor 0.1.4", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "textwrap" -version = "0.7.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "term_size 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "thread_local" -version = "0.3.4" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "unicode-segmentation" -version = "1.2.0" +name = "ucd-util" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "unicode-width" -version = "0.1.4" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "unicode-xid" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -296,12 +642,12 @@ [[package]] name = "utf8-ranges" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "vec_map" -version = "0.8.0" +name = "version_check" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -311,67 +657,124 @@ [[package]] name = "walkdir" -version = "1.0.7" +version = "2.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "same-file 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "winapi" -version = "0.2.8" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "winapi-build" +name = "winapi-util" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "wincolor" -version = "0.1.4" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [metadata] -"checksum aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "500909c4f87a9e52355b26626d890833e9e1d53ac566db76c36faa984b889699" -"checksum ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6" -"checksum atty 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d912da0db7fa85514874458ca3651fe2cddace8d0b0505571dbdcd41ab490159" -"checksum bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4efd02e230a02e18f92fc2735f44597385ed02ad8f831e7c1c1156ee5e1ab3a5" -"checksum bytecount 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "4bbeb7c30341fce29f6078b4bdf876ea4779600866e98f5b2d203a534f195050" -"checksum cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de" -"checksum clap 2.26.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2267a8fdd4dce6956ba6649e130f62fb279026e5e84b92aa939ac8f85ce3f9f0" -"checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97" -"checksum encoding_rs 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)" = "e00a1b1e95eb46988805ceee6f34cd95c46a6753e290cb3ff0486931989d4a4c" -"checksum env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3ddf21e73e016298f5cb37d6ef8e8da8e39f91f9ec8b0df44b7deb16a9f8cd5b" -"checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344" -"checksum fs2 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9ab76cfd2aaa59b7bf6688ad9ba15bbae64bff97f04ea02144cfd3443e5c2866" -"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" -"checksum lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "3b37545ab726dd833ec6420aaba8231c5b320814b9029ad585555d2a03e94fbf" -"checksum libc 0.2.29 (registry+https://github.com/rust-lang/crates.io-index)" = "8a014d9226c2cc402676fbe9ea2e15dd5222cd1dd57f576b5b283178c944a264" -"checksum log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "880f77541efa6e5cc74e76910c9884d9859683118839d6a1dc3b11e63512565b" -"checksum memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1dbccc0e46f1ea47b9f17e6d67c5a96bd27030519c519c9c91327e31275a47b4" -"checksum memmap 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "46f3c7359028b31999287dae4e5047ddfe90a23b7dca2282ce759b491080c99b" -"checksum num_cpus 1.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aec53c34f2d0247c5ca5d32cca1478762f301740468ee9ee6dcb7a0dd7a0c584" -"checksum regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1731164734096285ec2a5ec7fea5248ae2f5485b3feeb0115af4fda2183b2d1b" -"checksum regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db" -"checksum same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d931a44fdaa43b8637009e7632a02adc4f2b2e0733c08caa4cf00e8da4a117a7" -"checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023" -"checksum simd 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a94d14a2ae1f1f110937de5fb69e494372560181c7e1739a097fcc2cee37ba0" -"checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694" -"checksum term_size 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2b6b55df3198cc93372e85dd2ed817f0e38ce8cc0f22eb32391bfad9c4bf209" -"checksum textwrap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f728584ea33b0ad19318e20557cb0a39097751dbb07171419673502f848c7af6" -"checksum thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1697c4b57aeeb7a536b647165a2825faddffb1d3bad386d507709bd51a90bb14" -"checksum unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a8083c594e02b8ae1654ae26f0ade5158b119bd88ad0e8227a5d8fcd72407946" -"checksum unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bf3a113775714a22dcb774d8ea3655c53a32debae63a063acc00a91cc586245f" +"checksum aho-corasick 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "68f56c7353e5a9547cbd76ed90f7bb5ffc3ba09d4ea9bd1d8c06c8b1142eeb5a" +"checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef" +"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" +"checksum base64 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "85415d2594767338a74a30c1d370b2f3262ec1b4ed2d7bba5b3faf4de40467d9" +"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" +"checksum bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f861d9ce359f56dbcb6e0c2a1cb84e52ad732cadb57b806adeb3c7668caccbd8" +"checksum byteorder 1.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "90492c5858dd7d2e78691cfb89f90d273a2800fc11d98f60786e5d87e2f83781" +"checksum cc 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)" = "70f2a88c2e69ceee91c209d8ef25b81fc1a65f42c7f14dfd59d1fed189e514d1" +"checksum cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0c4e7bb64a8ebb0d856483e1e682ea3422f883c5f5615a90d51a2c82fe87fdd3" +"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" +"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" +"checksum crossbeam-channel 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "6c0a94250b0278d7fc5a894c3d276b11ea164edc8bf8feb10ca1ea517b44a649" +"checksum crossbeam-epoch 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "30fecfcac6abfef8771151f8be4abc9e4edc112c2bcb233314cafde2680536e9" +"checksum crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "677d453a17e8bd2b913fa38e8b9cf04bcdbb5be790aa294f2389661d72036015" +"checksum encoding_rs 0.8.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2a91912d6f37c6a8fef8a2316a862542d036f13c923ad518b5aca7bcaac7544c" +"checksum encoding_rs_io 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f222ff554d6e172f3569a2d7d0fd8061d54215984ef67b24ce031c1fcbf2c9b3" +"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" +"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" +"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" +"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" +"checksum itoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5adb58558dcd1d786b5f0bd15f3226ee23486e24b7b58304b60f64dc68e62606" +"checksum lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca488b89a5657b0a2ecd45b95609b3e848cf1755da332a0da46e2b2b1cb371a7" +"checksum libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)" = "76e3a3ef172f1a0b9a9ff0dd1491ae5e6c948b94479a3021819ba7d860c8645d" +"checksum lock_api 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "949826a5ccf18c1b3a7c3d57692778d21768b79e46eb9dd07bfc4c2160036c54" +"checksum log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d4fcce5fa49cc693c312001daf1d13411c4a5283796bac1084299ea3e567113f" +"checksum memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a3b4142ab8738a78c51896f704f83c11df047ff1bda9a92a661aa6361552d93d" +"checksum memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff" +"checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3" +"checksum nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "9a2228dca57108069a5262f2ed8bd2e82496d2e074a06d1ccc7ce1687b6ae0a2" +"checksum num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c51a3322e4bca9d212ad9a158a02abc6934d005490c054a2778df73a70aa0a30" +"checksum owning_ref 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "cdf84f41639e037b484f93433aa3897863b561ed65c6e59c7073d7c561710f37" +"checksum parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f0802bff09003b291ba756dc7e79313e51cc31667e94afbe847def490424cde5" +"checksum parking_lot_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "06a2b6aae052309c2fd2161ef58f5067bc17bb758377a0de9d4b279d603fdd8a" +"checksum pcre2 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3ae0a2682105ec5ca0ee5910bbc7e926386d348a05166348f74007942983c319" +"checksum pcre2-sys 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a9027f9474e4e13d3b965538aafcaebe48c803488ad76b3c97ef061a8324695f" +"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c" +"checksum proc-macro2 0.4.18 (registry+https://github.com/rust-lang/crates.io-index)" = "afa4d377067cc02eb5e0b491d3f7cfbe145ad4da778535bfb13c444413dd35b9" +"checksum quote 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "dd636425967c33af890042c483632d33fa7a18f19ad1d7ea72e8998c6ef8dea5" +"checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd" +"checksum rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e464cd887e869cddcae8792a4ee31d23c7edd516700695608f5b98c67ee0131c" +"checksum rand_core 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "edecf0f94da5551fc9b492093e30b041a891657db7940ee221f9d2f66e82eef2" +"checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1" +"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" +"checksum regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "2069749032ea3ec200ca51e4a31df41759190a88edca0d2d86ee8bedf7073341" +"checksum regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "747ba3b235651f6e2f67dfa8bcdcd073ddb7c243cb21c442fc12395dfcac212d" +"checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" +"checksum ryu 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7153dd96dade874ab973e098cb62fcdbb89a03682e46b144fd09550998d4a4a7" +"checksum safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e27a8b19b835f7aea908818e871f5cc3a5a186550c30773be987e155e8163d8f" +"checksum same-file 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "10f7794e2fda7f594866840e95f5c5962e886e228e68b6505885811a94dd728c" +"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" +"checksum serde 1.0.77 (registry+https://github.com/rust-lang/crates.io-index)" = "c6e67977d7523ce4d9284ed58918af99392de8edb6192c44afefcf634654ab7f" +"checksum serde_derive 1.0.77 (registry+https://github.com/rust-lang/crates.io-index)" = "5569c52faae3e21b9abae2cc5cfbb56ed008bfcac480ad62bc241b828f0b0aee" +"checksum serde_json 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)" = "59790990c5115d16027f00913e2e66de23a51f70422e549d2ad68c8c5f268f1c" +"checksum simd 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ed3686dd9418ebcc3a26a0c0ae56deab0681e53fe899af91f5bbcee667ebffb1" +"checksum smallvec 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "153ffa32fd170e9944f7e0838edf824a754ec4c1fc64746fcc9fe1f8fa602e5d" +"checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" +"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" +"checksum syn 0.15.1 (registry+https://github.com/rust-lang/crates.io-index)" = "85fb2f7f9b7a4c8df2c913a852de570efdb40f0d2edd39c8245ad573f5c7fbcc" +"checksum tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" +"checksum termcolor 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ff3bac0e465b59f194e7037ed404b0326e56ff234d767edc4c5cc9cd49e7a2c7" +"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" +"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" +"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" +"checksum ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d" +"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" +"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" -"checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" -"checksum vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "887b5b631c2ad01628bbbaa7dd4c869f80d3186688f8d0b6f58774fbe324988c" +"checksum utf8-ranges 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd70f467df6810094968e2fce0ee1bd0e87157aceb026a8c083bcf5e25b9efe4" +"checksum version_check 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7716c242968ee87e5542f8021178248f267f295a5c4803beae8b8b7fd9bc6051" "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" -"checksum walkdir 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "bb08f9e670fab86099470b97cd2b252d6527f0b3cc1401acdb595ffc9dd288ff" -"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" -"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" +"checksum walkdir 2.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "af464bc7be7b785c7ac72e266a6b67c4c9070155606f51655a650a6686204e35" +"checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd" +"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +"checksum winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "afc5508759c5bf4285e61feb862b6083c8480aec864fa17a81fdec6f69b461ab" +"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +"checksum wincolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "561ed901ae465d6185fa7864d63fbd5720d0ef718366c9a4dc83cf6170d7e9ba" diff -Nru ripgrep-0.6.0/Cargo.toml ripgrep-0.10.0.3/Cargo.toml --- ripgrep-0.6.0/Cargo.toml 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/Cargo.toml 2018-09-10 21:10:55.000000000 +0000 @@ -1,10 +1,11 @@ [package] name = "ripgrep" -version = "0.6.0" #:version +version = "0.10.0" #:version authors = ["Andrew Gallant "] description = """ -Line oriented search tool using Rust's regex library. Combines the raw -performance of grep with the usability of the silver searcher. +ripgrep is a line-oriented search tool that recursively searches your current +directory for a regex pattern while respecting your gitignore rules. ripgrep +has first class support on Windows, macOS and Linux """ documentation = "https://github.com/BurntSushi/ripgrep" homepage = "https://github.com/BurntSushi/ripgrep" @@ -12,9 +13,10 @@ readme = "README.md" keywords = ["regex", "grep", "egrep", "search", "pattern"] categories = ["command-line-utilities", "text-processing"] -license = "Unlicense/MIT" +license = "Unlicense OR MIT" exclude = ["HomebrewFormula"] build = "build.rs" +autotests = false [badges] travis-ci = { repository = "BurntSushi/ripgrep" } @@ -29,30 +31,76 @@ name = "integration" path = "tests/tests.rs" +[workspace] +members = [ + "globset", + "grep", + "grep-cli", + "grep-matcher", + "grep-pcre2", + "grep-printer", + "grep-regex", + "grep-searcher", + "ignore", +] + [dependencies] -atty = "0.2.2" -bytecount = "0.1.4" -clap = "2.26" -encoding_rs = "0.6" -env_logger = { version = "0.4", default-features = false } -grep = { version = "0.1.5", path = "grep" } -ignore = { version = "0.2.2", path = "ignore" } -lazy_static = "0.2" -log = "0.3" -memchr = "1" -memmap = "0.5" -num_cpus = "1" -regex = "0.2.1" -same-file = "0.1.1" -termcolor = { version = "0.3.0", path = "termcolor" } +grep = { version = "0.2.3", path = "grep" } +ignore = { version = "0.4.4", path = "ignore" } +lazy_static = "1.1.0" +log = "0.4.5" +num_cpus = "1.8.0" +regex = "1.0.5" +serde_json = "1.0.23" +termcolor = "1.0.3" + +[dependencies.clap] +version = "2.32.0" +default-features = false +features = ["suggestions"] [build-dependencies] -clap = "2.26" -lazy_static = "0.2" +lazy_static = "1.1.0" + +[build-dependencies.clap] +version = "2.32.0" +default-features = false +features = ["suggestions"] + +[dev-dependencies] +serde = "1.0.77" +serde_derive = "1.0.77" [features] -avx-accel = ["bytecount/avx-accel"] -simd-accel = ["bytecount/simd-accel", "regex/simd-accel", "encoding_rs/simd-accel"] +avx-accel = ["grep/avx-accel"] +simd-accel = ["grep/simd-accel"] +pcre2 = ["grep/pcre2"] [profile.release] -debug = true +debug = 1 + +[package.metadata.deb] +features = ["pcre2"] +assets = [ + ["target/release/rg", "usr/bin/", "755"], + ["COPYING", "usr/share/doc/ripgrep/", "644"], + ["LICENSE-MIT", "usr/share/doc/ripgrep/", "644"], + ["UNLICENSE", "usr/share/doc/ripgrep/", "644"], + ["CHANGELOG.md", "usr/share/doc/ripgrep/CHANGELOG", "644"], + ["README.md", "usr/share/doc/ripgrep/README", "644"], + ["FAQ.md", "usr/share/doc/ripgrep/FAQ", "644"], + # The man page is automatically generated by ripgrep's build process, so + # this file isn't actually commited. Instead, to create a dpkg, either + # create a deployment/deb directory and copy the man page to it, or use the + # 'ci/build_deb.sh' script. + ["deployment/deb/rg.1", "usr/share/man/man1/rg.1", "644"], + # Similarly for shell completions. + ["deployment/deb/rg.bash", "usr/share/bash-completion/completions/rg", "644"], + ["deployment/deb/rg.fish", "usr/share/fish/completions/rg.fish", "644"], + ["deployment/deb/_rg", "usr/share/zsh/vendor-completions/", "644"], +] +extended-description = """\ +ripgrep (rg) recursively searches your current directory for a regex pattern. +By default, ripgrep will respect your .gitignore and automatically skip hidden +files/directories and binary files. +""" diff -Nru ripgrep-0.6.0/CHANGELOG.md ripgrep-0.10.0.3/CHANGELOG.md --- ripgrep-0.6.0/CHANGELOG.md 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/CHANGELOG.md 2018-09-10 21:10:55.000000000 +0000 @@ -1,3 +1,420 @@ +0.10.0 (2018-09-07) +=================== +This is a new minor version release of ripgrep that contains some major new +features, a huge number of bug fixes, and is the first release based on +libripgrep. The entirety of ripgrep's core search and printing code has been +rewritten and generalized so that anyone can make use of it. + +Major new features include PCRE2 support, multi-line search and a JSON output +format. + +**BREAKING CHANGES**: + +* The minimum version required to compile Rust has now changed to track the + latest stable version of Rust. Patch releases will continue to compile with + the same version of Rust as the previous patch release, but new minor + versions will use the current stable version of the Rust compile as its + minimum supported version. +* The match semantics of `-w/--word-regexp` have changed slightly. They used + to be `\b(?:)\b`, but now it's + `(?:^|\W)(?:)(?:$|\W)`. This matches the behavior of GNU grep + and is believed to be closer to the intended semantics of the flag. See + [#389](https://github.com/BurntSushi/ripgrep/issues/389) for more details. + +Feature enhancements: + +* [FEATURE #162](https://github.com/BurntSushi/ripgrep/issues/162): + libripgrep is now a thing. The primary crate is + [`grep`](https://docs.rs/grep). +* [FEATURE #176](https://github.com/BurntSushi/ripgrep/issues/176): + Add `-U/--multiline` flag that permits matching over multiple lines. +* [FEATURE #188](https://github.com/BurntSushi/ripgrep/issues/188): + Add `-P/--pcre2` flag that gives support for look-around and backreferences. +* [FEATURE #244](https://github.com/BurntSushi/ripgrep/issues/244): + Add `--json` flag that prints results in a JSON Lines format. +* [FEATURE #321](https://github.com/BurntSushi/ripgrep/issues/321): + Add `--one-file-system` flag to skip directories on different file systems. +* [FEATURE #404](https://github.com/BurntSushi/ripgrep/issues/404): + Add `--sort` and `--sortr` flag for more sorting. Deprecate `--sort-files`. +* [FEATURE #416](https://github.com/BurntSushi/ripgrep/issues/416): + Add `--crlf` flag to permit `$` to work with carriage returns on Windows. +* [FEATURE #917](https://github.com/BurntSushi/ripgrep/issues/917): + The `--trim` flag strips prefix whitespace from all lines printed. +* [FEATURE #993](https://github.com/BurntSushi/ripgrep/issues/993): + Add `--null-data` flag, which makes ripgrep use NUL as a line terminator. +* [FEATURE #997](https://github.com/BurntSushi/ripgrep/issues/997): + The `--passthru` flag now works with the `--replace` flag. +* [FEATURE #1038-1](https://github.com/BurntSushi/ripgrep/issues/1038): + Add `--line-buffered` and `--block-buffered` for forcing a buffer strategy. +* [FEATURE #1038-2](https://github.com/BurntSushi/ripgrep/issues/1038): + Add `--pre-glob` for filtering files through the `--pre` flag. + +Bug fixes: + +* [BUG #2](https://github.com/BurntSushi/ripgrep/issues/2): + Searching with non-zero context can now use memory maps if appropriate. +* [BUG #200](https://github.com/BurntSushi/ripgrep/issues/200): + ripgrep will now stop correctly when its output pipe is closed. +* [BUG #389](https://github.com/BurntSushi/ripgrep/issues/389): + The `-w/--word-regexp` flag now works more intuitively. +* [BUG #643](https://github.com/BurntSushi/ripgrep/issues/643): + Detection of readable stdin has improved on Windows. +* [BUG #441](https://github.com/BurntSushi/ripgrep/issues/441), + [BUG #690](https://github.com/BurntSushi/ripgrep/issues/690), + [BUG #980](https://github.com/BurntSushi/ripgrep/issues/980): + Matching empty lines now works correctly in several corner cases. +* [BUG #764](https://github.com/BurntSushi/ripgrep/issues/764): + Color escape sequences now coalesce, which reduces output size. +* [BUG #842](https://github.com/BurntSushi/ripgrep/issues/842): + Add man page to binary Debian package. +* [BUG #922](https://github.com/BurntSushi/ripgrep/issues/922): + ripgrep is now more robust with respect to memory maps failing. +* [BUG #937](https://github.com/BurntSushi/ripgrep/issues/937): + Color escape sequences are no longer emitted for empty matches. +* [BUG #940](https://github.com/BurntSushi/ripgrep/issues/940): + Context from the `--passthru` flag should not impact process exit status. +* [BUG #984](https://github.com/BurntSushi/ripgrep/issues/984): + Fixes bug in `ignore` crate where first path was always treated as a symlink. +* [BUG #990](https://github.com/BurntSushi/ripgrep/issues/990): + Read stderr asynchronously when running a process. +* [BUG #1013](https://github.com/BurntSushi/ripgrep/issues/1013): + Add compile time and runtime CPU features to `--version` output. +* [BUG #1028](https://github.com/BurntSushi/ripgrep/pull/1028): + Don't complete bare pattern after `-f` in zsh. + + +0.9.0 (2018-08-03) +================== +This is a new minor version release of ripgrep that contains some minor new +features and a panoply of bug fixes. + +Releases provided on Github for `x86_64` will now work on all target CPUs, and +will also automatically take advantage of features found on modern CPUs (such +as AVX2) for additional optimizations. + +This release increases the **minimum supported Rust version** from 1.20.0 to +1.23.0. + +It is anticipated that the next release of ripgrep (0.10.0) will provide +multi-line search support and a JSON output format. + +**BREAKING CHANGES**: + +* When `--count` and `--only-matching` are provided simultaneously, the + behavior of ripgrep is as if the `--count-matches` flag was given. That is, + the total number of matches is reported, where there may be multiple matches + per line. Previously, the behavior of ripgrep was to report the total number + of matching lines. (Note that this behavior diverges from the behavior of + GNU grep.) +* Octal syntax is no longer supported. ripgrep previously accepted expressions + like `\1` as syntax for matching `U+0001`, but ripgrep will now report an + error instead. +* The `--line-number-width` flag has been removed. Its functionality was not + carefully considered with all ripgrep output formats. + See [#795](https://github.com/BurntSushi/ripgrep/issues/795) for more + details. + +Feature enhancements: + +* Added or improved file type filtering for Android, Bazel, Fuchsia, Haskell, + Java and Puppet. +* [FEATURE #411](https://github.com/BurntSushi/ripgrep/issues/411): + Add a `--stats` flag, which emits aggregate statistics after search results. +* [FEATURE #646](https://github.com/BurntSushi/ripgrep/issues/646): + Add a `--no-ignore-messages` flag, which suppresses parse errors from reading + `.ignore` and `.gitignore` files. +* [FEATURE #702](https://github.com/BurntSushi/ripgrep/issues/702): + Support `\u{..}` Unicode escape sequences. +* [FEATURE #812](https://github.com/BurntSushi/ripgrep/issues/812): + Add `-b/--byte-offset` flag that shows the byte offset of each matching line. +* [FEATURE #814](https://github.com/BurntSushi/ripgrep/issues/814): + Add `--count-matches` flag, which is like `--count`, but for each match. +* [FEATURE #880](https://github.com/BurntSushi/ripgrep/issues/880): + Add a `--no-column` flag, which disables column numbers in the output. +* [FEATURE #898](https://github.com/BurntSushi/ripgrep/issues/898): + Add support for `lz4` when using the `-z/--search-zip` flag. +* [FEATURE #924](https://github.com/BurntSushi/ripgrep/issues/924): + `termcolor` has moved to its own repository: + https://github.com/BurntSushi/termcolor +* [FEATURE #934](https://github.com/BurntSushi/ripgrep/issues/934): + Add a new flag, `--no-ignore-global`, that permits disabling global + gitignores. +* [FEATURE #967](https://github.com/BurntSushi/ripgrep/issues/967): + Rename `--maxdepth` to `--max-depth` for consistency. Keep `--maxdepth` for + backwards compatibility. +* [FEATURE #978](https://github.com/BurntSushi/ripgrep/issues/978): + Add a `--pre` option to filter inputs with an arbitrary program. +* [FEATURE fca9709d](https://github.com/BurntSushi/ripgrep/commit/fca9709d): + Improve zsh completion. + +Bug fixes: + +* [BUG #135](https://github.com/BurntSushi/ripgrep/issues/135): + Release portable binaries that conditionally use SSSE3, AVX2, etc., at + runtime. +* [BUG #268](https://github.com/BurntSushi/ripgrep/issues/268): + Print descriptive error message when trying to use look-around or + backreferences. +* [BUG #395](https://github.com/BurntSushi/ripgrep/issues/395): + Show comprehensible error messages for regexes like `\s*{`. +* [BUG #526](https://github.com/BurntSushi/ripgrep/issues/526): + Support backslash escapes in globs. +* [BUG #795](https://github.com/BurntSushi/ripgrep/issues/795): + Fix problems with `--line-number-width` by removing it. +* [BUG #832](https://github.com/BurntSushi/ripgrep/issues/832): + Clarify usage instructions for `-f/--file` flag. +* [BUG #835](https://github.com/BurntSushi/ripgrep/issues/835): + Fix small performance regression while crawling very large directory trees. +* [BUG #851](https://github.com/BurntSushi/ripgrep/issues/851): + Fix `-S/--smart-case` detection once and for all. +* [BUG #852](https://github.com/BurntSushi/ripgrep/issues/852): + Be robust with respect to `ENOMEM` errors returned by `mmap`. +* [BUG #853](https://github.com/BurntSushi/ripgrep/issues/853): + Upgrade `grep` crate to `regex-syntax 0.6.0`. +* [BUG #893](https://github.com/BurntSushi/ripgrep/issues/893): + Improve support for git submodules. +* [BUG #900](https://github.com/BurntSushi/ripgrep/issues/900): + When no patterns are given, ripgrep should never match anything. +* [BUG #907](https://github.com/BurntSushi/ripgrep/issues/907): + ripgrep will now stop traversing after the first file when `--quiet --files` + is used. +* [BUG #918](https://github.com/BurntSushi/ripgrep/issues/918): + Don't skip tar archives when `-z/--search-zip` is used. +* [BUG #934](https://github.com/BurntSushi/ripgrep/issues/934): + Don't respect gitignore files when searching outside git repositories. +* [BUG #948](https://github.com/BurntSushi/ripgrep/issues/948): + Use exit code 2 to indicate error, and use exit code 1 to indicate no + matches. +* [BUG #951](https://github.com/BurntSushi/ripgrep/issues/951): + Add stdin example to ripgrep usage documentation. +* [BUG #955](https://github.com/BurntSushi/ripgrep/issues/955): + Use buffered writing when not printing to a tty, which fixes a performance + regression. +* [BUG #957](https://github.com/BurntSushi/ripgrep/issues/957): + Improve the error message shown for `--path separator /` in some Windows + shells. +* [BUG #964](https://github.com/BurntSushi/ripgrep/issues/964): + Add a `--no-fixed-strings` flag to disable `-F/--fixed-strings`. +* [BUG #988](https://github.com/BurntSushi/ripgrep/issues/988): + Fix a bug in the `ignore` crate that prevented the use of explicit ignore + files after disabling all other ignore rules. +* [BUG #995](https://github.com/BurntSushi/ripgrep/issues/995): + Respect `$XDG_CONFIG_DIR/git/config` for detecting `core.excludesFile`. + + +0.8.1 (2018-02-20) +================== +This is a patch release of ripgrep that primarily fixes regressions introduced +in 0.8.0 (#820 and #824) in directory traversal on Windows. These regressions +do not impact non-Windows users. + +Feature enhancements: + +* Added or improved file type filtering for csv and VHDL. +* [FEATURE #798](https://github.com/BurntSushi/ripgrep/issues/798): + Add `underline` support to `termcolor` and ripgrep. See documentation on the + `--colors` flag for details. + +Bug fixes: + +* [BUG #684](https://github.com/BurntSushi/ripgrep/issues/684): + Improve documentation for the `--ignore-file` flag. +* [BUG #789](https://github.com/BurntSushi/ripgrep/issues/789): + Don't show `(rev )` if the revision wasn't available during the build. +* [BUG #791](https://github.com/BurntSushi/ripgrep/issues/791): + Add man page to ARM release. +* [BUG #797](https://github.com/BurntSushi/ripgrep/issues/797): + Improve documentation for "intense" setting in `termcolor`. +* [BUG #800](https://github.com/BurntSushi/ripgrep/issues/800): + Fix a bug in the `ignore` crate for custom ignore files. This had no impact + on ripgrep. +* [BUG #807](https://github.com/BurntSushi/ripgrep/issues/807): + Fix a bug where `rg --hidden .` behaved differently from `rg --hidden ./`. +* [BUG #815](https://github.com/BurntSushi/ripgrep/issues/815): + Clarify a common failure mode in user guide. +* [BUG #820](https://github.com/BurntSushi/ripgrep/issues/820): + Fixes a bug on Windows where symlinks were followed even if not requested. +* [BUG #824](https://github.com/BurntSushi/ripgrep/issues/824): + Fix a performance regression in directory traversal on Windows. + + +0.8.0 (2018-02-11) +================== +This is a new minor version releae of ripgrep that satisfies several popular +feature requests (config files, search compressed files, true colors), fixes +many bugs and improves the quality of life for ripgrep maintainers. This +release also includes greatly improved documentation in the form of a +[User Guide](GUIDE.md) and a [FAQ](FAQ.md). + +This release increases the **minimum supported Rust version** from 1.17 to +1.20. + +**BREAKING CHANGES**: + +Note that these are all very minor and unlikely to impact most users. + +* In order to support configuration files, flag overrides needed to be + rethought. In some cases, this changed ripgrep's behavior. For example, + in ripgrep 0.7.1, `rg foo -s -i` will perform a case sensitive search + since the `-s/--case-sensitive` flag was defined to always take precedence + over the `-i/--ignore-case` flag, regardless of position. In ripgrep 0.8.0 + however, the override rule for all flags has changed to "the most recent + flag wins among competing flags." That is, `rg foo -s -i` now performs a + case insensitive search. +* The `-M/--max-columns` flag was tweaked so that specifying a value of `0` + now makes ripgrep behave as if the flag was absent. This makes it possible + to set a default value in a configuration file and then override it. The + previous ripgrep behavior was to suppress all matching non-empty lines. +* In all globs, `[^...]` is now equivalent to `[!...]` (indicating class + negation). Previously, `^` had no special significance in a character class. +* For **downstream packagers**, the directory hierarchy in ripgrep's archive + releases has changed. The root directory now only contains the executable, + README and license. There is now a new directory called `doc` which contains + the man page (previously in the root), a user guide (new), a FAQ (new) and + the CHANGELOG (previously not included in release). The `complete` + directory remains the same. + +Feature enhancements: + +* Added or improved file type filtering for + Apache Avro, C++, GN, Google Closure Templates, Jupyter notebooks, man pages, + Protocol Buffers, Smarty and Web IDL. +* [FEATURE #196](https://github.com/BurntSushi/ripgrep/issues/196): + Support a configuration file. See + [the new user guide](GUIDE.md#configuration-file) + for details. +* [FEATURE #261](https://github.com/BurntSushi/ripgrep/issues/261): + Add extended or "true" color support. Works in Windows 10! + [See the FAQ for details.](FAQ.md#colors) +* [FEATURE #539](https://github.com/BurntSushi/ripgrep/issues/539): + Search gzip, bzip2, lzma or xz files when given `-z/--search-zip` flag. +* [FEATURE #544](https://github.com/BurntSushi/ripgrep/issues/544): + Add support for line number alignment via a new `--line-number-width` flag. +* [FEATURE #654](https://github.com/BurntSushi/ripgrep/pull/654): + Support linuxbrew in ripgrep's Brew tap. +* [FEATURE #673](https://github.com/BurntSushi/ripgrep/issues/673): + Bring back `.rgignore` files. (A higher precedent, application specific + version of `.ignore`.) +* [FEATURE #676](https://github.com/BurntSushi/ripgrep/issues/676): + Provide ARM binaries. **WARNING:** This will be provided on a best effort + basis. +* [FEATURE #709](https://github.com/BurntSushi/ripgrep/issues/709): + Suggest `-F/--fixed-strings` flag on a regex syntax error. +* [FEATURE #740](https://github.com/BurntSushi/ripgrep/issues/740): + Add a `--passthru` flag that causes ripgrep to print every line it reads. +* [FEATURE #785](https://github.com/BurntSushi/ripgrep/pull/785): + Overhaul documentation. Cleaned up README, added user guide and FAQ. +* [FEATURE 7f5c07](https://github.com/BurntSushi/ripgrep/commit/7f5c07434be92103b5bf7e216b9c7494aed2d8cb): + Add hidden flags for convenient overrides (e.g., `--no-text`). + +Bug fixes: + +* [BUG #553](https://github.com/BurntSushi/ripgrep/issues/553): + Permit flags to be repeated. +* [BUG #633](https://github.com/BurntSushi/ripgrep/issues/633): + Fix a bug where ripgrep would panic on Windows while following symlinks. +* [BUG #649](https://github.com/BurntSushi/ripgrep/issues/649): + Fix handling of `!**/` in `.gitignore`. +* [BUG #663](https://github.com/BurntSushi/ripgrep/issues/663): + **BREAKING CHANGE:** Support `[^...]` glob syntax (as identical to `[!...]`). +* [BUG #693](https://github.com/BurntSushi/ripgrep/issues/693): + Don't display context separators when not printing matches. +* [BUG #705](https://github.com/BurntSushi/ripgrep/issues/705): + Fix a bug that prevented ripgrep from searching OneDrive directories. +* [BUG #717](https://github.com/BurntSushi/ripgrep/issues/717): + Improve `--smart-case` uppercase character detection. +* [BUG #725](https://github.com/BurntSushi/ripgrep/issues/725): + Clarify that globs do not override explicitly given paths to search. +* [BUG #742](https://github.com/BurntSushi/ripgrep/pull/742): + Write ANSI reset code as `\x1B[0m` instead of `\x1B[m`. +* [BUG #747](https://github.com/BurntSushi/ripgrep/issues/747): + Remove `yarn.lock` from YAML file type. +* [BUG #760](https://github.com/BurntSushi/ripgrep/issues/760): + ripgrep can now search `/sys/devices/system/cpu/vulnerabilities/*` files. +* [BUG #761](https://github.com/BurntSushi/ripgrep/issues/761): + Fix handling of gitignore patterns that contain a `/`. +* [BUG #776](https://github.com/BurntSushi/ripgrep/pull/776): + **BREAKING CHANGE:** `--max-columns=0` now disables the limit. +* [BUG #779](https://github.com/BurntSushi/ripgrep/issues/779): + Clarify documentation for `--files-without-match`. +* [BUG #780](https://github.com/BurntSushi/ripgrep/issues/780), + [BUG #781](https://github.com/BurntSushi/ripgrep/issues/781): + Fix bug where ripgrep missed some matching lines. + +Maintenance fixes: + +* [MAINT #772](https://github.com/BurntSushi/ripgrep/pull/772): + Drop `env_logger` in favor of simpler logger to avoid many new dependencies. +* [MAINT #772](https://github.com/BurntSushi/ripgrep/pull/772): + Add git revision hash to ripgrep's version string. +* [MAINT #772](https://github.com/BurntSushi/ripgrep/pull/772): + (Seemingly) improve compile times. +* [MAINT #776](https://github.com/BurntSushi/ripgrep/pull/776): + Automatically generate man page during build. +* [MAINT #786](https://github.com/BurntSushi/ripgrep/pull/786): + Remove use of `unsafe` in `globset`. :tada: +* [MAINT e9d448](https://github.com/BurntSushi/ripgrep/commit/e9d448e93bb4e1fb3b0c1afc29adb5af6ed5283d): + Add an issue template (has already drastically improved bug reports). +* [MAINT ae2d03](https://github.com/BurntSushi/ripgrep/commit/ae2d036dd4ba2a46acac9c2d77c32e7c667eb850): + Remove the `compile` script. + +Friends of ripgrep: + +I'd like to extend my gratitude to +[@balajisivaraman](https://github.com/balajisivaraman) +for their recent hard work in a number of areas, and in particular, for +implementing the "search compressed files" feature. Their work in sketching out +a specification for that and other work has been exemplary. + +Thanks +[@balajisivaraman](https://github.com/balajisivaraman)! + + +0.7.1 (2017-10-22) +================== +This is a patch release of ripgrep that includes a fix to very bad regression +introduced in ripgrep 0.7.0. + +Bug fixes: + +* [BUG #648](https://github.com/BurntSushi/ripgrep/issues/648): + Fix a bug where it was very easy to exceed standard file descriptor limits. + + +0.7.0 (2017-10-20) +================== +This is a new minor version release of ripgrep that includes mostly bug fixes. + +ripgrep continues to require Rust 1.17, and there are no known breaking changes +introduced in this release. + +Feature enhancements: + +* Added or improved file type filtering for config & license files, Elm, + Purescript, Standard ML, sh, systemd, Terraform +* [FEATURE #593](https://github.com/BurntSushi/ripgrep/pull/593): + Using both `-o/--only-matching` and `-r/--replace` does the right thing. + +Bug fixes: + +* [BUG #200](https://github.com/BurntSushi/ripgrep/issues/200): + ripgrep will stop when its pipe is closed. +* [BUG #402](https://github.com/BurntSushi/ripgrep/issues/402): + Fix context printing bug when the `-m/--max-count` flag is used. +* [BUG #521](https://github.com/BurntSushi/ripgrep/issues/521): + Fix interaction between `-r/--replace` and terminal colors. +* [BUG #559](https://github.com/BurntSushi/ripgrep/issues/559): + Ignore test that tried reading a non-UTF-8 file path on macOS. +* [BUG #599](https://github.com/BurntSushi/ripgrep/issues/599): + Fix color escapes on empty matches. +* [BUG #600](https://github.com/BurntSushi/ripgrep/issues/600): + Avoid expensive (on Windows) file handle check when using --files. +* [BUG #618](https://github.com/BurntSushi/ripgrep/issues/618): + Clarify installation instructions for Ubuntu users. +* [BUG #633](https://github.com/BurntSushi/ripgrep/issues/633): + Faster symlink loop checking on Windows. + + 0.6.0 (2017-08-23) ================== This is a new minor version release of ripgrep that includes many bug fixes diff -Nru ripgrep-0.6.0/ci/before_deploy.sh ripgrep-0.10.0.3/ci/before_deploy.sh --- ripgrep-0.6.0/ci/before_deploy.sh 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ci/before_deploy.sh 2018-09-10 21:10:55.000000000 +0000 @@ -1,35 +1,56 @@ -# `before_deploy` phase: here we package the build artifacts +#!/bin/bash + +# package the build artifacts set -ex -. $(dirname $0)/utils.sh +. "$(dirname $0)/utils.sh" # Generate artifacts for release mk_artifacts() { - RUSTFLAGS="-C target-feature=+ssse3" \ - cargo build --target $TARGET --release --features simd-accel + if is_arm; then + cargo build --target "$TARGET" --release + else + # Technically, MUSL builds will force PCRE2 to get statically compiled, + # but we also want PCRE2 statically build for macOS binaries. + PCRE2_SYS_STATIC=1 cargo build --target "$TARGET" --release --features 'pcre2' + fi } mk_tarball() { - # create a "staging" directory - local td=$(mktempd) - local out_dir=$(pwd) + # When cross-compiling, use the right `strip` tool on the binary. + local gcc_prefix="$(gcc_prefix)" + # Create a temporary dir that contains our staging area. + # $tmpdir/$name is what eventually ends up as the deployed archive. + local tmpdir="$(mktemp -d)" local name="${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}" - mkdir "$td/$name" - mkdir "$td/$name/complete" + local staging="$tmpdir/$name" + mkdir -p "$staging"/{complete,doc} + # The deployment directory is where the final archive will reside. + # This path is known by the .travis.yml configuration. + local out_dir="$(pwd)/deployment" + mkdir -p "$out_dir" + # Find the correct (most recent) Cargo "out" directory. The out directory + # contains shell completion files and the man page. + local cargo_out_dir="$(cargo_out_dir "target/$TARGET")" + + # Copy the ripgrep binary and strip it. + cp "target/$TARGET/release/rg" "$staging/rg" + "${gcc_prefix}strip" "$staging/rg" + # Copy the licenses and README. + cp {README.md,UNLICENSE,COPYING,LICENSE-MIT} "$staging/" + # Copy documentation and man page. + cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$staging/doc/" + if command -V a2x 2>&1 > /dev/null; then + # The man page should only exist if we have asciidoc installed. + cp "$cargo_out_dir/rg.1" "$staging/doc/" + fi + # Copy shell completion files. + cp "$cargo_out_dir"/{rg.bash,rg.fish,_rg.ps1} "$staging/complete/" + cp complete/_rg "$staging/complete/" - cp target/$TARGET/release/rg "$td/$name/rg" - strip "$td/$name/rg" - cp {doc/rg.1,README.md,UNLICENSE,COPYING,LICENSE-MIT} "$td/$name/" - cp \ - target/$TARGET/release/build/ripgrep-*/out/{rg.bash-completion,rg.fish,_rg.ps1} \ - "$td/$name/complete/" - cp complete/_rg "$td/$name/complete/" - - pushd $td - tar czf "$out_dir/$name.tar.gz" * - popd - rm -r $td + (cd "$tmpdir" && tar czf "$out_dir/$name.tar.gz" "$name") + rm -rf "$tmpdir" } main() { diff -Nru ripgrep-0.6.0/ci/build_deb.sh ripgrep-0.10.0.3/ci/build_deb.sh --- ripgrep-0.6.0/ci/build_deb.sh 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/ci/build_deb.sh 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,43 @@ +#!/bin/bash + +set -e + +# This script builds a binary dpkg for Debian based distros. It does not +# currently run in CI, and is instead run manually and the resulting dpkg is +# uploaded to GitHub via the web UI. +# +# Note that this requires 'cargo deb', which can be installed with +# 'cargo install cargo-deb'. +# +# This should be run from the root of the ripgrep repo. + +if ! command -V cargo-deb > /dev/null 2>&1; then + echo "cargo-deb command missing" >&2 + exit 1 +fi + +# 'cargo deb' does not seem to provide a way to specify an asset that is +# created at build time, such as ripgrep's man page. To work around this, +# we force a debug build, copy out the man page (and shell completions) +# produced from that build, put it into a predictable location and then build +# the deb, which knows where to look. + +DEPLOY_DIR=deployment/deb +mkdir -p "$DEPLOY_DIR" +cargo build + +# Find and copy man page. +manpage="$(find ./target/debug -name rg.1 -print0 | xargs -0 ls -t | head -n1)" +cp "$manpage" "$DEPLOY_DIR/" + +# Do the same for shell completions. +compbash="$(find ./target/debug -name rg.bash -print0 | xargs -0 ls -t | head -n1)" +cp "$compbash" "$DEPLOY_DIR/" +compfish="$(find ./target/debug -name rg.fish -print0 | xargs -0 ls -t | head -n1)" +cp "$compfish" "$DEPLOY_DIR/" +compzsh="complete/_rg" +cp "$compzsh" "$DEPLOY_DIR/" + +# Since we're distributing the dpkg, we don't know whether the user will have +# PCRE2 installed, so just do a static build. +PCRE2_SYS_STATIC=1 cargo deb diff -Nru ripgrep-0.6.0/ci/install.sh ripgrep-0.10.0.3/ci/install.sh --- ripgrep-0.6.0/ci/install.sh 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ci/install.sh 2018-09-10 21:10:55.000000000 +0000 @@ -1,57 +1,61 @@ -# `install` phase: install stuff needed for the `script` phase +#!/bin/bash -set -ex +# install stuff needed for the `script` phase -. $(dirname $0)/utils.sh +# Where rustup gets installed. +export PATH="$PATH:$HOME/.cargo/bin" -install_c_toolchain() { - case $TARGET in - aarch64-unknown-linux-gnu) - sudo apt-get install -y --no-install-recommends \ - gcc-aarch64-linux-gnu libc6-arm64-cross libc6-dev-arm64-cross - ;; - *) - # For other targets, this is handled by addons.apt.packages in .travis.yml - ;; - esac -} +set -ex -install_rustup() { - curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain=$TRAVIS_RUST_VERSION +. "$(dirname $0)/utils.sh" +install_rustup() { + curl https://sh.rustup.rs -sSf \ + | sh -s -- -y --default-toolchain="$TRAVIS_RUST_VERSION" rustc -V cargo -V } -install_standard_crates() { +install_targets() { if [ $(host) != "$TARGET" ]; then rustup target add $TARGET fi } +install_osx_dependencies() { + if ! is_osx; then + return + fi + + brew install asciidoc docbook-xsl +} + configure_cargo() { local prefix=$(gcc_prefix) + if [ -n "${prefix}" ]; then + local gcc_suffix= + if [ -n "$GCC_VERSION" ]; then + gcc_suffix="-$GCC_VERSION" + fi + local gcc="${prefix}gcc${gcc_suffix}" - if [ ! -z $prefix ]; then # information about the cross compiler - ${prefix}gcc -v + "${gcc}" -v # tell cargo which linker to use for cross compilation mkdir -p .cargo cat >>.cargo/config <&2 'File not found: %s\n' $rg + print -r >&2 "File not found: $rg" return 1 } [[ -e $_rg ]] || { - printf >&2 'File not found: %s\n' $_rg + print -r >&2 "File not found: $_rg" return 1 } - printf 'Comparing options:\n-%s\n+%s\n' $rg $_rg + print -rl - 'Comparing options:' "-$rg" "+$_rg" # 'Parse' options out of the `--help` output. To prevent false positives we - # only look at lines where the first non-white-space character is `-` + # only look at lines where the first non-white-space character is `-`, or + # where a long option starting with certain letters (see `_rg`) is found. + # Occasionally we may have to handle some manually, however help_args=( ${(f)"$( $rg --help | - $rg -- '^\s*-' | - $rg -io -- '[\t ,](-[a-z0-9]|--[a-z0-9-]+)\b' | - tr -d '\t ,' | + $rg -i -- '^\s+--?[a-z0-9]|--[imnp]' | + $rg -ior '$1' -- $'[\t /\"\'`.,](-[a-z0-9]|--[a-z0-9-]+)\\b' | + $rg -v -- --print0 | # False positives sort -u )"} ) # 'Parse' options out of the completion function comp_args=( ${(f)"$( get_comp_args $_rg )"} ) + # Note that we currently exclude hidden (!...) options; matching these + # properly against the `--help` output could be irritating comp_args=( ${comp_args#\(*\)} ) # Strip excluded options comp_args=( ${comp_args#\*} ) # Strip repetition indicator comp_args=( ${comp_args%%-[:[]*} ) # Strip everything after -optname- comp_args=( ${comp_args%%[:+=[]*} ) # Strip everything after other optspecs comp_args=( ${comp_args##[^-]*} ) # Remove non-options - - # This probably isn't necessary, but we should ensure the same order - comp_args=( ${(f)"$( printf '%s\n' $comp_args | sort -u )"} ) + comp_args=( ${(f)"$( print -rl - $comp_args | sort -u )"} ) (( $#help_args )) || { - printf >&2 'Failed to get help_args\n' + print -r >&2 'Failed to get help_args' return 1 } (( $#comp_args )) || { - printf >&2 'Failed to get comp_args\n' + print -r >&2 'Failed to get comp_args' return 1 } @@ -73,12 +76,12 @@ diff -U2 \ --label '`rg --help`' \ --label '`_rg`' \ - =( printf '%s\n' $help_args ) =( printf '%s\n' $comp_args ) + =( print -rl - $help_args ) =( print -rl - $comp_args ) else diff -U2 \ -L '`rg --help`' \ -L '`_rg`' \ - =( printf '%s\n' $help_args ) =( printf '%s\n' $comp_args ) + =( print -rl - $help_args ) =( print -rl - $comp_args ) fi )" @@ -91,4 +94,4 @@ return 0 } -main "${@}" +main "$@" diff -Nru ripgrep-0.6.0/ci/utils.sh ripgrep-0.10.0.3/ci/utils.sh --- ripgrep-0.6.0/ci/utils.sh 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ci/utils.sh 2018-09-10 21:10:55.000000000 +0000 @@ -1,5 +1,19 @@ -mktempd() { - echo $(mktemp -d 2>/dev/null || mktemp -d -t tmp) +#!/bin/bash + +# Various utility functions used through CI. + +# Finds Cargo's `OUT_DIR` directory from the most recent build. +# +# This requires one parameter corresponding to the target directory +# to search for the build output. +cargo_out_dir() { + # This works by finding the most recent stamp file, which is produced by + # every ripgrep build. + target_dir="$1" + find "$target_dir" -name ripgrep-stamp -print0 \ + | xargs -0 ls -t \ + | head -n1 \ + | xargs dirname } host() { @@ -13,12 +27,26 @@ esac } -gcc_prefix() { +architecture() { case "$TARGET" in - aarch64-unknown-linux-gnu) - echo aarch64-linux-gnu- + x86_64-*) + echo amd64 + ;; + i686-*|i586-*|i386-*) + echo i386 + ;; + arm*-unknown-linux-gnueabihf) + echo armhf ;; - arm*-gnueabihf) + *) + die "architecture: unexpected target $TARGET" + ;; + esac +} + +gcc_prefix() { + case "$(architecture)" in + armhf) echo arm-linux-gnueabihf- ;; *) @@ -27,30 +55,30 @@ esac } -dobin() { - [ -z $MAKE_DEB ] && die 'dobin: $MAKE_DEB not set' - [ $# -lt 1 ] && die "dobin: at least one argument needed" - - local f prefix=$(gcc_prefix) - for f in "$@"; do - install -m0755 $f $dtd/debian/usr/bin/ - ${prefix}strip -s $dtd/debian/usr/bin/$(basename $f) - done +is_x86() { + case "$(architecture)" in + amd64|i386) return 0 ;; + *) return 1 ;; + esac } -architecture() { - case $1 in - x86_64-unknown-linux-gnu|x86_64-unknown-linux-musl) - echo amd64 - ;; - i686-unknown-linux-gnu|i686-unknown-linux-musl) - echo i386 - ;; - arm*-unknown-linux-gnueabihf) - echo armhf - ;; - *) - die "architecture: unexpected target $TARGET" - ;; +is_arm() { + case "$(architecture)" in + armhf) return 0 ;; + *) return 1 ;; + esac +} + +is_linux() { + case "$TRAVIS_OS_NAME" in + linux) return 0 ;; + *) return 1 ;; + esac +} + +is_osx() { + case "$TRAVIS_OS_NAME" in + osx) return 0 ;; + *) return 1 ;; esac } diff -Nru ripgrep-0.6.0/compile ripgrep-0.10.0.3/compile --- ripgrep-0.6.0/compile 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/compile 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -#!/bin/sh - -# export RUSTFLAGS="-C target-feature=+ssse3" -# cargo build --release --features 'simd-accel' - -export RUSTFLAGS="-C target-cpu=native" -cargo build --release --features 'simd-accel avx-accel' -# cargo build --release --features 'simd-accel avx-accel' --target x86_64-unknown-linux-musl diff -Nru ripgrep-0.6.0/complete/_rg ripgrep-0.10.0.3/complete/_rg --- ripgrep-0.6.0/complete/_rg 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/complete/_rg 2018-09-10 21:10:55.000000000 +0000 @@ -6,155 +6,338 @@ # Run ci/test_complete.sh after building to ensure that the options supported by # this function stay in synch with the `rg` binary. # -# @see https://github.com/zsh-users/zsh/blob/master/Etc/completion-style-guide +# For convenience, a completion reference guide is included at the bottom of +# this file. # -# Based on code from the zsh-users project — see copyright notice below. +# Originally based on code from the zsh-users project — see copyright notice +# below. _rg() { - local state_descr ret curcontext="${curcontext:-}" - local -a context line state - local -A opt_args val_args - local -a rg_args - - # Sort by long option name to match `rg --help` - rg_args=( - '(-A -C --after-context --context)'{-A+,--after-context=}'[specify lines to show after each match]:number of lines' - '(-B -C --before-context --context)'{-B+,--before-context=}'[specify lines to show before each match]:number of lines' - '(-i -s -S --ignore-case --case-sensitive --smart-case)'{-s,--case-sensitive}'[search case-sensitively]' - '--color=[specify when to use colors in output]:when:( never auto always ansi )' - '*--colors=[specify color settings and styles]: :->colorspec' - '--column[show column numbers]' - '(-A -B -C --after-context --before-context --context)'{-C+,--context=}'[specify lines to show before and after each match]:number of lines' + local curcontext=$curcontext no='!' descr ret=1 + local -a context line state state_descr args tmp suf + local -A opt_args + + # ripgrep has many options which negate the effect of a more common one — for + # example, `--no-column` to negate `--column`, and `--messages` to negate + # `--no-messages`. There are so many of these, and they're so infrequently + # used, that some users will probably find it irritating if they're completed + # indiscriminately, so let's not do that unless either the current prefix + # matches one of those negation options or the user has the `complete-all` + # style set. Note that this prefix check has to be updated manually to account + # for all of the potential negation options listed below! + if + # We also want to list all of these options during testing + [[ $_RG_COMPLETE_LIST_ARGS == (1|t*|y*) ]] || + # (--[imnp]* => --ignore*, --messages, --no-*, --pcre2-unicode) + [[ $PREFIX$SUFFIX == --[imnp]* ]] || + zstyle -t ":complete:$curcontext:*" complete-all + then + no= + fi + + # We make heavy use of argument groups here to prevent the option specs from + # growing unwieldy. These aren't supported in zsh <5.4, though, so we'll strip + # them out below if necessary. This makes the exclusions inaccurate on those + # older versions, but oh well — it's not that big a deal + args=( + + '(exclusive)' # Misc. fully exclusive options + '(: * -)'{-h,--help}'[display help information]' + '(: * -)'{-V,--version}'[display version information]' + + + '(buffered)' # buffering options + '--line-buffered[force line buffering]' + $no"--no-line-buffered[don't force line buffering]" + '--block-buffered[force block buffering]' + $no"--no-block-buffered[don't force block buffering]" + + + '(case)' # Case-sensitivity options + {-i,--ignore-case}'[search case-insensitively]' + {-s,--case-sensitive}'[search case-sensitively]' + {-S,--smart-case}'[search case-insensitively if pattern is all lowercase]' + + + '(context-a)' # Context (after) options + '(context-c)'{-A+,--after-context=}'[specify lines to show after each match]:number of lines' + + + '(context-b)' # Context (before) options + '(context-c)'{-B+,--before-context=}'[specify lines to show before each match]:number of lines' + + + '(context-c)' # Context (combined) options + '(context-a context-b)'{-C+,--context=}'[specify lines to show before and after each match]:number of lines' + + + '(column)' # Column options + '--column[show column numbers for matches]' + $no"--no-column[don't show column numbers for matches]" + + + '(count)' # Counting options + {-c,--count}'[only show count of matching lines for each file]' + '--count-matches[only show count of individual matches for each file]' + + + '(encoding)' # Encoding options + {-E+,--encoding=}'[specify text encoding of files to search]: :_rg_encodings' + $no'--no-encoding[use default text encoding]' + + + file # File-input options + '(1)*'{-f+,--file=}'[specify file containing patterns to search for]: :_files' + + + '(file-match)' # Files with/without match options + '(stats)'{-l,--files-with-matches}'[only show names of files with matches]' + '(stats)--files-without-match[only show names of files without matches]' + + + '(file-name)' # File-name options + {-H,--with-filename}'[show file name for matches]' + "--no-filename[don't show file name for matches]" + + + '(file-system)' # File system options + "--one-file-system[don't descend into directories on other file systems]" + $no'--no-one-file-system[descend into directories on other file systems]' + + + '(fixed)' # Fixed-string options + {-F,--fixed-strings}'[treat pattern as literal string instead of regular expression]' + $no"--no-fixed-strings[don't treat pattern as literal string]" + + + '(follow)' # Symlink-following options + {-L,--follow}'[follow symlinks]' + $no"--no-follow[don't follow symlinks]" + + + glob # File-glob options + '*'{-g+,--glob=}'[include/exclude files matching specified glob]:glob' + '*--iglob=[include/exclude files matching specified case-insensitive glob]:glob' + + + '(heading)' # Heading options + '(pretty-vimgrep)--heading[show matches grouped by file name]' + "(pretty-vimgrep)--no-heading[don't show matches grouped by file name]" + + + '(hidden)' # Hidden-file options + '--hidden[search hidden files and directories]' + $no"--no-hidden[don't search hidden files and directories]" + + + '(ignore)' # Ignore-file options + "(--no-ignore-global --no-ignore-parent --no-ignore-vcs)--no-ignore[don't respect ignore files]" + $no'(--ignore-global --ignore-parent --ignore-vcs)--ignore[respect ignore files]' + + + '(ignore-global)' # Global ignore-file options + "--no-ignore-global[don't respect global ignore files]" + $no'--ignore-global[respect global ignore files]' + + + '(ignore-parent)' # Parent ignore-file options + "--no-ignore-parent[don't respect ignore files in parent directories]" + $no'--ignore-parent[respect ignore files in parent directories]' + + + '(ignore-vcs)' # VCS ignore-file options + "--no-ignore-vcs[don't respect version control ignore files]" + $no'--ignore-vcs[respect version control ignore files]' + + + '(json)' # JSON options + '--json[output results in JSON Lines format]' + $no"--no-json[don't output results in JSON Lines format]" + + + '(line-number)' # Line-number options + {-n,--line-number}'[show line numbers for matches]' + {-N,--no-line-number}"[don't show line numbers for matches]" + + + '(line-terminator)' # Line-terminator options + '--crlf[use CRLF as line terminator]' + $no"--no-crlf[don't use CRLF as line terminator]" + '(text)--null-data[use NUL as line terminator]' + + + '(max-depth)' # Directory-depth options + '--max-depth=[specify max number of directories to descend]:number of directories' + '!--maxdepth=:number of directories' + + + '(messages)' # Error-message options + '(--no-ignore-messages)--no-messages[suppress some error messages]' + $no"--messages[don't suppress error messages affected by --no-messages]" + + + '(messages-ignore)' # Ignore-error message options + "--no-ignore-messages[don't show ignore-file parse error messages]" + $no'--ignore-messages[show ignore-file parse error messages]' + + + '(mmap)' # mmap options + '--mmap[search using memory maps when possible]' + "--no-mmap[don't search using memory maps]" + + + '(multiline)' # Multiline options + {-U,--multiline}'[permit matching across multiple lines]' + $no'(multiline-dotall)--no-multiline[restrict matches to at most one line each]' + + + '(multiline-dotall)' # Multiline DOTALL options + '(--no-multiline)--multiline-dotall[allow "." to match newline (with -U)]' + $no"(--no-multiline)--no-multiline-dotall[don't allow \".\" to match newline (with -U)]" + + + '(only)' # Only-match options + {-o,--only-matching}'[show only matching part of each line]' + + + '(passthru)' # Pass-through options + '(--vimgrep)--passthru[show both matching and non-matching lines]' + '!(--vimgrep)--passthrough' + + + '(pcre2)' # PCRE2 options + {-P,--pcre2}'[enable matching with PCRE2]' + $no'(pcre2-unicode)--no-pcre2[disable matching with PCRE2]' + + + '(pcre2-unicode)' # PCRE2 Unicode options + $no'(--no-pcre2 --no-pcre2-unicode)--pcre2-unicode[enable PCRE2 Unicode mode (with -P)]' + '(--no-pcre2 --pcre2-unicode)--no-pcre2-unicode[disable PCRE2 Unicode mode (with -P)]' + + + '(pre)' # Preprocessing options + '(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e' + $no'--no-pre[disable preprocessor utility]' + + + pre-glob # Preprocessing glob options + '*--pre-glob[include/exclude files for preprocessing with --pre]' + + + '(pretty-vimgrep)' # Pretty/vimgrep display options + '(heading)'{-p,--pretty}'[alias for --color=always --heading -n]' + '(heading passthru)--vimgrep[show results in vim-compatible format]' + + + regexp # Explicit pattern options + '(1 file)*'{-e+,--regexp=}'[specify pattern]:pattern' + + + '(replace)' # Replacement options + {-r+,--replace=}'[specify string used to replace matches]:replace string' + + + '(sort)' # File-sorting options + '(threads)--sort=[sort results in ascending order (disables parallelism)]:sort method:(( + none\:"no sorting" + path\:"sort by file path" + modified\:"sort by last modified time" + accessed\:"sort by last accessed time" + created\:"sort by creation time" + ))' + '(threads)--sortr=[sort results in descending order (disables parallelism)]:sort method:(( + none\:"no sorting" + path\:"sort by file path" + modified\:"sort by last modified time" + accessed\:"sort by last accessed time" + created\:"sort by creation time" + ))' + '!(threads)--sort-files[sort results by file path (disables parallelism)]' + + + '(stats)' # Statistics options + '(--files file-match)--stats[show search statistics]' + $no"--no-stats[don't show search statistics]" + + + '(text)' # Binary-search options + {-a,--text}'[search binary files as if they were text]' + $no"(--null-data)--no-text[don't search binary files as if they were text]" + + + '(threads)' # Thread-count options + '(sort)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads' + + + '(trim)' # Trim options + '--trim[trim any ASCII whitespace prefix from each line]' + $no"--no-trim[don't trim ASCII whitespace prefix from each line]" + + + type # Type options + '*'{-t+,--type=}'[only search files matching specified type]: :_rg_types' + '*--type-add=[add new glob for specified file type]: :->typespec' + '*--type-clear=[clear globs previously defined for specified file type]: :_rg_types' + # This should actually be exclusive with everything but other type options + '(: *)--type-list[show all supported file types and their associated globs]' + '*'{-T+,--type-not=}"[don't search files matching specified file type]: :_rg_types" + + + '(word-line)' # Whole-word/line match options + {-w,--word-regexp}'[only show matches surrounded by word boundaries]' + {-x,--line-regexp}'[only show matches surrounded by line boundaries]' + + + '(zip)' # Compression options + '(--pre)'{-z,--search-zip}'[search in compressed files]' + $no"--no-search-zip[don't search in compressed files]" + + + misc # Other options — no need to separate these at the moment + '(-b --byte-offset)'{-b,--byte-offset}'[show 0-based byte offset for each matching line]' + '--color=[specify when to use colors in output]:when:(( + never\:"never use colors" + auto\:"use colors or not based on stdout, TERM, etc." + always\:"always use colors" + ansi\:"always use ANSI colors (even on Windows)" + ))' + '*--colors=[specify color and style settings]: :->colorspec' '--context-separator=[specify string used to separate non-continuous context lines in output]:separator' - '(-c --count)'{-c,--count}'[only show count of matches for each file]' '--debug[show debug messages]' - '--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size' - '(-E --encoding)'{-E+,--encoding=}'[specify text encoding of files to search]: :_rg_encodings' - '*'{-f+,--file=}'[specify file containing patterns to search for]:file:_files' - "(1)--files[show each file that would be searched (but don't search)]" - '(-l --files-with-matches --files-without-match)'{-l,--files-with-matches}'[only show names of files with matches]' - '(-l --files-with-matches --files-without-match)--files-without-match[only show names of files without matches]' - '(-F --fixed-strings)'{-F,--fixed-strings}'[treat pattern as literal string instead of regular expression]' - '(-L --follow)'{-L,--follow}'[follow symlinks]' - '*'{-g+,--glob=}'[include or exclude files for searching that match the specified glob]:glob' - '(: -)'{-h,--help}'[display help information]' - '(-p --no-heading --pretty --vimgrep)--heading[show matches grouped by file name]' - '--hidden[search hidden files and directories]' - '*--iglob=[include or exclude files for searching that match the specified case-insensitive glob]:glob' - '(-i -s -S --case-sensitive --ignore-case --smart-case)'{-i,--ignore-case}'[search case-insensitively]' - '--ignore-file=[specify additional ignore file]:file:_files' + '--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)' + "(1 stats)--files[show each file that would be searched (but don't search)]" + '*--ignore-file=[specify additional ignore file]:ignore file:_files' '(-v --invert-match)'{-v,--invert-match}'[invert matching]' - '(-n -N --line-number --no-line-number)'{-n,--line-number}'[show line numbers]' - '(-w -x --line-regexp --word-regexp)'{-x,--line-regexp}'[only show matches surrounded by line boundaries]' '(-M --max-columns)'{-M+,--max-columns=}'[specify max length of lines to print]:number of bytes' '(-m --max-count)'{-m+,--max-count=}'[specify max number of matches per file]:number of matches' - '--max-filesize=[specify size above which files should be ignored]:file size' - '--maxdepth=[specify max number of directories to descend]:number of directories' - '(--mmap --no-mmap)--mmap[search using memory maps when possible]' - '(-H --with-filename --no-filename)--no-filename[suppress all file names]' - "(-p --heading --pretty --vimgrep)--no-heading[don't group matches by file name]" - "(--no-ignore-parent)--no-ignore[don't respect ignore files]" - "--no-ignore-parent[don't respect ignore files in parent directories]" - "--no-ignore-vcs[don't respect version control ignore files]" - '(-n -N --line-number --no-line-number)'{-N,--no-line-number}'[suppress line numbers]' - '--no-messages[suppress all error messages]' - "(--mmap --no-mmap)--no-mmap[don't search using memory maps]" + '--max-filesize=[specify size above which files should be ignored]:file size (bytes)' + "--no-config[don't load configuration files]" '(-0 --null)'{-0,--null}'[print NUL byte after file names]' - '(-o --only-matching -r --replace)'{-o,--only-matching}'[show only matching part of each line]' '--path-separator=[specify path separator to use when printing file names]:separator' - '(-p --heading --no-heading --pretty --vimgrep)'{-p,--pretty}'[alias for --color=always --heading -n]' '(-q --quiet)'{-q,--quiet}'[suppress normal output]' - '--regex-size-limit=[specify upper size limit of compiled regex]:regex size' - '(1 -f --file)*'{-e+,--regexp=}'[specify pattern]:pattern' - '(-o --only-matching -r --replace)'{-r+,--replace=}'[specify string used to replace matches]:replace string' - '(-i -s -S --ignore-case --case-sensitive --smart-case)'{-S,--smart-case}'[search case-insensitively if the pattern is all lowercase]' - '(-j --threads)--sort-files[sort results by file path (disables parallelism)]' - '(-a --text)'{-a,--text}'[search binary files as if they were text]' - '(-j --sort-files --threads)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads' - '*'{-t+,--type=}'[only search files matching specified type]: :_rg_types' - '*--type-add=[add new glob for file type]: :->typespec' - '*--type-clear=[clear globs previously defined for specified file type]: :_rg_types' - # This should actually be exclusive with everything but other type options - '(:)--type-list[show all supported file types and their associated globs]' - '*'{-T+,--type-not=}"[don't search files matching specified type]: :_rg_types" + '--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)' '*'{-u,--unrestricted}'[reduce level of "smart" searching]' - '(: -)'{-V,--version}'[display version information]' - '(-p --heading --no-heading --pretty)--vimgrep[show results in vim-compatible format]' - '(-H --no-filename --with-filename)'{-H,--with-filename}'[prefix each match with name of file that contains it]' - '(-w -x --line-regexp --word-regexp)'{-w,--word-regexp}'[only show matches surrounded by word boundaries]' - '(-e -f --file --files --regexp --type-list)1: :_rg_pattern' - '(--type-list)*:file:_files' + + + operand # Operands + '(--files --type-list file regexp)1: :_guard "^-*" pattern' + '(--type-list)*: :_files' ) - [[ ${_RG_COMPLETE_LIST_ARGS:-} == (1|t*|y*) ]] && { - printf '%s\n' "${rg_args[@]}" + # This is used with test_complete.sh to verify that there are no options + # listed in the help output that aren't also defined here + [[ $_RG_COMPLETE_LIST_ARGS == (1|t*|y*) ]] && { + print -rl - $args return 0 } - _arguments -s -S : "${rg_args[@]}" && return 0 + # Strip out argument groups where unsupported (see above) + [[ $ZSH_VERSION == (4|5.<0-3>)(.*)# ]] && + args=( ${(@)args:#(#i)(+|[a-z0-9][a-z0-9_-]#|\([a-z0-9][a-z0-9_-]#\))} ) + + _arguments -C -s -S : $args && ret=0 + + case $state in + colorspec) + if [[ ${IPREFIX#--*=}$PREFIX == [^:]# ]]; then + suf=( -qS: ) + tmp=( + 'column:specify coloring for column numbers' + 'line:specify coloring for line numbers' + 'match:specify coloring for match text' + 'path:specify coloring for file names' + ) + descr='color/style type' + elif [[ ${IPREFIX#--*=}$PREFIX == (column|line|match|path):[^:]# ]]; then + suf=( -qS: ) + tmp=( + 'none:clear color/style for type' + 'bg:specify background color' + 'fg:specify foreground color' + 'style:specify text style' + ) + descr='color/style attribute' + elif [[ ${IPREFIX#--*=}$PREFIX == [^:]##:(bg|fg):[^:]# ]]; then + tmp=( black blue green red cyan magenta yellow white ) + descr='color name or r,g,b' + elif [[ ${IPREFIX#--*=}$PREFIX == [^:]##:style:[^:]# ]]; then + tmp=( {,no}bold {,no}intense {,no}underline ) + descr='style name' + else + _message -e colorspec 'no more arguments' + fi + + (( $#tmp )) && { + compset -P '*:' + _describe -t colorspec $descr tmp $suf && ret=0 + } + ;; + + typespec) + if compset -P '[^:]##:include:'; then + _sequence -s , _rg_types && ret=0 + # @todo This bit in particular could be better, but it's a little + # complex, and attempting to solve it seems to run us up against a crash + # bug — zsh # 40362 + elif compset -P '[^:]##:'; then + _message 'glob or include directive' && ret=1 + elif [[ ! -prefix *:* ]]; then + _rg_types -qS : && ret=0 + fi + ;; + esac - while (( $#state )); do - case "${state[1]}" in - colorspec) - # @todo I don't like this because it allows you to do weird things like - # `line:line:bg:`. Also, i would like the `compadd -q` behaviour - [[ -prefix *:none: ]] && return 1 - [[ -prefix *:*:*:* ]] && return 1 - - _values -S ':' 'color/style type' \ - 'column[specify coloring for column numbers]: :->attribute' \ - 'line[specify coloring for line numbers]: :->attribute' \ - 'match[specify coloring for match text]: :->attribute' \ - 'path[specify color for file names]: :->attribute' && return 0 - - [[ "${state}" == 'attribute' ]] && - _values -S ':' 'color/style attribute' \ - 'none[clear color/style for type]' \ - 'bg[specify background color]: :->color' \ - 'fg[specify foreground color]: :->color' \ - 'style[specify text style]: :->style' && return 0 - - [[ "${state}" == 'color' ]] && - _values -S ':' 'color value' \ - black blue green red cyan magenta yellow white && return 0 - - [[ "${state}" == 'style' ]] && - _values -S ':' 'style value' \ - bold nobold intense nointense && return 0 - ;; - - typespec) - if compset -P '[^:]##:include:'; then - _sequence -s ',' _rg_types && return 0 - # @todo This bit in particular could be better, but it's a little - # complex, and attempting to solve it seems to run us up against a crash - # bug — zsh # 40362 - elif compset -P '[^:]##:'; then - _message 'glob or include directive' && return 1 - elif [[ ! -prefix *:* ]]; then - _rg_types -qS ':' && return 0 - fi - ;; - esac - shift state - done - - return 1 -} - -# zsh 5.1 refuses to complete options if a 'match-less' operand like our pattern -# could be 'completed' instead. We can use _guard() to avoid this problem, but -# it introduces another one: zsh won't print the message if we try to complete -# the pattern after having passed `--`. To work around *that* problem, we can -# use this function to bypass the _guard() when `--` is on the command line. -# This is inaccurate (it'd get confused by e.g. `rg -e --`), but zsh's handling -# of `--` isn't accurate anyway -_rg_pattern() { - if (( ${words[(I)--]} )); then - _message 'pattern' - else - _guard '^-*' 'pattern' - fi + return ret } # Complete encodings @@ -190,7 +373,7 @@ x-user-defined auto ) - _wanted rg-encodings expl 'encoding' compadd -a "${@}" - _encodings + _wanted encodings expl encoding compadd -a "$@" - _encodings } # Complete file types @@ -198,12 +381,163 @@ local -a expl local -aU _types - _types=( ${${(f)"$( _call_program rg-types rg --type-list )"}%%:*} ) + _types=( ${(@)${(f)"$( _call_program types rg --type-list )"}%%:*} ) - _wanted rg-types expl 'file type' compadd -a "${@}" - _types + _wanted types expl 'file type' compadd -a "$@" - _types } -_rg "${@}" +_rg "$@" + +################################################################################ +# ZSH COMPLETION REFERENCE +# +# For the convenience of developers who aren't especially familiar with zsh +# completion functions, a brief reference guide follows. This is in no way +# comprehensive; it covers just enough of the basic structure, syntax, and +# conventions to help someone make simple changes like adding new options. For +# more complete documentation regarding zsh completion functions, please see the +# following: +# +# * http://zsh.sourceforge.net/Doc/Release/Completion-System.html +# * https://github.com/zsh-users/zsh/blob/master/Etc/completion-style-guide +# +# OVERVIEW +# +# Most zsh completion functions are defined in terms of `_arguments`, which is a +# shell function that takes a series of argument specifications. The specs for +# `rg` are stored in an array, which is common for more complex functions; the +# elements of the array are passed to `_arguments` on invocation. +# +# ARGUMENT-SPECIFICATION SYNTAX +# +# The following is a contrived example of the argument specs for a simple tool: +# +# '(: * -)'{-h,--help}'[display help information]' +# '(-q -v --quiet --verbose)'{-q,--quiet}'[decrease output verbosity]' +# '!(-q -v --quiet --verbose)--silent' +# '(-q -v --quiet --verbose)'{-v,--verbose}'[increase output verbosity]' +# '--color=[specify when to use colors]:when:(always never auto)' +# '*:example file:_files' +# +# Although there may appear to be six specs here, there are actually nine; we +# use brace expansion to combine specs for options that go by multiple names, +# like `-q` and `--quiet`. This is customary, and ties in with the fact that zsh +# merges completion possibilities together when they have the same description. +# +# The first line defines the option `-h`/`--help`. With most tools, it isn't +# useful to complete anything after `--help` because it effectively overrides +# all others; the `(: * -)` at the beginning of the spec tells zsh not to +# complete any other operands (`:` and `*`) or options (`-`) after this one has +# been used. The `[...]` at the end associates a description with `-h`/`--help`; +# as mentioned, zsh will see the identical descriptions and merge these options +# together when offering completion possibilities. +# +# The next line defines `-q`/`--quiet`. Here we don't want to suppress further +# completions entirely, but we don't want to offer `-q` if `--quiet` has been +# given (since they do the same thing), nor do we want to offer `-v` (since it +# doesn't make sense to be quiet and verbose at the same time). We don't need to +# tell zsh not to offer `--quiet` a second time, since that's the default +# behaviour, but since this line expands to two specs describing `-q` *and* +# `--quiet` we do need to explicitly list all of them here. +# +# The next line defines a hidden option `--silent` — maybe it's a deprecated +# synonym for `--quiet`. The leading `!` indicates that zsh shouldn't offer this +# option during completion. The benefit of providing a spec for an option that +# shouldn't be completed is that, if someone *does* use it, we can correctly +# suppress completion of other options afterwards. +# +# The next line defines `-v`/`--verbose`; this works just like `-q`/`--quiet`. +# +# The next line defines `--color`. In this example, `--color` doesn't have a +# corresponding short option, so we don't need to use brace expansion. Further, +# there are no other options it's exclusive with (just itself), so we don't need +# to define those at the beginning. However, it does take a mandatory argument. +# The `=` at the end of `--color=` indicates that the argument may appear either +# like `--color always` or like `--color=always`; this is how most GNU-style +# command-line tools work. The corresponding short option would normally use `+` +# — for example, `-c+` would allow either `-c always` or `-calways`. For this +# option, the arguments are known ahead of time, so we can simply list them in +# parentheses at the end (`when` is used as the description for the argument). +# +# The last line defines an operand (a non-option argument). In this example, the +# operand can be used any number of times (the leading `*`), and it should be a +# file path, so we tell zsh to call the `_files` function to complete it. The +# `example file` in the middle is the description to use for this operand; we +# could use a space instead to accept the default provided by `_files`. +# +# GROUPING ARGUMENT SPECIFICATIONS +# +# Newer versions of zsh support grouping argument specs together. All specs +# following a `+` and then a group name are considered to be members of the +# named group. Grouping is useful mostly for organisational purposes; it makes +# the relationship between different options more obvious, and makes it easier +# to specify exclusions. +# +# We could rewrite our example above using grouping as follows: +# +# '(: * -)'{-h,--help}'[display help information]' +# '--color=[specify when to use colors]:when:(always never auto)' +# '*:example file:_files' +# + '(verbosity)' +# {-q,--quiet}'[decrease output verbosity]' +# '!--silent' +# {-v,--verbose}'[increase output verbosity]' +# +# Here we take advantage of a useful feature of spec grouping — when the group +# name is surrounded by parentheses, as in `(verbosity)`, it tells zsh that all +# of the options in that group are exclusive with each other. As a result, we +# don't need to manually list out the exclusions at the beginning of each +# option. +# +# Groups can also be referred to by name in other argument specs; for example: +# +# '(xyz)--aaa' '*: :_files' +# + xyz --xxx --yyy --zzz +# +# Here we use the group name `xyz` to tell zsh that `--xxx`, `--yyy`, and +# `--zzz` are not to be completed after `--aaa`. This makes the exclusion list +# much more compact and reusable. +# +# CONVENTIONS +# +# zsh completion functions generally adhere to the following conventions: +# +# * Use two spaces for indentation +# * Combine specs for options with different names using brace expansion +# * In combined specs, list the short option first (as in `{-a,--text}`) +# * Use `+` or `=` as described above for options that take arguments +# * Provide a description for all options, option-arguments, and operands +# * Capitalise/punctuate argument descriptions as phrases, not complete +# sentences — 'display help information', never 'Display help information.' +# (but still capitalise acronyms and proper names) +# * Write argument descriptions as verb phrases — 'display x', 'enable y', +# 'use z' +# * Word descriptions to make it clear when an option expects an argument; +# usually this is done with the word 'specify', as in 'specify x' or +# 'use specified x') +# * Write argument descriptions as tersely as possible — for example, articles +# like 'a' and 'the' should be omitted unless it would be confusing +# +# Other conventions currently used by this function: +# +# * Order argument specs alphabetically by group name, then option name +# * Group options that are directly related, mutually exclusive, or frequently +# referenced by other argument specs +# * Use only characters in the set [a-z0-9_-] in group names +# * Order exclusion lists as follows: short options, long options, groups +# * Use American English in descriptions +# * Use 'don't' in descriptions instead of 'do not' +# * Word descriptions for related options as similarly as possible. For example, +# `--foo[enable foo]` and `--no-foo[disable foo]`, or `--foo[use foo]` and +# `--no-foo[don't use foo]` +# * Word descriptions to make it clear when an option only makes sense with +# another option, usually by adding '(with -x)' to the end +# * Don't quote strings or variables unnecessarily. When quotes are required, +# prefer single-quotes to double-quotes +# * Prefix option specs with `$no` when the option serves only to negate the +# behaviour of another option that must be provided explicitly by the user. +# This prevents rarely used options from cluttering up the completion menu +################################################################################ # ------------------------------------------------------------------------------ # Copyright (c) 2011 Github zsh-users - http://github.com/zsh-users diff -Nru ripgrep-0.6.0/complete/rg.bash ripgrep-0.10.0.3/complete/rg.bash --- ripgrep-0.6.0/complete/rg.bash 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/complete/rg.bash 2018-09-11 18:28:34.000000000 +0000 @@ -0,0 +1,209 @@ +_rg() { + local i cur prev opts cmds + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + cmd="" + opts="" + + for i in ${COMP_WORDS[@]} + do + case "${i}" in + rg) + cmd="rg" + ;; + + *) + ;; + esac + done + + case "${cmd}" in + rg) + opts=" -b -s -c -l -F -L -i -v -n -N -x -U -0 -o -P -p -q -z -S -a -u -H -w -h -V -A -B -C -E -f -g -M -m -e -r -j -t -T --block-buffered --no-block-buffered --byte-offset --case-sensitive --column --no-column --count --count-matches --crlf --no-crlf --debug --trace --no-encoding --files --files-with-matches --files-without-match --fixed-strings --no-fixed-strings --follow --no-follow --heading --no-heading --hidden --no-hidden --ignore-case --invert-match --json --no-json --line-buffered --no-line-buffered --line-number --no-line-number --line-regexp --mmap --no-mmap --multiline --no-multiline --multiline-dotall --no-multiline-dotall --no-config --no-ignore --ignore --no-ignore-global --ignore-global --no-ignore-messages --ignore-messages --no-ignore-parent --ignore-parent --no-ignore-vcs --ignore-vcs --no-messages --messages --no-pcre2-unicode --pcre2-unicode --null --null-data --one-file-system --no-one-file-system --only-matching --passthru --pcre2 --no-pcre2 --no-pre --pretty --quiet --search-zip --no-search-zip --smart-case --sort-files --no-sort-files --stats --no-stats --text --no-text --trim --no-trim --type-list --unrestricted --vimgrep --with-filename --no-filename --word-regexp --help --version --after-context --before-context --color --colors --context --context-separator --dfa-size-limit --encoding --file --glob --iglob --ignore-file --max-columns --max-count --max-depth --max-filesize --path-separator --pre --pre-glob --regex-size-limit --regexp --replace --sort --sortr --threads --type --type-add --type-clear --type-not ... " + if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then + COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) + return 0 + fi + case "${prev}" in + + --after-context) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -A) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --before-context) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -B) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --color) + COMPREPLY=($(compgen -W "never auto always ansi" -- ${cur})) + return 0 + ;; + --colors) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --context) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -C) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --context-separator) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --dfa-size-limit) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --encoding) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -E) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --file) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -f) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --glob) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -g) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --iglob) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --ignore-file) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --max-columns) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -M) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --max-count) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -m) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --max-depth) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --max-filesize) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --path-separator) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --pre) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --pre-glob) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --regex-size-limit) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --regexp) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -e) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --replace) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -r) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --sort) + COMPREPLY=($(compgen -W "path modified accessed created none" -- ${cur})) + return 0 + ;; + --sortr) + COMPREPLY=($(compgen -W "path modified accessed created none" -- ${cur})) + return 0 + ;; + --threads) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -j) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --type) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -t) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --type-add) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --type-clear) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + --type-not) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + -T) + COMPREPLY=($(compgen -f ${cur})) + return 0 + ;; + *) + COMPREPLY=() + ;; + esac + COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) + return 0 + ;; + + esac +} + +complete -F _rg -o bashdefault -o default rg diff -Nru ripgrep-0.6.0/complete/rg.fish ripgrep-0.10.0.3/complete/rg.fish --- ripgrep-0.6.0/complete/rg.fish 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/complete/rg.fish 2018-09-11 18:28:34.000000000 +0000 @@ -0,0 +1,113 @@ +complete -c rg -n "__fish_use_subcommand" -s A -l after-context -d 'Show NUM lines after each match.' +complete -c rg -n "__fish_use_subcommand" -s B -l before-context -d 'Show NUM lines before each match.' +complete -c rg -n "__fish_use_subcommand" -l color -d 'Controls when to use color.' -r -f -a "never auto always ansi" +complete -c rg -n "__fish_use_subcommand" -l colors -d 'Configure color settings and styles.' +complete -c rg -n "__fish_use_subcommand" -s C -l context -d 'Show NUM lines before and after each match.' +complete -c rg -n "__fish_use_subcommand" -l context-separator -d 'Set the context separator string.' +complete -c rg -n "__fish_use_subcommand" -l dfa-size-limit -d 'The upper size limit of the regex DFA.' +complete -c rg -n "__fish_use_subcommand" -s E -l encoding -d 'Specify the text encoding of files to search.' +complete -c rg -n "__fish_use_subcommand" -s f -l file -d 'Search for patterns from the given file.' +complete -c rg -n "__fish_use_subcommand" -s g -l glob -d 'Include or exclude files.' +complete -c rg -n "__fish_use_subcommand" -l iglob -d 'Include or exclude files case insensitively.' +complete -c rg -n "__fish_use_subcommand" -l ignore-file -d 'Specify additional ignore files.' +complete -c rg -n "__fish_use_subcommand" -s M -l max-columns -d 'Don\'t print lines longer than this limit.' +complete -c rg -n "__fish_use_subcommand" -s m -l max-count -d 'Limit the number of matches.' +complete -c rg -n "__fish_use_subcommand" -l max-depth -d 'Descend at most NUM directories.' +complete -c rg -n "__fish_use_subcommand" -l max-filesize -d 'Ignore files larger than NUM in size.' +complete -c rg -n "__fish_use_subcommand" -l path-separator -d 'Set the path separator.' +complete -c rg -n "__fish_use_subcommand" -l pre -d 'search outputs of COMMAND FILE for each FILE' +complete -c rg -n "__fish_use_subcommand" -l pre-glob -d 'Include or exclude files from a preprocessing command.' +complete -c rg -n "__fish_use_subcommand" -l regex-size-limit -d 'The upper size limit of the compiled regex.' +complete -c rg -n "__fish_use_subcommand" -s e -l regexp -d 'A pattern to search for.' +complete -c rg -n "__fish_use_subcommand" -s r -l replace -d 'Replace matches with the given text.' +complete -c rg -n "__fish_use_subcommand" -l sort -d 'Sort results in ascending order. Implies --threads=1.' -r -f -a "path modified accessed created none" +complete -c rg -n "__fish_use_subcommand" -l sortr -d 'Sort results in descending order. Implies --threads=1.' -r -f -a "path modified accessed created none" +complete -c rg -n "__fish_use_subcommand" -s j -l threads -d 'The approximate number of threads to use.' +complete -c rg -n "__fish_use_subcommand" -s t -l type -d 'Only search files matching TYPE.' +complete -c rg -n "__fish_use_subcommand" -l type-add -d 'Add a new glob for a file type.' +complete -c rg -n "__fish_use_subcommand" -l type-clear -d 'Clear globs for a file type.' +complete -c rg -n "__fish_use_subcommand" -s T -l type-not -d 'Do not search files matching TYPE.' +complete -c rg -n "__fish_use_subcommand" -l block-buffered -d 'Force block buffering.' +complete -c rg -n "__fish_use_subcommand" -l no-block-buffered +complete -c rg -n "__fish_use_subcommand" -s b -l byte-offset -d 'Print the 0-based byte offset for each matching line.' +complete -c rg -n "__fish_use_subcommand" -s s -l case-sensitive -d 'Search case sensitively (default).' +complete -c rg -n "__fish_use_subcommand" -l column -d 'Show column numbers.' +complete -c rg -n "__fish_use_subcommand" -l no-column +complete -c rg -n "__fish_use_subcommand" -s c -l count -d 'Only show the count of matching lines for each file.' +complete -c rg -n "__fish_use_subcommand" -l count-matches -d 'Only show the count of individual matches for each file.' +complete -c rg -n "__fish_use_subcommand" -l crlf -d 'Support CRLF line terminators (useful on Windows).' +complete -c rg -n "__fish_use_subcommand" -l no-crlf +complete -c rg -n "__fish_use_subcommand" -l debug -d 'Show debug messages.' +complete -c rg -n "__fish_use_subcommand" -l trace +complete -c rg -n "__fish_use_subcommand" -l no-encoding +complete -c rg -n "__fish_use_subcommand" -l files -d 'Print each file that would be searched.' +complete -c rg -n "__fish_use_subcommand" -s l -l files-with-matches -d 'Only print the paths with at least one match.' +complete -c rg -n "__fish_use_subcommand" -l files-without-match -d 'Only print the paths that contain zero matches.' +complete -c rg -n "__fish_use_subcommand" -s F -l fixed-strings -d 'Treat the pattern as a literal string.' +complete -c rg -n "__fish_use_subcommand" -l no-fixed-strings +complete -c rg -n "__fish_use_subcommand" -s L -l follow -d 'Follow symbolic links.' +complete -c rg -n "__fish_use_subcommand" -l no-follow +complete -c rg -n "__fish_use_subcommand" -l heading -d 'Print matches grouped by each file.' +complete -c rg -n "__fish_use_subcommand" -l no-heading -d 'Don\'t group matches by each file.' +complete -c rg -n "__fish_use_subcommand" -l hidden -d 'Search hidden files and directories.' +complete -c rg -n "__fish_use_subcommand" -l no-hidden +complete -c rg -n "__fish_use_subcommand" -s i -l ignore-case -d 'Case insensitive search.' +complete -c rg -n "__fish_use_subcommand" -s v -l invert-match -d 'Invert matching.' +complete -c rg -n "__fish_use_subcommand" -l json -d 'Show search results in a JSON Lines format.' +complete -c rg -n "__fish_use_subcommand" -l no-json +complete -c rg -n "__fish_use_subcommand" -l line-buffered -d 'Force line buffering.' +complete -c rg -n "__fish_use_subcommand" -l no-line-buffered +complete -c rg -n "__fish_use_subcommand" -s n -l line-number -d 'Show line numbers.' +complete -c rg -n "__fish_use_subcommand" -s N -l no-line-number -d 'Suppress line numbers.' +complete -c rg -n "__fish_use_subcommand" -s x -l line-regexp -d 'Only show matches surrounded by line boundaries.' +complete -c rg -n "__fish_use_subcommand" -l mmap -d 'Search using memory maps when possible.' +complete -c rg -n "__fish_use_subcommand" -l no-mmap -d 'Never use memory maps.' +complete -c rg -n "__fish_use_subcommand" -s U -l multiline -d 'Enable matching across multiple lines.' +complete -c rg -n "__fish_use_subcommand" -l no-multiline +complete -c rg -n "__fish_use_subcommand" -l multiline-dotall -d 'Make \'.\' match new lines when multiline is enabled.' +complete -c rg -n "__fish_use_subcommand" -l no-multiline-dotall +complete -c rg -n "__fish_use_subcommand" -l no-config -d 'Never read configuration files.' +complete -c rg -n "__fish_use_subcommand" -l no-ignore -d 'Don\'t respect ignore files.' +complete -c rg -n "__fish_use_subcommand" -l ignore +complete -c rg -n "__fish_use_subcommand" -l no-ignore-global -d 'Don\'t respect global ignore files.' +complete -c rg -n "__fish_use_subcommand" -l ignore-global +complete -c rg -n "__fish_use_subcommand" -l no-ignore-messages -d 'Suppress gitignore parse error messages.' +complete -c rg -n "__fish_use_subcommand" -l ignore-messages +complete -c rg -n "__fish_use_subcommand" -l no-ignore-parent -d 'Don\'t respect ignore files in parent directories.' +complete -c rg -n "__fish_use_subcommand" -l ignore-parent +complete -c rg -n "__fish_use_subcommand" -l no-ignore-vcs -d 'Don\'t respect VCS ignore files.' +complete -c rg -n "__fish_use_subcommand" -l ignore-vcs +complete -c rg -n "__fish_use_subcommand" -l no-messages -d 'Suppress some error messages.' +complete -c rg -n "__fish_use_subcommand" -l messages +complete -c rg -n "__fish_use_subcommand" -l no-pcre2-unicode -d 'Disable Unicode mode for PCRE2 matching.' +complete -c rg -n "__fish_use_subcommand" -l pcre2-unicode +complete -c rg -n "__fish_use_subcommand" -s 0 -l null -d 'Print a NUL byte after file paths.' +complete -c rg -n "__fish_use_subcommand" -l null-data -d 'Use NUL as a line terminator instead of \\n.' +complete -c rg -n "__fish_use_subcommand" -l one-file-system -d 'Do not descend into directories on other file systems.' +complete -c rg -n "__fish_use_subcommand" -l no-one-file-system +complete -c rg -n "__fish_use_subcommand" -s o -l only-matching -d 'Print only matches parts of a line.' +complete -c rg -n "__fish_use_subcommand" -l passthru -d 'Print both matching and non-matching lines.' +complete -c rg -n "__fish_use_subcommand" -s P -l pcre2 -d 'Enable PCRE2 matching.' +complete -c rg -n "__fish_use_subcommand" -l no-pcre2 +complete -c rg -n "__fish_use_subcommand" -l no-pre +complete -c rg -n "__fish_use_subcommand" -s p -l pretty -d 'Alias for --color always --heading --line-number.' +complete -c rg -n "__fish_use_subcommand" -s q -l quiet -d 'Do not print anything to stdout.' +complete -c rg -n "__fish_use_subcommand" -s z -l search-zip -d 'Search in compressed files.' +complete -c rg -n "__fish_use_subcommand" -l no-search-zip +complete -c rg -n "__fish_use_subcommand" -s S -l smart-case -d 'Smart case search.' +complete -c rg -n "__fish_use_subcommand" -l sort-files -d 'DEPRECATED' +complete -c rg -n "__fish_use_subcommand" -l no-sort-files +complete -c rg -n "__fish_use_subcommand" -l stats -d 'Print statistics about this ripgrep search.' +complete -c rg -n "__fish_use_subcommand" -l no-stats +complete -c rg -n "__fish_use_subcommand" -s a -l text -d 'Search binary files as if they were text.' +complete -c rg -n "__fish_use_subcommand" -l no-text +complete -c rg -n "__fish_use_subcommand" -l trim -d 'Trim prefixed whitespace from matches.' +complete -c rg -n "__fish_use_subcommand" -l no-trim +complete -c rg -n "__fish_use_subcommand" -l type-list -d 'Show all supported file types.' +complete -c rg -n "__fish_use_subcommand" -s u -l unrestricted -d 'Reduce the level of "smart" searching.' +complete -c rg -n "__fish_use_subcommand" -l vimgrep -d 'Show results in vim compatible format.' +complete -c rg -n "__fish_use_subcommand" -s H -l with-filename -d 'Print the file path with the matched lines.' +complete -c rg -n "__fish_use_subcommand" -l no-filename -d 'Never print the file path with the matched lines.' +complete -c rg -n "__fish_use_subcommand" -s w -l word-regexp -d 'Only show matches surrounded by word boundaries.' +complete -c rg -n "__fish_use_subcommand" -s h -l help -d 'Prints help information. Use --help for more details.' +complete -c rg -n "__fish_use_subcommand" -s V -l version -d 'Prints version information' diff -Nru ripgrep-0.6.0/debian/changelog ripgrep-0.10.0.3/debian/changelog --- ripgrep-0.6.0/debian/changelog 2017-10-17 21:50:48.000000000 +0000 +++ ripgrep-0.10.0.3/debian/changelog 2018-09-11 20:11:10.000000000 +0000 @@ -1,35 +1,5 @@ -ripgrep (0.6.0-2) zesty; urgency=medium +ripgrep (0.10.0.3) cosmic; urgency=medium - * Fix location of zsh completion + * Initial release - -- Armin Grodon Tue, 17 Oct 2017 23:50:48 +0200 - -ripgrep (0.6.0-1) zesty; urgency=medium - - * Update to upstream (0.6.0) - - -- Armin Grodon Tue, 17 Oct 2017 21:23:58 +0200 - -ripgrep (0.5.2-1) zesty; urgency=medium - - * Update to upstream (0.5.2) - - -- Armin Grodon Sat, 13 May 2017 16:57:36 +0200 - -ripgrep (0.5.1-3) zesty; urgency=low - - * Fixed license file. - - -- Armin Grodon Sun, 30 Apr 2017 01:14:25 +0200 - -ripgrep (0.5.1-2) zesty; urgency=medium - - * Prefetch sources with cargo-vendor. - - -- Armin Grodon Sat, 29 Apr 2017 23:50:23 +0200 - -ripgrep (0.5.1-1) zesty; urgency=medium - - * Initial release. - - -- Armin Grodon Sat, 22 Apr 2017 02:35:37 +0200 + -- Jérémie Ferry Mon, 10 Sep 2018 23:47:01 -0100 diff -Nru ripgrep-0.6.0/debian/control ripgrep-0.10.0.3/debian/control --- ripgrep-0.6.0/debian/control 2017-10-17 21:50:48.000000000 +0000 +++ ripgrep-0.10.0.3/debian/control 2018-09-10 21:44:46.000000000 +0000 @@ -1,19 +1,19 @@ Source: ripgrep -Section: universe/utils -Priority: extra -Maintainer: Armin Grodon +Section: utils +Priority: optional +Maintainer: Jérémie Ferry Build-Depends: - cargo, - debhelper (>= 9), - quilt, -Standards-Version: 3.9.8 -Homepage: https://github.com/BurntSushi/ripgrep -Vcs-Browser: https://github.com/BurntSushi/ripgrep.git -Vcs-Git: https://github.com/BurntSushi/ripgrep.git + debhelper (>=9), + cargo, + quilt +Standards-Version: 4.1.1 Package: ripgrep +Version: 0.10 +Section: utils +Priority: optional +Maintainer: Jérémie Ferry Architecture: any Pre-Depends: dpkg (>= 1.17.14) -Depends: ${shlibs:Depends} -Description: A search tool that combines the usability of ag with the raw speed of grep - ripgrep is a line oriented search tool that combines the usability of The Silver Searcher (similar to ack) with the raw speed of GNU grep. ripgrep works by recursively searching your current directory for a regex pattern. +Depends: ${shlib:Depends} +Description: ripgrep is a line-oriented search tool that recursively searches your current directory for a regex pattern while respecting your gitignore rules. ripgrep has first class support on Windows, macOS and Linux, with binary downloads available for every release. ripgrep is similar to other popular search tools like The Silver Searcher, ack and grep. diff -Nru ripgrep-0.6.0/debian/copyright ripgrep-0.10.0.3/debian/copyright --- ripgrep-0.6.0/debian/copyright 2017-10-17 21:50:48.000000000 +0000 +++ ripgrep-0.10.0.3/debian/copyright 2018-05-27 13:00:02.000000000 +0000 @@ -1,84 +1,201 @@ -Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ -Upstream-Name: ripgrep -Upstream-Contact: jamslam@gmail.com -Source: https://github.com/BurntSushi/ripgrep - -Files: * -Copyright: 2015-2017 Andrew Gallant -License: MIT/Unlicense - -Files: debian/* -Copyright: 2017 Armin Grodon -License: MIT - -License: Unlicense - This is free and unencumbered software released into the public domain. - . - Anyone is free to copy, modify, publish, use, compile, sell, or - distribute this software, either in source code form or as a compiled - binary, for any purpose, commercial or non-commercial, and by any - means. - . - In jurisdictions that recognize copyright laws, the author or authors - of this software dedicate any and all copyright interest in the - software to the public domain. We make this dedication for the benefit - of the public at large and to the detriment of our heirs and - successors. We intend this dedication to be an overt act of - relinquishment in perpetuity of all present and future rights to this - software under copyright law. - . - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - OTHER DEALINGS IN THE SOFTWARE. - . - For more information, please refer to - -License: MIT - The MIT License (MIT) - . - Copyright (c) 2015 Andrew Gallant - . - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - . - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - . - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - -License: MIT - MIT License - . - Copyright (c) 2017 Armin Grodon - . - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - . - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - . - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright 2017 fd developers + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff -Nru ripgrep-0.6.0/debian/LICENSE-MIT ripgrep-0.10.0.3/debian/LICENSE-MIT --- ripgrep-0.6.0/debian/LICENSE-MIT 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/debian/LICENSE-MIT 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Andrew Gallant + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff -Nru ripgrep-0.6.0/debian/ripgrep.install ripgrep-0.10.0.3/debian/ripgrep.install --- ripgrep-0.6.0/debian/ripgrep.install 2017-10-17 21:50:48.000000000 +0000 +++ ripgrep-0.10.0.3/debian/ripgrep.install 2018-09-11 18:53:14.000000000 +0000 @@ -1,12 +1,10 @@ -target/release/rg usr/bin +target/release/rg usr/bin -target/release/bash/rg usr/share/bash-completion/completions -complete/_rg usr/share/zsh/site-functions -target/release/build/ripgrep-*/out/rg.fish usr/share/fish/completions +complete/rg.bash usr/share/bash-completion/completions +complete/rg.fish usr/share/fish/completions +complete/_rg usr/share/zsh/vendor-completions -doc/rg.1 usr/share/man/man1 -README.md usr/share/doc/ripgrep +README.md usr/share/doc/ripgrep -COPYING usr/share/licenses/ripgrep -LICENSE-MIT usr/share/licenses/ripgrep -UNLICENSE usr/share/licenses/ripgrep +debian/LICENSE-MIT usr/share/doc/rigrep +debian/UNLICENSE usr/share/doc/ripgrep diff -Nru ripgrep-0.6.0/debian/rules ripgrep-0.10.0.3/debian/rules --- ripgrep-0.6.0/debian/rules 2017-10-17 21:50:48.000000000 +0000 +++ ripgrep-0.10.0.3/debian/rules 2018-03-21 22:17:08.000000000 +0000 @@ -13,5 +13,3 @@ sed 's@$$PWD@$(CURDIR)@' debian/cargo.config > .cargo/config tar xzf debian/vendor.tar.gz cargo build --release --frozen --verbose - mkdir -p target/release/bash - cp target/release/build/ripgrep-*/out/rg.bash-completion target/release/bash/rg diff -Nru ripgrep-0.6.0/debian/source/format ripgrep-0.10.0.3/debian/source/format --- ripgrep-0.6.0/debian/source/format 2017-10-17 21:50:48.000000000 +0000 +++ ripgrep-0.10.0.3/debian/source/format 2018-03-18 16:22:26.000000000 +0000 @@ -1 +1 @@ -3.0 (quilt) +3.0 (native) diff -Nru ripgrep-0.6.0/debian/source/include-binaries ripgrep-0.10.0.3/debian/source/include-binaries --- ripgrep-0.6.0/debian/source/include-binaries 2017-10-17 21:50:48.000000000 +0000 +++ ripgrep-0.10.0.3/debian/source/include-binaries 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -debian/vendor.tar.gz diff -Nru ripgrep-0.6.0/debian/UNLICENSE ripgrep-0.10.0.3/debian/UNLICENSE --- ripgrep-0.6.0/debian/UNLICENSE 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/debian/UNLICENSE 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to Binary files /tmp/tmpWb_x3N/8wrkwDuYlx/ripgrep-0.6.0/debian/vendor.tar.gz and /tmp/tmpWb_x3N/i6IbzxLPq8/ripgrep-0.10.0.3/debian/vendor.tar.gz differ diff -Nru ripgrep-0.6.0/doc/convert-to-man ripgrep-0.10.0.3/doc/convert-to-man --- ripgrep-0.6.0/doc/convert-to-man 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/doc/convert-to-man 1970-01-01 00:00:00.000000000 +0000 @@ -1,5 +0,0 @@ -#!/bin/sh -e - -pandoc -s -t man rg.1.md -o rg.1 -sed -i.bak 's/\.TH.*/.TH "rg" "1"/g' rg.1 -rm -f rg.1.bak # BSD `sed` requires the creation of a back-up file diff -Nru ripgrep-0.6.0/doc/rg.1 ripgrep-0.10.0.3/doc/rg.1 --- ripgrep-0.6.0/doc/rg.1 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/doc/rg.1 1970-01-01 00:00:00.000000000 +0000 @@ -1,566 +0,0 @@ -.\" Automatically generated by Pandoc 1.19.2.1 -.\" -.TH "rg" "1" -.hy -.SH NAME -.PP -rg \- recursively search current directory for lines matching a pattern -.SH SYNOPSIS -.PP -rg [\f[I]options\f[]] \f[I]PATTERN\f[] [\f[I]path\f[] ...] -.PP -rg [\f[I]options\f[]] [\-e \f[I]PATTERN\f[] ...] [\-f \f[I]FILE\f[] ...] -[\f[I]path\f[] ...] -.PP -rg [\f[I]options\f[]] \-\-files [\f[I]path\f[] ...] -.PP -rg [\f[I]options\f[]] \-\-type\-list -.PP -rg [\f[I]options\f[]] \-\-help -.PP -rg [\f[I]options\f[]] \-\-version -.SH DESCRIPTION -.PP -ripgrep (rg) combines the usability of The Silver Searcher (an ack -clone) with the raw speed of grep. -.PP -ripgrep\[aq]s regex engine uses finite automata and guarantees linear -time searching. -Because of this, features like backreferences and arbitrary lookaround -are not supported. -.PP -Note that ripgrep may abort unexpectedly when using default settings if -it searches a file that is simultaneously truncated. -This behavior can be avoided by passing the \-\-no\-mmap flag. -.PP -Project home page: https://github.com/BurntSushi/ripgrep -.SH COMMON OPTIONS -.TP -.B \-a, \-\-text -Search binary files as if they were text. -.RS -.RE -.TP -.B \-c, \-\-count -Only show count of line matches for each file. -.RS -.RE -.TP -.B \-\-color \f[I]WHEN\f[] -Whether to use color in the output. -Valid values are never, auto, always or ansi. -The default is auto. -When always is used, coloring is attempted based on your environment. -When ansi is used, coloring is forcefully done using ANSI escape color -codes. -.RS -.RE -.TP -.B \-e, \-\-regexp \f[I]PATTERN\f[] ... -Use PATTERN to search. -This option can be provided multiple times, where all patterns given are -searched. -This is also useful when searching for patterns that start with a dash. -.RS -.RE -.TP -.B \-F, \-\-fixed\-strings -Treat the pattern as a literal string instead of a regular expression. -.RS -.RE -.TP -.B \-g, \-\-glob \f[I]GLOB\f[] ... -Include or exclude files for searching that match the given glob. -This always overrides any other ignore logic if there is a conflict, but -is otherwise applied in addition to ignore files (e.g., .gitignore or -\&.ignore). -Multiple glob flags may be used. -Globbing rules match .gitignore globs. -Precede a glob with a \[aq]!\[aq] to exclude it. -.RS -.PP -The \-\-glob flag subsumes the functionality of both the \-\-include and -\-\-exclude flags commonly found in other tools. -.PP -Values given to \-g must be quoted or your shell will expand them and -result in unexpected behavior. -.PP -Combine with the \-\-files flag to return matched filenames (i.e., to -replicate ack/ag\[aq]s \-g flag). -For example: -.IP -.nf -\f[C] -rg\ \-g\ \[aq]*.foo\[aq]\ \-\-files -\f[] -.fi -.RE -.TP -.B \-h, \-\-help -Show this usage message. -.RS -.RE -.TP -.B \-i, \-\-ignore\-case -Case insensitive search. -Overridden by \-\-case\-sensitive. -.RS -.RE -.TP -.B \-n, \-\-line\-number -Show line numbers (1\-based). -This is enabled by default at a tty. -.RS -.RE -.TP -.B \-N, \-\-no\-line\-number -Suppress line numbers. -.RS -.RE -.TP -.B \-q, \-\-quiet -Do not print anything to stdout. -If a match is found in a file, stop searching that file. -.RS -.RE -.TP -.B \-t, \-\-type \f[I]TYPE\f[] ... -Only search files matching TYPE. -Multiple type flags may be provided. -Use the \-\-type\-list flag to list all available types. -.RS -.RE -.TP -.B \-T, \-\-type\-not \f[I]TYPE\f[] ... -Do not search files matching TYPE. -Multiple not\-type flags may be provided. -.RS -.RE -.TP -.B \-u, \-\-unrestricted ... -Reduce the level of \[aq]smart\[aq] searching. -A single \-u doesn\[aq]t respect .gitignore (etc.) files. -Two \-u flags will search hidden files and directories. -Three \-u flags will search binary files. -\-uu is equivalent to \f[C]grep\ \-r\f[], and \-uuu is equivalent to -\f[C]grep\ \-a\ \-r\f[]. -.RS -.PP -Note that the \-u flags are convenient aliases for other combinations of -flags. -\-u aliases \-\-no\-ignore. -\-uu aliases \-\-no\-ignore \-\-hidden. -\-uuu aliases \-\-no\-ignore \-\-hidden \-\-text. -.RE -.TP -.B \-v, \-\-invert\-match -Invert matching. -.RS -.RE -.TP -.B \-w, \-\-word\-regexp -Only show matches surrounded by word boundaries. -This is equivalent to putting \\b before and after the search pattern. -.RS -.RE -.TP -.B \-x, \-\-line\-regexp -Only show matches surrounded by line boundaries. -This is equivalent to putting ^...$ around the search pattern. -.RS -.RE -.SH LESS COMMON OPTIONS -.TP -.B \-A, \-\-after\-context \f[I]NUM\f[] -Show NUM lines after each match. -.RS -.RE -.TP -.B \-B, \-\-before\-context \f[I]NUM\f[] -Show NUM lines before each match. -.RS -.RE -.TP -.B \-C, \-\-context \f[I]NUM\f[] -Show NUM lines before and after each match. -.RS -.RE -.TP -.B \-\-colors \f[I]SPEC\f[] ... -This flag specifies color settings for use in the output. -This flag may be provided multiple times. -Settings are applied iteratively. -Colors are limited to one of eight choices: red, blue, green, cyan, -magenta, yellow, white and black. -Styles are limited to nobold, bold, nointense or intense. -.RS -.PP -The format of the flag is {type}:{attribute}:{value}. -{type} should be one of path, line, column or match. -{attribute} can be fg, bg or style. -Value is either a color (for fg and bg) or a text style. -A special format, {type}:none, will clear all color settings for {type}. -.PP -For example, the following command will change the match color to -magenta and the background color for line numbers to yellow: -.IP -.nf -\f[C] -rg\ \-\-colors\ \[aq]match:fg:magenta\[aq]\ \-\-colors\ \[aq]line:bg:yellow\[aq]\ foo. -\f[] -.fi -.RE -.TP -.B \-\-column -Show column numbers (1 based) in output. -This only shows the column numbers for the first match on each line. -Note that this doesn\[aq]t try to account for Unicode. -One byte is equal to one column. -This implies \-\-line\-number. -.RS -.RE -.TP -.B \-\-context\-separator \f[I]SEPARATOR\f[] -The string to use when separating non\-continuous context lines. -Escape sequences may be used. -[default: \-\-] -.RS -.RE -.TP -.B \-\-debug -Show debug messages. -.RS -.RE -.TP -.B \-E, \-\-encoding \f[I]ENCODING\f[] -Specify the text encoding that ripgrep will use on all files searched. -The default value is \[aq]auto\[aq], which will cause ripgrep to do a -best effort automatic detection of encoding on a per\-file basis. -Other supported values can be found in the list of labels here: -https://encoding.spec.whatwg.org/#concept\-encoding\-get -.RS -.RE -.TP -.B \-f, \-\-file \f[I]FILE\f[] ... -Search for patterns from the given file, with one pattern per line. -When this flag is used or multiple times or in combination with the -\-e/\-\-regexp flag, then all patterns provided are searched. -Empty pattern lines will match all input lines, and the newline is not -counted as part of the pattern. -.RS -.RE -.TP -.B \-\-files -Print each file that would be searched (but don\[aq]t search). -.RS -.PP -Combine with the \-g flag to return matched paths, for example: -.IP -.nf -\f[C] -rg\ \-g\ \[aq]*.foo\[aq]\ \-\-files -\f[] -.fi -.RE -.TP -.B \-l, \-\-files\-with\-matches -Only show path of each file with matches. -.RS -.RE -.TP -.B \-\-files\-without\-match -Only show path of each file with no matches. -.RS -.RE -.TP -.B \-H, \-\-with\-filename -Prefix each match with the file name that contains it. -This is the default when more than one file is searched. -.RS -.RE -.TP -.B \-\-no\-filename -Never show the filename for a match. -This is the default when one file is searched. -.RS -.RE -.TP -.B \-\-heading -Show the file name above clusters of matches from each file instead of -showing the file name for every match. -This is the default mode at a tty. -.RS -.RE -.TP -.B \-\-no\-heading -Don\[aq]t group matches by each file. -If \-H/\-\-with\-filename is enabled, then file names will be shown for -every line matched. -This is the default mode when not at a tty. -.RS -.RE -.TP -.B \-\-hidden -Search hidden directories and files. -(Hidden directories and files are skipped by default.) -.RS -.RE -.TP -.B \-\-iglob \f[I]GLOB\f[] ... -Include or exclude files/directories case insensitively. -This always overrides any other ignore logic if there is a conflict, but -is otherwise applied in addition to ignore files (e.g., .gitignore or -\&.ignore). -Multiple glob flags may be used. -Globbing rules match .gitignore globs. -Precede a glob with a \[aq]!\[aq] to exclude it. -.RS -.RE -.TP -.B \-\-ignore\-file \f[I]FILE\f[] ... -Specify additional ignore files for filtering file paths. -Ignore files should be in the gitignore format and are matched relative -to the current working directory. -These ignore files have lower precedence than all other ignore files. -When specifying multiple ignore files, earlier files have lower -precedence than later files. -.RS -.RE -.TP -.B \-L, \-\-follow -Follow symlinks. -.RS -.RE -.TP -.B \-M, \-\-max\-columns \f[I]NUM\f[] -Don\[aq]t print lines longer than this limit in bytes. -Longer lines are omitted, and only the number of matches in that line is -printed. -.RS -.RE -.TP -.B \-m, \-\-max\-count \f[I]NUM\f[] -Limit the number of matching lines per file searched to NUM. -.RS -.RE -.TP -.B \-\-max\-filesize \f[I]NUM\f[]+\f[I]SUFFIX\f[]? -Ignore files larger than \f[I]NUM\f[] in size. -Directories will never be ignored. -.RS -.PP -\f[I]SUFFIX\f[] is optional and may be one of K, M or G. -These correspond to kilobytes, megabytes and gigabytes respectively. -If omitted the input is treated as bytes. -.RE -.TP -.B \-\-maxdepth \f[I]NUM\f[] -Descend at most NUM directories below the command line arguments. -A value of zero searches only the starting\-points themselves. -.RS -.RE -.TP -.B \-\-mmap -Search using memory maps when possible. -This is enabled by default when ripgrep thinks it will be faster. -(Note that mmap searching doesn\[aq]t currently support the various -context related options.) -.RS -.RE -.TP -.B \-\-no\-messages -Suppress all error messages. -.RS -.RE -.TP -.B \-\-no\-mmap -Never use memory maps, even when they might be faster. -.RS -.RE -.TP -.B \-\-no\-ignore -Don\[aq]t respect ignore files (.gitignore, .ignore, etc.) This implies -\-\-no\-ignore\-parent. -.RS -.RE -.TP -.B \-\-no\-ignore\-parent -Don\[aq]t respect ignore files in parent directories. -.RS -.RE -.TP -.B \-\-no\-ignore\-vcs -Don\[aq]t respect version control ignore files (e.g., .gitignore). -Note that .ignore files will continue to be respected. -.RS -.RE -.TP -.B \-0, \-\-null -Whenever a file name is printed, follow it with a NUL byte. -This includes printing filenames before matches, and when printing a -list of matching files such as with \-\-count, \-\-files\-with\-matches -and \-\-files. -.RS -.RE -.TP -.B \-o, \-\-only\-matching -Print only the matched (non\-empty) parts of a matching line, with each -such part on a separate output line. -.RS -.RE -.TP -.B \-\-path\-separator \f[I]SEPARATOR\f[] -The path separator to use when printing file paths. -This defaults to your platform\[aq]s path separator, which is / on Unix -and \\ on Windows. -This flag is intended for overriding the default when the environment -demands it (e.g., cygwin). -A path separator is limited to a single byte. -.RS -.RE -.TP -.B \-p, \-\-pretty -Alias for \-\-color=always \-\-heading \-\-line\-number. -.RS -.RE -.TP -.B \-r, \-\-replace \f[I]ARG\f[] -Replace every match with the string given when printing search results. -Neither this flag nor any other flag will modify your files. -.RS -.PP -Capture group indices (e.g., $5) and names (e.g., $foo) are supported in -the replacement string. -.PP -Note that the replacement by default replaces each match, and NOT the -entire line. -To replace the entire line, you should match the entire line. -For example, to emit only the first phone numbers in each line: -.IP -.nf -\f[C] -rg\ \[aq]^.*([0\-9]{3}\-[0\-9]{3}\-[0\-9]{4}).*$\[aq]\ \-\-replace\ \[aq]$1\[aq] -\f[] -.fi -.RE -.TP -.B \-s, \-\-case\-sensitive -Search case sensitively. -This overrides \-\-ignore\-case and \-\-smart\-case. -.RS -.RE -.TP -.B \-S, \-\-smart\-case -Search case insensitively if the pattern is all lowercase. -Search case sensitively otherwise. -This is overridden by either \-\-case\-sensitive or \-\-ignore\-case. -.RS -.RE -.TP -.B \-\-sort\-files -Sort results by file path. -Note that this currently disables all parallelism and runs search in a -single thread. -.RS -.RE -.TP -.B \-j, \-\-threads \f[I]ARG\f[] -The number of threads to use. -0 means use the number of logical CPUs (capped at 12). -[default: 0] -.RS -.RE -.TP -.B \-\-version -Show the version number of ripgrep and exit. -.RS -.RE -.TP -.B \-\-vimgrep -Show results with every match on its own line, including line numbers -and column numbers. -With this option, a line with more than one match will be printed more -than once. -.RS -.PP -Recommended .vimrc configuration: -.IP -.nf -\f[C] -\ \ set\ grepprg=rg\\\ \-\-vimgrep -\ \ set\ grepformat^=%f:%l:%c:%m -\f[] -.fi -.PP -Use :grep to grep for something, then :cn and :cp to navigate through -the matches. -.RE -.SH FILE TYPE MANAGEMENT OPTIONS -.TP -.B \-\-type\-list -Show all supported file types and their associated globs. -.RS -.RE -.TP -.B \-\-type\-add \f[I]ARG\f[] ... -Add a new glob for a particular file type. -Only one glob can be added at a time. -Multiple \-\-type\-add flags can be provided. -Unless \-\-type\-clear is used, globs are added to any existing globs -inside of ripgrep. -Note that this must be passed to every invocation of rg. -Type settings are NOT persisted. -Example: -.RS -.IP -.nf -\f[C] -\ \ rg\ \-\-type\-add\ \[aq]foo:*.foo\[aq]\ \-tfoo\ PATTERN -\f[] -.fi -.PP -\-\-type\-add can also be used to include rules from other types with -the special include directive. -The include directive permits specifying one or more other type names -(separated by a comma) that have been defined and its rules will -automatically be imported into the type specified. -For example, to create a type called src that matches C++, Python and -Markdown files, one can use: -.IP -.nf -\f[C] -\ \ \-\-type\-add\ \[aq]src:include:cpp,py,md\[aq] -\f[] -.fi -.PP -Additional glob rules can still be added to the src type by using the -\-\-type\-add flag again: -.IP -.nf -\f[C] -\ \ \-\-type\-add\ \[aq]src:include:cpp,py,md\[aq]\ \-\-type\-add\ \[aq]src:*.foo\[aq] -\f[] -.fi -.PP -Note that type names must consist only of Unicode letters or numbers. -Punctuation characters are not allowed. -.RE -.TP -.B \-\-type\-clear \f[I]TYPE\f[] ... -Clear the file type globs previously defined for TYPE. -This only clears the default type definitions that are found inside of -ripgrep. -Note that this must be passed to every invocation of rg. -.RS -.RE -.SH SHELL COMPLETION -.PP -Shell completion files are included in the release tarball for Bash, -Fish, Zsh and PowerShell. -.PP -For \f[B]bash\f[], move \f[C]rg.bash\-completion\f[] to -\f[C]$XDG_CONFIG_HOME/bash_completion\f[] or -\f[C]/etc/bash_completion.d/\f[]. -.PP -For \f[B]fish\f[], move \f[C]rg.fish\f[] to -\f[C]$HOME/.config/fish/completions\f[]. diff -Nru ripgrep-0.6.0/doc/rg.1.md ripgrep-0.10.0.3/doc/rg.1.md --- ripgrep-0.6.0/doc/rg.1.md 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/doc/rg.1.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,372 +0,0 @@ -# NAME - -rg - recursively search current directory for lines matching a pattern - -# SYNOPSIS - -rg [*options*] *PATTERN* [*path* ...] - -rg [*options*] [-e *PATTERN* ...] [-f *FILE* ...] [*path* ...] - -rg [*options*] --files [*path* ...] - -rg [*options*] --type-list - -rg [*options*] --help - -rg [*options*] --version - -# DESCRIPTION - -ripgrep (rg) combines the usability of The Silver Searcher (an ack clone) with -the raw speed of grep. - -ripgrep's regex engine uses finite automata and guarantees linear time -searching. Because of this, features like backreferences and arbitrary -lookaround are not supported. - -Note that ripgrep may abort unexpectedly when using default settings if it -searches a file that is simultaneously truncated. This behavior can be avoided -by passing the --no-mmap flag. - -Project home page: https://github.com/BurntSushi/ripgrep - -# COMMON OPTIONS - --a, --text -: Search binary files as if they were text. - --c, --count -: Only show count of line matches for each file. - ---color *WHEN* -: Whether to use color in the output. Valid values are never, auto, always or - ansi. The default is auto. When always is used, coloring is attempted based - on your environment. When ansi is used, coloring is forcefully done using - ANSI escape color codes. - --e, --regexp *PATTERN* ... -: Use PATTERN to search. This option can be provided multiple times, where all - patterns given are searched. This is also useful when searching for patterns - that start with a dash. - --F, --fixed-strings -: Treat the pattern as a literal string instead of a regular expression. - --g, --glob *GLOB* ... -: Include or exclude files for searching that match the given glob. This always - overrides any other ignore logic if there is a conflict, but is otherwise - applied in addition to ignore files (e.g., .gitignore or .ignore). Multiple - glob flags may be used. Globbing rules match .gitignore globs. Precede a - glob with a '!' to exclude it. - - The --glob flag subsumes the functionality of both the --include and - --exclude flags commonly found in other tools. - - Values given to -g must be quoted or your shell will expand them and result - in unexpected behavior. - - Combine with the --files flag to return matched filenames - (i.e., to replicate ack/ag's -g flag). For example: - - rg -g '*.foo' --files - --h, --help -: Show this usage message. - --i, --ignore-case -: Case insensitive search. Overridden by --case-sensitive. - --n, --line-number -: Show line numbers (1-based). This is enabled by default at a tty. - --N, --no-line-number -: Suppress line numbers. - --q, --quiet -: Do not print anything to stdout. If a match is found in a file, stop - searching that file. - --t, --type *TYPE* ... -: Only search files matching TYPE. Multiple type flags may be provided. Use the - --type-list flag to list all available types. - --T, --type-not *TYPE* ... -: Do not search files matching TYPE. Multiple not-type flags may be provided. - --u, --unrestricted ... -: Reduce the level of 'smart' searching. A single -u doesn't respect .gitignore - (etc.) files. Two -u flags will search hidden files and directories. Three - -u flags will search binary files. -uu is equivalent to `grep -r`, and -uuu - is equivalent to `grep -a -r`. - - Note that the -u flags are convenient aliases for other combinations of - flags. -u aliases --no-ignore. -uu aliases --no-ignore --hidden. - -uuu aliases --no-ignore --hidden --text. - --v, --invert-match -: Invert matching. - --w, --word-regexp -: Only show matches surrounded by word boundaries. This is equivalent to - putting \\b before and after the search pattern. - --x, --line-regexp -: Only show matches surrounded by line boundaries. This is equivalent to - putting ^...$ around the search pattern. - -# LESS COMMON OPTIONS - --A, --after-context *NUM* -: Show NUM lines after each match. - --B, --before-context *NUM* -: Show NUM lines before each match. - --C, --context *NUM* -: Show NUM lines before and after each match. - ---colors *SPEC* ... -: This flag specifies color settings for use in the output. This flag may be - provided multiple times. Settings are applied iteratively. Colors are limited - to one of eight choices: red, blue, green, cyan, magenta, yellow, white and - black. Styles are limited to nobold, bold, nointense or intense. - - The format of the flag is {type}:{attribute}:{value}. {type} should be one - of path, line, column or match. {attribute} can be fg, bg or style. Value - is either a color (for fg and bg) or a text style. A special format, - {type}:none, will clear all color settings for {type}. - - For example, the following command will change the match color to magenta - and the background color for line numbers to yellow: - - rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo. - ---column -: Show column numbers (1 based) in output. This only shows the column - numbers for the first match on each line. Note that this doesn't try - to account for Unicode. One byte is equal to one column. This implies - --line-number. - ---context-separator *SEPARATOR* -: The string to use when separating non-continuous context lines. Escape - sequences may be used. [default: --] - ---debug -: Show debug messages. - --E, --encoding *ENCODING* -: Specify the text encoding that ripgrep will use on all files - searched. The default value is 'auto', which will cause ripgrep to do - a best effort automatic detection of encoding on a per-file basis. - Other supported values can be found in the list of labels here: - https://encoding.spec.whatwg.org/#concept-encoding-get - --f, --file *FILE* ... -: Search for patterns from the given file, with one pattern per line. When this - flag is used or multiple times or in combination with the -e/--regexp flag, - then all patterns provided are searched. Empty pattern lines will match all - input lines, and the newline is not counted as part of the pattern. - ---files -: Print each file that would be searched (but don't search). - - Combine with the -g flag to return matched paths, for example: - - rg -g '*.foo' --files - --l, --files-with-matches -: Only show path of each file with matches. - ---files-without-match -: Only show path of each file with no matches. - --H, --with-filename -: Prefix each match with the file name that contains it. This is the - default when more than one file is searched. - ---no-filename -: Never show the filename for a match. This is the default when - one file is searched. - ---heading -: Show the file name above clusters of matches from each file instead of - showing the file name for every match. This is the default mode at a tty. - ---no-heading -: Don't group matches by each file. If -H/--with-filename is enabled, then - file names will be shown for every line matched. This is the default mode - when not at a tty. - ---hidden -: Search hidden directories and files. (Hidden directories and files are - skipped by default.) - ---iglob *GLOB* ... -: Include or exclude files/directories case insensitively. This always - overrides any other ignore logic if there is a conflict, but is otherwise - applied in addition to ignore files (e.g., .gitignore or .ignore). Multiple - glob flags may be used. Globbing rules match .gitignore globs. Precede a - glob with a '!' to exclude it. - ---ignore-file *FILE* ... -: Specify additional ignore files for filtering file paths. - Ignore files should be in the gitignore format and are matched - relative to the current working directory. These ignore files - have lower precedence than all other ignore files. When - specifying multiple ignore files, earlier files have lower - precedence than later files. - --L, --follow -: Follow symlinks. - --M, --max-columns *NUM* -: Don't print lines longer than this limit in bytes. Longer lines are omitted, - and only the number of matches in that line is printed. - --m, --max-count *NUM* -: Limit the number of matching lines per file searched to NUM. - ---max-filesize *NUM*+*SUFFIX*? -: Ignore files larger than *NUM* in size. Directories will never be ignored. - - *SUFFIX* is optional and may be one of K, M or G. These correspond to - kilobytes, megabytes and gigabytes respectively. If omitted the input is - treated as bytes. - ---maxdepth *NUM* -: Descend at most NUM directories below the command line arguments. - A value of zero searches only the starting-points themselves. - ---mmap -: Search using memory maps when possible. This is enabled by default - when ripgrep thinks it will be faster. (Note that mmap searching - doesn't currently support the various context related options.) - ---no-messages -: Suppress all error messages. - ---no-mmap -: Never use memory maps, even when they might be faster. - ---no-ignore -: Don't respect ignore files (.gitignore, .ignore, etc.) - This implies --no-ignore-parent. - ---no-ignore-parent -: Don't respect ignore files in parent directories. - ---no-ignore-vcs -: Don't respect version control ignore files (e.g., .gitignore). - Note that .ignore files will continue to be respected. - --0, --null -: Whenever a file name is printed, follow it with a NUL byte. - This includes printing filenames before matches, and when printing - a list of matching files such as with --count, --files-with-matches - and --files. - --o, --only-matching -: Print only the matched (non-empty) parts of a matching line, with each such - part on a separate output line. - ---path-separator *SEPARATOR* -: The path separator to use when printing file paths. This defaults to your - platform's path separator, which is / on Unix and \\ on Windows. This flag is - intended for overriding the default when the environment demands it (e.g., - cygwin). A path separator is limited to a single byte. - --p, --pretty -: Alias for --color=always --heading --line-number. - --r, --replace *ARG* -: Replace every match with the string given when printing search results. - Neither this flag nor any other flag will modify your files. - - Capture group indices (e.g., $5) and names (e.g., $foo) are supported - in the replacement string. - - Note that the replacement by default replaces each match, and NOT the - entire line. To replace the entire line, you should match the entire line. - For example, to emit only the first phone numbers in each line: - - rg '^.*([0-9]{3}-[0-9]{3}-[0-9]{4}).*$' --replace '$1' - --s, --case-sensitive -: Search case sensitively. This overrides --ignore-case and --smart-case. - --S, --smart-case -: Search case insensitively if the pattern is all lowercase. - Search case sensitively otherwise. This is overridden by either - --case-sensitive or --ignore-case. - ---sort-files -: Sort results by file path. Note that this currently - disables all parallelism and runs search in a single thread. - --j, --threads *ARG* -: The number of threads to use. 0 means use the number of logical CPUs - (capped at 12). [default: 0] - ---version -: Show the version number of ripgrep and exit. - ---vimgrep -: Show results with every match on its own line, including - line numbers and column numbers. With this option, a line with - more than one match will be printed more than once. - - Recommended .vimrc configuration: - - set grepprg=rg\ --vimgrep - set grepformat^=%f:%l:%c:%m - - Use :grep to grep for something, then :cn and :cp to navigate through the - matches. - -# FILE TYPE MANAGEMENT OPTIONS - ---type-list -: Show all supported file types and their associated globs. - ---type-add *ARG* ... -: Add a new glob for a particular file type. Only one glob can be added - at a time. Multiple --type-add flags can be provided. Unless --type-clear - is used, globs are added to any existing globs inside of ripgrep. Note that - this must be passed to every invocation of rg. Type settings are NOT - persisted. Example: - - rg --type-add 'foo:*.foo' -tfoo PATTERN - - --type-add can also be used to include rules from other types - with the special include directive. The include directive - permits specifying one or more other type names (separated by a - comma) that have been defined and its rules will automatically - be imported into the type specified. For example, to create a - type called src that matches C++, Python and Markdown files, one - can use: - - --type-add 'src:include:cpp,py,md' - - Additional glob rules can still be added to the src type by - using the --type-add flag again: - - --type-add 'src:include:cpp,py,md' --type-add 'src:*.foo' - - Note that type names must consist only of Unicode letters or - numbers. Punctuation characters are not allowed. - ---type-clear *TYPE* ... -: Clear the file type globs previously defined for TYPE. This only clears - the default type definitions that are found inside of ripgrep. Note - that this must be passed to every invocation of rg. - -# SHELL COMPLETION - -Shell completion files are included in the release tarball for Bash, Fish, Zsh -and PowerShell. - -For **bash**, move `rg.bash-completion` to `$XDG_CONFIG_HOME/bash_completion` -or `/etc/bash_completion.d/`. - -For **fish**, move `rg.fish` to `$HOME/.config/fish/completions`. diff -Nru ripgrep-0.6.0/doc/rg.1.txt.tpl ripgrep-0.10.0.3/doc/rg.1.txt.tpl --- ripgrep-0.6.0/doc/rg.1.txt.tpl 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/doc/rg.1.txt.tpl 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,188 @@ +rg(1) +===== + +Name +---- +rg - recursively search current directory for lines matching a pattern + + +Synopsis +-------- +*rg* [_OPTIONS_] _PATTERN_ [_PATH_...] + +*rg* [_OPTIONS_] *-e* _PATTERN_... [_PATH_...] + +*rg* [_OPTIONS_] *-f* _PATTERNFILE_... [_PATH_...] + +*rg* [_OPTIONS_] *--files* [_PATH_...] + +*rg* [_OPTIONS_] *--type-list* + +*command* | *rg* [_OPTIONS_] _PATTERN_ + +*rg* [_OPTIONS_] *--help* + +*rg* [_OPTIONS_] *--version* + + +DESCRIPTION +----------- +ripgrep (rg) recursively searches your current directory for a regex pattern. +By default, ripgrep will respect your .gitignore and automatically skip hidden +files/directories and binary files. + +ripgrep's default regex engine uses finite automata and guarantees linear +time searching. Because of this, features like backreferences and arbitrary +look-around are not supported. However, if ripgrep is built with PCRE2, then +the --pcre2 flag can be used to enable backreferences and look-around. + +ripgrep supports configuration files. Set RIPGREP_CONFIG_PATH to a +configuration file. The file can specify one shell argument per line. Lines +starting with '#' are ignored. For more details, see the man page or the +README. + + +REGEX SYNTAX +------------ +ripgrep uses Rust's regex engine by default, which documents its syntax: +https://docs.rs/regex/*/regex/#syntax + +ripgrep uses byte-oriented regexes, which has some additional documentation: +https://docs.rs/regex/*/regex/bytes/index.html#syntax + +To a first approximation, ripgrep uses Perl-like regexes without look-around or +backreferences. This makes them very similar to the "extended" (ERE) regular +expressions supported by `egrep`, but with a few additional features like +Unicode character classes. + +If you're using ripgrep with the --pcre2 flag, then please consult +https://www.pcre.org or the PCRE2 man pages for documentation on the supported +syntax. + + +POSITIONAL ARGUMENTS +-------------------- +_PATTERN_:: + A regular expression used for searching. To match a pattern beginning with a + dash, use the -e/--regexp option. + +_PATH_:: + A file or directory to search. Directories are searched recursively. Paths + specified expicitly on the command line override glob and ignore rules. + + +OPTIONS +------- +{OPTIONS} + + +EXIT STATUS +----------- +If ripgrep finds a match, then the exit status of the program is 0. If no match +could be found, then the exit status is non-zero. + + +CONFIGURATION FILES +------------------- +ripgrep supports reading configuration files that change ripgrep's default +behavior. The format of the configuration file is an "rc" style and is very +simple. It is defined by two rules: + + 1. Every line is a shell argument, after trimming ASCII whitespace. + 2. Lines starting with _#_ (optionally preceded by any amount of + ASCII whitespace) are ignored. + +ripgrep will look for a single configuration file if and only if the +_RIPGREP_CONFIG_PATH_ environment variable is set and is non-empty. +ripgrep will parse shell arguments from this file on startup and will +behave as if the arguments in this file were prepended to any explicit +arguments given to ripgrep on the command line. + +For example, if your ripgreprc file contained a single line: + + --smart-case + +then the following command + + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo + +would behave identically to the following command + + rg --smart-case foo + +another example is adding types + + --type-add + web:*.{html,css,js}* + +would behave identically to the following command + + rg --type-add 'web:*.{html,css,js}*' foo + +same with using globs + + --glob=!git/* + +or + + --glob + !git/* + +would behave identically to the following command + + rg --glob '!git/*' foo + +ripgrep also provides a flag, *--no-config*, that when present will suppress +any and all support for configuration. This includes any future support +for auto-loading configuration files from pre-determined paths. + +Conflicts between configuration files and explicit arguments are handled +exactly like conflicts in the same command line invocation. That is, +this command: + + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo --case-sensitive + +is exactly equivalent to + + rg --smart-case foo --case-sensitive + +in which case, the *--case-sensitive* flag would override the *--smart-case* +flag. + + +SHELL COMPLETION +---------------- +Shell completion files are included in the release tarball for Bash, Fish, Zsh +and PowerShell. + +For *bash*, move `rg.bash` to `$XDG_CONFIG_HOME/bash_completion` +or `/etc/bash_completion.d/`. + +For *fish*, move `rg.fish` to `$HOME/.config/fish/completions`. + +For *zsh*, move `_rg` to one of your `$fpath` directories. + + +CAVEATS +------- +ripgrep may abort unexpectedly when using default settings if it searches a +file that is simultaneously truncated. This behavior can be avoided by passing +the --no-mmap flag which will forcefully disable the use of memory maps in all +cases. + + +VERSION +------- +{VERSION} + + +HOMEPAGE +-------- +https://github.com/BurntSushi/ripgrep + +Please report bugs and feature requests in the issue tracker. + + +AUTHORS +------- +Andrew Gallant diff -Nru ripgrep-0.6.0/FAQ.md ripgrep-0.10.0.3/FAQ.md --- ripgrep-0.6.0/FAQ.md 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/FAQ.md 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,984 @@ +## FAQ + +* [Does ripgrep support configuration files?](#config) +* [What's changed in ripgrep recently?](#changelog) +* [When is the next release?](#release) +* [Does ripgrep have a man page?](#manpage) +* [Does ripgrep have support for shell auto-completion?](#complete) +* [How do I use lookaround and/or backreferences?](#fancy) +* [How do I configure ripgrep's colors?](#colors) +* [How do I enable true colors on Windows?](#truecolors-windows) +* [How do I stop ripgrep from messing up colors when I kill it?](#stop-ripgrep) +* [How can I get results in a consistent order?](#order) +* [How do I search files that aren't UTF-8?](#encoding) +* [How do I search compressed files?](#compressed) +* [How do I search over multiple lines?](#multiline) +* [How do I get around the regex size limit?](#size-limit) +* [How do I make the `-f/--file` flag faster?](#dfa-size) +* [How do I make the output look like The Silver Searcher's output?](#silver-searcher-output) +* [Why does ripgrep get slower when I enabled PCRE2 regexes?](#pcre2-slow) +* [When I run `rg`, why does it execute some other command?](#rg-other-cmd) +* [How do I create an alias for ripgrep on Windows?](#rg-alias-windows) +* [How do I create a PowerShell profile?](#powershell-profile) +* [How do I pipe non-ASCII content to ripgrep on Windows?](#pipe-non-ascii-windows) +* [How can I search and replace with ripgrep?](#search-and-replace) +* [How is ripgrep licensed?](#license) +* [Can ripgrep replace grep?](#posix4ever) +* [What does the "rip" in ripgrep mean?](#intentcountsforsomething) + + +

+Does ripgrep support configuration files? +

+ +Yes. See the +[guide's section on configuration files](GUIDE.md#configuration-file). + + +

+What's changed in ripgrep recently? +

+ +Please consult ripgrep's [CHANGELOG](CHANGELOG.md). + + +

+When is the next release? +

+ +ripgrep is a project whose contributors are volunteers. A release schedule +adds undue stress to said volunteers. Therefore, releases are made on a best +effort basis and no dates **will ever be given**. + +One exception to this is high impact bugs. If a ripgrep release contains a +significant regression, then there will generally be a strong push to get a +patch release out with a fix. + + +

+Does ripgrep have a man page? +

+ +Yes! Whenever ripgrep is compiled on a system with `asciidoc` present, then a +man page is generated from ripgrep's argv parser. After compiling ripgrep, you +can find the man page like so from the root of the repository: + +``` +$ find ./target -name rg.1 -print0 | xargs -0 ls -t | head -n1 +./target/debug/build/ripgrep-79899d0edd4129ca/out/rg.1 +``` + +Running `man -l ./target/debug/build/ripgrep-79899d0edd4129ca/out/rg.1` will +show the man page in your normal pager. + +Note that the man page's documentation for options is equivalent to the output +shown in `rg --help`. To see more condensed documentation (one line per flag), +run `rg -h`. + +The man page is also included in all +[ripgrep binary releases](https://github.com/BurntSushi/ripgrep/releases). + + +

+Does ripgrep have support for shell auto-completion? +

+ +Yes! Shell completions can be found in the +[same directory as the man page](#manpage) +after building ripgrep. Zsh completions are maintained separately and committed +to the repository in `complete/_rg`. + +Shell completions are also included in all +[ripgrep binary releases](https://github.com/BurntSushi/ripgrep/releases). + +For **bash**, move `rg.bash` to +`$XDG_CONFIG_HOME/bash_completion` or `/etc/bash_completion.d/`. + +For **fish**, move `rg.fish` to `$HOME/.config/fish/completions/`. + +For **zsh**, move `_rg` to one of your `$fpath` directories. + +For **PowerShell**, add `. _rg.ps1` to your PowerShell +[profile](https://technet.microsoft.com/en-us/library/bb613488(v=vs.85).aspx) +(note the leading period). If the `_rg.ps1` file is not on your `PATH`, do +`. /path/to/_rg.ps1` instead. + + +

+How can I get results in a consistent order? +

+ +By default, ripgrep uses parallelism to execute its search because this makes +the search much faster on most modern systems. This in turn means that ripgrep +has a non-deterministic aspect to it, since the interleaving of threads during +the execution of the program is itself non-deterministic. This has the effect +of printing results in a somewhat arbitrary order, and this order can change +from run to run of ripgrep. + +The only way to make the order of results consistent is to ask ripgrep to +sort the output. Currently, this will disable all parallelism. (On smaller +repositories, you might not notice much of a performance difference!) You +can achieve this with the `--sort-files` flag. + +There is more discussion on this topic here: +https://github.com/BurntSushi/ripgrep/issues/152 + + +

+How do I search files that aren't UTF-8? +

+ +See the [guide's section on file encoding](GUIDE.md#file-encoding). + + +

+How do I search compressed files? +

+ +ripgrep's `-z/--search-zip` flag will cause it to search compressed files +automatically. Currently, this supports gzip, bzip2, lzma, lz4 and xz only and +requires the corresponding `gzip`, `bzip2` and `xz` binaries to be installed on +your system. (That is, ripgrep does decompression by shelling out to another +process.) + +ripgrep currently does not search archive formats, so `*.tar.gz` files, for +example, are skipped. + + +

+How do I search over multiple lines? +

+ +This isn't currently possible. ripgrep is fundamentally a line-oriented search +tool. With that said, +[multiline search is a planned opt-in feature](https://github.com/BurntSushi/ripgrep/issues/176). + + +

+How do I use lookaround and/or backreferences? +

+ +ripgrep's default regex engine does not support lookaround or backreferences. +This is primarily because the default regex engine is implemented using finite +state machines in order to guarantee a linear worst case time complexity on all +inputs. Backreferences are not possible to implement in this paradigm, and +lookaround appears difficult to do efficiently. + +However, ripgrep optionally supports using PCRE2 as the regex engine instead of +the default one based on finite state machines. You can enable PCRE2 with the +`-P/--pcre2` flag. For example, in the root of the ripgrep repo, you can easily +find all palindromes: + +``` +$ rg -P '(\w{10})\1' +tests/misc.rs +483: cmd.arg("--max-filesize").arg("44444444444444444444"); +globset/src/glob.rs +1206: matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); +``` + +If your version of ripgrep doesn't support PCRE2, then you'll get an error +message when you try to use the `-P/--pcre2` flag: + +``` +$ rg -P '(\w{10})\1' +PCRE2 is not available in this build of ripgrep +``` + +Most of the releases distributed by the ripgrep project here on GitHub will +come bundled with PCRE2 enabled. If you installed ripgrep through a different +means (like your system's package manager), then please reach out to the +maintainer of that package to see whether it's possible to enable the PCRE2 +feature. + + +

+How do I configure ripgrep's colors? +

+ +ripgrep has two flags related to colors: + +* `--color` controls *when* to use colors. +* `--colors` controls *which* colors to use. + +The `--color` flag accepts one of the following possible values: `never`, +`auto`, `always` or `ansi`. The `auto` value is the default and will cause +ripgrep to only enable colors when it is printing to a terminal. But if you +pipe ripgrep to a file or some other process, then it will suppress colors. + +The --colors` flag is a bit more complicated. The general format is: + +``` +--colors '{type}:{attribute}:{value}' +``` + +* `{type}` should be one of `path`, `line`, `column` or `match`. Each of these + correspond to the four different types of things that ripgrep will add color + to in its output. Select the type whose color you want to change. +* `{attribute}` should be one of `fg`, `bg` or `style`, corresponding to + foreground color, background color, or miscellaneous styling (such as whether + to bold the output or not). +* `{value}` is determined by the value of `{attribute}`. If + `{attribute}` is `style`, then `{value}` should be one of `nobold`, + `bold`, `nointense`, `intense`, `nounderline` or `underline`. If + `{attribute}` is `fg` or `bg`, then `{value}` should be a color. + +A color is specified by either one of eight of English names, a single 256-bit +number or an RGB triple (with over 16 million possible values, or "true +color"). + +The color names are `red`, `blue`, `green`, `cyan`, `magenta`, `yellow`, +`white` or `black`. + +A single 256-bit number is a value in the range 0-255 (inclusive). It can +either be in decimal format (e.g., `62`) or hexadecimal format (e.g., `0x3E`). + +An RGB triple corresponds to three numbers (decimal or hexadecimal) separated +by commas. + +As a special case, `--colors '{type}:none'` will clear all colors and styles +associated with `{type}`, which lets you start with a clean slate (instead of +building on top of ripgrep's default color settings). + +Here's an example that makes highlights the matches with a nice blue background +with bolded white text: + +``` +$ rg somepattern \ + --colors 'match:none' \ + --colors 'match:bg:0x33,0x66,0xFF' \ + --colors 'match:fg:white' \ + --colors 'match:style:bold' +``` + +Colors are an ideal candidate to set in your +[configuration file](GUIDE.md#configuration-file). See the +[question on emulating The Silver Searcher's output style](#silver-searcher-output) +for an example specific to colors. + + +

+How do I enable true colors on Windows? +

+ +First, see the previous question's +[answer on configuring colors](#colors). + +Secondly, coloring on Windows is a bit complicated. If you're using a terminal +like Cygwin, then it's likely true color support already works out of the box. +However, if you are using a normal Windows console (`cmd` or `PowerShell`) and +a version of Windows prior to 10, then there is no known way to get true +color support. If you are on Windows 10 and using a Windows console, then +true colors should work out of the box with one caveat: you might need to +clear ripgrep's default color settings first. That is, instead of this: + +``` +$ rg somepattern --colors 'match:fg:0x33,0x66,0xFF' +``` + +you should do this + +``` +$ rg somepattern --colors 'match:none' --colors 'match:fg:0x33,0x66,0xFF' +``` + +This is because ripgrep might set the default style for `match` to `bold`, and +it seems like Windows 10's VT100 support doesn't permit bold and true color +ANSI escapes to be used simultaneously. The work-around above will clear +ripgrep's default styling, allowing you to craft it exactly as desired. + + +

+How do I stop ripgrep from messing up colors when I kill it? +

+ +Type in `color` in cmd.exe (Command Prompt) and `echo -ne "\033[0m"` on +Unix-like systems to restore your original foreground color. + +In PowerShell, you can add the following code to your profile which will +restore the original foreground color when `Reset-ForegroundColor` is called. +Including the `Set-Alias` line will allow you to call it with simply `color`. + +```powershell +$OrigFgColor = $Host.UI.RawUI.ForegroundColor +function Reset-ForegroundColor { + $Host.UI.RawUI.ForegroundColor = $OrigFgColor +} +Set-Alias -Name color -Value Reset-ForegroundColor +``` + +PR [#187](https://github.com/BurntSushi/ripgrep/pull/187) fixed this, and it +was later deprecated in +[#281](https://github.com/BurntSushi/ripgrep/issues/281). A full explanation is +available +[here](https://github.com/BurntSushi/ripgrep/issues/281#issuecomment-269093893). + + +

+How do I get around the regex size limit? +

+ +If you've given ripgrep a particularly large pattern (or a large number of +smaller patterns), then it is possible that it will fail to compile because it +hit a pre-set limit. For example: + +``` +$ rg '\pL{1000}' +Compiled regex exceeds size limit of 10485760 bytes. +``` + +(Note: `\pL{1000}` may look small, but `\pL` is the character class containing +all Unicode letters, which is quite large. *And* it's repeated 1000 times.) + +In this case, you can work around by simply increasing the limit: + +``` +$ rg '\pL{1000}' --regex-size-limit 1G +``` + +Increasing the limit to 1GB does not necessarily mean that ripgrep will use +that much memory. The limit just says that it's allowed to (approximately) use +that much memory for constructing the regular expression. + + +

+How do I make the -f/--file flag faster? +

+ +The `-f/--file` permits one to give a file to ripgrep which contains a pattern +on each line. ripgrep will then report any line that matches any of the +patterns. + +If this pattern file gets too big, then it is possible ripgrep will slow down +dramatically. *Typically* this is because an internal cache is too small, and +will cause ripgrep to spill over to a slower but more robust regular expression +engine. If this is indeed the problem, then it is possible to increase this +cache and regain speed. The cache can be controlled via the `--dfa-size-limit` +flag. For example, using `--dfa-size-limit 1G` will set the cache size to 1GB. +(Note that this doesn't mean ripgrep will use 1GB of memory automatically, but +it will allow the regex engine to if it needs to.) + + +

+How do I make the output look like The Silver Searcher's output? +

+ +Use the `--colors` flag, like so: + +``` +rg --colors line:fg:yellow \ + --colors line:style:bold \ + --colors path:fg:green \ + --colors path:style:bold \ + --colors match:fg:black \ + --colors match:bg:yellow \ + --colors match:style:nobold \ + foo +``` + +Alternatively, add your color configuration to your ripgrep config file (which +is activated by setting the `RIPGREP_CONFIG_PATH` environment variable to point +to your config file). For example: + +``` +$ cat $HOME/.config/ripgrep/rc +--colors=line:fg:yellow +--colors=line:style:bold +--colors=path:fg:green +--colors=path:style:bold +--colors=match:fg:black +--colors=match:bg:yellow +--colors=match:style:nobold +$ RIPGREP_CONFIG_PATH=$HOME/.config/ripgrep/rc rg foo +``` + + +

+Why does ripgrep get slower when I enable PCRE2 regexes? +

+ +When you use the `--pcre2` (`-P` for short) flag, ripgrep will use the PCRE2 +regex engine instead of the default. Both regex engines are quite fast, +but PCRE2 provides a number of additional features such as look-around and +backreferences that many enjoy using. This is largely because PCRE2 uses +a backtracking implementation where as the default regex engine uses a finite +automaton based implementation. The former provides the ability to add lots of +bells and whistles over the latter, but the latter executes with worst case +linear time complexity. + +With that out of the way, if you've used `-P` with ripgrep, you may have +noticed that it can be slower. The reasons for why this is are quite complex, +and they are complex because the optimizations that ripgrep uses to implement +fast search are complex. + +The task ripgrep has before it is somewhat simple; all it needs to do is search +a file for occurrences of some pattern and then print the lines containing +those occurrences. The problem lies in what is considered a valid match and how +exactly we read the bytes from a file. + +In terms of what is considered a valid match, remember that ripgrep will only +report matches spanning a single line by default. The problem here is that +some patterns can match across multiple lines, and ripgrep needs to prevent +that from happening. For example, `foo\sbar` will match `foo\nbar`. The most +obvious way to achieve this is to read the data from a file, and then apply +the pattern search to that data for each line. The problem with this approach +is that it can be quite slow; it would be much faster to let the pattern +search across as much data as possible. It's faster because it gets rid of the +overhead of finding the boundaries of every line, and also because it gets rid +of the overhead of starting and stopping the pattern search for every single +line. (This is operating under the general assumption that matching lines are +much rarer than non-matching lines.) + +It turns out that we can use the faster approach by applying a very simple +restriction to the pattern: *statically prevent* the pattern from matching +through a `\n` character. Namely, when given a pattern like `foo\sbar`, +ripgrep will remove `\n` from the `\s` character class automatically. In some +cases, a simple removal is not so easy. For example, ripgrep will return an +error when your pattern includes a `\n` literal: + +``` +$ rg '\n' +the literal '"\n"' is not allowed in a regex +``` + +So what does this have to do with PCRE2? Well, ripgrep's default regex engine +exposes APIs for doing syntactic analysis on the pattern in a way that makes +it quite easy to strip `\n` from the pattern (or otherwise detect it and report +an error if stripping isn't possible). PCRE2 seemingly does not provide a +similar API, so ripgrep does not do any stripping when PCRE2 is enabled. This +forces ripgrep to use the "slow" search strategy of searching each line +individually. + +OK, so if enabling PCRE2 slows down the default method of searching because it +forces matches to be limited to a single line, then why is PCRE2 also sometimes +slower when performing multiline searches? Well, that's because there are +*multiple* reasons why using PCRE2 in ripgrep can be slower than the default +regex engine. This time, blame PCRE2's Unicode support, which ripgrep enables +by default. In particular, PCRE2 cannot simultaneously enable Unicode support +and search arbitrary data. That is, when PCRE2's Unicode support is enabled, +the data **must** be valid UTF-8 (to do otherwise is to invoke undefined +behavior). This is in contrast to ripgrep's default regex engine, which can +enable Unicode support and still search arbitrary data. ripgrep's default +regex engine simply won't match invalid UTF-8 for a pattern that can otherwise +only match valid UTF-8. Why doesn't PCRE2 do the same? This author isn't +familiar with its internals, so we can't comment on it here. + +The bottom line here is that we can't enable PCRE2's Unicode support without +simultaneously incurring a performance penalty for ensuring that we are +searching valid UTF-8. In particular, ripgrep will transcode the contents +of each file to UTF-8 while replacing invalid UTF-8 data with the Unicode +replacement codepoint. ripgrep then disables PCRE2's own internal UTF-8 +checking, since we've guaranteed the data we hand it will be valid UTF-8. The +reason why ripgrep takes this approach is because if we do hand PCRE2 invalid +UTF-8, then it will report a match error if it comes across an invalid UTF-8 +sequence. This is not good news for ripgrep, since it will stop it from +searching the rest of the file, and will also print potentially undesirable +error messages to users. + +All right, the above is a lot of information to swallow if you aren't already +familiar with ripgrep internals. Let's make this concrete with some examples. +First, let's get some data big enough to magnify the performance differences: + +``` +$ curl -O 'https://burntsushi.net/stuff/subtitles2016-sample.gz' +$ gzip -d subtitles2016-sample +$ md5sum subtitles2016-sample +e3cb796a20bbc602fbfd6bb43bda45f5 subtitles2016-sample +``` + +To search this data, we will use the pattern `^\w{42}$`, which contains exactly +one hit in the file and has no literals. Having no literals is important, +because it ensures that the regex engine won't use literal optimizations to +speed up the search. In other words, it lets us reason coherently about the +actual task that the regex engine is performing. + +Let's now walk through a few examples in light of the information above. First, +let's consider the default search using ripgrep's default regex engine and +then the same search with PCRE2: + +``` +$ time rg '^\w{42}$' subtitles2016-sample +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m1.783s +user 0m1.731s +sys 0m0.051s + +$ time rg -P '^\w{42}$' subtitles2016-sample +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m2.458s +user 0m2.419s +sys 0m0.038s +``` + +In this particular example, both pattern searches are using a Unicode aware +`\w` character class and both are counting lines in order to report line +numbers. The key difference here is that the first search will not search +line by line, but the second one will. We can observe which strategy ripgrep +uses by passing the `--trace` flag: + +``` +$ rg '^\w{42}$' subtitles2016-sample --trace +[... snip ...] +TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:622: Some("subtitles2016-sample"): searching via memory map +TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:712: slice reader: searching via slice-by-line strategy +TRACE|grep_searcher::searcher::core|grep-searcher/src/searcher/core.rs:61: searcher core: will use fast line searcher +[... snip ...] + +$ rg -P '^\w{42}$' subtitles2016-sample --trace +[... snip ...] +TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:622: Some("subtitles2016-sample"): searching via memory map +TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:705: slice reader: needs transcoding, using generic reader +TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:685: generic reader: searching via roll buffer strategy +TRACE|grep_searcher::searcher::core|grep-searcher/src/searcher/core.rs:63: searcher core: will use slow line searcher +[... snip ...] +``` + +The first says it is using the "fast line searcher" where as the latter says +it is using the "slow line searcher." The latter also shows that we are +decoding the contents of the file, which also impacts performance. + +Interestingly, in this case, the pattern does not match a `\n` and the file +we're searching is valid UTF-8, so neither the slow line-by-line search +strategy nor the decoding are necessary. We could fix the former issue with +better PCRE2 introspection APIs. We can actually fix the latter issue with +ripgrep's `--no-encoding` flag, which prevents the automatic UTF-8 decoding, +but will enable PCRE2's own UTF-8 validity checking. Unfortunately, it's slower +in my build of ripgrep: + +``` +$ time rg -P '^\w{42}$' subtitles2016-sample --no-encoding +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m3.074s +user 0m3.021s +sys 0m0.051s +``` + +(Tip: use the `--trace` flag to verify that no decoding in ripgrep is +happening.) + +A possible reason why PCRE2's UTF-8 checking is slower is because it might +not be better than the highly optimized UTF-8 checking routines found in the +[`encoding_rs`](https://github.com/hsivonen/encoding_rs) library, which is what +ripgrep uses for UTF-8 decoding. Moreover, my build of ripgrep enables +`encoding_rs`'s SIMD optimizations, which may be in play here. + +Also, note that using the `--no-encoding` flag can cause PCRE2 to report +invalid UTF-8 errors, which causes ripgrep to stop searching the file: + +``` +$ cat invalid-utf8 +foobar + +$ xxd invalid-utf8 +00000000: 666f 6fff 6261 720a foo.bar. + +$ rg foo invalid-utf8 +1:foobar + +$ rg -P foo invalid-utf8 +1:foo�bar + +$ rg -P foo invalid-utf8 --no-encoding +invalid-utf8: PCRE2: error matching: UTF-8 error: illegal byte (0xfe or 0xff) +``` + +All right, so at this point, you might think that we could remove the penalty +for line-by-line searching by enabling multiline search. After all, our +particular pattern can't match across multiple lines anyway, so we'll still get +the results we want. Let's try it: + +``` +$ time rg -U '^\w{42}$' subtitles2016-sample +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m1.803s +user 0m1.748s +sys 0m0.054s + +$ time rg -P -U '^\w{42}$' subtitles2016-sample +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m2.962s +user 0m2.246s +sys 0m0.713s +``` + +Search times remain the same with the default regex engine, but the PCRE2 +search gets _slower_. What happened? The secrets can be revealed with the +`--trace` flag once again. In the former case, ripgrep actually detects that +the pattern can't match across multiple lines, and so will fall back to the +"fast line search" strategy as with our search without `-U`. + +However, for PCRE2, things are much worse. Namely, since Unicode mode is still +enabled, ripgrep is still going to decode UTF-8 to ensure that it hands only +valid UTF-8 to PCRE2. Unfortunately, one key downside of multiline search is +that ripgrep cannot do it incrementally. Since matches can be arbitrarily long, +ripgrep actually needs the entire file in memory at once. Normally, we can use +a memory map for this, but because we need to UTF-8 decode the file before +searching it, ripgrep winds up reading the entire contents of the file on to +the heap before executing a search. Owch. + +OK, so Unicode is killing us here. The file we're searching is _mostly_ ASCII, +so maybe we're OK with missing some data. (Try `rg '[\w--\p{ascii}]'` to see +non-ASCII word characters that an ASCII-only `\w` character class would miss.) +We can disable Unicode in both searches, but this is done differently depending +on the regex engine we use: + +``` +$ time rg '(?-u)^\w{42}$' subtitles2016-sample +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m1.714s +user 0m1.669s +sys 0m0.044s + +$ time rg -P '^\w{42}$' subtitles2016-sample --no-pcre2-unicode +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m1.997s +user 0m1.958s +sys 0m0.037s +``` + +For the most part, ripgrep's default regex engine performs about the same. +PCRE2 does improve a little bit, and is now almost as fast as the default +regex engine. If you look at the output of `--trace`, you'll see that ripgrep +will no longer perform UTF-8 decoding, but it does still use the slow +line-by-line searcher. + +At this point, we can combine all of our insights above: let's try to get off +of the slow line-by-line searcher by enabling multiline mode, and let's stop +UTF-8 decoding by disabling Unicode support: + +``` +$ time rg -U '(?-u)^\w{42}$' subtitles2016-sample +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m1.714s +user 0m1.655s +sys 0m0.058s + +$ time rg -P -U '^\w{42}$' subtitles2016-sample --no-pcre2-unicode +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m1.121s +user 0m1.071s +sys 0m0.048s +``` + +Ah, there's PCRE2's JIT shining! ripgrep's default regex engine once again +remains about the same, but PCRE2 no longer needs to search line-by-line and it +no longer needs to do any kind of UTF-8 checks. This allows the file to get +memory mapped and passed right through PCRE2's JIT at impressive speeds. (As +a brief and interesting historical note, the configuration of "memory map + +multiline + no-Unicode" is exactly the configuration used by The Silver +Searcher. This analysis perhaps sheds some reasoning as to why that +configuration is useful!) + +In summary, if you want PCRE2 to go as fast as possible and you don't care +about Unicode and you don't care about matches possibly spanning across +multiple lines, then enable multiline mode with `-U` and disable PCRE2's +Unicode support with the `--no-pcre2-unicode` flag. + +Caveat emptor: This author is not a PCRE2 expert, so there may be APIs that can +improve performance that the author missed. Similarly, there may be alternative +designs for a searching tool that are more amenable to how PCRE2 works. + + +

+When I run rg, why does it execute some other command? +

+ +It's likely that you have a shell alias or even another tool called `rg` which +is interfering with ripgrep. Run `which rg` to see what it is. + +(Notably, the Rails plug-in for +[Oh My Zsh](https://github.com/robbyrussell/oh-my-zsh/wiki/Plugins#rails) sets +up an `rg` alias for `rails generate`.) + +Problems like this can be resolved in one of several ways: + +* If you're using the OMZ Rails plug-in, disable it by editing the `plugins` + array in your zsh configuration. +* Temporarily bypass an existing `rg` alias by calling ripgrep as + `command rg`, `\rg`, or `'rg'`. +* Temporarily bypass an existing alias or another tool named `rg` by calling + ripgrep by its full path (e.g., `/usr/bin/rg` or `/usr/local/bin/rg`). +* Permanently disable an existing `rg` alias by adding `unalias rg` to the + bottom of your shell configuration file (e.g., `.bash_profile` or `.zshrc`). +* Give ripgrep its own alias that doesn't conflict with other tools/aliases by + adding a line like the following to the bottom of your shell configuration + file: `alias ripgrep='command rg'`. + + +

+How do I create an alias for ripgrep on Windows? +

+ +Often you can find a need to make alias for commands you use a lot that set +certain flags. But PowerShell function aliases do not behave like your typical +linux shell alias. You always need to propagate arguments and `stdin` input. +But it cannot be done simply as +`function grep() { $input | rg.exe --hidden $args }` + +Use below example as reference to how setup alias in PowerShell. + +```powershell +function grep { + $count = @($input).Count + $input.Reset() + + if ($count) { + $input | rg.exe --hidden $args + } + else { + rg.exe --hidden $args + } +} +``` + +PowerShell special variables: + +* input - is powershell `stdin` object that allows you to access its content. +* args - is array of arguments passed to this function. + +This alias checks whether there is `stdin` input and propagates only if there +is some lines. Otherwise empty `$input` will make powershell to trigger `rg` to +search empty `stdin`. + + +

+How do I create a PowerShell profile? +

+ +To customize powershell on start-up, there is a special PowerShell script that +has to be created. In order to find its location, type `$profile`. +See +[Microsoft's documentation](https://technet.microsoft.com/en-us/library/bb613488(v=vs.85).aspx) +for more details. + +Any PowerShell code in this file gets evaluated at the start of console. This +way you can have own aliases to be created at start. + + +

+How do I pipe non-ASCII content to ripgrep on Windows? +

+ +When piping input into native executables in PowerShell, the encoding of the +input is controlled by the `$OutputEncoding` variable. By default, this is set +to US-ASCII, and any characters in the pipeline that don't have encodings in +US-ASCII are converted to `?` (question mark) characters. + +To change this setting, set `$OutputEncoding` to a different encoding, as +represented by a .NET encoding object. Some common examples are below. The +value of this variable is reset when PowerShell restarts, so to make this +change take effect every time PowerShell is started add a line setting the +variable into your PowerShell profile. + +Example `$OutputEncoding` settings: + +* UTF-8 without BOM: `$OutputEncoding = [System.Text.UTF8Encoding]::new()` +* The console's output encoding: + `$OutputEncoding = [System.Console]::OutputEncoding` + +If you continue to have encoding problems, you can also force the encoding +that the console will use for printing to UTF-8 with +`[System.Console]::OutputEncoding = [System.Text.Encoding]::UTF8`. This +will also reset when PowerShell is restarted, so you can add that line +to your profile as well if you want to make the setting permanent. + +

+How can I search and replace with ripgrep? +

+ +Using ripgrep alone, you can't. ripgrep is a search tool that will never +touch your files. However, the output of ripgrep can be piped to other tools +that do modify files on disk. See +[this issue](https://github.com/BurntSushi/ripgrep/issues/74) for more +information. + +sed is one such tool that can modify files on disk. sed can take a filename +and a substitution command to search and replace in the specified file. +Files containing matching patterns can be provided to sed using + +``` +rg foo --files-with-matches +``` + +The output of this command is a list of filenames that contain a match for +the `foo` pattern. + +This list can be piped into `xargs`, which will split the filenames from +standard input into arguments for the command following xargs. You can use this +combination to pipe a list of filenames into sed for replacement. For example: + +``` +rg foo --files-with-matches | xargs sed -i 's/foo/bar/g' +``` + +will replace all instances of 'foo' with 'bar' in the files in which +ripgrep finds the foo pattern. The `-i` flag to sed indicates that you are +editing files in place, and `s/foo/bar/g` says that you are performing a +**s**ubstitution of the pattren `foo` for `bar`, and that you are doing this +substitution **g**lobally (all occurrences of the pattern in each file). + +Note: the above command assumes that you are using GNU sed. If you are using +BSD sed (the default on macOS and FreeBSD) then you must modify the above +command to be the following: + +``` +rg foo --files-with-matches | xargs sed -i '' 's/foo/bar/g' +``` + +The `-i` flag in BSD sed requires a file extension to be given to make backups +for all modified files. Specifying the empty string prevents file backups from +being made. + +Finally, if any of your file paths contain whitespace in them, then you might +need to delimit your file paths with a NUL terminator. This requires telling +ripgrep to output NUL bytes between each path, and telling xargs to read paths +delimited by NUL bytes: + +``` +rg foo --files-with-matches -0 | xargs -0 sed -i 's/foo/bar/g' +``` + +To learn more about sed, see the sed manual +[here](https://www.gnu.org/software/sed/manual/sed.html). + +Additionally, Facebook has a tool called +[fastmod](https://github.com/facebookincubator/fastmod) +that uses some of the same libraries as ripgrep and might provide a more +ergonomic search-and-replace experience. + + +

+How is ripgrep licensed? +

+ +ripgrep is dual licensed under the +[Unlicense](https://unlicense.org/) +and MIT licenses. Specifically, you may use ripgrep under the terms of either +license. + +The reason why ripgrep is dual licensed this way is two-fold: + +1. I, as ripgrep's author, would like to participate in a small bit of + ideological activism by promoting the Unlicense's goal: to disclaim + copyright monopoly interest. +2. I, as ripgrep's author, would like as many people to use rigprep as + possible. Since the Unlicense is not a proven or well known license, ripgrep + is also offered under the MIT license, which is ubiquitous and accepted by + almost everyone. + +More specifically, ripgrep and all its dependencies are compatible with this +licensing choice. In particular, ripgrep's dependencies (direct and transitive) +will always be limited to permissive licenses. That is, ripgrep will never +depend on code that is not permissively licensed. This means rejecting any +dependency that uses a copyleft license such as the GPL, LGPL, MPL or any of +the Creative Commons ShareAlike licenses. Whether the license is "weak" +copyleft or not does not matter; ripgrep will **not** depend on it. + + +

+Can ripgrep replace grep? +

+ +Yes and no. + +If, upon hearing that "ripgrep can replace grep," you *actually* hear, "ripgrep +can be used in every instance grep can be used, in exactly the same way, for +the same use cases, with exactly the same bug-for-bug behavior," then no, +ripgrep trivially *cannot* replace grep. Moreover, ripgrep will *never* replace +grep. + +If, upon hearing that "ripgrep can replace grep," you *actually* hear, "ripgrep +can replace grep in some cases and not in other use cases," then yes, that is +indeed true! + +Let's go over some of those use cases in favor of ripgrep. Some of these may +not apply to you. That's OK. There may be other use cases not listed here that +do apply to you. That's OK too. + +(For all claims related to performance in the following words, see my +[blog post](https://blog.burntsushi.net/ripgrep/) +introducing ripgrep.) + +* Are you frequently searching a repository of code? If so, ripgrep might be a + good choice since there's likely a good chunk of your repository that you + don't want to search. grep, can, of course, be made to filter files using + recursive search, and if you don't mind writing out the requisite `--exclude` + rules or writing wrapper scripts, then grep might be sufficient. (I'm not + kidding, I myself did this with grep for almost a decade before writing + ripgrep.) But if you instead enjoy having a search tool respect your + `.gitignore`, then ripgrep might be perfect for you! +* Are you frequently searching non-ASCII text that is UTF-8 encoded? One of + ripgrep's key features is that it can handle Unicode features in your + patterns in a way that tends to be faster than GNU grep. Unicode features + in ripgrep are enabled by default; there is no need to configure your locale + settings to use ripgrep properly because ripgrep doesn't respect your locale + settings. +* Do you need to search UTF-16 files and you don't want to bother explicitly + transcoding them? Great. ripgrep does this for you automatically. No need + to enable it. +* Do you need to search a large directory of large files? ripgrep uses + parallelism by default, which tends to make it faster than a standard + `grep -r` search. However, if you're OK writing the occasional + `find ./ -print0 | xargs -P8 -0 grep` command, then maybe grep is good + enough. + +Here are some cases where you might *not* want to use ripgrep. The same caveats +for the previous section apply. + +* Are you writing portable shell scripts intended to work in a variety of + environments? Great, probably not a good idea to use ripgrep! ripgrep is has + nowhere near the ubquity of grep, so if you do use ripgrep, you might need + to futz with the installation process more than you would with grep. +* Do you care about POSIX compatibility? If so, then you can't use ripgrep + because it never was, isn't and never will be POSIX compatible. +* Do you hate tools that try to do something smart? If so, ripgrep is all about + being smart, so you might prefer to just stick with grep. +* Is there a particular feature of grep you rely on that ripgrep either doesn't + have or never will have? If the former, file a bug report, maybe ripgrep can + do it! If the latter, well, then, just use grep. + + +

+What does the "rip" in ripgrep mean? +

+ +When I first started writing ripgrep, I called it `rep`, intending it to be a +shorter variant of `grep`. Soon after, I renamed it to `xrep` since `rep` +wasn't obvious enough of a name for my taste. And also because adding `x` to +anything always makes it better, right? + +Before ripgrep's first public release, I decided that I didn't like `xrep`. I +thought it was slightly awkward to type, and despite my previous praise of the +letter `x`, I kind of thought it was pretty lame. Being someone who really +likes Rust, I wanted to call it "rustgrep" or maybe "rgrep" for short. But I +thought that was just as lame, and maybe a little too in-your-face. But I +wanted to continue using `r` so I could at least pretend Rust had something to +do with it. + +I spent a couple of days trying to think of very short words that began with +the letter `r` that were even somewhat related to the task of searching. I +don't remember how it popped into my head, but "rip" came up as something that +meant "fast," as in, "to rip through your text." The fact that RIP is also +an initialism for "Rest in Peace" (as in, "ripgrep kills grep") never really +dawned on me. Perhaps the coincidence is too striking to believe that, but +I didn't realize it until someone explicitly pointed it out to me after the +initial public release. I admit that I found it mildly amusing, but if I had +realized it myself before the public release, I probably would have pressed on +and chose a different name. Alas, renaming things after a release is hard, so I +decided to mush on. + +Given the fact that +[ripgrep never was, is or will be a 100% drop-in replacement for +grep](#posix4ever), +ripgrep is neither actually a "grep killer" nor was it ever intended to be. It +certainly does eat into some of its use cases, but that's nothing that other +tools like ack or The Silver Searcher weren't already doing. diff -Nru ripgrep-0.6.0/.gitignore ripgrep-0.10.0.3/.gitignore --- ripgrep-0.6.0/.gitignore 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/.gitignore 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ -.*.swp -tags -target -/grep/Cargo.lock -/globset/Cargo.lock -/ignore/Cargo.lock -/termcolor/Cargo.lock -/wincolor/Cargo.lock - -# Snapcraft files -stage -prime -parts -*.snap -*.pyc -ripgrep*_source.tar.bz2 \ No newline at end of file diff -Nru ripgrep-0.6.0/globset/Cargo.toml ripgrep-0.10.0.3/globset/Cargo.toml --- ripgrep-0.6.0/globset/Cargo.toml 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/globset/Cargo.toml 2018-09-10 21:10:55.000000000 +0000 @@ -1,6 +1,6 @@ [package] name = "globset" -version = "0.2.0" #:version +version = "0.4.2" #:version authors = ["Andrew Gallant "] description = """ Cross platform single glob and glob set matching. Glob set matching is the @@ -19,14 +19,14 @@ bench = false [dependencies] -aho-corasick = "0.6.0" -fnv = "1.0" -log = "0.3" -memchr = "1" -regex = "0.2.1" +aho-corasick = "0.6.8" +fnv = "1.0.6" +log = "0.4.5" +memchr = "2.0.2" +regex = "1.0.5" [dev-dependencies] -glob = "0.2" +glob = "0.2.11" [features] -simd-accel = ["regex/simd-accel"] +simd-accel = [] diff -Nru ripgrep-0.6.0/globset/README.md ripgrep-0.10.0.3/globset/README.md --- ripgrep-0.6.0/globset/README.md 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/globset/README.md 2018-09-10 21:10:55.000000000 +0000 @@ -4,7 +4,7 @@ process of matching one or more glob patterns against a single candidate path simultaneously, and returning all of the globs that matched. -[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep) +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) [![](https://img.shields.io/crates/v/globset.svg)](https://crates.io/crates/globset) @@ -20,7 +20,7 @@ ```toml [dependencies] -globset = "0.1" +globset = "0.3" ``` and this to your crate root: @@ -36,7 +36,7 @@ ```rust use globset::Glob; -let glob = try!(Glob::new("*.rs")).compile_matcher(); +let glob = Glob::new("*.rs")?.compile_matcher(); assert!(glob.is_match("foo.rs")); assert!(glob.is_match("foo/bar.rs")); @@ -51,8 +51,8 @@ ```rust use globset::GlobBuilder; -let glob = try!(GlobBuilder::new("*.rs") - .literal_separator(true).build()).compile_matcher(); +let glob = GlobBuilder::new("*.rs") + .literal_separator(true).build()?.compile_matcher(); assert!(glob.is_match("foo.rs")); assert!(!glob.is_match("foo/bar.rs")); // no longer matches @@ -69,10 +69,10 @@ let mut builder = GlobSetBuilder::new(); // A GlobBuilder can be used to configure each glob's match semantics // independently. -builder.add(try!(Glob::new("*.rs"))); -builder.add(try!(Glob::new("src/lib.rs"))); -builder.add(try!(Glob::new("src/**/foo.rs"))); -let set = try!(builder.build()); +builder.add(Glob::new("*.rs")?); +builder.add(Glob::new("src/lib.rs")?); +builder.add(Glob::new("src/**/foo.rs")?); +let set = builder.build()?; assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]); ``` diff -Nru ripgrep-0.6.0/globset/src/glob.rs ripgrep-0.10.0.3/globset/src/glob.rs --- ripgrep-0.6.0/globset/src/glob.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/globset/src/glob.rs 2018-09-10 21:10:55.000000000 +0000 @@ -1,4 +1,3 @@ -use std::ffi::{OsStr, OsString}; use std::fmt; use std::hash; use std::iter; @@ -28,7 +27,7 @@ BasenameLiteral(String), /// A pattern matches if and only if the file path's extension matches this /// literal string. - Extension(OsString), + Extension(String), /// A pattern matches if and only if this prefix literal is a prefix of the /// candidate file path. Prefix(String), @@ -47,7 +46,7 @@ /// extension. Note that this is a necessary but NOT sufficient criterion. /// Namely, if the extension matches, then a full regex search is still /// required. - RequiredExtension(OsString), + RequiredExtension(String), /// A regex needs to be used for matching. Regex, } @@ -154,7 +153,7 @@ lit.as_bytes() == &*candidate.basename } MatchStrategy::Extension(ref ext) => { - candidate.ext == ext + ext.as_bytes() == &*candidate.ext } MatchStrategy::Prefix(ref pre) => { starts_with(pre.as_bytes(), byte_path) @@ -166,7 +165,8 @@ ends_with(suffix.as_bytes(), byte_path) } MatchStrategy::RequiredExtension(ref ext) => { - candidate.ext == ext && self.re.is_match(byte_path) + let ext = ext.as_bytes(); + &*candidate.ext == ext && self.re.is_match(byte_path) } MatchStrategy::Regex => self.re.is_match(byte_path), } @@ -187,13 +187,26 @@ opts: GlobOptions, } -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] struct GlobOptions { /// Whether to match case insensitively. case_insensitive: bool, /// Whether to require a literal separator to match a separator in a file /// path. e.g., when enabled, `*` won't match `/`. literal_separator: bool, + /// Whether or not to use `\` to escape special characters. + /// e.g., when enabled, `\*` will match a literal `*`. + backslash_escape: bool, +} + +impl GlobOptions { + fn default() -> GlobOptions { + GlobOptions { + case_insensitive: false, + literal_separator: false, + backslash_escape: !is_separator('\\'), + } + } } #[derive(Clone, Debug, Default, Eq, PartialEq)] @@ -262,6 +275,19 @@ } /// Returns the regular expression string for this glob. + /// + /// Note that regular expressions for globs are intended to be matched on + /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In + /// particular, globs are frequently used on file paths, where there is no + /// general guarantee that file paths are themselves valid UTF-8. As a + /// result, callers will need to ensure that they are using a regex API + /// that can match on arbitrary bytes. For example, the + /// [`regex`](https://crates.io/regex) + /// crate's + /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html) + /// API is not suitable for this since it matches on `&str`, but its + /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html) + /// API is suitable for this. pub fn regex(&self) -> &str { &self.re } @@ -295,7 +321,7 @@ /// std::path::Path::extension returns. Namely, this extension includes /// the '.'. Also, paths like `.rs` are considered to have an extension /// of `.rs`. - fn ext(&self) -> Option { + fn ext(&self) -> Option { if self.opts.case_insensitive { return None; } @@ -319,11 +345,11 @@ Some(&Token::Literal('.')) => {} _ => return None, } - let mut lit = OsStr::new(".").to_os_string(); + let mut lit = ".".to_string(); for t in self.tokens[start + 2..].iter() { match *t { Token::Literal('.') | Token::Literal('/') => return None, - Token::Literal(c) => lit.push(c.to_string()), + Token::Literal(c) => lit.push(c), _ => return None, } } @@ -337,7 +363,7 @@ /// This is like `ext`, but returns an extension even if it isn't sufficent /// to imply a match. Namely, if an extension is returned, then it is /// necessary but not sufficient for a match. - fn required_ext(&self) -> Option { + fn required_ext(&self) -> Option { if self.opts.case_insensitive { return None; } @@ -360,7 +386,7 @@ None } else { ext.reverse(); - Some(OsString::from(ext.into_iter().collect::())) + Some(ext.into_iter().collect()) } } @@ -509,7 +535,7 @@ Some(&self.tokens[start..]) } - /// Returns the pattern as a literal if and only if the pattern exclusiely + /// Returns the pattern as a literal if and only if the pattern exclusively /// matches the basename of a file path *and* is a literal. /// /// The basic format of these patterns is `**/{literal}`, where `{literal}` @@ -549,8 +575,9 @@ chars: self.glob.chars().peekable(), prev: None, cur: None, + opts: &self.opts, }; - try!(p.parse()); + p.parse()?; if p.stack.is_empty() { Err(Error { glob: Some(self.glob.to_string()), @@ -585,6 +612,19 @@ self.opts.literal_separator = yes; self } + + /// When enabled, a back slash (`\`) may be used to escape + /// special characters in a glob pattern. Additionally, this will + /// prevent `\` from being interpreted as a path separator on all + /// platforms. + /// + /// This is enabled by default on platforms where `\` is not a + /// path separator and disabled by default on platforms where `\` + /// is a path separator. + pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> { + self.opts.backslash_escape = yes; + self + } } impl Tokens { @@ -710,6 +750,7 @@ chars: iter::Peekable>, prev: Option, cur: Option, + opts: &'a GlobOptions, } impl<'a> Parser<'a> { @@ -720,20 +761,14 @@ fn parse(&mut self) -> Result<(), Error> { while let Some(c) = self.bump() { match c { - '?' => try!(self.push_token(Token::Any)), - '*' => try!(self.parse_star()), - '[' => try!(self.parse_class()), - '{' => try!(self.push_alternate()), - '}' => try!(self.pop_alternate()), - ',' => try!(self.parse_comma()), - c => { - if is_separator(c) { - // Normalize all patterns to use / as a separator. - try!(self.push_token(Token::Literal('/'))) - } else { - try!(self.push_token(Token::Literal(c))) - } - } + '?' => self.push_token(Token::Any)?, + '*' => self.parse_star()?, + '[' => self.parse_class()?, + '{' => self.push_alternate()?, + '}' => self.pop_alternate()?, + ',' => self.parse_comma()?, + '\\' => self.parse_backslash()?, + c => self.push_token(Token::Literal(c))?, } } Ok(()) @@ -786,22 +821,36 @@ } } + fn parse_backslash(&mut self) -> Result<(), Error> { + if self.opts.backslash_escape { + match self.bump() { + None => Err(self.error(ErrorKind::DanglingEscape)), + Some(c) => self.push_token(Token::Literal(c)), + } + } else if is_separator('\\') { + // Normalize all patterns to use / as a separator. + self.push_token(Token::Literal('/')) + } else { + self.push_token(Token::Literal('\\')) + } + } + fn parse_star(&mut self) -> Result<(), Error> { let prev = self.prev; if self.chars.peek() != Some(&'*') { - try!(self.push_token(Token::ZeroOrMore)); + self.push_token(Token::ZeroOrMore)?; return Ok(()); } assert!(self.bump() == Some('*')); - if !try!(self.have_tokens()) { - try!(self.push_token(Token::RecursivePrefix)); + if !self.have_tokens()? { + self.push_token(Token::RecursivePrefix)?; let next = self.bump(); if !next.map(is_separator).unwrap_or(true) { return Err(self.error(ErrorKind::InvalidRecursive)); } return Ok(()); } - try!(self.pop_token()); + self.pop_token()?; if !prev.map(is_separator).unwrap_or(false) { if self.stack.len() <= 1 || (prev != Some(',') && prev != Some('{')) { @@ -840,12 +889,15 @@ Ok(()) } } - let mut negated = false; let mut ranges = vec![]; - if self.chars.peek() == Some(&'!') { - assert!(self.bump() == Some('!')); - negated = true; - } + let negated = match self.chars.peek() { + Some(&'!') | Some(&'^') => { + let bump = self.bump(); + assert!(bump == Some('!') || bump == Some('^')); + true + } + _ => false, + }; let mut first = true; let mut in_range = false; loop { @@ -870,7 +922,7 @@ // invariant: in_range is only set when there is // already at least one character seen. let r = ranges.last_mut().unwrap(); - try!(add_to_last_range(&self.glob, r, '-')); + add_to_last_range(&self.glob, r, '-')?; in_range = false; } else { assert!(!ranges.is_empty()); @@ -881,8 +933,8 @@ if in_range { // invariant: in_range is only set when there is // already at least one character seen. - try!(add_to_last_range( - &self.glob, ranges.last_mut().unwrap(), c)); + add_to_last_range( + &self.glob, ranges.last_mut().unwrap(), c)?; } else { ranges.push((c, c)); } @@ -924,16 +976,15 @@ #[cfg(test)] mod tests { - use std::ffi::{OsStr, OsString}; - use {GlobSetBuilder, ErrorKind}; use super::{Glob, GlobBuilder, Token}; use super::Token::*; #[derive(Clone, Copy, Debug, Default)] struct Options { - casei: bool, - litsep: bool, + casei: Option, + litsep: Option, + bsesc: Option, } macro_rules! syntax { @@ -963,11 +1014,17 @@ ($name:ident, $pat:expr, $re:expr, $options:expr) => { #[test] fn $name() { - let pat = GlobBuilder::new($pat) - .case_insensitive($options.casei) - .literal_separator($options.litsep) - .build() - .unwrap(); + let mut builder = GlobBuilder::new($pat); + if let Some(casei) = $options.casei { + builder.case_insensitive(casei); + } + if let Some(litsep) = $options.litsep { + builder.literal_separator(litsep); + } + if let Some(bsesc) = $options.bsesc { + builder.backslash_escape(bsesc); + } + let pat = builder.build().unwrap(); assert_eq!(format!("(?-u){}", $re), pat.regex()); } }; @@ -980,11 +1037,17 @@ ($name:ident, $pat:expr, $path:expr, $options:expr) => { #[test] fn $name() { - let pat = GlobBuilder::new($pat) - .case_insensitive($options.casei) - .literal_separator($options.litsep) - .build() - .unwrap(); + let mut builder = GlobBuilder::new($pat); + if let Some(casei) = $options.casei { + builder.case_insensitive(casei); + } + if let Some(litsep) = $options.litsep { + builder.literal_separator(litsep); + } + if let Some(bsesc) = $options.bsesc { + builder.backslash_escape(bsesc); + } + let pat = builder.build().unwrap(); let matcher = pat.compile_matcher(); let strategic = pat.compile_strategic_matcher(); let set = GlobSetBuilder::new().add(pat).build().unwrap(); @@ -1002,11 +1065,17 @@ ($name:ident, $pat:expr, $path:expr, $options:expr) => { #[test] fn $name() { - let pat = GlobBuilder::new($pat) - .case_insensitive($options.casei) - .literal_separator($options.litsep) - .build() - .unwrap(); + let mut builder = GlobBuilder::new($pat); + if let Some(casei) = $options.casei { + builder.case_insensitive(casei); + } + if let Some(litsep) = $options.litsep { + builder.literal_separator(litsep); + } + if let Some(bsesc) = $options.bsesc { + builder.backslash_escape(bsesc); + } + let pat = builder.build().unwrap(); let matcher = pat.compile_matcher(); let strategic = pat.compile_strategic_matcher(); let set = GlobSetBuilder::new().add(pat).build().unwrap(); @@ -1018,7 +1087,6 @@ } fn s(string: &str) -> String { string.to_string() } - fn os(string: &str) -> OsString { OsStr::new(string).to_os_string() } fn class(s: char, e: char) -> Token { Class { negated: false, ranges: vec![(s, e)] } @@ -1073,6 +1141,8 @@ syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]); syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]); syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]); + syntax!(cls20, "[^a]", vec![classn('a', 'a')]); + syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]); syntaxerr!(err_rseq1, "a**", ErrorKind::InvalidRecursive); syntaxerr!(err_rseq2, "**a", ErrorKind::InvalidRecursive); @@ -1089,12 +1159,24 @@ syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-')); const CASEI: Options = Options { - casei: true, - litsep: false, + casei: Some(true), + litsep: None, + bsesc: None, }; const SLASHLIT: Options = Options { - casei: false, - litsep: true, + casei: None, + litsep: Some(true), + bsesc: None, + }; + const NOBSESC: Options = Options { + casei: None, + litsep: None, + bsesc: Some(false), + }; + const BSESC: Options = Options { + casei: None, + litsep: None, + bsesc: Some(true), }; toregex!(re_casei, "a", "(?i)^a$", &CASEI); @@ -1150,6 +1232,7 @@ matches!(matchrec22, ".*/**", ".abc/abc"); matches!(matchrec23, "foo/**", "foo"); matches!(matchrec24, "**/foo/bar", "foo/bar"); + matches!(matchrec25, "some/*/needle.txt", "some/one/needle.txt"); matches!(matchrange1, "a[0-9]b", "a0b"); matches!(matchrange2, "a[0-9]b", "a9b"); @@ -1162,6 +1245,7 @@ matches!(matchrange9, "[-a-c]", "b"); matches!(matchrange10, "[a-c-]", "b"); matches!(matchrange11, "[-]", "-"); + matches!(matchrange12, "a[^0-9]b", "a_b"); matches!(matchpat1, "*hello.txt", "hello.txt"); matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt"); @@ -1205,6 +1289,17 @@ #[cfg(not(unix))] matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT); + matches!(matchbackslash1, "\\[", "[", BSESC); + matches!(matchbackslash2, "\\?", "?", BSESC); + matches!(matchbackslash3, "\\*", "*", BSESC); + matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC); + matches!(matchbackslash5, "\\?", "\\a", NOBSESC); + matches!(matchbackslash6, "\\*", "\\\\", NOBSESC); + #[cfg(unix)] + matches!(matchbackslash7, "\\a", "a"); + #[cfg(not(unix))] + matches!(matchbackslash8, "\\a", "/a"); + nmatches!(matchnot1, "a*b*c", "abcd"); nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca"); nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt"); @@ -1234,18 +1329,35 @@ nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT); nmatches!(matchnot26, "**/m4/ltoptions.m4", "csharp/src/packages/repositories.config", SLASHLIT); + nmatches!(matchnot27, "a[^0-9]b", "a0b"); + nmatches!(matchnot28, "a[^0-9]b", "a9b"); + nmatches!(matchnot29, "[^-]", "-"); + nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt"); + nmatches!( + matchrec31, + "some/*/needle.txt", "some/one/two/needle.txt", SLASHLIT); + nmatches!( + matchrec32, + "some/*/needle.txt", "some/one/two/three/needle.txt", SLASHLIT); macro_rules! extract { ($which:ident, $name:ident, $pat:expr, $expect:expr) => { extract!($which, $name, $pat, $expect, Options::default()); }; - ($which:ident, $name:ident, $pat:expr, $expect:expr, $opts:expr) => { + ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => { #[test] fn $name() { - let pat = GlobBuilder::new($pat) - .case_insensitive($opts.casei) - .literal_separator($opts.litsep) - .build().unwrap(); + let mut builder = GlobBuilder::new($pat); + if let Some(casei) = $options.casei { + builder.case_insensitive(casei); + } + if let Some(litsep) = $options.litsep { + builder.literal_separator(litsep); + } + if let Some(bsesc) = $options.bsesc { + builder.backslash_escape(bsesc); + } + let pat = builder.build().unwrap(); assert_eq!($expect, pat.$which()); } }; @@ -1302,19 +1414,19 @@ Literal('f'), Literal('o'), ZeroOrMore, Literal('o'), ]), SLASHLIT); - ext!(extract_ext1, "**/*.rs", Some(os(".rs"))); + ext!(extract_ext1, "**/*.rs", Some(s(".rs"))); ext!(extract_ext2, "**/*.rs.bak", None); - ext!(extract_ext3, "*.rs", Some(os(".rs"))); + ext!(extract_ext3, "*.rs", Some(s(".rs"))); ext!(extract_ext4, "a*.rs", None); ext!(extract_ext5, "/*.c", None); ext!(extract_ext6, "*.c", None, SLASHLIT); - ext!(extract_ext7, "*.c", Some(os(".c"))); + ext!(extract_ext7, "*.c", Some(s(".c"))); - required_ext!(extract_req_ext1, "*.rs", Some(os(".rs"))); - required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(os(".rs"))); - required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(os(".rs"))); - required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(os(".rs"))); - required_ext!(extract_req_ext5, ".rs", Some(os(".rs"))); + required_ext!(extract_req_ext1, "*.rs", Some(s(".rs"))); + required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs"))); + required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs"))); + required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs"))); + required_ext!(extract_req_ext5, ".rs", Some(s(".rs"))); required_ext!(extract_req_ext6, "./rs", None); required_ext!(extract_req_ext7, "foo", None); required_ext!(extract_req_ext8, ".foo/", None); diff -Nru ripgrep-0.6.0/globset/src/lib.rs ripgrep-0.10.0.3/globset/src/lib.rs --- ripgrep-0.6.0/globset/src/lib.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/globset/src/lib.rs 2018-09-10 21:10:55.000000000 +0000 @@ -22,7 +22,7 @@ # fn example() -> Result<(), globset::Error> { use globset::Glob; -let glob = try!(Glob::new("*.rs")).compile_matcher(); +let glob = Glob::new("*.rs")?.compile_matcher(); assert!(glob.is_match("foo.rs")); assert!(glob.is_match("foo/bar.rs")); @@ -39,8 +39,8 @@ # fn example() -> Result<(), globset::Error> { use globset::GlobBuilder; -let glob = try!(GlobBuilder::new("*.rs") - .literal_separator(true).build()).compile_matcher(); +let glob = GlobBuilder::new("*.rs") + .literal_separator(true).build()?.compile_matcher(); assert!(glob.is_match("foo.rs")); assert!(!glob.is_match("foo/bar.rs")); // no longer matches @@ -59,10 +59,10 @@ let mut builder = GlobSetBuilder::new(); // A GlobBuilder can be used to configure each glob's match semantics // independently. -builder.add(try!(Glob::new("*.rs"))); -builder.add(try!(Glob::new("src/lib.rs"))); -builder.add(try!(Glob::new("src/**/foo.rs"))); -let set = try!(builder.build()); +builder.add(Glob::new("*.rs")?); +builder.add(Glob::new("src/lib.rs")?); +builder.add(Glob::new("src/**/foo.rs")?); +let set = builder.build()?; assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]); # Ok(()) } example().unwrap(); @@ -91,6 +91,11 @@ `[!ab]` to match any character except for `a` and `b`. * Metacharacters such as `*` and `?` can be escaped with character class notation. e.g., `[*]` matches `*`. +* When backslash escapes are enabled, a backslash (`\`) will escape all meta + characters in a glob. If it precedes a non-meta character, then the slash is + ignored. A `\\` will match a literal `\\`. Note that this mode is only + enabled on Unix platforms by default, but can be enabled on any platform + via the `backslash_escape` setting on `Glob`. A `GlobBuilder` can be used to prevent wildcards from matching path separators, or to enable case insensitive matching. @@ -108,7 +113,7 @@ use std::borrow::Cow; use std::collections::{BTreeMap, HashMap}; use std::error::Error as StdError; -use std::ffi::{OsStr, OsString}; +use std::ffi::OsStr; use std::fmt; use std::hash; use std::path::Path; @@ -154,8 +159,17 @@ /// Occurs when an alternating group is nested inside another alternating /// group, e.g., `{{a,b},{c,d}}`. NestedAlternates, + /// Occurs when an unescaped '\' is found at the end of a glob. + DanglingEscape, /// An error associated with parsing or compiling a regex. Regex(String), + /// Hints that destructuring should not be exhaustive. + /// + /// This enum may grow additional variants, so this makes sure clients + /// don't count on exhaustive matching. (Otherwise, adding a new variant + /// could break existing code.) + #[doc(hidden)] + __Nonexhaustive, } impl StdError for Error { @@ -199,7 +213,11 @@ ErrorKind::NestedAlternates => { "nested alternate groups are not allowed" } + ErrorKind::DanglingEscape => { + "dangling '\\'" + } ErrorKind::Regex(ref err) => err, + ErrorKind::__Nonexhaustive => unreachable!(), } } } @@ -223,12 +241,14 @@ | ErrorKind::UnopenedAlternates | ErrorKind::UnclosedAlternates | ErrorKind::NestedAlternates + | ErrorKind::DanglingEscape | ErrorKind::Regex(_) => { write!(f, "{}", self.description()) } ErrorKind::InvalidRange(s, e) => { write!(f, "invalid range; '{}' > '{}'", s, e) } + ErrorKind::__Nonexhaustive => unreachable!(), } } } @@ -268,6 +288,14 @@ } impl GlobSet { + /// Create an empty `GlobSet`. An empty set matches nothing. + pub fn empty() -> GlobSet { + GlobSet { + len: 0, + strats: vec![], + } + } + /// Returns true if this set is empty, and therefore matches nothing. pub fn is_empty(&self) -> bool { self.len == 0 @@ -412,8 +440,8 @@ GlobSetMatchStrategy::Suffix(suffixes.suffix()), GlobSetMatchStrategy::Prefix(prefixes.prefix()), GlobSetMatchStrategy::RequiredExtension( - try!(required_exts.build())), - GlobSetMatchStrategy::Regex(try!(regexes.regex_set())), + required_exts.build()?), + GlobSetMatchStrategy::Regex(regexes.regex_set()?), ], }) } @@ -421,6 +449,7 @@ /// GlobSetBuilder builds a group of patterns that can be used to /// simultaneously match a file path. +#[derive(Clone, Debug)] pub struct GlobSetBuilder { pats: Vec, } @@ -441,7 +470,6 @@ } /// Add a new pattern to this set. - #[allow(dead_code)] pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder { self.pats.push(pat); self @@ -458,7 +486,7 @@ pub struct Candidate<'a> { path: Cow<'a, [u8]>, basename: Cow<'a, [u8]>, - ext: &'a OsStr, + ext: Cow<'a, [u8]>, } impl<'a> Candidate<'a> { @@ -469,7 +497,7 @@ Candidate { path: normalize_path(path_bytes(path)), basename: os_str_bytes(basename), - ext: file_name_ext(basename).unwrap_or(OsStr::new("")), + ext: file_name_ext(basename).unwrap_or(Cow::Borrowed(b"")), } } @@ -584,22 +612,22 @@ } #[derive(Clone, Debug)] -struct ExtensionStrategy(HashMap, Fnv>); +struct ExtensionStrategy(HashMap, Vec, Fnv>); impl ExtensionStrategy { fn new() -> ExtensionStrategy { ExtensionStrategy(HashMap::with_hasher(Fnv::default())) } - fn add(&mut self, global_index: usize, ext: OsString) { - self.0.entry(ext).or_insert(vec![]).push(global_index); + fn add(&mut self, global_index: usize, ext: String) { + self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index); } fn is_match(&self, candidate: &Candidate) -> bool { if candidate.ext.is_empty() { return false; } - self.0.contains_key(candidate.ext) + self.0.contains_key(&*candidate.ext) } #[inline(never)] @@ -607,7 +635,7 @@ if candidate.ext.is_empty() { return; } - if let Some(hits) = self.0.get(candidate.ext) { + if let Some(hits) = self.0.get(&*candidate.ext) { matches.extend(hits); } } @@ -670,14 +698,14 @@ } #[derive(Clone, Debug)] -struct RequiredExtensionStrategy(HashMap, Fnv>); +struct RequiredExtensionStrategy(HashMap, Vec<(usize, Regex)>, Fnv>); impl RequiredExtensionStrategy { fn is_match(&self, candidate: &Candidate) -> bool { if candidate.ext.is_empty() { return false; } - match self.0.get(candidate.ext) { + match self.0.get(&*candidate.ext) { None => false, Some(regexes) => { for &(_, ref re) in regexes { @@ -695,7 +723,7 @@ if candidate.ext.is_empty() { return; } - if let Some(regexes) = self.0.get(candidate.ext) { + if let Some(regexes) = self.0.get(&*candidate.ext) { for &(global_index, ref re) in regexes { if re.is_match(&*candidate.path) { matches.push(global_index); @@ -767,7 +795,7 @@ fn regex_set(self) -> Result { Ok(RegexSetStrategy { - matcher: try!(new_regex_set(self.literals)), + matcher: new_regex_set(self.literals)?, map: self.map, }) } @@ -775,7 +803,7 @@ #[derive(Clone, Debug)] struct RequiredExtensionStrategyBuilder( - HashMap>, + HashMap, Vec<(usize, String)>>, ); impl RequiredExtensionStrategyBuilder { @@ -783,8 +811,11 @@ RequiredExtensionStrategyBuilder(HashMap::new()) } - fn add(&mut self, global_index: usize, ext: OsString, regex: String) { - self.0.entry(ext).or_insert(vec![]).push((global_index, regex)); + fn add(&mut self, global_index: usize, ext: String, regex: String) { + self.0 + .entry(ext.into_bytes()) + .or_insert(vec![]) + .push((global_index, regex)); } fn build(self) -> Result { @@ -792,7 +823,7 @@ for (ext, regexes) in self.0.into_iter() { exts.insert(ext.clone(), vec![]); for (global_index, regex) in regexes { - let compiled = try!(new_regex(®ex)); + let compiled = new_regex(®ex)?; exts.get_mut(&ext).unwrap().push((global_index, compiled)); } } diff -Nru ripgrep-0.6.0/globset/src/pathutil.rs ripgrep-0.10.0.3/globset/src/pathutil.rs --- ripgrep-0.6.0/globset/src/pathutil.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/globset/src/pathutil.rs 2018-09-10 21:10:55.000000000 +0000 @@ -54,34 +54,28 @@ /// a pattern like `*.rs` is obviously trying to match files with a `rs` /// extension, but it also matches files like `.rs`, which doesn't have an /// extension according to std::path::Path::extension. -pub fn file_name_ext(name: &OsStr) -> Option<&OsStr> { - // Yes, these functions are awful, and yes, we are completely violating - // the abstraction barrier of std::ffi. The barrier we're violating is - // that an OsStr's encoding is *ASCII compatible*. While this is obviously - // true on Unix systems, it's also true on Windows because an OsStr uses - // WTF-8 internally: https://simonsapin.github.io/wtf-8/ - // - // We should consider doing the same for the other path utility functions. - // Right now, we don't break any barriers, but Windows users are paying - // for it. - // - // Got any better ideas that don't cost anything? Hit me up. ---AG - unsafe fn os_str_as_u8_slice(s: &OsStr) -> &[u8] { - ::std::mem::transmute(s) - } - unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr { - ::std::mem::transmute(s) - } +pub fn file_name_ext(name: &OsStr) -> Option> { if name.is_empty() { return None; } - let name = unsafe { os_str_as_u8_slice(name) }; - for (i, &b) in name.iter().enumerate().rev() { - if b == b'.' { - return Some(unsafe { u8_slice_as_os_str(&name[i..]) }); + let name = os_str_bytes(name); + let last_dot_at = { + let result = name + .iter().enumerate().rev() + .find(|&(_, &b)| b == b'.') + .map(|(i, _)| i); + match result { + None => return None, + Some(i) => i, } - } - None + }; + Some(match name { + Cow::Borrowed(name) => Cow::Borrowed(&name[last_dot_at..]), + Cow::Owned(mut name) => { + name.drain(..last_dot_at); + Cow::Owned(name) + } + }) } /// Return raw bytes of a path, transcoded to UTF-8 if necessary. @@ -144,7 +138,7 @@ #[test] fn $name() { let got = file_name_ext(OsStr::new($file_name)); - assert_eq!($ext.map(OsStr::new), got); + assert_eq!($ext.map(|s| Cow::Borrowed(s.as_bytes())), got); } }; } diff -Nru ripgrep-0.6.0/grep/Cargo.toml ripgrep-0.10.0.3/grep/Cargo.toml --- ripgrep-0.6.0/grep/Cargo.toml 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/grep/Cargo.toml 2018-09-10 21:10:55.000000000 +0000 @@ -1,6 +1,6 @@ [package] name = "grep" -version = "0.1.6" #:version +version = "0.2.3" #:version authors = ["Andrew Gallant "] description = """ Fast line oriented regex searching as a library. @@ -13,7 +13,25 @@ license = "Unlicense/MIT" [dependencies] -log = "0.3" -memchr = "1" -regex = "0.2.1" -regex-syntax = "0.4.0" +grep-cli = { version = "0.1.1", path = "../grep-cli" } +grep-matcher = { version = "0.1.1", path = "../grep-matcher" } +grep-pcre2 = { version = "0.1.2", path = "../grep-pcre2", optional = true } +grep-printer = { version = "0.1.1", path = "../grep-printer" } +grep-regex = { version = "0.1.1", path = "../grep-regex" } +grep-searcher = { version = "0.1.1", path = "../grep-searcher" } + +[dev-dependencies] +atty = "0.2.11" +regex = "1" +termcolor = "1" +walkdir = "2.2.2" + +[dev-dependencies.clap] +version = "2.32.0" +default-features = false +features = ["suggestions"] + +[features] +avx-accel = ["grep-searcher/avx-accel"] +simd-accel = ["grep-searcher/simd-accel"] +pcre2 = ["grep-pcre2"] diff -Nru ripgrep-0.6.0/grep/examples/simplegrep.rs ripgrep-0.10.0.3/grep/examples/simplegrep.rs --- ripgrep-0.6.0/grep/examples/simplegrep.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep/examples/simplegrep.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,74 @@ +extern crate grep; +extern crate termcolor; +extern crate walkdir; + +use std::env; +use std::error::Error; +use std::ffi::OsString; +use std::process; + +use grep::cli; +use grep::printer::{ColorSpecs, StandardBuilder}; +use grep::regex::RegexMatcher; +use grep::searcher::{BinaryDetection, SearcherBuilder}; +use termcolor::ColorChoice; +use walkdir::WalkDir; + +fn main() { + if let Err(err) = try_main() { + eprintln!("{}", err); + process::exit(1); + } +} + +fn try_main() -> Result<(), Box> { + let mut args: Vec = env::args_os().collect(); + if args.len() < 2 { + return Err("Usage: simplegrep [ ...]".into()); + } + if args.len() == 2 { + args.push(OsString::from("./")); + } + search(cli::pattern_from_os(&args[1])?, &args[2..]) +} + +fn search(pattern: &str, paths: &[OsString]) -> Result<(), Box> { + let matcher = RegexMatcher::new_line_matcher(&pattern)?; + let mut searcher = SearcherBuilder::new() + .binary_detection(BinaryDetection::quit(b'\x00')) + .line_number(false) + .build(); + let mut printer = StandardBuilder::new() + .color_specs(ColorSpecs::default_with_color()) + .build(cli::stdout( + if cli::is_tty_stdout() { + ColorChoice::Auto + } else { + ColorChoice::Never + } + )); + + for path in paths { + for result in WalkDir::new(path) { + let dent = match result { + Ok(dent) => dent, + Err(err) => { + eprintln!("{}", err); + continue; + } + }; + if !dent.file_type().is_file() { + continue; + } + let result = searcher.search_path( + &matcher, + dent.path(), + printer.sink_with_path(&matcher, dent.path()), + ); + if let Err(err) = result { + eprintln!("{}: {}", dent.path().display(), err); + } + } + } + Ok(()) +} diff -Nru ripgrep-0.6.0/grep/README.md ripgrep-0.10.0.3/grep/README.md --- ripgrep-0.6.0/grep/README.md 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/grep/README.md 2018-09-10 21:10:55.000000000 +0000 @@ -1,4 +1,41 @@ grep ---- -This is a *library* that provides grep-style line-by-line regex searching (with -comparable performance to `grep` itself). +ripgrep, as a library. + +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) +[![](https://img.shields.io/crates/v/grep.svg)](https://crates.io/crates/grep) + +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + + +### Documentation + +[https://docs.rs/grep](https://docs.rs/grep) + +NOTE: This crate isn't ready for wide use yet. Ambitious individuals can +probably piece together the parts, but there is no high level documentation +describing how all of the pieces fit together. + + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +grep = "0.2" +``` + +and this to your crate root: + +```rust +extern crate grep; +``` + + +### Features + +This crate provides a `pcre2` feature (disabled by default) which, when +enabled, re-exports the `grep-pcre2` crate as an alternative `Matcher` +implementation to the standard `grep-regex` implementation. diff -Nru ripgrep-0.6.0/grep/src/data/sherlock.txt ripgrep-0.10.0.3/grep/src/data/sherlock.txt --- ripgrep-0.6.0/grep/src/data/sherlock.txt 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/grep/src/data/sherlock.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,13052 +0,0 @@ -Project Gutenberg's The Adventures of Sherlock Holmes, by Arthur Conan Doyle - -This eBook is for the use of anyone anywhere at no cost and with -almost no restrictions whatsoever. You may copy it, give it away or -re-use it under the terms of the Project Gutenberg License included -with this eBook or online at www.gutenberg.net - - -Title: The Adventures of Sherlock Holmes - -Author: Arthur Conan Doyle - -Posting Date: April 18, 2011 [EBook #1661] -First Posted: November 29, 2002 - -Language: English - - -*** START OF THIS PROJECT GUTENBERG EBOOK THE ADVENTURES OF SHERLOCK HOLMES *** - - - - -Produced by an anonymous Project Gutenberg volunteer and Jose Menendez - - - - - - - - - -THE ADVENTURES OF SHERLOCK HOLMES - -by - -SIR ARTHUR CONAN DOYLE - - - - I. A Scandal in Bohemia - II. The Red-headed League - III. A Case of Identity - IV. The Boscombe Valley Mystery - V. The Five Orange Pips - VI. The Man with the Twisted Lip - VII. The Adventure of the Blue Carbuncle -VIII. The Adventure of the Speckled Band - IX. The Adventure of the Engineer's Thumb - X. The Adventure of the Noble Bachelor - XI. The Adventure of the Beryl Coronet - XII. The Adventure of the Copper Beeches - - - - -ADVENTURE I. A SCANDAL IN BOHEMIA - -I. - -To Sherlock Holmes she is always THE woman. I have seldom heard -him mention her under any other name. In his eyes she eclipses -and predominates the whole of her sex. It was not that he felt -any emotion akin to love for Irene Adler. All emotions, and that -one particularly, were abhorrent to his cold, precise but -admirably balanced mind. He was, I take it, the most perfect -reasoning and observing machine that the world has seen, but as a -lover he would have placed himself in a false position. He never -spoke of the softer passions, save with a gibe and a sneer. They -were admirable things for the observer--excellent for drawing the -veil from men's motives and actions. But for the trained reasoner -to admit such intrusions into his own delicate and finely -adjusted temperament was to introduce a distracting factor which -might throw a doubt upon all his mental results. Grit in a -sensitive instrument, or a crack in one of his own high-power -lenses, would not be more disturbing than a strong emotion in a -nature such as his. And yet there was but one woman to him, and -that woman was the late Irene Adler, of dubious and questionable -memory. - -I had seen little of Holmes lately. My marriage had drifted us -away from each other. My own complete happiness, and the -home-centred interests which rise up around the man who first -finds himself master of his own establishment, were sufficient to -absorb all my attention, while Holmes, who loathed every form of -society with his whole Bohemian soul, remained in our lodgings in -Baker Street, buried among his old books, and alternating from -week to week between cocaine and ambition, the drowsiness of the -drug, and the fierce energy of his own keen nature. He was still, -as ever, deeply attracted by the study of crime, and occupied his -immense faculties and extraordinary powers of observation in -following out those clues, and clearing up those mysteries which -had been abandoned as hopeless by the official police. From time -to time I heard some vague account of his doings: of his summons -to Odessa in the case of the Trepoff murder, of his clearing up -of the singular tragedy of the Atkinson brothers at Trincomalee, -and finally of the mission which he had accomplished so -delicately and successfully for the reigning family of Holland. -Beyond these signs of his activity, however, which I merely -shared with all the readers of the daily press, I knew little of -my former friend and companion. - -One night--it was on the twentieth of March, 1888--I was -returning from a journey to a patient (for I had now returned to -civil practice), when my way led me through Baker Street. As I -passed the well-remembered door, which must always be associated -in my mind with my wooing, and with the dark incidents of the -Study in Scarlet, I was seized with a keen desire to see Holmes -again, and to know how he was employing his extraordinary powers. -His rooms were brilliantly lit, and, even as I looked up, I saw -his tall, spare figure pass twice in a dark silhouette against -the blind. He was pacing the room swiftly, eagerly, with his head -sunk upon his chest and his hands clasped behind him. To me, who -knew his every mood and habit, his attitude and manner told their -own story. He was at work again. He had risen out of his -drug-created dreams and was hot upon the scent of some new -problem. I rang the bell and was shown up to the chamber which -had formerly been in part my own. - -His manner was not effusive. It seldom was; but he was glad, I -think, to see me. With hardly a word spoken, but with a kindly -eye, he waved me to an armchair, threw across his case of cigars, -and indicated a spirit case and a gasogene in the corner. Then he -stood before the fire and looked me over in his singular -introspective fashion. - -"Wedlock suits you," he remarked. "I think, Watson, that you have -put on seven and a half pounds since I saw you." - -"Seven!" I answered. - -"Indeed, I should have thought a little more. Just a trifle more, -I fancy, Watson. And in practice again, I observe. You did not -tell me that you intended to go into harness." - -"Then, how do you know?" - -"I see it, I deduce it. How do I know that you have been getting -yourself very wet lately, and that you have a most clumsy and -careless servant girl?" - -"My dear Holmes," said I, "this is too much. You would certainly -have been burned, had you lived a few centuries ago. It is true -that I had a country walk on Thursday and came home in a dreadful -mess, but as I have changed my clothes I can't imagine how you -deduce it. As to Mary Jane, she is incorrigible, and my wife has -given her notice, but there, again, I fail to see how you work it -out." - -He chuckled to himself and rubbed his long, nervous hands -together. - -"It is simplicity itself," said he; "my eyes tell me that on the -inside of your left shoe, just where the firelight strikes it, -the leather is scored by six almost parallel cuts. Obviously they -have been caused by someone who has very carelessly scraped round -the edges of the sole in order to remove crusted mud from it. -Hence, you see, my double deduction that you had been out in vile -weather, and that you had a particularly malignant boot-slitting -specimen of the London slavey. As to your practice, if a -gentleman walks into my rooms smelling of iodoform, with a black -mark of nitrate of silver upon his right forefinger, and a bulge -on the right side of his top-hat to show where he has secreted -his stethoscope, I must be dull, indeed, if I do not pronounce -him to be an active member of the medical profession." - -I could not help laughing at the ease with which he explained his -process of deduction. "When I hear you give your reasons," I -remarked, "the thing always appears to me to be so ridiculously -simple that I could easily do it myself, though at each -successive instance of your reasoning I am baffled until you -explain your process. And yet I believe that my eyes are as good -as yours." - -"Quite so," he answered, lighting a cigarette, and throwing -himself down into an armchair. "You see, but you do not observe. -The distinction is clear. For example, you have frequently seen -the steps which lead up from the hall to this room." - -"Frequently." - -"How often?" - -"Well, some hundreds of times." - -"Then how many are there?" - -"How many? I don't know." - -"Quite so! You have not observed. And yet you have seen. That is -just my point. Now, I know that there are seventeen steps, -because I have both seen and observed. By-the-way, since you are -interested in these little problems, and since you are good -enough to chronicle one or two of my trifling experiences, you -may be interested in this." He threw over a sheet of thick, -pink-tinted note-paper which had been lying open upon the table. -"It came by the last post," said he. "Read it aloud." - -The note was undated, and without either signature or address. - -"There will call upon you to-night, at a quarter to eight -o'clock," it said, "a gentleman who desires to consult you upon a -matter of the very deepest moment. Your recent services to one of -the royal houses of Europe have shown that you are one who may -safely be trusted with matters which are of an importance which -can hardly be exaggerated. This account of you we have from all -quarters received. Be in your chamber then at that hour, and do -not take it amiss if your visitor wear a mask." - -"This is indeed a mystery," I remarked. "What do you imagine that -it means?" - -"I have no data yet. It is a capital mistake to theorize before -one has data. Insensibly one begins to twist facts to suit -theories, instead of theories to suit facts. But the note itself. -What do you deduce from it?" - -I carefully examined the writing, and the paper upon which it was -written. - -"The man who wrote it was presumably well to do," I remarked, -endeavouring to imitate my companion's processes. "Such paper -could not be bought under half a crown a packet. It is peculiarly -strong and stiff." - -"Peculiar--that is the very word," said Holmes. "It is not an -English paper at all. Hold it up to the light." - -I did so, and saw a large "E" with a small "g," a "P," and a -large "G" with a small "t" woven into the texture of the paper. - -"What do you make of that?" asked Holmes. - -"The name of the maker, no doubt; or his monogram, rather." - -"Not at all. The 'G' with the small 't' stands for -'Gesellschaft,' which is the German for 'Company.' It is a -customary contraction like our 'Co.' 'P,' of course, stands for -'Papier.' Now for the 'Eg.' Let us glance at our Continental -Gazetteer." He took down a heavy brown volume from his shelves. -"Eglow, Eglonitz--here we are, Egria. It is in a German-speaking -country--in Bohemia, not far from Carlsbad. 'Remarkable as being -the scene of the death of Wallenstein, and for its numerous -glass-factories and paper-mills.' Ha, ha, my boy, what do you -make of that?" His eyes sparkled, and he sent up a great blue -triumphant cloud from his cigarette. - -"The paper was made in Bohemia," I said. - -"Precisely. And the man who wrote the note is a German. Do you -note the peculiar construction of the sentence--'This account of -you we have from all quarters received.' A Frenchman or Russian -could not have written that. It is the German who is so -uncourteous to his verbs. It only remains, therefore, to discover -what is wanted by this German who writes upon Bohemian paper and -prefers wearing a mask to showing his face. And here he comes, if -I am not mistaken, to resolve all our doubts." - -As he spoke there was the sharp sound of horses' hoofs and -grating wheels against the curb, followed by a sharp pull at the -bell. Holmes whistled. - -"A pair, by the sound," said he. "Yes," he continued, glancing -out of the window. "A nice little brougham and a pair of -beauties. A hundred and fifty guineas apiece. There's money in -this case, Watson, if there is nothing else." - -"I think that I had better go, Holmes." - -"Not a bit, Doctor. Stay where you are. I am lost without my -Boswell. And this promises to be interesting. It would be a pity -to miss it." - -"But your client--" - -"Never mind him. I may want your help, and so may he. Here he -comes. Sit down in that armchair, Doctor, and give us your best -attention." - -A slow and heavy step, which had been heard upon the stairs and -in the passage, paused immediately outside the door. Then there -was a loud and authoritative tap. - -"Come in!" said Holmes. - -A man entered who could hardly have been less than six feet six -inches in height, with the chest and limbs of a Hercules. His -dress was rich with a richness which would, in England, be looked -upon as akin to bad taste. Heavy bands of astrakhan were slashed -across the sleeves and fronts of his double-breasted coat, while -the deep blue cloak which was thrown over his shoulders was lined -with flame-coloured silk and secured at the neck with a brooch -which consisted of a single flaming beryl. Boots which extended -halfway up his calves, and which were trimmed at the tops with -rich brown fur, completed the impression of barbaric opulence -which was suggested by his whole appearance. He carried a -broad-brimmed hat in his hand, while he wore across the upper -part of his face, extending down past the cheekbones, a black -vizard mask, which he had apparently adjusted that very moment, -for his hand was still raised to it as he entered. From the lower -part of the face he appeared to be a man of strong character, -with a thick, hanging lip, and a long, straight chin suggestive -of resolution pushed to the length of obstinacy. - -"You had my note?" he asked with a deep harsh voice and a -strongly marked German accent. "I told you that I would call." He -looked from one to the other of us, as if uncertain which to -address. - -"Pray take a seat," said Holmes. "This is my friend and -colleague, Dr. Watson, who is occasionally good enough to help me -in my cases. Whom have I the honour to address?" - -"You may address me as the Count Von Kramm, a Bohemian nobleman. -I understand that this gentleman, your friend, is a man of honour -and discretion, whom I may trust with a matter of the most -extreme importance. If not, I should much prefer to communicate -with you alone." - -I rose to go, but Holmes caught me by the wrist and pushed me -back into my chair. "It is both, or none," said he. "You may say -before this gentleman anything which you may say to me." - -The Count shrugged his broad shoulders. "Then I must begin," said -he, "by binding you both to absolute secrecy for two years; at -the end of that time the matter will be of no importance. At -present it is not too much to say that it is of such weight it -may have an influence upon European history." - -"I promise," said Holmes. - -"And I." - -"You will excuse this mask," continued our strange visitor. "The -august person who employs me wishes his agent to be unknown to -you, and I may confess at once that the title by which I have -just called myself is not exactly my own." - -"I was aware of it," said Holmes dryly. - -"The circumstances are of great delicacy, and every precaution -has to be taken to quench what might grow to be an immense -scandal and seriously compromise one of the reigning families of -Europe. To speak plainly, the matter implicates the great House -of Ormstein, hereditary kings of Bohemia." - -"I was also aware of that," murmured Holmes, settling himself -down in his armchair and closing his eyes. - -Our visitor glanced with some apparent surprise at the languid, -lounging figure of the man who had been no doubt depicted to him -as the most incisive reasoner and most energetic agent in Europe. -Holmes slowly reopened his eyes and looked impatiently at his -gigantic client. - -"If your Majesty would condescend to state your case," he -remarked, "I should be better able to advise you." - -The man sprang from his chair and paced up and down the room in -uncontrollable agitation. Then, with a gesture of desperation, he -tore the mask from his face and hurled it upon the ground. "You -are right," he cried; "I am the King. Why should I attempt to -conceal it?" - -"Why, indeed?" murmured Holmes. "Your Majesty had not spoken -before I was aware that I was addressing Wilhelm Gottsreich -Sigismond von Ormstein, Grand Duke of Cassel-Felstein, and -hereditary King of Bohemia." - -"But you can understand," said our strange visitor, sitting down -once more and passing his hand over his high white forehead, "you -can understand that I am not accustomed to doing such business in -my own person. Yet the matter was so delicate that I could not -confide it to an agent without putting myself in his power. I -have come incognito from Prague for the purpose of consulting -you." - -"Then, pray consult," said Holmes, shutting his eyes once more. - -"The facts are briefly these: Some five years ago, during a -lengthy visit to Warsaw, I made the acquaintance of the well-known -adventuress, Irene Adler. The name is no doubt familiar to you." - -"Kindly look her up in my index, Doctor," murmured Holmes without -opening his eyes. For many years he had adopted a system of -docketing all paragraphs concerning men and things, so that it -was difficult to name a subject or a person on which he could not -at once furnish information. In this case I found her biography -sandwiched in between that of a Hebrew rabbi and that of a -staff-commander who had written a monograph upon the deep-sea -fishes. - -"Let me see!" said Holmes. "Hum! Born in New Jersey in the year -1858. Contralto--hum! La Scala, hum! Prima donna Imperial Opera -of Warsaw--yes! Retired from operatic stage--ha! Living in -London--quite so! Your Majesty, as I understand, became entangled -with this young person, wrote her some compromising letters, and -is now desirous of getting those letters back." - -"Precisely so. But how--" - -"Was there a secret marriage?" - -"None." - -"No legal papers or certificates?" - -"None." - -"Then I fail to follow your Majesty. If this young person should -produce her letters for blackmailing or other purposes, how is -she to prove their authenticity?" - -"There is the writing." - -"Pooh, pooh! Forgery." - -"My private note-paper." - -"Stolen." - -"My own seal." - -"Imitated." - -"My photograph." - -"Bought." - -"We were both in the photograph." - -"Oh, dear! That is very bad! Your Majesty has indeed committed an -indiscretion." - -"I was mad--insane." - -"You have compromised yourself seriously." - -"I was only Crown Prince then. I was young. I am but thirty now." - -"It must be recovered." - -"We have tried and failed." - -"Your Majesty must pay. It must be bought." - -"She will not sell." - -"Stolen, then." - -"Five attempts have been made. Twice burglars in my pay ransacked -her house. Once we diverted her luggage when she travelled. Twice -she has been waylaid. There has been no result." - -"No sign of it?" - -"Absolutely none." - -Holmes laughed. "It is quite a pretty little problem," said he. - -"But a very serious one to me," returned the King reproachfully. - -"Very, indeed. And what does she propose to do with the -photograph?" - -"To ruin me." - -"But how?" - -"I am about to be married." - -"So I have heard." - -"To Clotilde Lothman von Saxe-Meningen, second daughter of the -King of Scandinavia. You may know the strict principles of her -family. She is herself the very soul of delicacy. A shadow of a -doubt as to my conduct would bring the matter to an end." - -"And Irene Adler?" - -"Threatens to send them the photograph. And she will do it. I -know that she will do it. You do not know her, but she has a soul -of steel. She has the face of the most beautiful of women, and -the mind of the most resolute of men. Rather than I should marry -another woman, there are no lengths to which she would not -go--none." - -"You are sure that she has not sent it yet?" - -"I am sure." - -"And why?" - -"Because she has said that she would send it on the day when the -betrothal was publicly proclaimed. That will be next Monday." - -"Oh, then we have three days yet," said Holmes with a yawn. "That -is very fortunate, as I have one or two matters of importance to -look into just at present. Your Majesty will, of course, stay in -London for the present?" - -"Certainly. You will find me at the Langham under the name of the -Count Von Kramm." - -"Then I shall drop you a line to let you know how we progress." - -"Pray do so. I shall be all anxiety." - -"Then, as to money?" - -"You have carte blanche." - -"Absolutely?" - -"I tell you that I would give one of the provinces of my kingdom -to have that photograph." - -"And for present expenses?" - -The King took a heavy chamois leather bag from under his cloak -and laid it on the table. - -"There are three hundred pounds in gold and seven hundred in -notes," he said. - -Holmes scribbled a receipt upon a sheet of his note-book and -handed it to him. - -"And Mademoiselle's address?" he asked. - -"Is Briony Lodge, Serpentine Avenue, St. John's Wood." - -Holmes took a note of it. "One other question," said he. "Was the -photograph a cabinet?" - -"It was." - -"Then, good-night, your Majesty, and I trust that we shall soon -have some good news for you. And good-night, Watson," he added, -as the wheels of the royal brougham rolled down the street. "If -you will be good enough to call to-morrow afternoon at three -o'clock I should like to chat this little matter over with you." - - -II. - -At three o'clock precisely I was at Baker Street, but Holmes had -not yet returned. The landlady informed me that he had left the -house shortly after eight o'clock in the morning. I sat down -beside the fire, however, with the intention of awaiting him, -however long he might be. I was already deeply interested in his -inquiry, for, though it was surrounded by none of the grim and -strange features which were associated with the two crimes which -I have already recorded, still, the nature of the case and the -exalted station of his client gave it a character of its own. -Indeed, apart from the nature of the investigation which my -friend had on hand, there was something in his masterly grasp of -a situation, and his keen, incisive reasoning, which made it a -pleasure to me to study his system of work, and to follow the -quick, subtle methods by which he disentangled the most -inextricable mysteries. So accustomed was I to his invariable -success that the very possibility of his failing had ceased to -enter into my head. - -It was close upon four before the door opened, and a -drunken-looking groom, ill-kempt and side-whiskered, with an -inflamed face and disreputable clothes, walked into the room. -Accustomed as I was to my friend's amazing powers in the use of -disguises, I had to look three times before I was certain that it -was indeed he. With a nod he vanished into the bedroom, whence he -emerged in five minutes tweed-suited and respectable, as of old. -Putting his hands into his pockets, he stretched out his legs in -front of the fire and laughed heartily for some minutes. - -"Well, really!" he cried, and then he choked and laughed again -until he was obliged to lie back, limp and helpless, in the -chair. - -"What is it?" - -"It's quite too funny. I am sure you could never guess how I -employed my morning, or what I ended by doing." - -"I can't imagine. I suppose that you have been watching the -habits, and perhaps the house, of Miss Irene Adler." - -"Quite so; but the sequel was rather unusual. I will tell you, -however. I left the house a little after eight o'clock this -morning in the character of a groom out of work. There is a -wonderful sympathy and freemasonry among horsey men. Be one of -them, and you will know all that there is to know. I soon found -Briony Lodge. It is a bijou villa, with a garden at the back, but -built out in front right up to the road, two stories. Chubb lock -to the door. Large sitting-room on the right side, well -furnished, with long windows almost to the floor, and those -preposterous English window fasteners which a child could open. -Behind there was nothing remarkable, save that the passage window -could be reached from the top of the coach-house. I walked round -it and examined it closely from every point of view, but without -noting anything else of interest. - -"I then lounged down the street and found, as I expected, that -there was a mews in a lane which runs down by one wall of the -garden. I lent the ostlers a hand in rubbing down their horses, -and received in exchange twopence, a glass of half and half, two -fills of shag tobacco, and as much information as I could desire -about Miss Adler, to say nothing of half a dozen other people in -the neighbourhood in whom I was not in the least interested, but -whose biographies I was compelled to listen to." - -"And what of Irene Adler?" I asked. - -"Oh, she has turned all the men's heads down in that part. She is -the daintiest thing under a bonnet on this planet. So say the -Serpentine-mews, to a man. She lives quietly, sings at concerts, -drives out at five every day, and returns at seven sharp for -dinner. Seldom goes out at other times, except when she sings. -Has only one male visitor, but a good deal of him. He is dark, -handsome, and dashing, never calls less than once a day, and -often twice. He is a Mr. Godfrey Norton, of the Inner Temple. See -the advantages of a cabman as a confidant. They had driven him -home a dozen times from Serpentine-mews, and knew all about him. -When I had listened to all they had to tell, I began to walk up -and down near Briony Lodge once more, and to think over my plan -of campaign. - -"This Godfrey Norton was evidently an important factor in the -matter. He was a lawyer. That sounded ominous. What was the -relation between them, and what the object of his repeated -visits? Was she his client, his friend, or his mistress? If the -former, she had probably transferred the photograph to his -keeping. If the latter, it was less likely. On the issue of this -question depended whether I should continue my work at Briony -Lodge, or turn my attention to the gentleman's chambers in the -Temple. It was a delicate point, and it widened the field of my -inquiry. I fear that I bore you with these details, but I have to -let you see my little difficulties, if you are to understand the -situation." - -"I am following you closely," I answered. - -"I was still balancing the matter in my mind when a hansom cab -drove up to Briony Lodge, and a gentleman sprang out. He was a -remarkably handsome man, dark, aquiline, and moustached--evidently -the man of whom I had heard. He appeared to be in a -great hurry, shouted to the cabman to wait, and brushed past the -maid who opened the door with the air of a man who was thoroughly -at home. - -"He was in the house about half an hour, and I could catch -glimpses of him in the windows of the sitting-room, pacing up and -down, talking excitedly, and waving his arms. Of her I could see -nothing. Presently he emerged, looking even more flurried than -before. As he stepped up to the cab, he pulled a gold watch from -his pocket and looked at it earnestly, 'Drive like the devil,' he -shouted, 'first to Gross & Hankey's in Regent Street, and then to -the Church of St. Monica in the Edgeware Road. Half a guinea if -you do it in twenty minutes!' - -"Away they went, and I was just wondering whether I should not do -well to follow them when up the lane came a neat little landau, -the coachman with his coat only half-buttoned, and his tie under -his ear, while all the tags of his harness were sticking out of -the buckles. It hadn't pulled up before she shot out of the hall -door and into it. I only caught a glimpse of her at the moment, -but she was a lovely woman, with a face that a man might die for. - -"'The Church of St. Monica, John,' she cried, 'and half a -sovereign if you reach it in twenty minutes.' - -"This was quite too good to lose, Watson. I was just balancing -whether I should run for it, or whether I should perch behind her -landau when a cab came through the street. The driver looked -twice at such a shabby fare, but I jumped in before he could -object. 'The Church of St. Monica,' said I, 'and half a sovereign -if you reach it in twenty minutes.' It was twenty-five minutes to -twelve, and of course it was clear enough what was in the wind. - -"My cabby drove fast. I don't think I ever drove faster, but the -others were there before us. The cab and the landau with their -steaming horses were in front of the door when I arrived. I paid -the man and hurried into the church. There was not a soul there -save the two whom I had followed and a surpliced clergyman, who -seemed to be expostulating with them. They were all three -standing in a knot in front of the altar. I lounged up the side -aisle like any other idler who has dropped into a church. -Suddenly, to my surprise, the three at the altar faced round to -me, and Godfrey Norton came running as hard as he could towards -me. - -"'Thank God,' he cried. 'You'll do. Come! Come!' - -"'What then?' I asked. - -"'Come, man, come, only three minutes, or it won't be legal.' - -"I was half-dragged up to the altar, and before I knew where I was -I found myself mumbling responses which were whispered in my ear, -and vouching for things of which I knew nothing, and generally -assisting in the secure tying up of Irene Adler, spinster, to -Godfrey Norton, bachelor. It was all done in an instant, and -there was the gentleman thanking me on the one side and the lady -on the other, while the clergyman beamed on me in front. It was -the most preposterous position in which I ever found myself in my -life, and it was the thought of it that started me laughing just -now. It seems that there had been some informality about their -license, that the clergyman absolutely refused to marry them -without a witness of some sort, and that my lucky appearance -saved the bridegroom from having to sally out into the streets in -search of a best man. The bride gave me a sovereign, and I mean -to wear it on my watch-chain in memory of the occasion." - -"This is a very unexpected turn of affairs," said I; "and what -then?" - -"Well, I found my plans very seriously menaced. It looked as if -the pair might take an immediate departure, and so necessitate -very prompt and energetic measures on my part. At the church -door, however, they separated, he driving back to the Temple, and -she to her own house. 'I shall drive out in the park at five as -usual,' she said as she left him. I heard no more. They drove -away in different directions, and I went off to make my own -arrangements." - -"Which are?" - -"Some cold beef and a glass of beer," he answered, ringing the -bell. "I have been too busy to think of food, and I am likely to -be busier still this evening. By the way, Doctor, I shall want -your co-operation." - -"I shall be delighted." - -"You don't mind breaking the law?" - -"Not in the least." - -"Nor running a chance of arrest?" - -"Not in a good cause." - -"Oh, the cause is excellent!" - -"Then I am your man." - -"I was sure that I might rely on you." - -"But what is it you wish?" - -"When Mrs. Turner has brought in the tray I will make it clear to -you. Now," he said as he turned hungrily on the simple fare that -our landlady had provided, "I must discuss it while I eat, for I -have not much time. It is nearly five now. In two hours we must -be on the scene of action. Miss Irene, or Madame, rather, returns -from her drive at seven. We must be at Briony Lodge to meet her." - -"And what then?" - -"You must leave that to me. I have already arranged what is to -occur. There is only one point on which I must insist. You must -not interfere, come what may. You understand?" - -"I am to be neutral?" - -"To do nothing whatever. There will probably be some small -unpleasantness. Do not join in it. It will end in my being -conveyed into the house. Four or five minutes afterwards the -sitting-room window will open. You are to station yourself close -to that open window." - -"Yes." - -"You are to watch me, for I will be visible to you." - -"Yes." - -"And when I raise my hand--so--you will throw into the room what -I give you to throw, and will, at the same time, raise the cry of -fire. You quite follow me?" - -"Entirely." - -"It is nothing very formidable," he said, taking a long cigar-shaped -roll from his pocket. "It is an ordinary plumber's smoke-rocket, -fitted with a cap at either end to make it self-lighting. -Your task is confined to that. When you raise your cry of fire, -it will be taken up by quite a number of people. You may then -walk to the end of the street, and I will rejoin you in ten -minutes. I hope that I have made myself clear?" - -"I am to remain neutral, to get near the window, to watch you, -and at the signal to throw in this object, then to raise the cry -of fire, and to wait you at the corner of the street." - -"Precisely." - -"Then you may entirely rely on me." - -"That is excellent. I think, perhaps, it is almost time that I -prepare for the new role I have to play." - -He disappeared into his bedroom and returned in a few minutes in -the character of an amiable and simple-minded Nonconformist -clergyman. His broad black hat, his baggy trousers, his white -tie, his sympathetic smile, and general look of peering and -benevolent curiosity were such as Mr. John Hare alone could have -equalled. It was not merely that Holmes changed his costume. His -expression, his manner, his very soul seemed to vary with every -fresh part that he assumed. The stage lost a fine actor, even as -science lost an acute reasoner, when he became a specialist in -crime. - -It was a quarter past six when we left Baker Street, and it still -wanted ten minutes to the hour when we found ourselves in -Serpentine Avenue. It was already dusk, and the lamps were just -being lighted as we paced up and down in front of Briony Lodge, -waiting for the coming of its occupant. The house was just such -as I had pictured it from Sherlock Holmes' succinct description, -but the locality appeared to be less private than I expected. On -the contrary, for a small street in a quiet neighbourhood, it was -remarkably animated. There was a group of shabbily dressed men -smoking and laughing in a corner, a scissors-grinder with his -wheel, two guardsmen who were flirting with a nurse-girl, and -several well-dressed young men who were lounging up and down with -cigars in their mouths. - -"You see," remarked Holmes, as we paced to and fro in front of -the house, "this marriage rather simplifies matters. The -photograph becomes a double-edged weapon now. The chances are -that she would be as averse to its being seen by Mr. Godfrey -Norton, as our client is to its coming to the eyes of his -princess. Now the question is, Where are we to find the -photograph?" - -"Where, indeed?" - -"It is most unlikely that she carries it about with her. It is -cabinet size. Too large for easy concealment about a woman's -dress. She knows that the King is capable of having her waylaid -and searched. Two attempts of the sort have already been made. We -may take it, then, that she does not carry it about with her." - -"Where, then?" - -"Her banker or her lawyer. There is that double possibility. But -I am inclined to think neither. Women are naturally secretive, -and they like to do their own secreting. Why should she hand it -over to anyone else? She could trust her own guardianship, but -she could not tell what indirect or political influence might be -brought to bear upon a business man. Besides, remember that she -had resolved to use it within a few days. It must be where she -can lay her hands upon it. It must be in her own house." - -"But it has twice been burgled." - -"Pshaw! They did not know how to look." - -"But how will you look?" - -"I will not look." - -"What then?" - -"I will get her to show me." - -"But she will refuse." - -"She will not be able to. But I hear the rumble of wheels. It is -her carriage. Now carry out my orders to the letter." - -As he spoke the gleam of the side-lights of a carriage came round -the curve of the avenue. It was a smart little landau which -rattled up to the door of Briony Lodge. As it pulled up, one of -the loafing men at the corner dashed forward to open the door in -the hope of earning a copper, but was elbowed away by another -loafer, who had rushed up with the same intention. A fierce -quarrel broke out, which was increased by the two guardsmen, who -took sides with one of the loungers, and by the scissors-grinder, -who was equally hot upon the other side. A blow was struck, and -in an instant the lady, who had stepped from her carriage, was -the centre of a little knot of flushed and struggling men, who -struck savagely at each other with their fists and sticks. Holmes -dashed into the crowd to protect the lady; but just as he reached -her he gave a cry and dropped to the ground, with the blood -running freely down his face. At his fall the guardsmen took to -their heels in one direction and the loungers in the other, while -a number of better-dressed people, who had watched the scuffle -without taking part in it, crowded in to help the lady and to -attend to the injured man. Irene Adler, as I will still call her, -had hurried up the steps; but she stood at the top with her -superb figure outlined against the lights of the hall, looking -back into the street. - -"Is the poor gentleman much hurt?" she asked. - -"He is dead," cried several voices. - -"No, no, there's life in him!" shouted another. "But he'll be -gone before you can get him to hospital." - -"He's a brave fellow," said a woman. "They would have had the -lady's purse and watch if it hadn't been for him. They were a -gang, and a rough one, too. Ah, he's breathing now." - -"He can't lie in the street. May we bring him in, marm?" - -"Surely. Bring him into the sitting-room. There is a comfortable -sofa. This way, please!" - -Slowly and solemnly he was borne into Briony Lodge and laid out -in the principal room, while I still observed the proceedings -from my post by the window. The lamps had been lit, but the -blinds had not been drawn, so that I could see Holmes as he lay -upon the couch. I do not know whether he was seized with -compunction at that moment for the part he was playing, but I -know that I never felt more heartily ashamed of myself in my life -than when I saw the beautiful creature against whom I was -conspiring, or the grace and kindliness with which she waited -upon the injured man. And yet it would be the blackest treachery -to Holmes to draw back now from the part which he had intrusted -to me. I hardened my heart, and took the smoke-rocket from under -my ulster. After all, I thought, we are not injuring her. We are -but preventing her from injuring another. - -Holmes had sat up upon the couch, and I saw him motion like a man -who is in need of air. A maid rushed across and threw open the -window. At the same instant I saw him raise his hand and at the -signal I tossed my rocket into the room with a cry of "Fire!" The -word was no sooner out of my mouth than the whole crowd of -spectators, well dressed and ill--gentlemen, ostlers, and -servant-maids--joined in a general shriek of "Fire!" Thick clouds -of smoke curled through the room and out at the open window. I -caught a glimpse of rushing figures, and a moment later the voice -of Holmes from within assuring them that it was a false alarm. -Slipping through the shouting crowd I made my way to the corner -of the street, and in ten minutes was rejoiced to find my -friend's arm in mine, and to get away from the scene of uproar. -He walked swiftly and in silence for some few minutes until we -had turned down one of the quiet streets which lead towards the -Edgeware Road. - -"You did it very nicely, Doctor," he remarked. "Nothing could -have been better. It is all right." - -"You have the photograph?" - -"I know where it is." - -"And how did you find out?" - -"She showed me, as I told you she would." - -"I am still in the dark." - -"I do not wish to make a mystery," said he, laughing. "The matter -was perfectly simple. You, of course, saw that everyone in the -street was an accomplice. They were all engaged for the evening." - -"I guessed as much." - -"Then, when the row broke out, I had a little moist red paint in -the palm of my hand. I rushed forward, fell down, clapped my hand -to my face, and became a piteous spectacle. It is an old trick." - -"That also I could fathom." - -"Then they carried me in. She was bound to have me in. What else -could she do? And into her sitting-room, which was the very room -which I suspected. It lay between that and her bedroom, and I was -determined to see which. They laid me on a couch, I motioned for -air, they were compelled to open the window, and you had your -chance." - -"How did that help you?" - -"It was all-important. When a woman thinks that her house is on -fire, her instinct is at once to rush to the thing which she -values most. It is a perfectly overpowering impulse, and I have -more than once taken advantage of it. In the case of the -Darlington substitution scandal it was of use to me, and also in -the Arnsworth Castle business. A married woman grabs at her baby; -an unmarried one reaches for her jewel-box. Now it was clear to -me that our lady of to-day had nothing in the house more precious -to her than what we are in quest of. She would rush to secure it. -The alarm of fire was admirably done. The smoke and shouting were -enough to shake nerves of steel. She responded beautifully. The -photograph is in a recess behind a sliding panel just above the -right bell-pull. She was there in an instant, and I caught a -glimpse of it as she half-drew it out. When I cried out that it -was a false alarm, she replaced it, glanced at the rocket, rushed -from the room, and I have not seen her since. I rose, and, making -my excuses, escaped from the house. I hesitated whether to -attempt to secure the photograph at once; but the coachman had -come in, and as he was watching me narrowly it seemed safer to -wait. A little over-precipitance may ruin all." - -"And now?" I asked. - -"Our quest is practically finished. I shall call with the King -to-morrow, and with you, if you care to come with us. We will be -shown into the sitting-room to wait for the lady, but it is -probable that when she comes she may find neither us nor the -photograph. It might be a satisfaction to his Majesty to regain -it with his own hands." - -"And when will you call?" - -"At eight in the morning. She will not be up, so that we shall -have a clear field. Besides, we must be prompt, for this marriage -may mean a complete change in her life and habits. I must wire to -the King without delay." - -We had reached Baker Street and had stopped at the door. He was -searching his pockets for the key when someone passing said: - -"Good-night, Mister Sherlock Holmes." - -There were several people on the pavement at the time, but the -greeting appeared to come from a slim youth in an ulster who had -hurried by. - -"I've heard that voice before," said Holmes, staring down the -dimly lit street. "Now, I wonder who the deuce that could have -been." - - -III. - -I slept at Baker Street that night, and we were engaged upon our -toast and coffee in the morning when the King of Bohemia rushed -into the room. - -"You have really got it!" he cried, grasping Sherlock Holmes by -either shoulder and looking eagerly into his face. - -"Not yet." - -"But you have hopes?" - -"I have hopes." - -"Then, come. I am all impatience to be gone." - -"We must have a cab." - -"No, my brougham is waiting." - -"Then that will simplify matters." We descended and started off -once more for Briony Lodge. - -"Irene Adler is married," remarked Holmes. - -"Married! When?" - -"Yesterday." - -"But to whom?" - -"To an English lawyer named Norton." - -"But she could not love him." - -"I am in hopes that she does." - -"And why in hopes?" - -"Because it would spare your Majesty all fear of future -annoyance. If the lady loves her husband, she does not love your -Majesty. If she does not love your Majesty, there is no reason -why she should interfere with your Majesty's plan." - -"It is true. And yet--Well! I wish she had been of my own -station! What a queen she would have made!" He relapsed into a -moody silence, which was not broken until we drew up in -Serpentine Avenue. - -The door of Briony Lodge was open, and an elderly woman stood -upon the steps. She watched us with a sardonic eye as we stepped -from the brougham. - -"Mr. Sherlock Holmes, I believe?" said she. - -"I am Mr. Holmes," answered my companion, looking at her with a -questioning and rather startled gaze. - -"Indeed! My mistress told me that you were likely to call. She -left this morning with her husband by the 5:15 train from Charing -Cross for the Continent." - -"What!" Sherlock Holmes staggered back, white with chagrin and -surprise. "Do you mean that she has left England?" - -"Never to return." - -"And the papers?" asked the King hoarsely. "All is lost." - -"We shall see." He pushed past the servant and rushed into the -drawing-room, followed by the King and myself. The furniture was -scattered about in every direction, with dismantled shelves and -open drawers, as if the lady had hurriedly ransacked them before -her flight. Holmes rushed at the bell-pull, tore back a small -sliding shutter, and, plunging in his hand, pulled out a -photograph and a letter. The photograph was of Irene Adler -herself in evening dress, the letter was superscribed to -"Sherlock Holmes, Esq. To be left till called for." My friend -tore it open and we all three read it together. It was dated at -midnight of the preceding night and ran in this way: - -"MY DEAR MR. SHERLOCK HOLMES,--You really did it very well. You -took me in completely. Until after the alarm of fire, I had not a -suspicion. But then, when I found how I had betrayed myself, I -began to think. I had been warned against you months ago. I had -been told that if the King employed an agent it would certainly -be you. And your address had been given me. Yet, with all this, -you made me reveal what you wanted to know. Even after I became -suspicious, I found it hard to think evil of such a dear, kind -old clergyman. But, you know, I have been trained as an actress -myself. Male costume is nothing new to me. I often take advantage -of the freedom which it gives. I sent John, the coachman, to -watch you, ran up stairs, got into my walking-clothes, as I call -them, and came down just as you departed. - -"Well, I followed you to your door, and so made sure that I was -really an object of interest to the celebrated Mr. Sherlock -Holmes. Then I, rather imprudently, wished you good-night, and -started for the Temple to see my husband. - -"We both thought the best resource was flight, when pursued by -so formidable an antagonist; so you will find the nest empty when -you call to-morrow. As to the photograph, your client may rest in -peace. I love and am loved by a better man than he. The King may -do what he will without hindrance from one whom he has cruelly -wronged. I keep it only to safeguard myself, and to preserve a -weapon which will always secure me from any steps which he might -take in the future. I leave a photograph which he might care to -possess; and I remain, dear Mr. Sherlock Holmes, - - "Very truly yours, - "IRENE NORTON, née ADLER." - -"What a woman--oh, what a woman!" cried the King of Bohemia, when -we had all three read this epistle. "Did I not tell you how quick -and resolute she was? Would she not have made an admirable queen? -Is it not a pity that she was not on my level?" - -"From what I have seen of the lady she seems indeed to be on a -very different level to your Majesty," said Holmes coldly. "I am -sorry that I have not been able to bring your Majesty's business -to a more successful conclusion." - -"On the contrary, my dear sir," cried the King; "nothing could be -more successful. I know that her word is inviolate. The -photograph is now as safe as if it were in the fire." - -"I am glad to hear your Majesty say so." - -"I am immensely indebted to you. Pray tell me in what way I can -reward you. This ring--" He slipped an emerald snake ring from -his finger and held it out upon the palm of his hand. - -"Your Majesty has something which I should value even more -highly," said Holmes. - -"You have but to name it." - -"This photograph!" - -The King stared at him in amazement. - -"Irene's photograph!" he cried. "Certainly, if you wish it." - -"I thank your Majesty. Then there is no more to be done in the -matter. I have the honour to wish you a very good-morning." He -bowed, and, turning away without observing the hand which the -King had stretched out to him, he set off in my company for his -chambers. - -And that was how a great scandal threatened to affect the kingdom -of Bohemia, and how the best plans of Mr. Sherlock Holmes were -beaten by a woman's wit. He used to make merry over the -cleverness of women, but I have not heard him do it of late. And -when he speaks of Irene Adler, or when he refers to her -photograph, it is always under the honourable title of the woman. - - - -ADVENTURE II. THE RED-HEADED LEAGUE - -I had called upon my friend, Mr. Sherlock Holmes, one day in the -autumn of last year and found him in deep conversation with a -very stout, florid-faced, elderly gentleman with fiery red hair. -With an apology for my intrusion, I was about to withdraw when -Holmes pulled me abruptly into the room and closed the door -behind me. - -"You could not possibly have come at a better time, my dear -Watson," he said cordially. - -"I was afraid that you were engaged." - -"So I am. Very much so." - -"Then I can wait in the next room." - -"Not at all. This gentleman, Mr. Wilson, has been my partner and -helper in many of my most successful cases, and I have no -doubt that he will be of the utmost use to me in yours also." - -The stout gentleman half rose from his chair and gave a bob of -greeting, with a quick little questioning glance from his small -fat-encircled eyes. - -"Try the settee," said Holmes, relapsing into his armchair and -putting his fingertips together, as was his custom when in -judicial moods. "I know, my dear Watson, that you share my love -of all that is bizarre and outside the conventions and humdrum -routine of everyday life. You have shown your relish for it by -the enthusiasm which has prompted you to chronicle, and, if you -will excuse my saying so, somewhat to embellish so many of my own -little adventures." - -"Your cases have indeed been of the greatest interest to me," I -observed. - -"You will remember that I remarked the other day, just before we -went into the very simple problem presented by Miss Mary -Sutherland, that for strange effects and extraordinary -combinations we must go to life itself, which is always far more -daring than any effort of the imagination." - -"A proposition which I took the liberty of doubting." - -"You did, Doctor, but none the less you must come round to my -view, for otherwise I shall keep on piling fact upon fact on you -until your reason breaks down under them and acknowledges me to -be right. Now, Mr. Jabez Wilson here has been good enough to call -upon me this morning, and to begin a narrative which promises to -be one of the most singular which I have listened to for some -time. You have heard me remark that the strangest and most unique -things are very often connected not with the larger but with the -smaller crimes, and occasionally, indeed, where there is room for -doubt whether any positive crime has been committed. As far as I -have heard it is impossible for me to say whether the present -case is an instance of crime or not, but the course of events is -certainly among the most singular that I have ever listened to. -Perhaps, Mr. Wilson, you would have the great kindness to -recommence your narrative. I ask you not merely because my friend -Dr. Watson has not heard the opening part but also because the -peculiar nature of the story makes me anxious to have every -possible detail from your lips. As a rule, when I have heard some -slight indication of the course of events, I am able to guide -myself by the thousands of other similar cases which occur to my -memory. In the present instance I am forced to admit that the -facts are, to the best of my belief, unique." - -The portly client puffed out his chest with an appearance of some -little pride and pulled a dirty and wrinkled newspaper from the -inside pocket of his greatcoat. As he glanced down the -advertisement column, with his head thrust forward and the paper -flattened out upon his knee, I took a good look at the man and -endeavoured, after the fashion of my companion, to read the -indications which might be presented by his dress or appearance. - -I did not gain very much, however, by my inspection. Our visitor -bore every mark of being an average commonplace British -tradesman, obese, pompous, and slow. He wore rather baggy grey -shepherd's check trousers, a not over-clean black frock-coat, -unbuttoned in the front, and a drab waistcoat with a heavy brassy -Albert chain, and a square pierced bit of metal dangling down as -an ornament. A frayed top-hat and a faded brown overcoat with a -wrinkled velvet collar lay upon a chair beside him. Altogether, -look as I would, there was nothing remarkable about the man save -his blazing red head, and the expression of extreme chagrin and -discontent upon his features. - -Sherlock Holmes' quick eye took in my occupation, and he shook -his head with a smile as he noticed my questioning glances. -"Beyond the obvious facts that he has at some time done manual -labour, that he takes snuff, that he is a Freemason, that he has -been in China, and that he has done a considerable amount of -writing lately, I can deduce nothing else." - -Mr. Jabez Wilson started up in his chair, with his forefinger -upon the paper, but his eyes upon my companion. - -"How, in the name of good-fortune, did you know all that, Mr. -Holmes?" he asked. "How did you know, for example, that I did -manual labour. It's as true as gospel, for I began as a ship's -carpenter." - -"Your hands, my dear sir. Your right hand is quite a size larger -than your left. You have worked with it, and the muscles are more -developed." - -"Well, the snuff, then, and the Freemasonry?" - -"I won't insult your intelligence by telling you how I read that, -especially as, rather against the strict rules of your order, you -use an arc-and-compass breastpin." - -"Ah, of course, I forgot that. But the writing?" - -"What else can be indicated by that right cuff so very shiny for -five inches, and the left one with the smooth patch near the -elbow where you rest it upon the desk?" - -"Well, but China?" - -"The fish that you have tattooed immediately above your right -wrist could only have been done in China. I have made a small -study of tattoo marks and have even contributed to the literature -of the subject. That trick of staining the fishes' scales of a -delicate pink is quite peculiar to China. When, in addition, I -see a Chinese coin hanging from your watch-chain, the matter -becomes even more simple." - -Mr. Jabez Wilson laughed heavily. "Well, I never!" said he. "I -thought at first that you had done something clever, but I see -that there was nothing in it, after all." - -"I begin to think, Watson," said Holmes, "that I make a mistake -in explaining. 'Omne ignotum pro magnifico,' you know, and my -poor little reputation, such as it is, will suffer shipwreck if I -am so candid. Can you not find the advertisement, Mr. Wilson?" - -"Yes, I have got it now," he answered with his thick red finger -planted halfway down the column. "Here it is. This is what began -it all. You just read it for yourself, sir." - -I took the paper from him and read as follows: - -"TO THE RED-HEADED LEAGUE: On account of the bequest of the late -Ezekiah Hopkins, of Lebanon, Pennsylvania, U. S. A., there is now -another vacancy open which entitles a member of the League to a -salary of 4 pounds a week for purely nominal services. All -red-headed men who are sound in body and mind and above the age -of twenty-one years, are eligible. Apply in person on Monday, at -eleven o'clock, to Duncan Ross, at the offices of the League, 7 -Pope's Court, Fleet Street." - -"What on earth does this mean?" I ejaculated after I had twice -read over the extraordinary announcement. - -Holmes chuckled and wriggled in his chair, as was his habit when -in high spirits. "It is a little off the beaten track, isn't it?" -said he. "And now, Mr. Wilson, off you go at scratch and tell us -all about yourself, your household, and the effect which this -advertisement had upon your fortunes. You will first make a note, -Doctor, of the paper and the date." - -"It is The Morning Chronicle of April 27, 1890. Just two months -ago." - -"Very good. Now, Mr. Wilson?" - -"Well, it is just as I have been telling you, Mr. Sherlock -Holmes," said Jabez Wilson, mopping his forehead; "I have a small -pawnbroker's business at Coburg Square, near the City. It's not a -very large affair, and of late years it has not done more than -just give me a living. I used to be able to keep two assistants, -but now I only keep one; and I would have a job to pay him but -that he is willing to come for half wages so as to learn the -business." - -"What is the name of this obliging youth?" asked Sherlock Holmes. - -"His name is Vincent Spaulding, and he's not such a youth, -either. It's hard to say his age. I should not wish a smarter -assistant, Mr. Holmes; and I know very well that he could better -himself and earn twice what I am able to give him. But, after -all, if he is satisfied, why should I put ideas in his head?" - -"Why, indeed? You seem most fortunate in having an employé who -comes under the full market price. It is not a common experience -among employers in this age. I don't know that your assistant is -not as remarkable as your advertisement." - -"Oh, he has his faults, too," said Mr. Wilson. "Never was such a -fellow for photography. Snapping away with a camera when he ought -to be improving his mind, and then diving down into the cellar -like a rabbit into its hole to develop his pictures. That is his -main fault, but on the whole he's a good worker. There's no vice -in him." - -"He is still with you, I presume?" - -"Yes, sir. He and a girl of fourteen, who does a bit of simple -cooking and keeps the place clean--that's all I have in the -house, for I am a widower and never had any family. We live very -quietly, sir, the three of us; and we keep a roof over our heads -and pay our debts, if we do nothing more. - -"The first thing that put us out was that advertisement. -Spaulding, he came down into the office just this day eight -weeks, with this very paper in his hand, and he says: - -"'I wish to the Lord, Mr. Wilson, that I was a red-headed man.' - -"'Why that?' I asks. - -"'Why,' says he, 'here's another vacancy on the League of the -Red-headed Men. It's worth quite a little fortune to any man who -gets it, and I understand that there are more vacancies than -there are men, so that the trustees are at their wits' end what -to do with the money. If my hair would only change colour, here's -a nice little crib all ready for me to step into.' - -"'Why, what is it, then?' I asked. You see, Mr. Holmes, I am a -very stay-at-home man, and as my business came to me instead of -my having to go to it, I was often weeks on end without putting -my foot over the door-mat. In that way I didn't know much of what -was going on outside, and I was always glad of a bit of news. - -"'Have you never heard of the League of the Red-headed Men?' he -asked with his eyes open. - -"'Never.' - -"'Why, I wonder at that, for you are eligible yourself for one -of the vacancies.' - -"'And what are they worth?' I asked. - -"'Oh, merely a couple of hundred a year, but the work is slight, -and it need not interfere very much with one's other -occupations.' - -"Well, you can easily think that that made me prick up my ears, -for the business has not been over-good for some years, and an -extra couple of hundred would have been very handy. - -"'Tell me all about it,' said I. - -"'Well,' said he, showing me the advertisement, 'you can see for -yourself that the League has a vacancy, and there is the address -where you should apply for particulars. As far as I can make out, -the League was founded by an American millionaire, Ezekiah -Hopkins, who was very peculiar in his ways. He was himself -red-headed, and he had a great sympathy for all red-headed men; -so when he died it was found that he had left his enormous -fortune in the hands of trustees, with instructions to apply the -interest to the providing of easy berths to men whose hair is of -that colour. From all I hear it is splendid pay and very little to -do.' - -"'But,' said I, 'there would be millions of red-headed men who -would apply.' - -"'Not so many as you might think,' he answered. 'You see it is -really confined to Londoners, and to grown men. This American had -started from London when he was young, and he wanted to do the -old town a good turn. Then, again, I have heard it is no use your -applying if your hair is light red, or dark red, or anything but -real bright, blazing, fiery red. Now, if you cared to apply, Mr. -Wilson, you would just walk in; but perhaps it would hardly be -worth your while to put yourself out of the way for the sake of a -few hundred pounds.' - -"Now, it is a fact, gentlemen, as you may see for yourselves, -that my hair is of a very full and rich tint, so that it seemed -to me that if there was to be any competition in the matter I -stood as good a chance as any man that I had ever met. Vincent -Spaulding seemed to know so much about it that I thought he might -prove useful, so I just ordered him to put up the shutters for -the day and to come right away with me. He was very willing to -have a holiday, so we shut the business up and started off for -the address that was given us in the advertisement. - -"I never hope to see such a sight as that again, Mr. Holmes. From -north, south, east, and west every man who had a shade of red in -his hair had tramped into the city to answer the advertisement. -Fleet Street was choked with red-headed folk, and Pope's Court -looked like a coster's orange barrow. I should not have thought -there were so many in the whole country as were brought together -by that single advertisement. Every shade of colour they -were--straw, lemon, orange, brick, Irish-setter, liver, clay; -but, as Spaulding said, there were not many who had the real -vivid flame-coloured tint. When I saw how many were waiting, I -would have given it up in despair; but Spaulding would not hear -of it. How he did it I could not imagine, but he pushed and -pulled and butted until he got me through the crowd, and right up -to the steps which led to the office. There was a double stream -upon the stair, some going up in hope, and some coming back -dejected; but we wedged in as well as we could and soon found -ourselves in the office." - -"Your experience has been a most entertaining one," remarked -Holmes as his client paused and refreshed his memory with a huge -pinch of snuff. "Pray continue your very interesting statement." - -"There was nothing in the office but a couple of wooden chairs -and a deal table, behind which sat a small man with a head that -was even redder than mine. He said a few words to each candidate -as he came up, and then he always managed to find some fault in -them which would disqualify them. Getting a vacancy did not seem -to be such a very easy matter, after all. However, when our turn -came the little man was much more favourable to me than to any of -the others, and he closed the door as we entered, so that he -might have a private word with us. - -"'This is Mr. Jabez Wilson,' said my assistant, 'and he is -willing to fill a vacancy in the League.' - -"'And he is admirably suited for it,' the other answered. 'He has -every requirement. I cannot recall when I have seen anything so -fine.' He took a step backward, cocked his head on one side, and -gazed at my hair until I felt quite bashful. Then suddenly he -plunged forward, wrung my hand, and congratulated me warmly on my -success. - -"'It would be injustice to hesitate,' said he. 'You will, -however, I am sure, excuse me for taking an obvious precaution.' -With that he seized my hair in both his hands, and tugged until I -yelled with the pain. 'There is water in your eyes,' said he as -he released me. 'I perceive that all is as it should be. But we -have to be careful, for we have twice been deceived by wigs and -once by paint. I could tell you tales of cobbler's wax which -would disgust you with human nature.' He stepped over to the -window and shouted through it at the top of his voice that the -vacancy was filled. A groan of disappointment came up from below, -and the folk all trooped away in different directions until there -was not a red-head to be seen except my own and that of the -manager. - -"'My name,' said he, 'is Mr. Duncan Ross, and I am myself one of -the pensioners upon the fund left by our noble benefactor. Are -you a married man, Mr. Wilson? Have you a family?' - -"I answered that I had not. - -"His face fell immediately. - -"'Dear me!' he said gravely, 'that is very serious indeed! I am -sorry to hear you say that. The fund was, of course, for the -propagation and spread of the red-heads as well as for their -maintenance. It is exceedingly unfortunate that you should be a -bachelor.' - -"My face lengthened at this, Mr. Holmes, for I thought that I was -not to have the vacancy after all; but after thinking it over for -a few minutes he said that it would be all right. - -"'In the case of another,' said he, 'the objection might be -fatal, but we must stretch a point in favour of a man with such a -head of hair as yours. When shall you be able to enter upon your -new duties?' - -"'Well, it is a little awkward, for I have a business already,' -said I. - -"'Oh, never mind about that, Mr. Wilson!' said Vincent Spaulding. -'I should be able to look after that for you.' - -"'What would be the hours?' I asked. - -"'Ten to two.' - -"Now a pawnbroker's business is mostly done of an evening, Mr. -Holmes, especially Thursday and Friday evening, which is just -before pay-day; so it would suit me very well to earn a little in -the mornings. Besides, I knew that my assistant was a good man, -and that he would see to anything that turned up. - -"'That would suit me very well,' said I. 'And the pay?' - -"'Is 4 pounds a week.' - -"'And the work?' - -"'Is purely nominal.' - -"'What do you call purely nominal?' - -"'Well, you have to be in the office, or at least in the -building, the whole time. If you leave, you forfeit your whole -position forever. The will is very clear upon that point. You -don't comply with the conditions if you budge from the office -during that time.' - -"'It's only four hours a day, and I should not think of leaving,' -said I. - -"'No excuse will avail,' said Mr. Duncan Ross; 'neither sickness -nor business nor anything else. There you must stay, or you lose -your billet.' - -"'And the work?' - -"'Is to copy out the "Encyclopaedia Britannica." There is the first -volume of it in that press. You must find your own ink, pens, and -blotting-paper, but we provide this table and chair. Will you be -ready to-morrow?' - -"'Certainly,' I answered. - -"'Then, good-bye, Mr. Jabez Wilson, and let me congratulate you -once more on the important position which you have been fortunate -enough to gain.' He bowed me out of the room and I went home with -my assistant, hardly knowing what to say or do, I was so pleased -at my own good fortune. - -"Well, I thought over the matter all day, and by evening I was in -low spirits again; for I had quite persuaded myself that the -whole affair must be some great hoax or fraud, though what its -object might be I could not imagine. It seemed altogether past -belief that anyone could make such a will, or that they would pay -such a sum for doing anything so simple as copying out the -'Encyclopaedia Britannica.' Vincent Spaulding did what he could to -cheer me up, but by bedtime I had reasoned myself out of the -whole thing. However, in the morning I determined to have a look -at it anyhow, so I bought a penny bottle of ink, and with a -quill-pen, and seven sheets of foolscap paper, I started off for -Pope's Court. - -"Well, to my surprise and delight, everything was as right as -possible. The table was set out ready for me, and Mr. Duncan Ross -was there to see that I got fairly to work. He started me off -upon the letter A, and then he left me; but he would drop in from -time to time to see that all was right with me. At two o'clock he -bade me good-day, complimented me upon the amount that I had -written, and locked the door of the office after me. - -"This went on day after day, Mr. Holmes, and on Saturday the -manager came in and planked down four golden sovereigns for my -week's work. It was the same next week, and the same the week -after. Every morning I was there at ten, and every afternoon I -left at two. By degrees Mr. Duncan Ross took to coming in only -once of a morning, and then, after a time, he did not come in at -all. Still, of course, I never dared to leave the room for an -instant, for I was not sure when he might come, and the billet -was such a good one, and suited me so well, that I would not risk -the loss of it. - -"Eight weeks passed away like this, and I had written about -Abbots and Archery and Armour and Architecture and Attica, and -hoped with diligence that I might get on to the B's before very -long. It cost me something in foolscap, and I had pretty nearly -filled a shelf with my writings. And then suddenly the whole -business came to an end." - -"To an end?" - -"Yes, sir. And no later than this morning. I went to my work as -usual at ten o'clock, but the door was shut and locked, with a -little square of cardboard hammered on to the middle of the -panel with a tack. Here it is, and you can read for yourself." - -He held up a piece of white cardboard about the size of a sheet -of note-paper. It read in this fashion: - - THE RED-HEADED LEAGUE - - IS - - DISSOLVED. - - October 9, 1890. - -Sherlock Holmes and I surveyed this curt announcement and the -rueful face behind it, until the comical side of the affair so -completely overtopped every other consideration that we both -burst out into a roar of laughter. - -"I cannot see that there is anything very funny," cried our -client, flushing up to the roots of his flaming head. "If you can -do nothing better than laugh at me, I can go elsewhere." - -"No, no," cried Holmes, shoving him back into the chair from -which he had half risen. "I really wouldn't miss your case for -the world. It is most refreshingly unusual. But there is, if you -will excuse my saying so, something just a little funny about it. -Pray what steps did you take when you found the card upon the -door?" - -"I was staggered, sir. I did not know what to do. Then I called -at the offices round, but none of them seemed to know anything -about it. Finally, I went to the landlord, who is an accountant -living on the ground-floor, and I asked him if he could tell me -what had become of the Red-headed League. He said that he had -never heard of any such body. Then I asked him who Mr. Duncan -Ross was. He answered that the name was new to him. - -"'Well,' said I, 'the gentleman at No. 4.' - -"'What, the red-headed man?' - -"'Yes.' - -"'Oh,' said he, 'his name was William Morris. He was a solicitor -and was using my room as a temporary convenience until his new -premises were ready. He moved out yesterday.' - -"'Where could I find him?' - -"'Oh, at his new offices. He did tell me the address. Yes, 17 -King Edward Street, near St. Paul's.' - -"I started off, Mr. Holmes, but when I got to that address it was -a manufactory of artificial knee-caps, and no one in it had ever -heard of either Mr. William Morris or Mr. Duncan Ross." - -"And what did you do then?" asked Holmes. - -"I went home to Saxe-Coburg Square, and I took the advice of my -assistant. But he could not help me in any way. He could only say -that if I waited I should hear by post. But that was not quite -good enough, Mr. Holmes. I did not wish to lose such a place -without a struggle, so, as I had heard that you were good enough -to give advice to poor folk who were in need of it, I came right -away to you." - -"And you did very wisely," said Holmes. "Your case is an -exceedingly remarkable one, and I shall be happy to look into it. -From what you have told me I think that it is possible that -graver issues hang from it than might at first sight appear." - -"Grave enough!" said Mr. Jabez Wilson. "Why, I have lost four -pound a week." - -"As far as you are personally concerned," remarked Holmes, "I do -not see that you have any grievance against this extraordinary -league. On the contrary, you are, as I understand, richer by some -30 pounds, to say nothing of the minute knowledge which you have -gained on every subject which comes under the letter A. You have -lost nothing by them." - -"No, sir. But I want to find out about them, and who they are, -and what their object was in playing this prank--if it was a -prank--upon me. It was a pretty expensive joke for them, for it -cost them two and thirty pounds." - -"We shall endeavour to clear up these points for you. And, first, -one or two questions, Mr. Wilson. This assistant of yours who -first called your attention to the advertisement--how long had he -been with you?" - -"About a month then." - -"How did he come?" - -"In answer to an advertisement." - -"Was he the only applicant?" - -"No, I had a dozen." - -"Why did you pick him?" - -"Because he was handy and would come cheap." - -"At half-wages, in fact." - -"Yes." - -"What is he like, this Vincent Spaulding?" - -"Small, stout-built, very quick in his ways, no hair on his face, -though he's not short of thirty. Has a white splash of acid upon -his forehead." - -Holmes sat up in his chair in considerable excitement. "I thought -as much," said he. "Have you ever observed that his ears are -pierced for earrings?" - -"Yes, sir. He told me that a gipsy had done it for him when he -was a lad." - -"Hum!" said Holmes, sinking back in deep thought. "He is still -with you?" - -"Oh, yes, sir; I have only just left him." - -"And has your business been attended to in your absence?" - -"Nothing to complain of, sir. There's never very much to do of a -morning." - -"That will do, Mr. Wilson. I shall be happy to give you an -opinion upon the subject in the course of a day or two. To-day is -Saturday, and I hope that by Monday we may come to a conclusion." - -"Well, Watson," said Holmes when our visitor had left us, "what -do you make of it all?" - -"I make nothing of it," I answered frankly. "It is a most -mysterious business." - -"As a rule," said Holmes, "the more bizarre a thing is the less -mysterious it proves to be. It is your commonplace, featureless -crimes which are really puzzling, just as a commonplace face is -the most difficult to identify. But I must be prompt over this -matter." - -"What are you going to do, then?" I asked. - -"To smoke," he answered. "It is quite a three pipe problem, and I -beg that you won't speak to me for fifty minutes." He curled -himself up in his chair, with his thin knees drawn up to his -hawk-like nose, and there he sat with his eyes closed and his -black clay pipe thrusting out like the bill of some strange bird. -I had come to the conclusion that he had dropped asleep, and -indeed was nodding myself, when he suddenly sprang out of his -chair with the gesture of a man who has made up his mind and put -his pipe down upon the mantelpiece. - -"Sarasate plays at the St. James's Hall this afternoon," he -remarked. "What do you think, Watson? Could your patients spare -you for a few hours?" - -"I have nothing to do to-day. My practice is never very -absorbing." - -"Then put on your hat and come. I am going through the City -first, and we can have some lunch on the way. I observe that -there is a good deal of German music on the programme, which is -rather more to my taste than Italian or French. It is -introspective, and I want to introspect. Come along!" - -We travelled by the Underground as far as Aldersgate; and a short -walk took us to Saxe-Coburg Square, the scene of the singular -story which we had listened to in the morning. It was a poky, -little, shabby-genteel place, where four lines of dingy -two-storied brick houses looked out into a small railed-in -enclosure, where a lawn of weedy grass and a few clumps of faded -laurel-bushes made a hard fight against a smoke-laden and -uncongenial atmosphere. Three gilt balls and a brown board with -"JABEZ WILSON" in white letters, upon a corner house, announced -the place where our red-headed client carried on his business. -Sherlock Holmes stopped in front of it with his head on one side -and looked it all over, with his eyes shining brightly between -puckered lids. Then he walked slowly up the street, and then down -again to the corner, still looking keenly at the houses. Finally -he returned to the pawnbroker's, and, having thumped vigorously -upon the pavement with his stick two or three times, he went up -to the door and knocked. It was instantly opened by a -bright-looking, clean-shaven young fellow, who asked him to step -in. - -"Thank you," said Holmes, "I only wished to ask you how you would -go from here to the Strand." - -"Third right, fourth left," answered the assistant promptly, -closing the door. - -"Smart fellow, that," observed Holmes as we walked away. "He is, -in my judgment, the fourth smartest man in London, and for daring -I am not sure that he has not a claim to be third. I have known -something of him before." - -"Evidently," said I, "Mr. Wilson's assistant counts for a good -deal in this mystery of the Red-headed League. I am sure that you -inquired your way merely in order that you might see him." - -"Not him." - -"What then?" - -"The knees of his trousers." - -"And what did you see?" - -"What I expected to see." - -"Why did you beat the pavement?" - -"My dear doctor, this is a time for observation, not for talk. We -are spies in an enemy's country. We know something of Saxe-Coburg -Square. Let us now explore the parts which lie behind it." - -The road in which we found ourselves as we turned round the -corner from the retired Saxe-Coburg Square presented as great a -contrast to it as the front of a picture does to the back. It was -one of the main arteries which conveyed the traffic of the City -to the north and west. The roadway was blocked with the immense -stream of commerce flowing in a double tide inward and outward, -while the footpaths were black with the hurrying swarm of -pedestrians. It was difficult to realise as we looked at the line -of fine shops and stately business premises that they really -abutted on the other side upon the faded and stagnant square -which we had just quitted. - -"Let me see," said Holmes, standing at the corner and glancing -along the line, "I should like just to remember the order of the -houses here. It is a hobby of mine to have an exact knowledge of -London. There is Mortimer's, the tobacconist, the little -newspaper shop, the Coburg branch of the City and Suburban Bank, -the Vegetarian Restaurant, and McFarlane's carriage-building -depot. That carries us right on to the other block. And now, -Doctor, we've done our work, so it's time we had some play. A -sandwich and a cup of coffee, and then off to violin-land, where -all is sweetness and delicacy and harmony, and there are no -red-headed clients to vex us with their conundrums." - -My friend was an enthusiastic musician, being himself not only a -very capable performer but a composer of no ordinary merit. All -the afternoon he sat in the stalls wrapped in the most perfect -happiness, gently waving his long, thin fingers in time to the -music, while his gently smiling face and his languid, dreamy eyes -were as unlike those of Holmes the sleuth-hound, Holmes the -relentless, keen-witted, ready-handed criminal agent, as it was -possible to conceive. In his singular character the dual nature -alternately asserted itself, and his extreme exactness and -astuteness represented, as I have often thought, the reaction -against the poetic and contemplative mood which occasionally -predominated in him. The swing of his nature took him from -extreme languor to devouring energy; and, as I knew well, he was -never so truly formidable as when, for days on end, he had been -lounging in his armchair amid his improvisations and his -black-letter editions. Then it was that the lust of the chase -would suddenly come upon him, and that his brilliant reasoning -power would rise to the level of intuition, until those who were -unacquainted with his methods would look askance at him as on a -man whose knowledge was not that of other mortals. When I saw him -that afternoon so enwrapped in the music at St. James's Hall I -felt that an evil time might be coming upon those whom he had set -himself to hunt down. - -"You want to go home, no doubt, Doctor," he remarked as we -emerged. - -"Yes, it would be as well." - -"And I have some business to do which will take some hours. This -business at Coburg Square is serious." - -"Why serious?" - -"A considerable crime is in contemplation. I have every reason to -believe that we shall be in time to stop it. But to-day being -Saturday rather complicates matters. I shall want your help -to-night." - -"At what time?" - -"Ten will be early enough." - -"I shall be at Baker Street at ten." - -"Very well. And, I say, Doctor, there may be some little danger, -so kindly put your army revolver in your pocket." He waved his -hand, turned on his heel, and disappeared in an instant among the -crowd. - -I trust that I am not more dense than my neighbours, but I was -always oppressed with a sense of my own stupidity in my dealings -with Sherlock Holmes. Here I had heard what he had heard, I had -seen what he had seen, and yet from his words it was evident that -he saw clearly not only what had happened but what was about to -happen, while to me the whole business was still confused and -grotesque. As I drove home to my house in Kensington I thought -over it all, from the extraordinary story of the red-headed -copier of the "Encyclopaedia" down to the visit to Saxe-Coburg -Square, and the ominous words with which he had parted from me. -What was this nocturnal expedition, and why should I go armed? -Where were we going, and what were we to do? I had the hint from -Holmes that this smooth-faced pawnbroker's assistant was a -formidable man--a man who might play a deep game. I tried to -puzzle it out, but gave it up in despair and set the matter aside -until night should bring an explanation. - -It was a quarter-past nine when I started from home and made my -way across the Park, and so through Oxford Street to Baker -Street. Two hansoms were standing at the door, and as I entered -the passage I heard the sound of voices from above. On entering -his room I found Holmes in animated conversation with two men, -one of whom I recognised as Peter Jones, the official police -agent, while the other was a long, thin, sad-faced man, with a -very shiny hat and oppressively respectable frock-coat. - -"Ha! Our party is complete," said Holmes, buttoning up his -pea-jacket and taking his heavy hunting crop from the rack. -"Watson, I think you know Mr. Jones, of Scotland Yard? Let me -introduce you to Mr. Merryweather, who is to be our companion in -to-night's adventure." - -"We're hunting in couples again, Doctor, you see," said Jones in -his consequential way. "Our friend here is a wonderful man for -starting a chase. All he wants is an old dog to help him to do -the running down." - -"I hope a wild goose may not prove to be the end of our chase," -observed Mr. Merryweather gloomily. - -"You may place considerable confidence in Mr. Holmes, sir," said -the police agent loftily. "He has his own little methods, which -are, if he won't mind my saying so, just a little too theoretical -and fantastic, but he has the makings of a detective in him. It -is not too much to say that once or twice, as in that business of -the Sholto murder and the Agra treasure, he has been more nearly -correct than the official force." - -"Oh, if you say so, Mr. Jones, it is all right," said the -stranger with deference. "Still, I confess that I miss my rubber. -It is the first Saturday night for seven-and-twenty years that I -have not had my rubber." - -"I think you will find," said Sherlock Holmes, "that you will -play for a higher stake to-night than you have ever done yet, and -that the play will be more exciting. For you, Mr. Merryweather, -the stake will be some 30,000 pounds; and for you, Jones, it will -be the man upon whom you wish to lay your hands." - -"John Clay, the murderer, thief, smasher, and forger. He's a -young man, Mr. Merryweather, but he is at the head of his -profession, and I would rather have my bracelets on him than on -any criminal in London. He's a remarkable man, is young John -Clay. His grandfather was a royal duke, and he himself has been -to Eton and Oxford. His brain is as cunning as his fingers, and -though we meet signs of him at every turn, we never know where to -find the man himself. He'll crack a crib in Scotland one week, -and be raising money to build an orphanage in Cornwall the next. -I've been on his track for years and have never set eyes on him -yet." - -"I hope that I may have the pleasure of introducing you to-night. -I've had one or two little turns also with Mr. John Clay, and I -agree with you that he is at the head of his profession. It is -past ten, however, and quite time that we started. If you two -will take the first hansom, Watson and I will follow in the -second." - -Sherlock Holmes was not very communicative during the long drive -and lay back in the cab humming the tunes which he had heard in -the afternoon. We rattled through an endless labyrinth of gas-lit -streets until we emerged into Farrington Street. - -"We are close there now," my friend remarked. "This fellow -Merryweather is a bank director, and personally interested in the -matter. I thought it as well to have Jones with us also. He is -not a bad fellow, though an absolute imbecile in his profession. -He has one positive virtue. He is as brave as a bulldog and as -tenacious as a lobster if he gets his claws upon anyone. Here we -are, and they are waiting for us." - -We had reached the same crowded thoroughfare in which we had -found ourselves in the morning. Our cabs were dismissed, and, -following the guidance of Mr. Merryweather, we passed down a -narrow passage and through a side door, which he opened for us. -Within there was a small corridor, which ended in a very massive -iron gate. This also was opened, and led down a flight of winding -stone steps, which terminated at another formidable gate. Mr. -Merryweather stopped to light a lantern, and then conducted us -down a dark, earth-smelling passage, and so, after opening a -third door, into a huge vault or cellar, which was piled all -round with crates and massive boxes. - -"You are not very vulnerable from above," Holmes remarked as he -held up the lantern and gazed about him. - -"Nor from below," said Mr. Merryweather, striking his stick upon -the flags which lined the floor. "Why, dear me, it sounds quite -hollow!" he remarked, looking up in surprise. - -"I must really ask you to be a little more quiet!" said Holmes -severely. "You have already imperilled the whole success of our -expedition. Might I beg that you would have the goodness to sit -down upon one of those boxes, and not to interfere?" - -The solemn Mr. Merryweather perched himself upon a crate, with a -very injured expression upon his face, while Holmes fell upon his -knees upon the floor and, with the lantern and a magnifying lens, -began to examine minutely the cracks between the stones. A few -seconds sufficed to satisfy him, for he sprang to his feet again -and put his glass in his pocket. - -"We have at least an hour before us," he remarked, "for they can -hardly take any steps until the good pawnbroker is safely in bed. -Then they will not lose a minute, for the sooner they do their -work the longer time they will have for their escape. We are at -present, Doctor--as no doubt you have divined--in the cellar of -the City branch of one of the principal London banks. Mr. -Merryweather is the chairman of directors, and he will explain to -you that there are reasons why the more daring criminals of -London should take a considerable interest in this cellar at -present." - -"It is our French gold," whispered the director. "We have had -several warnings that an attempt might be made upon it." - -"Your French gold?" - -"Yes. We had occasion some months ago to strengthen our resources -and borrowed for that purpose 30,000 napoleons from the Bank of -France. It has become known that we have never had occasion to -unpack the money, and that it is still lying in our cellar. The -crate upon which I sit contains 2,000 napoleons packed between -layers of lead foil. Our reserve of bullion is much larger at -present than is usually kept in a single branch office, and the -directors have had misgivings upon the subject." - -"Which were very well justified," observed Holmes. "And now it is -time that we arranged our little plans. I expect that within an -hour matters will come to a head. In the meantime Mr. -Merryweather, we must put the screen over that dark lantern." - -"And sit in the dark?" - -"I am afraid so. I had brought a pack of cards in my pocket, and -I thought that, as we were a partie carrée, you might have your -rubber after all. But I see that the enemy's preparations have -gone so far that we cannot risk the presence of a light. And, -first of all, we must choose our positions. These are daring men, -and though we shall take them at a disadvantage, they may do us -some harm unless we are careful. I shall stand behind this crate, -and do you conceal yourselves behind those. Then, when I flash a -light upon them, close in swiftly. If they fire, Watson, have no -compunction about shooting them down." - -I placed my revolver, cocked, upon the top of the wooden case -behind which I crouched. Holmes shot the slide across the front -of his lantern and left us in pitch darkness--such an absolute -darkness as I have never before experienced. The smell of hot -metal remained to assure us that the light was still there, ready -to flash out at a moment's notice. To me, with my nerves worked -up to a pitch of expectancy, there was something depressing and -subduing in the sudden gloom, and in the cold dank air of the -vault. - -"They have but one retreat," whispered Holmes. "That is back -through the house into Saxe-Coburg Square. I hope that you have -done what I asked you, Jones?" - -"I have an inspector and two officers waiting at the front door." - -"Then we have stopped all the holes. And now we must be silent -and wait." - -What a time it seemed! From comparing notes afterwards it was but -an hour and a quarter, yet it appeared to me that the night must -have almost gone and the dawn be breaking above us. My limbs -were weary and stiff, for I feared to change my position; yet my -nerves were worked up to the highest pitch of tension, and my -hearing was so acute that I could not only hear the gentle -breathing of my companions, but I could distinguish the deeper, -heavier in-breath of the bulky Jones from the thin, sighing note -of the bank director. From my position I could look over the case -in the direction of the floor. Suddenly my eyes caught the glint -of a light. - -At first it was but a lurid spark upon the stone pavement. Then -it lengthened out until it became a yellow line, and then, -without any warning or sound, a gash seemed to open and a hand -appeared, a white, almost womanly hand, which felt about in the -centre of the little area of light. For a minute or more the -hand, with its writhing fingers, protruded out of the floor. Then -it was withdrawn as suddenly as it appeared, and all was dark -again save the single lurid spark which marked a chink between -the stones. - -Its disappearance, however, was but momentary. With a rending, -tearing sound, one of the broad, white stones turned over upon -its side and left a square, gaping hole, through which streamed -the light of a lantern. Over the edge there peeped a clean-cut, -boyish face, which looked keenly about it, and then, with a hand -on either side of the aperture, drew itself shoulder-high and -waist-high, until one knee rested upon the edge. In another -instant he stood at the side of the hole and was hauling after -him a companion, lithe and small like himself, with a pale face -and a shock of very red hair. - -"It's all clear," he whispered. "Have you the chisel and the -bags? Great Scott! Jump, Archie, jump, and I'll swing for it!" - -Sherlock Holmes had sprung out and seized the intruder by the -collar. The other dived down the hole, and I heard the sound of -rending cloth as Jones clutched at his skirts. The light flashed -upon the barrel of a revolver, but Holmes' hunting crop came -down on the man's wrist, and the pistol clinked upon the stone -floor. - -"It's no use, John Clay," said Holmes blandly. "You have no -chance at all." - -"So I see," the other answered with the utmost coolness. "I fancy -that my pal is all right, though I see you have got his -coat-tails." - -"There are three men waiting for him at the door," said Holmes. - -"Oh, indeed! You seem to have done the thing very completely. I -must compliment you." - -"And I you," Holmes answered. "Your red-headed idea was very new -and effective." - -"You'll see your pal again presently," said Jones. "He's quicker -at climbing down holes than I am. Just hold out while I fix the -derbies." - -"I beg that you will not touch me with your filthy hands," -remarked our prisoner as the handcuffs clattered upon his wrists. -"You may not be aware that I have royal blood in my veins. Have -the goodness, also, when you address me always to say 'sir' and -'please.'" - -"All right," said Jones with a stare and a snigger. "Well, would -you please, sir, march upstairs, where we can get a cab to carry -your Highness to the police-station?" - -"That is better," said John Clay serenely. He made a sweeping bow -to the three of us and walked quietly off in the custody of the -detective. - -"Really, Mr. Holmes," said Mr. Merryweather as we followed them -from the cellar, "I do not know how the bank can thank you or -repay you. There is no doubt that you have detected and defeated -in the most complete manner one of the most determined attempts -at bank robbery that have ever come within my experience." - -"I have had one or two little scores of my own to settle with Mr. -John Clay," said Holmes. "I have been at some small expense over -this matter, which I shall expect the bank to refund, but beyond -that I am amply repaid by having had an experience which is in -many ways unique, and by hearing the very remarkable narrative of -the Red-headed League." - - -"You see, Watson," he explained in the early hours of the morning -as we sat over a glass of whisky and soda in Baker Street, "it -was perfectly obvious from the first that the only possible -object of this rather fantastic business of the advertisement of -the League, and the copying of the 'Encyclopaedia,' must be to get -this not over-bright pawnbroker out of the way for a number of -hours every day. It was a curious way of managing it, but, -really, it would be difficult to suggest a better. The method was -no doubt suggested to Clay's ingenious mind by the colour of his -accomplice's hair. The 4 pounds a week was a lure which must draw -him, and what was it to them, who were playing for thousands? -They put in the advertisement, one rogue has the temporary -office, the other rogue incites the man to apply for it, and -together they manage to secure his absence every morning in the -week. From the time that I heard of the assistant having come for -half wages, it was obvious to me that he had some strong motive -for securing the situation." - -"But how could you guess what the motive was?" - -"Had there been women in the house, I should have suspected a -mere vulgar intrigue. That, however, was out of the question. The -man's business was a small one, and there was nothing in his -house which could account for such elaborate preparations, and -such an expenditure as they were at. It must, then, be something -out of the house. What could it be? I thought of the assistant's -fondness for photography, and his trick of vanishing into the -cellar. The cellar! There was the end of this tangled clue. Then -I made inquiries as to this mysterious assistant and found that I -had to deal with one of the coolest and most daring criminals in -London. He was doing something in the cellar--something which -took many hours a day for months on end. What could it be, once -more? I could think of nothing save that he was running a tunnel -to some other building. - -"So far I had got when we went to visit the scene of action. I -surprised you by beating upon the pavement with my stick. I was -ascertaining whether the cellar stretched out in front or behind. -It was not in front. Then I rang the bell, and, as I hoped, the -assistant answered it. We have had some skirmishes, but we had -never set eyes upon each other before. I hardly looked at his -face. His knees were what I wished to see. You must yourself have -remarked how worn, wrinkled, and stained they were. They spoke of -those hours of burrowing. The only remaining point was what they -were burrowing for. I walked round the corner, saw the City and -Suburban Bank abutted on our friend's premises, and felt that I -had solved my problem. When you drove home after the concert I -called upon Scotland Yard and upon the chairman of the bank -directors, with the result that you have seen." - -"And how could you tell that they would make their attempt -to-night?" I asked. - -"Well, when they closed their League offices that was a sign that -they cared no longer about Mr. Jabez Wilson's presence--in other -words, that they had completed their tunnel. But it was essential -that they should use it soon, as it might be discovered, or the -bullion might be removed. Saturday would suit them better than -any other day, as it would give them two days for their escape. -For all these reasons I expected them to come to-night." - -"You reasoned it out beautifully," I exclaimed in unfeigned -admiration. "It is so long a chain, and yet every link rings -true." - -"It saved me from ennui," he answered, yawning. "Alas! I already -feel it closing in upon me. My life is spent in one long effort -to escape from the commonplaces of existence. These little -problems help me to do so." - -"And you are a benefactor of the race," said I. - -He shrugged his shoulders. "Well, perhaps, after all, it is of -some little use," he remarked. "'L'homme c'est rien--l'oeuvre -c'est tout,' as Gustave Flaubert wrote to George Sand." - - - -ADVENTURE III. A CASE OF IDENTITY - -"My dear fellow," said Sherlock Holmes as we sat on either side -of the fire in his lodgings at Baker Street, "life is infinitely -stranger than anything which the mind of man could invent. We -would not dare to conceive the things which are really mere -commonplaces of existence. If we could fly out of that window -hand in hand, hover over this great city, gently remove the -roofs, and peep in at the queer things which are going on, the -strange coincidences, the plannings, the cross-purposes, the -wonderful chains of events, working through generations, and -leading to the most outré results, it would make all fiction with -its conventionalities and foreseen conclusions most stale and -unprofitable." - -"And yet I am not convinced of it," I answered. "The cases which -come to light in the papers are, as a rule, bald enough, and -vulgar enough. We have in our police reports realism pushed to -its extreme limits, and yet the result is, it must be confessed, -neither fascinating nor artistic." - -"A certain selection and discretion must be used in producing a -realistic effect," remarked Holmes. "This is wanting in the -police report, where more stress is laid, perhaps, upon the -platitudes of the magistrate than upon the details, which to an -observer contain the vital essence of the whole matter. Depend -upon it, there is nothing so unnatural as the commonplace." - -I smiled and shook my head. "I can quite understand your thinking -so," I said. "Of course, in your position of unofficial adviser -and helper to everybody who is absolutely puzzled, throughout -three continents, you are brought in contact with all that is -strange and bizarre. But here"--I picked up the morning paper -from the ground--"let us put it to a practical test. Here is the -first heading upon which I come. 'A husband's cruelty to his -wife.' There is half a column of print, but I know without -reading it that it is all perfectly familiar to me. There is, of -course, the other woman, the drink, the push, the blow, the -bruise, the sympathetic sister or landlady. The crudest of -writers could invent nothing more crude." - -"Indeed, your example is an unfortunate one for your argument," -said Holmes, taking the paper and glancing his eye down it. "This -is the Dundas separation case, and, as it happens, I was engaged -in clearing up some small points in connection with it. The -husband was a teetotaler, there was no other woman, and the -conduct complained of was that he had drifted into the habit of -winding up every meal by taking out his false teeth and hurling -them at his wife, which, you will allow, is not an action likely -to occur to the imagination of the average story-teller. Take a -pinch of snuff, Doctor, and acknowledge that I have scored over -you in your example." - -He held out his snuffbox of old gold, with a great amethyst in -the centre of the lid. Its splendour was in such contrast to his -homely ways and simple life that I could not help commenting upon -it. - -"Ah," said he, "I forgot that I had not seen you for some weeks. -It is a little souvenir from the King of Bohemia in return for my -assistance in the case of the Irene Adler papers." - -"And the ring?" I asked, glancing at a remarkable brilliant which -sparkled upon his finger. - -"It was from the reigning family of Holland, though the matter in -which I served them was of such delicacy that I cannot confide it -even to you, who have been good enough to chronicle one or two of -my little problems." - -"And have you any on hand just now?" I asked with interest. - -"Some ten or twelve, but none which present any feature of -interest. They are important, you understand, without being -interesting. Indeed, I have found that it is usually in -unimportant matters that there is a field for the observation, -and for the quick analysis of cause and effect which gives the -charm to an investigation. The larger crimes are apt to be the -simpler, for the bigger the crime the more obvious, as a rule, is -the motive. In these cases, save for one rather intricate matter -which has been referred to me from Marseilles, there is nothing -which presents any features of interest. It is possible, however, -that I may have something better before very many minutes are -over, for this is one of my clients, or I am much mistaken." - -He had risen from his chair and was standing between the parted -blinds gazing down into the dull neutral-tinted London street. -Looking over his shoulder, I saw that on the pavement opposite -there stood a large woman with a heavy fur boa round her neck, -and a large curling red feather in a broad-brimmed hat which was -tilted in a coquettish Duchess of Devonshire fashion over her -ear. From under this great panoply she peeped up in a nervous, -hesitating fashion at our windows, while her body oscillated -backward and forward, and her fingers fidgeted with her glove -buttons. Suddenly, with a plunge, as of the swimmer who leaves -the bank, she hurried across the road, and we heard the sharp -clang of the bell. - -"I have seen those symptoms before," said Holmes, throwing his -cigarette into the fire. "Oscillation upon the pavement always -means an affaire de coeur. She would like advice, but is not sure -that the matter is not too delicate for communication. And yet -even here we may discriminate. When a woman has been seriously -wronged by a man she no longer oscillates, and the usual symptom -is a broken bell wire. Here we may take it that there is a love -matter, but that the maiden is not so much angry as perplexed, or -grieved. But here she comes in person to resolve our doubts." - -As he spoke there was a tap at the door, and the boy in buttons -entered to announce Miss Mary Sutherland, while the lady herself -loomed behind his small black figure like a full-sailed -merchant-man behind a tiny pilot boat. Sherlock Holmes welcomed -her with the easy courtesy for which he was remarkable, and, -having closed the door and bowed her into an armchair, he looked -her over in the minute and yet abstracted fashion which was -peculiar to him. - -"Do you not find," he said, "that with your short sight it is a -little trying to do so much typewriting?" - -"I did at first," she answered, "but now I know where the letters -are without looking." Then, suddenly realising the full purport -of his words, she gave a violent start and looked up, with fear -and astonishment upon her broad, good-humoured face. "You've -heard about me, Mr. Holmes," she cried, "else how could you know -all that?" - -"Never mind," said Holmes, laughing; "it is my business to know -things. Perhaps I have trained myself to see what others -overlook. If not, why should you come to consult me?" - -"I came to you, sir, because I heard of you from Mrs. Etherege, -whose husband you found so easy when the police and everyone had -given him up for dead. Oh, Mr. Holmes, I wish you would do as -much for me. I'm not rich, but still I have a hundred a year in -my own right, besides the little that I make by the machine, and -I would give it all to know what has become of Mr. Hosmer Angel." - -"Why did you come away to consult me in such a hurry?" asked -Sherlock Holmes, with his finger-tips together and his eyes to -the ceiling. - -Again a startled look came over the somewhat vacuous face of Miss -Mary Sutherland. "Yes, I did bang out of the house," she said, -"for it made me angry to see the easy way in which Mr. -Windibank--that is, my father--took it all. He would not go to -the police, and he would not go to you, and so at last, as he -would do nothing and kept on saying that there was no harm done, -it made me mad, and I just on with my things and came right away -to you." - -"Your father," said Holmes, "your stepfather, surely, since the -name is different." - -"Yes, my stepfather. I call him father, though it sounds funny, -too, for he is only five years and two months older than myself." - -"And your mother is alive?" - -"Oh, yes, mother is alive and well. I wasn't best pleased, Mr. -Holmes, when she married again so soon after father's death, and -a man who was nearly fifteen years younger than herself. Father -was a plumber in the Tottenham Court Road, and he left a tidy -business behind him, which mother carried on with Mr. Hardy, the -foreman; but when Mr. Windibank came he made her sell the -business, for he was very superior, being a traveller in wines. -They got 4700 pounds for the goodwill and interest, which wasn't -near as much as father could have got if he had been alive." - -I had expected to see Sherlock Holmes impatient under this -rambling and inconsequential narrative, but, on the contrary, he -had listened with the greatest concentration of attention. - -"Your own little income," he asked, "does it come out of the -business?" - -"Oh, no, sir. It is quite separate and was left me by my uncle -Ned in Auckland. It is in New Zealand stock, paying 4 1/2 per -cent. Two thousand five hundred pounds was the amount, but I can -only touch the interest." - -"You interest me extremely," said Holmes. "And since you draw so -large a sum as a hundred a year, with what you earn into the -bargain, you no doubt travel a little and indulge yourself in -every way. I believe that a single lady can get on very nicely -upon an income of about 60 pounds." - -"I could do with much less than that, Mr. Holmes, but you -understand that as long as I live at home I don't wish to be a -burden to them, and so they have the use of the money just while -I am staying with them. Of course, that is only just for the -time. Mr. Windibank draws my interest every quarter and pays it -over to mother, and I find that I can do pretty well with what I -earn at typewriting. It brings me twopence a sheet, and I can -often do from fifteen to twenty sheets in a day." - -"You have made your position very clear to me," said Holmes. -"This is my friend, Dr. Watson, before whom you can speak as -freely as before myself. Kindly tell us now all about your -connection with Mr. Hosmer Angel." - -A flush stole over Miss Sutherland's face, and she picked -nervously at the fringe of her jacket. "I met him first at the -gasfitters' ball," she said. "They used to send father tickets -when he was alive, and then afterwards they remembered us, and -sent them to mother. Mr. Windibank did not wish us to go. He -never did wish us to go anywhere. He would get quite mad if I -wanted so much as to join a Sunday-school treat. But this time I -was set on going, and I would go; for what right had he to -prevent? He said the folk were not fit for us to know, when all -father's friends were to be there. And he said that I had nothing -fit to wear, when I had my purple plush that I had never so much -as taken out of the drawer. At last, when nothing else would do, -he went off to France upon the business of the firm, but we went, -mother and I, with Mr. Hardy, who used to be our foreman, and it -was there I met Mr. Hosmer Angel." - -"I suppose," said Holmes, "that when Mr. Windibank came back from -France he was very annoyed at your having gone to the ball." - -"Oh, well, he was very good about it. He laughed, I remember, and -shrugged his shoulders, and said there was no use denying -anything to a woman, for she would have her way." - -"I see. Then at the gasfitters' ball you met, as I understand, a -gentleman called Mr. Hosmer Angel." - -"Yes, sir. I met him that night, and he called next day to ask if -we had got home all safe, and after that we met him--that is to -say, Mr. Holmes, I met him twice for walks, but after that father -came back again, and Mr. Hosmer Angel could not come to the house -any more." - -"No?" - -"Well, you know father didn't like anything of the sort. He -wouldn't have any visitors if he could help it, and he used to -say that a woman should be happy in her own family circle. But -then, as I used to say to mother, a woman wants her own circle to -begin with, and I had not got mine yet." - -"But how about Mr. Hosmer Angel? Did he make no attempt to see -you?" - -"Well, father was going off to France again in a week, and Hosmer -wrote and said that it would be safer and better not to see each -other until he had gone. We could write in the meantime, and he -used to write every day. I took the letters in in the morning, so -there was no need for father to know." - -"Were you engaged to the gentleman at this time?" - -"Oh, yes, Mr. Holmes. We were engaged after the first walk that -we took. Hosmer--Mr. Angel--was a cashier in an office in -Leadenhall Street--and--" - -"What office?" - -"That's the worst of it, Mr. Holmes, I don't know." - -"Where did he live, then?" - -"He slept on the premises." - -"And you don't know his address?" - -"No--except that it was Leadenhall Street." - -"Where did you address your letters, then?" - -"To the Leadenhall Street Post Office, to be left till called -for. He said that if they were sent to the office he would be -chaffed by all the other clerks about having letters from a lady, -so I offered to typewrite them, like he did his, but he wouldn't -have that, for he said that when I wrote them they seemed to come -from me, but when they were typewritten he always felt that the -machine had come between us. That will just show you how fond he -was of me, Mr. Holmes, and the little things that he would think -of." - -"It was most suggestive," said Holmes. "It has long been an axiom -of mine that the little things are infinitely the most important. -Can you remember any other little things about Mr. Hosmer Angel?" - -"He was a very shy man, Mr. Holmes. He would rather walk with me -in the evening than in the daylight, for he said that he hated to -be conspicuous. Very retiring and gentlemanly he was. Even his -voice was gentle. He'd had the quinsy and swollen glands when he -was young, he told me, and it had left him with a weak throat, -and a hesitating, whispering fashion of speech. He was always -well dressed, very neat and plain, but his eyes were weak, just -as mine are, and he wore tinted glasses against the glare." - -"Well, and what happened when Mr. Windibank, your stepfather, -returned to France?" - -"Mr. Hosmer Angel came to the house again and proposed that we -should marry before father came back. He was in dreadful earnest -and made me swear, with my hands on the Testament, that whatever -happened I would always be true to him. Mother said he was quite -right to make me swear, and that it was a sign of his passion. -Mother was all in his favour from the first and was even fonder -of him than I was. Then, when they talked of marrying within the -week, I began to ask about father; but they both said never to -mind about father, but just to tell him afterwards, and mother -said she would make it all right with him. I didn't quite like -that, Mr. Holmes. It seemed funny that I should ask his leave, as -he was only a few years older than me; but I didn't want to do -anything on the sly, so I wrote to father at Bordeaux, where the -company has its French offices, but the letter came back to me on -the very morning of the wedding." - -"It missed him, then?" - -"Yes, sir; for he had started to England just before it arrived." - -"Ha! that was unfortunate. Your wedding was arranged, then, for -the Friday. Was it to be in church?" - -"Yes, sir, but very quietly. It was to be at St. Saviour's, near -King's Cross, and we were to have breakfast afterwards at the St. -Pancras Hotel. Hosmer came for us in a hansom, but as there were -two of us he put us both into it and stepped himself into a -four-wheeler, which happened to be the only other cab in the -street. We got to the church first, and when the four-wheeler -drove up we waited for him to step out, but he never did, and -when the cabman got down from the box and looked there was no one -there! The cabman said that he could not imagine what had become -of him, for he had seen him get in with his own eyes. That was -last Friday, Mr. Holmes, and I have never seen or heard anything -since then to throw any light upon what became of him." - -"It seems to me that you have been very shamefully treated," said -Holmes. - -"Oh, no, sir! He was too good and kind to leave me so. Why, all -the morning he was saying to me that, whatever happened, I was to -be true; and that even if something quite unforeseen occurred to -separate us, I was always to remember that I was pledged to him, -and that he would claim his pledge sooner or later. It seemed -strange talk for a wedding-morning, but what has happened since -gives a meaning to it." - -"Most certainly it does. Your own opinion is, then, that some -unforeseen catastrophe has occurred to him?" - -"Yes, sir. I believe that he foresaw some danger, or else he -would not have talked so. And then I think that what he foresaw -happened." - -"But you have no notion as to what it could have been?" - -"None." - -"One more question. How did your mother take the matter?" - -"She was angry, and said that I was never to speak of the matter -again." - -"And your father? Did you tell him?" - -"Yes; and he seemed to think, with me, that something had -happened, and that I should hear of Hosmer again. As he said, -what interest could anyone have in bringing me to the doors of -the church, and then leaving me? Now, if he had borrowed my -money, or if he had married me and got my money settled on him, -there might be some reason, but Hosmer was very independent about -money and never would look at a shilling of mine. And yet, what -could have happened? And why could he not write? Oh, it drives me -half-mad to think of it, and I can't sleep a wink at night." She -pulled a little handkerchief out of her muff and began to sob -heavily into it. - -"I shall glance into the case for you," said Holmes, rising, "and -I have no doubt that we shall reach some definite result. Let the -weight of the matter rest upon me now, and do not let your mind -dwell upon it further. Above all, try to let Mr. Hosmer Angel -vanish from your memory, as he has done from your life." - -"Then you don't think I'll see him again?" - -"I fear not." - -"Then what has happened to him?" - -"You will leave that question in my hands. I should like an -accurate description of him and any letters of his which you can -spare." - -"I advertised for him in last Saturday's Chronicle," said she. -"Here is the slip and here are four letters from him." - -"Thank you. And your address?" - -"No. 31 Lyon Place, Camberwell." - -"Mr. Angel's address you never had, I understand. Where is your -father's place of business?" - -"He travels for Westhouse & Marbank, the great claret importers -of Fenchurch Street." - -"Thank you. You have made your statement very clearly. You will -leave the papers here, and remember the advice which I have given -you. Let the whole incident be a sealed book, and do not allow it -to affect your life." - -"You are very kind, Mr. Holmes, but I cannot do that. I shall be -true to Hosmer. He shall find me ready when he comes back." - -For all the preposterous hat and the vacuous face, there was -something noble in the simple faith of our visitor which -compelled our respect. She laid her little bundle of papers upon -the table and went her way, with a promise to come again whenever -she might be summoned. - -Sherlock Holmes sat silent for a few minutes with his fingertips -still pressed together, his legs stretched out in front of him, -and his gaze directed upward to the ceiling. Then he took down -from the rack the old and oily clay pipe, which was to him as a -counsellor, and, having lit it, he leaned back in his chair, with -the thick blue cloud-wreaths spinning up from him, and a look of -infinite languor in his face. - -"Quite an interesting study, that maiden," he observed. "I found -her more interesting than her little problem, which, by the way, -is rather a trite one. You will find parallel cases, if you -consult my index, in Andover in '77, and there was something of -the sort at The Hague last year. Old as is the idea, however, -there were one or two details which were new to me. But the -maiden herself was most instructive." - -"You appeared to read a good deal upon her which was quite -invisible to me," I remarked. - -"Not invisible but unnoticed, Watson. You did not know where to -look, and so you missed all that was important. I can never bring -you to realise the importance of sleeves, the suggestiveness of -thumb-nails, or the great issues that may hang from a boot-lace. -Now, what did you gather from that woman's appearance? Describe -it." - -"Well, she had a slate-coloured, broad-brimmed straw hat, with a -feather of a brickish red. Her jacket was black, with black beads -sewn upon it, and a fringe of little black jet ornaments. Her -dress was brown, rather darker than coffee colour, with a little -purple plush at the neck and sleeves. Her gloves were greyish and -were worn through at the right forefinger. Her boots I didn't -observe. She had small round, hanging gold earrings, and a -general air of being fairly well-to-do in a vulgar, comfortable, -easy-going way." - -Sherlock Holmes clapped his hands softly together and chuckled. - -"'Pon my word, Watson, you are coming along wonderfully. You have -really done very well indeed. It is true that you have missed -everything of importance, but you have hit upon the method, and -you have a quick eye for colour. Never trust to general -impressions, my boy, but concentrate yourself upon details. My -first glance is always at a woman's sleeve. In a man it is -perhaps better first to take the knee of the trouser. As you -observe, this woman had plush upon her sleeves, which is a most -useful material for showing traces. The double line a little -above the wrist, where the typewritist presses against the table, -was beautifully defined. The sewing-machine, of the hand type, -leaves a similar mark, but only on the left arm, and on the side -of it farthest from the thumb, instead of being right across the -broadest part, as this was. I then glanced at her face, and, -observing the dint of a pince-nez at either side of her nose, I -ventured a remark upon short sight and typewriting, which seemed -to surprise her." - -"It surprised me." - -"But, surely, it was obvious. I was then much surprised and -interested on glancing down to observe that, though the boots -which she was wearing were not unlike each other, they were -really odd ones; the one having a slightly decorated toe-cap, and -the other a plain one. One was buttoned only in the two lower -buttons out of five, and the other at the first, third, and -fifth. Now, when you see that a young lady, otherwise neatly -dressed, has come away from home with odd boots, half-buttoned, -it is no great deduction to say that she came away in a hurry." - -"And what else?" I asked, keenly interested, as I always was, by -my friend's incisive reasoning. - -"I noted, in passing, that she had written a note before leaving -home but after being fully dressed. You observed that her right -glove was torn at the forefinger, but you did not apparently see -that both glove and finger were stained with violet ink. She had -written in a hurry and dipped her pen too deep. It must have been -this morning, or the mark would not remain clear upon the finger. -All this is amusing, though rather elementary, but I must go back -to business, Watson. Would you mind reading me the advertised -description of Mr. Hosmer Angel?" - -I held the little printed slip to the light. - -"Missing," it said, "on the morning of the fourteenth, a gentleman -named Hosmer Angel. About five ft. seven in. in height; -strongly built, sallow complexion, black hair, a little bald in -the centre, bushy, black side-whiskers and moustache; tinted -glasses, slight infirmity of speech. Was dressed, when last seen, -in black frock-coat faced with silk, black waistcoat, gold Albert -chain, and grey Harris tweed trousers, with brown gaiters over -elastic-sided boots. Known to have been employed in an office in -Leadenhall Street. Anybody bringing--" - -"That will do," said Holmes. "As to the letters," he continued, -glancing over them, "they are very commonplace. Absolutely no -clue in them to Mr. Angel, save that he quotes Balzac once. There -is one remarkable point, however, which will no doubt strike -you." - -"They are typewritten," I remarked. - -"Not only that, but the signature is typewritten. Look at the -neat little 'Hosmer Angel' at the bottom. There is a date, you -see, but no superscription except Leadenhall Street, which is -rather vague. The point about the signature is very suggestive--in -fact, we may call it conclusive." - -"Of what?" - -"My dear fellow, is it possible you do not see how strongly it -bears upon the case?" - -"I cannot say that I do unless it were that he wished to be able -to deny his signature if an action for breach of promise were -instituted." - -"No, that was not the point. However, I shall write two letters, -which should settle the matter. One is to a firm in the City, the -other is to the young lady's stepfather, Mr. Windibank, asking -him whether he could meet us here at six o'clock tomorrow -evening. It is just as well that we should do business with the -male relatives. And now, Doctor, we can do nothing until the -answers to those letters come, so we may put our little problem -upon the shelf for the interim." - -I had had so many reasons to believe in my friend's subtle powers -of reasoning and extraordinary energy in action that I felt that -he must have some solid grounds for the assured and easy -demeanour with which he treated the singular mystery which he had -been called upon to fathom. Once only had I known him to fail, in -the case of the King of Bohemia and of the Irene Adler -photograph; but when I looked back to the weird business of the -Sign of Four, and the extraordinary circumstances connected with -the Study in Scarlet, I felt that it would be a strange tangle -indeed which he could not unravel. - -I left him then, still puffing at his black clay pipe, with the -conviction that when I came again on the next evening I would -find that he held in his hands all the clues which would lead up -to the identity of the disappearing bridegroom of Miss Mary -Sutherland. - -A professional case of great gravity was engaging my own -attention at the time, and the whole of next day I was busy at -the bedside of the sufferer. It was not until close upon six -o'clock that I found myself free and was able to spring into a -hansom and drive to Baker Street, half afraid that I might be too -late to assist at the dénouement of the little mystery. I found -Sherlock Holmes alone, however, half asleep, with his long, thin -form curled up in the recesses of his armchair. A formidable -array of bottles and test-tubes, with the pungent cleanly smell -of hydrochloric acid, told me that he had spent his day in the -chemical work which was so dear to him. - -"Well, have you solved it?" I asked as I entered. - -"Yes. It was the bisulphate of baryta." - -"No, no, the mystery!" I cried. - -"Oh, that! I thought of the salt that I have been working upon. -There was never any mystery in the matter, though, as I said -yesterday, some of the details are of interest. The only drawback -is that there is no law, I fear, that can touch the scoundrel." - -"Who was he, then, and what was his object in deserting Miss -Sutherland?" - -The question was hardly out of my mouth, and Holmes had not yet -opened his lips to reply, when we heard a heavy footfall in the -passage and a tap at the door. - -"This is the girl's stepfather, Mr. James Windibank," said -Holmes. "He has written to me to say that he would be here at -six. Come in!" - -The man who entered was a sturdy, middle-sized fellow, some -thirty years of age, clean-shaven, and sallow-skinned, with a -bland, insinuating manner, and a pair of wonderfully sharp and -penetrating grey eyes. He shot a questioning glance at each of -us, placed his shiny top-hat upon the sideboard, and with a -slight bow sidled down into the nearest chair. - -"Good-evening, Mr. James Windibank," said Holmes. "I think that -this typewritten letter is from you, in which you made an -appointment with me for six o'clock?" - -"Yes, sir. I am afraid that I am a little late, but I am not -quite my own master, you know. I am sorry that Miss Sutherland -has troubled you about this little matter, for I think it is far -better not to wash linen of the sort in public. It was quite -against my wishes that she came, but she is a very excitable, -impulsive girl, as you may have noticed, and she is not easily -controlled when she has made up her mind on a point. Of course, I -did not mind you so much, as you are not connected with the -official police, but it is not pleasant to have a family -misfortune like this noised abroad. Besides, it is a useless -expense, for how could you possibly find this Hosmer Angel?" - -"On the contrary," said Holmes quietly; "I have every reason to -believe that I will succeed in discovering Mr. Hosmer Angel." - -Mr. Windibank gave a violent start and dropped his gloves. "I am -delighted to hear it," he said. - -"It is a curious thing," remarked Holmes, "that a typewriter has -really quite as much individuality as a man's handwriting. Unless -they are quite new, no two of them write exactly alike. Some -letters get more worn than others, and some wear only on one -side. Now, you remark in this note of yours, Mr. Windibank, that -in every case there is some little slurring over of the 'e,' and -a slight defect in the tail of the 'r.' There are fourteen other -characteristics, but those are the more obvious." - -"We do all our correspondence with this machine at the office, -and no doubt it is a little worn," our visitor answered, glancing -keenly at Holmes with his bright little eyes. - -"And now I will show you what is really a very interesting study, -Mr. Windibank," Holmes continued. "I think of writing another -little monograph some of these days on the typewriter and its -relation to crime. It is a subject to which I have devoted some -little attention. I have here four letters which purport to come -from the missing man. They are all typewritten. In each case, not -only are the 'e's' slurred and the 'r's' tailless, but you will -observe, if you care to use my magnifying lens, that the fourteen -other characteristics to which I have alluded are there as well." - -Mr. Windibank sprang out of his chair and picked up his hat. "I -cannot waste time over this sort of fantastic talk, Mr. Holmes," -he said. "If you can catch the man, catch him, and let me know -when you have done it." - -"Certainly," said Holmes, stepping over and turning the key in -the door. "I let you know, then, that I have caught him!" - -"What! where?" shouted Mr. Windibank, turning white to his lips -and glancing about him like a rat in a trap. - -"Oh, it won't do--really it won't," said Holmes suavely. "There -is no possible getting out of it, Mr. Windibank. It is quite too -transparent, and it was a very bad compliment when you said that -it was impossible for me to solve so simple a question. That's -right! Sit down and let us talk it over." - -Our visitor collapsed into a chair, with a ghastly face and a -glitter of moisture on his brow. "It--it's not actionable," he -stammered. - -"I am very much afraid that it is not. But between ourselves, -Windibank, it was as cruel and selfish and heartless a trick in a -petty way as ever came before me. Now, let me just run over the -course of events, and you will contradict me if I go wrong." - -The man sat huddled up in his chair, with his head sunk upon his -breast, like one who is utterly crushed. Holmes stuck his feet up -on the corner of the mantelpiece and, leaning back with his hands -in his pockets, began talking, rather to himself, as it seemed, -than to us. - -"The man married a woman very much older than himself for her -money," said he, "and he enjoyed the use of the money of the -daughter as long as she lived with them. It was a considerable -sum, for people in their position, and the loss of it would have -made a serious difference. It was worth an effort to preserve it. -The daughter was of a good, amiable disposition, but affectionate -and warm-hearted in her ways, so that it was evident that with -her fair personal advantages, and her little income, she would -not be allowed to remain single long. Now her marriage would -mean, of course, the loss of a hundred a year, so what does her -stepfather do to prevent it? He takes the obvious course of -keeping her at home and forbidding her to seek the company of -people of her own age. But soon he found that that would not -answer forever. She became restive, insisted upon her rights, and -finally announced her positive intention of going to a certain -ball. What does her clever stepfather do then? He conceives an -idea more creditable to his head than to his heart. With the -connivance and assistance of his wife he disguised himself, -covered those keen eyes with tinted glasses, masked the face with -a moustache and a pair of bushy whiskers, sunk that clear voice -into an insinuating whisper, and doubly secure on account of the -girl's short sight, he appears as Mr. Hosmer Angel, and keeps off -other lovers by making love himself." - -"It was only a joke at first," groaned our visitor. "We never -thought that she would have been so carried away." - -"Very likely not. However that may be, the young lady was very -decidedly carried away, and, having quite made up her mind that -her stepfather was in France, the suspicion of treachery never -for an instant entered her mind. She was flattered by the -gentleman's attentions, and the effect was increased by the -loudly expressed admiration of her mother. Then Mr. Angel began -to call, for it was obvious that the matter should be pushed as -far as it would go if a real effect were to be produced. There -were meetings, and an engagement, which would finally secure the -girl's affections from turning towards anyone else. But the -deception could not be kept up forever. These pretended journeys -to France were rather cumbrous. The thing to do was clearly to -bring the business to an end in such a dramatic manner that it -would leave a permanent impression upon the young lady's mind and -prevent her from looking upon any other suitor for some time to -come. Hence those vows of fidelity exacted upon a Testament, and -hence also the allusions to a possibility of something happening -on the very morning of the wedding. James Windibank wished Miss -Sutherland to be so bound to Hosmer Angel, and so uncertain as to -his fate, that for ten years to come, at any rate, she would not -listen to another man. As far as the church door he brought her, -and then, as he could go no farther, he conveniently vanished -away by the old trick of stepping in at one door of a -four-wheeler and out at the other. I think that was the chain of -events, Mr. Windibank!" - -Our visitor had recovered something of his assurance while Holmes -had been talking, and he rose from his chair now with a cold -sneer upon his pale face. - -"It may be so, or it may not, Mr. Holmes," said he, "but if you -are so very sharp you ought to be sharp enough to know that it is -you who are breaking the law now, and not me. I have done nothing -actionable from the first, but as long as you keep that door -locked you lay yourself open to an action for assault and illegal -constraint." - -"The law cannot, as you say, touch you," said Holmes, unlocking -and throwing open the door, "yet there never was a man who -deserved punishment more. If the young lady has a brother or a -friend, he ought to lay a whip across your shoulders. By Jove!" -he continued, flushing up at the sight of the bitter sneer upon -the man's face, "it is not part of my duties to my client, but -here's a hunting crop handy, and I think I shall just treat -myself to--" He took two swift steps to the whip, but before he -could grasp it there was a wild clatter of steps upon the stairs, -the heavy hall door banged, and from the window we could see Mr. -James Windibank running at the top of his speed down the road. - -"There's a cold-blooded scoundrel!" said Holmes, laughing, as he -threw himself down into his chair once more. "That fellow will -rise from crime to crime until he does something very bad, and -ends on a gallows. The case has, in some respects, been not -entirely devoid of interest." - -"I cannot now entirely see all the steps of your reasoning," I -remarked. - -"Well, of course it was obvious from the first that this Mr. -Hosmer Angel must have some strong object for his curious -conduct, and it was equally clear that the only man who really -profited by the incident, as far as we could see, was the -stepfather. Then the fact that the two men were never together, -but that the one always appeared when the other was away, was -suggestive. So were the tinted spectacles and the curious voice, -which both hinted at a disguise, as did the bushy whiskers. My -suspicions were all confirmed by his peculiar action in -typewriting his signature, which, of course, inferred that his -handwriting was so familiar to her that she would recognise even -the smallest sample of it. You see all these isolated facts, -together with many minor ones, all pointed in the same -direction." - -"And how did you verify them?" - -"Having once spotted my man, it was easy to get corroboration. I -knew the firm for which this man worked. Having taken the printed -description. I eliminated everything from it which could be the -result of a disguise--the whiskers, the glasses, the voice, and I -sent it to the firm, with a request that they would inform me -whether it answered to the description of any of their -travellers. I had already noticed the peculiarities of the -typewriter, and I wrote to the man himself at his business -address asking him if he would come here. As I expected, his -reply was typewritten and revealed the same trivial but -characteristic defects. The same post brought me a letter from -Westhouse & Marbank, of Fenchurch Street, to say that the -description tallied in every respect with that of their employé, -James Windibank. Voilà tout!" - -"And Miss Sutherland?" - -"If I tell her she will not believe me. You may remember the old -Persian saying, 'There is danger for him who taketh the tiger -cub, and danger also for whoso snatches a delusion from a woman.' -There is as much sense in Hafiz as in Horace, and as much -knowledge of the world." - - - -ADVENTURE IV. THE BOSCOMBE VALLEY MYSTERY - -We were seated at breakfast one morning, my wife and I, when the -maid brought in a telegram. It was from Sherlock Holmes and ran -in this way: - -"Have you a couple of days to spare? Have just been wired for from -the west of England in connection with Boscombe Valley tragedy. -Shall be glad if you will come with me. Air and scenery perfect. -Leave Paddington by the 11:15." - -"What do you say, dear?" said my wife, looking across at me. -"Will you go?" - -"I really don't know what to say. I have a fairly long list at -present." - -"Oh, Anstruther would do your work for you. You have been looking -a little pale lately. I think that the change would do you good, -and you are always so interested in Mr. Sherlock Holmes' cases." - -"I should be ungrateful if I were not, seeing what I gained -through one of them," I answered. "But if I am to go, I must pack -at once, for I have only half an hour." - -My experience of camp life in Afghanistan had at least had the -effect of making me a prompt and ready traveller. My wants were -few and simple, so that in less than the time stated I was in a -cab with my valise, rattling away to Paddington Station. Sherlock -Holmes was pacing up and down the platform, his tall, gaunt -figure made even gaunter and taller by his long grey -travelling-cloak and close-fitting cloth cap. - -"It is really very good of you to come, Watson," said he. "It -makes a considerable difference to me, having someone with me on -whom I can thoroughly rely. Local aid is always either worthless -or else biassed. If you will keep the two corner seats I shall -get the tickets." - -We had the carriage to ourselves save for an immense litter of -papers which Holmes had brought with him. Among these he rummaged -and read, with intervals of note-taking and of meditation, until -we were past Reading. Then he suddenly rolled them all into a -gigantic ball and tossed them up onto the rack. - -"Have you heard anything of the case?" he asked. - -"Not a word. I have not seen a paper for some days." - -"The London press has not had very full accounts. I have just -been looking through all the recent papers in order to master the -particulars. It seems, from what I gather, to be one of those -simple cases which are so extremely difficult." - -"That sounds a little paradoxical." - -"But it is profoundly true. Singularity is almost invariably a -clue. The more featureless and commonplace a crime is, the more -difficult it is to bring it home. In this case, however, they -have established a very serious case against the son of the -murdered man." - -"It is a murder, then?" - -"Well, it is conjectured to be so. I shall take nothing for -granted until I have the opportunity of looking personally into -it. I will explain the state of things to you, as far as I have -been able to understand it, in a very few words. - -"Boscombe Valley is a country district not very far from Ross, in -Herefordshire. The largest landed proprietor in that part is a -Mr. John Turner, who made his money in Australia and returned -some years ago to the old country. One of the farms which he -held, that of Hatherley, was let to Mr. Charles McCarthy, who was -also an ex-Australian. The men had known each other in the -colonies, so that it was not unnatural that when they came to -settle down they should do so as near each other as possible. -Turner was apparently the richer man, so McCarthy became his -tenant but still remained, it seems, upon terms of perfect -equality, as they were frequently together. McCarthy had one son, -a lad of eighteen, and Turner had an only daughter of the same -age, but neither of them had wives living. They appear to have -avoided the society of the neighbouring English families and to -have led retired lives, though both the McCarthys were fond of -sport and were frequently seen at the race-meetings of the -neighbourhood. McCarthy kept two servants--a man and a girl. -Turner had a considerable household, some half-dozen at the -least. That is as much as I have been able to gather about the -families. Now for the facts. - -"On June 3rd, that is, on Monday last, McCarthy left his house at -Hatherley about three in the afternoon and walked down to the -Boscombe Pool, which is a small lake formed by the spreading out -of the stream which runs down the Boscombe Valley. He had been -out with his serving-man in the morning at Ross, and he had told -the man that he must hurry, as he had an appointment of -importance to keep at three. From that appointment he never came -back alive. - -"From Hatherley Farm-house to the Boscombe Pool is a quarter of a -mile, and two people saw him as he passed over this ground. One -was an old woman, whose name is not mentioned, and the other was -William Crowder, a game-keeper in the employ of Mr. Turner. Both -these witnesses depose that Mr. McCarthy was walking alone. The -game-keeper adds that within a few minutes of his seeing Mr. -McCarthy pass he had seen his son, Mr. James McCarthy, going the -same way with a gun under his arm. To the best of his belief, the -father was actually in sight at the time, and the son was -following him. He thought no more of the matter until he heard in -the evening of the tragedy that had occurred. - -"The two McCarthys were seen after the time when William Crowder, -the game-keeper, lost sight of them. The Boscombe Pool is thickly -wooded round, with just a fringe of grass and of reeds round the -edge. A girl of fourteen, Patience Moran, who is the daughter of -the lodge-keeper of the Boscombe Valley estate, was in one of the -woods picking flowers. She states that while she was there she -saw, at the border of the wood and close by the lake, Mr. -McCarthy and his son, and that they appeared to be having a -violent quarrel. She heard Mr. McCarthy the elder using very -strong language to his son, and she saw the latter raise up his -hand as if to strike his father. She was so frightened by their -violence that she ran away and told her mother when she reached -home that she had left the two McCarthys quarrelling near -Boscombe Pool, and that she was afraid that they were going to -fight. She had hardly said the words when young Mr. McCarthy came -running up to the lodge to say that he had found his father dead -in the wood, and to ask for the help of the lodge-keeper. He was -much excited, without either his gun or his hat, and his right -hand and sleeve were observed to be stained with fresh blood. On -following him they found the dead body stretched out upon the -grass beside the pool. The head had been beaten in by repeated -blows of some heavy and blunt weapon. The injuries were such as -might very well have been inflicted by the butt-end of his son's -gun, which was found lying on the grass within a few paces of the -body. Under these circumstances the young man was instantly -arrested, and a verdict of 'wilful murder' having been returned -at the inquest on Tuesday, he was on Wednesday brought before the -magistrates at Ross, who have referred the case to the next -Assizes. Those are the main facts of the case as they came out -before the coroner and the police-court." - -"I could hardly imagine a more damning case," I remarked. "If -ever circumstantial evidence pointed to a criminal it does so -here." - -"Circumstantial evidence is a very tricky thing," answered Holmes -thoughtfully. "It may seem to point very straight to one thing, -but if you shift your own point of view a little, you may find it -pointing in an equally uncompromising manner to something -entirely different. It must be confessed, however, that the case -looks exceedingly grave against the young man, and it is very -possible that he is indeed the culprit. There are several people -in the neighbourhood, however, and among them Miss Turner, the -daughter of the neighbouring landowner, who believe in his -innocence, and who have retained Lestrade, whom you may recollect -in connection with the Study in Scarlet, to work out the case in -his interest. Lestrade, being rather puzzled, has referred the -case to me, and hence it is that two middle-aged gentlemen are -flying westward at fifty miles an hour instead of quietly -digesting their breakfasts at home." - -"I am afraid," said I, "that the facts are so obvious that you -will find little credit to be gained out of this case." - -"There is nothing more deceptive than an obvious fact," he -answered, laughing. "Besides, we may chance to hit upon some -other obvious facts which may have been by no means obvious to -Mr. Lestrade. You know me too well to think that I am boasting -when I say that I shall either confirm or destroy his theory by -means which he is quite incapable of employing, or even of -understanding. To take the first example to hand, I very clearly -perceive that in your bedroom the window is upon the right-hand -side, and yet I question whether Mr. Lestrade would have noted -even so self-evident a thing as that." - -"How on earth--" - -"My dear fellow, I know you well. I know the military neatness -which characterises you. You shave every morning, and in this -season you shave by the sunlight; but since your shaving is less -and less complete as we get farther back on the left side, until -it becomes positively slovenly as we get round the angle of the -jaw, it is surely very clear that that side is less illuminated -than the other. I could not imagine a man of your habits looking -at himself in an equal light and being satisfied with such a -result. I only quote this as a trivial example of observation and -inference. Therein lies my métier, and it is just possible that -it may be of some service in the investigation which lies before -us. There are one or two minor points which were brought out in -the inquest, and which are worth considering." - -"What are they?" - -"It appears that his arrest did not take place at once, but after -the return to Hatherley Farm. On the inspector of constabulary -informing him that he was a prisoner, he remarked that he was not -surprised to hear it, and that it was no more than his deserts. -This observation of his had the natural effect of removing any -traces of doubt which might have remained in the minds of the -coroner's jury." - -"It was a confession," I ejaculated. - -"No, for it was followed by a protestation of innocence." - -"Coming on the top of such a damning series of events, it was at -least a most suspicious remark." - -"On the contrary," said Holmes, "it is the brightest rift which I -can at present see in the clouds. However innocent he might be, -he could not be such an absolute imbecile as not to see that the -circumstances were very black against him. Had he appeared -surprised at his own arrest, or feigned indignation at it, I -should have looked upon it as highly suspicious, because such -surprise or anger would not be natural under the circumstances, -and yet might appear to be the best policy to a scheming man. His -frank acceptance of the situation marks him as either an innocent -man, or else as a man of considerable self-restraint and -firmness. As to his remark about his deserts, it was also not -unnatural if you consider that he stood beside the dead body of -his father, and that there is no doubt that he had that very day -so far forgotten his filial duty as to bandy words with him, and -even, according to the little girl whose evidence is so -important, to raise his hand as if to strike him. The -self-reproach and contrition which are displayed in his remark -appear to me to be the signs of a healthy mind rather than of a -guilty one." - -I shook my head. "Many men have been hanged on far slighter -evidence," I remarked. - -"So they have. And many men have been wrongfully hanged." - -"What is the young man's own account of the matter?" - -"It is, I am afraid, not very encouraging to his supporters, -though there are one or two points in it which are suggestive. -You will find it here, and may read it for yourself." - -He picked out from his bundle a copy of the local Herefordshire -paper, and having turned down the sheet he pointed out the -paragraph in which the unfortunate young man had given his own -statement of what had occurred. I settled myself down in the -corner of the carriage and read it very carefully. It ran in this -way: - -"Mr. James McCarthy, the only son of the deceased, was then called -and gave evidence as follows: 'I had been away from home for -three days at Bristol, and had only just returned upon the -morning of last Monday, the 3rd. My father was absent from home at -the time of my arrival, and I was informed by the maid that he -had driven over to Ross with John Cobb, the groom. Shortly after -my return I heard the wheels of his trap in the yard, and, -looking out of my window, I saw him get out and walk rapidly out -of the yard, though I was not aware in which direction he was -going. I then took my gun and strolled out in the direction of -the Boscombe Pool, with the intention of visiting the rabbit -warren which is upon the other side. On my way I saw William -Crowder, the game-keeper, as he had stated in his evidence; but -he is mistaken in thinking that I was following my father. I had -no idea that he was in front of me. When about a hundred yards -from the pool I heard a cry of "Cooee!" which was a usual signal -between my father and myself. I then hurried forward, and found -him standing by the pool. He appeared to be much surprised at -seeing me and asked me rather roughly what I was doing there. A -conversation ensued which led to high words and almost to blows, -for my father was a man of a very violent temper. Seeing that his -passion was becoming ungovernable, I left him and returned -towards Hatherley Farm. I had not gone more than 150 yards, -however, when I heard a hideous outcry behind me, which caused me -to run back again. I found my father expiring upon the ground, -with his head terribly injured. I dropped my gun and held him in -my arms, but he almost instantly expired. I knelt beside him for -some minutes, and then made my way to Mr. Turner's lodge-keeper, -his house being the nearest, to ask for assistance. I saw no one -near my father when I returned, and I have no idea how he came by -his injuries. He was not a popular man, being somewhat cold and -forbidding in his manners, but he had, as far as I know, no -active enemies. I know nothing further of the matter.' - -"The Coroner: Did your father make any statement to you before -he died? - -"Witness: He mumbled a few words, but I could only catch some -allusion to a rat. - -"The Coroner: What did you understand by that? - -"Witness: It conveyed no meaning to me. I thought that he was -delirious. - -"The Coroner: What was the point upon which you and your father -had this final quarrel? - -"Witness: I should prefer not to answer. - -"The Coroner: I am afraid that I must press it. - -"Witness: It is really impossible for me to tell you. I can -assure you that it has nothing to do with the sad tragedy which -followed. - -"The Coroner: That is for the court to decide. I need not point -out to you that your refusal to answer will prejudice your case -considerably in any future proceedings which may arise. - -"Witness: I must still refuse. - -"The Coroner: I understand that the cry of 'Cooee' was a common -signal between you and your father? - -"Witness: It was. - -"The Coroner: How was it, then, that he uttered it before he saw -you, and before he even knew that you had returned from Bristol? - -"Witness (with considerable confusion): I do not know. - -"A Juryman: Did you see nothing which aroused your suspicions -when you returned on hearing the cry and found your father -fatally injured? - -"Witness: Nothing definite. - -"The Coroner: What do you mean? - -"Witness: I was so disturbed and excited as I rushed out into -the open, that I could think of nothing except of my father. Yet -I have a vague impression that as I ran forward something lay -upon the ground to the left of me. It seemed to me to be -something grey in colour, a coat of some sort, or a plaid perhaps. -When I rose from my father I looked round for it, but it was -gone. - -"'Do you mean that it disappeared before you went for help?' - -"'Yes, it was gone.' - -"'You cannot say what it was?' - -"'No, I had a feeling something was there.' - -"'How far from the body?' - -"'A dozen yards or so.' - -"'And how far from the edge of the wood?' - -"'About the same.' - -"'Then if it was removed it was while you were within a dozen -yards of it?' - -"'Yes, but with my back towards it.' - -"This concluded the examination of the witness." - -"I see," said I as I glanced down the column, "that the coroner -in his concluding remarks was rather severe upon young McCarthy. -He calls attention, and with reason, to the discrepancy about his -father having signalled to him before seeing him, also to his -refusal to give details of his conversation with his father, and -his singular account of his father's dying words. They are all, -as he remarks, very much against the son." - -Holmes laughed softly to himself and stretched himself out upon -the cushioned seat. "Both you and the coroner have been at some -pains," said he, "to single out the very strongest points in the -young man's favour. Don't you see that you alternately give him -credit for having too much imagination and too little? Too -little, if he could not invent a cause of quarrel which would -give him the sympathy of the jury; too much, if he evolved from -his own inner consciousness anything so outré as a dying -reference to a rat, and the incident of the vanishing cloth. No, -sir, I shall approach this case from the point of view that what -this young man says is true, and we shall see whither that -hypothesis will lead us. And now here is my pocket Petrarch, and -not another word shall I say of this case until we are on the -scene of action. We lunch at Swindon, and I see that we shall be -there in twenty minutes." - -It was nearly four o'clock when we at last, after passing through -the beautiful Stroud Valley, and over the broad gleaming Severn, -found ourselves at the pretty little country-town of Ross. A -lean, ferret-like man, furtive and sly-looking, was waiting for -us upon the platform. In spite of the light brown dustcoat and -leather-leggings which he wore in deference to his rustic -surroundings, I had no difficulty in recognising Lestrade, of -Scotland Yard. With him we drove to the Hereford Arms where a -room had already been engaged for us. - -"I have ordered a carriage," said Lestrade as we sat over a cup -of tea. "I knew your energetic nature, and that you would not be -happy until you had been on the scene of the crime." - -"It was very nice and complimentary of you," Holmes answered. "It -is entirely a question of barometric pressure." - -Lestrade looked startled. "I do not quite follow," he said. - -"How is the glass? Twenty-nine, I see. No wind, and not a cloud -in the sky. I have a caseful of cigarettes here which need -smoking, and the sofa is very much superior to the usual country -hotel abomination. I do not think that it is probable that I -shall use the carriage to-night." - -Lestrade laughed indulgently. "You have, no doubt, already formed -your conclusions from the newspapers," he said. "The case is as -plain as a pikestaff, and the more one goes into it the plainer -it becomes. Still, of course, one can't refuse a lady, and such a -very positive one, too. She has heard of you, and would have your -opinion, though I repeatedly told her that there was nothing -which you could do which I had not already done. Why, bless my -soul! here is her carriage at the door." - -He had hardly spoken before there rushed into the room one of the -most lovely young women that I have ever seen in my life. Her -violet eyes shining, her lips parted, a pink flush upon her -cheeks, all thought of her natural reserve lost in her -overpowering excitement and concern. - -"Oh, Mr. Sherlock Holmes!" she cried, glancing from one to the -other of us, and finally, with a woman's quick intuition, -fastening upon my companion, "I am so glad that you have come. I -have driven down to tell you so. I know that James didn't do it. -I know it, and I want you to start upon your work knowing it, -too. Never let yourself doubt upon that point. We have known each -other since we were little children, and I know his faults as no -one else does; but he is too tender-hearted to hurt a fly. Such a -charge is absurd to anyone who really knows him." - -"I hope we may clear him, Miss Turner," said Sherlock Holmes. -"You may rely upon my doing all that I can." - -"But you have read the evidence. You have formed some conclusion? -Do you not see some loophole, some flaw? Do you not yourself -think that he is innocent?" - -"I think that it is very probable." - -"There, now!" she cried, throwing back her head and looking -defiantly at Lestrade. "You hear! He gives me hopes." - -Lestrade shrugged his shoulders. "I am afraid that my colleague -has been a little quick in forming his conclusions," he said. - -"But he is right. Oh! I know that he is right. James never did -it. And about his quarrel with his father, I am sure that the -reason why he would not speak about it to the coroner was because -I was concerned in it." - -"In what way?" asked Holmes. - -"It is no time for me to hide anything. James and his father had -many disagreements about me. Mr. McCarthy was very anxious that -there should be a marriage between us. James and I have always -loved each other as brother and sister; but of course he is young -and has seen very little of life yet, and--and--well, he -naturally did not wish to do anything like that yet. So there -were quarrels, and this, I am sure, was one of them." - -"And your father?" asked Holmes. "Was he in favour of such a -union?" - -"No, he was averse to it also. No one but Mr. McCarthy was in -favour of it." A quick blush passed over her fresh young face as -Holmes shot one of his keen, questioning glances at her. - -"Thank you for this information," said he. "May I see your father -if I call to-morrow?" - -"I am afraid the doctor won't allow it." - -"The doctor?" - -"Yes, have you not heard? Poor father has never been strong for -years back, but this has broken him down completely. He has taken -to his bed, and Dr. Willows says that he is a wreck and that his -nervous system is shattered. Mr. McCarthy was the only man alive -who had known dad in the old days in Victoria." - -"Ha! In Victoria! That is important." - -"Yes, at the mines." - -"Quite so; at the gold-mines, where, as I understand, Mr. Turner -made his money." - -"Yes, certainly." - -"Thank you, Miss Turner. You have been of material assistance to -me." - -"You will tell me if you have any news to-morrow. No doubt you -will go to the prison to see James. Oh, if you do, Mr. Holmes, do -tell him that I know him to be innocent." - -"I will, Miss Turner." - -"I must go home now, for dad is very ill, and he misses me so if -I leave him. Good-bye, and God help you in your undertaking." She -hurried from the room as impulsively as she had entered, and we -heard the wheels of her carriage rattle off down the street. - -"I am ashamed of you, Holmes," said Lestrade with dignity after a -few minutes' silence. "Why should you raise up hopes which you -are bound to disappoint? I am not over-tender of heart, but I -call it cruel." - -"I think that I see my way to clearing James McCarthy," said -Holmes. "Have you an order to see him in prison?" - -"Yes, but only for you and me." - -"Then I shall reconsider my resolution about going out. We have -still time to take a train to Hereford and see him to-night?" - -"Ample." - -"Then let us do so. Watson, I fear that you will find it very -slow, but I shall only be away a couple of hours." - -I walked down to the station with them, and then wandered through -the streets of the little town, finally returning to the hotel, -where I lay upon the sofa and tried to interest myself in a -yellow-backed novel. The puny plot of the story was so thin, -however, when compared to the deep mystery through which we were -groping, and I found my attention wander so continually from the -action to the fact, that I at last flung it across the room and -gave myself up entirely to a consideration of the events of the -day. Supposing that this unhappy young man's story were -absolutely true, then what hellish thing, what absolutely -unforeseen and extraordinary calamity could have occurred between -the time when he parted from his father, and the moment when, -drawn back by his screams, he rushed into the glade? It was -something terrible and deadly. What could it be? Might not the -nature of the injuries reveal something to my medical instincts? -I rang the bell and called for the weekly county paper, which -contained a verbatim account of the inquest. In the surgeon's -deposition it was stated that the posterior third of the left -parietal bone and the left half of the occipital bone had been -shattered by a heavy blow from a blunt weapon. I marked the spot -upon my own head. Clearly such a blow must have been struck from -behind. That was to some extent in favour of the accused, as when -seen quarrelling he was face to face with his father. Still, it -did not go for very much, for the older man might have turned his -back before the blow fell. Still, it might be worth while to call -Holmes' attention to it. Then there was the peculiar dying -reference to a rat. What could that mean? It could not be -delirium. A man dying from a sudden blow does not commonly become -delirious. No, it was more likely to be an attempt to explain how -he met his fate. But what could it indicate? I cudgelled my -brains to find some possible explanation. And then the incident -of the grey cloth seen by young McCarthy. If that were true the -murderer must have dropped some part of his dress, presumably his -overcoat, in his flight, and must have had the hardihood to -return and to carry it away at the instant when the son was -kneeling with his back turned not a dozen paces off. What a -tissue of mysteries and improbabilities the whole thing was! I -did not wonder at Lestrade's opinion, and yet I had so much faith -in Sherlock Holmes' insight that I could not lose hope as long -as every fresh fact seemed to strengthen his conviction of young -McCarthy's innocence. - -It was late before Sherlock Holmes returned. He came back alone, -for Lestrade was staying in lodgings in the town. - -"The glass still keeps very high," he remarked as he sat down. -"It is of importance that it should not rain before we are able -to go over the ground. On the other hand, a man should be at his -very best and keenest for such nice work as that, and I did not -wish to do it when fagged by a long journey. I have seen young -McCarthy." - -"And what did you learn from him?" - -"Nothing." - -"Could he throw no light?" - -"None at all. I was inclined to think at one time that he knew -who had done it and was screening him or her, but I am convinced -now that he is as puzzled as everyone else. He is not a very -quick-witted youth, though comely to look at and, I should think, -sound at heart." - -"I cannot admire his taste," I remarked, "if it is indeed a fact -that he was averse to a marriage with so charming a young lady as -this Miss Turner." - -"Ah, thereby hangs a rather painful tale. This fellow is madly, -insanely, in love with her, but some two years ago, when he was -only a lad, and before he really knew her, for she had been away -five years at a boarding-school, what does the idiot do but get -into the clutches of a barmaid in Bristol and marry her at a -registry office? No one knows a word of the matter, but you can -imagine how maddening it must be to him to be upbraided for not -doing what he would give his very eyes to do, but what he knows -to be absolutely impossible. It was sheer frenzy of this sort -which made him throw his hands up into the air when his father, -at their last interview, was goading him on to propose to Miss -Turner. On the other hand, he had no means of supporting himself, -and his father, who was by all accounts a very hard man, would -have thrown him over utterly had he known the truth. It was with -his barmaid wife that he had spent the last three days in -Bristol, and his father did not know where he was. Mark that -point. It is of importance. Good has come out of evil, however, -for the barmaid, finding from the papers that he is in serious -trouble and likely to be hanged, has thrown him over utterly and -has written to him to say that she has a husband already in the -Bermuda Dockyard, so that there is really no tie between them. I -think that that bit of news has consoled young McCarthy for all -that he has suffered." - -"But if he is innocent, who has done it?" - -"Ah! who? I would call your attention very particularly to two -points. One is that the murdered man had an appointment with -someone at the pool, and that the someone could not have been his -son, for his son was away, and he did not know when he would -return. The second is that the murdered man was heard to cry -'Cooee!' before he knew that his son had returned. Those are the -crucial points upon which the case depends. And now let us talk -about George Meredith, if you please, and we shall leave all -minor matters until to-morrow." - -There was no rain, as Holmes had foretold, and the morning broke -bright and cloudless. At nine o'clock Lestrade called for us with -the carriage, and we set off for Hatherley Farm and the Boscombe -Pool. - -"There is serious news this morning," Lestrade observed. "It is -said that Mr. Turner, of the Hall, is so ill that his life is -despaired of." - -"An elderly man, I presume?" said Holmes. - -"About sixty; but his constitution has been shattered by his life -abroad, and he has been in failing health for some time. This -business has had a very bad effect upon him. He was an old friend -of McCarthy's, and, I may add, a great benefactor to him, for I -have learned that he gave him Hatherley Farm rent free." - -"Indeed! That is interesting," said Holmes. - -"Oh, yes! In a hundred other ways he has helped him. Everybody -about here speaks of his kindness to him." - -"Really! Does it not strike you as a little singular that this -McCarthy, who appears to have had little of his own, and to have -been under such obligations to Turner, should still talk of -marrying his son to Turner's daughter, who is, presumably, -heiress to the estate, and that in such a very cocksure manner, -as if it were merely a case of a proposal and all else would -follow? It is the more strange, since we know that Turner himself -was averse to the idea. The daughter told us as much. Do you not -deduce something from that?" - -"We have got to the deductions and the inferences," said -Lestrade, winking at me. "I find it hard enough to tackle facts, -Holmes, without flying away after theories and fancies." - -"You are right," said Holmes demurely; "you do find it very hard -to tackle the facts." - -"Anyhow, I have grasped one fact which you seem to find it -difficult to get hold of," replied Lestrade with some warmth. - -"And that is--" - -"That McCarthy senior met his death from McCarthy junior and that -all theories to the contrary are the merest moonshine." - -"Well, moonshine is a brighter thing than fog," said Holmes, -laughing. "But I am very much mistaken if this is not Hatherley -Farm upon the left." - -"Yes, that is it." It was a widespread, comfortable-looking -building, two-storied, slate-roofed, with great yellow blotches -of lichen upon the grey walls. The drawn blinds and the smokeless -chimneys, however, gave it a stricken look, as though the weight -of this horror still lay heavy upon it. We called at the door, -when the maid, at Holmes' request, showed us the boots which her -master wore at the time of his death, and also a pair of the -son's, though not the pair which he had then had. Having measured -these very carefully from seven or eight different points, Holmes -desired to be led to the court-yard, from which we all followed -the winding track which led to Boscombe Pool. - -Sherlock Holmes was transformed when he was hot upon such a scent -as this. Men who had only known the quiet thinker and logician of -Baker Street would have failed to recognise him. His face flushed -and darkened. His brows were drawn into two hard black lines, -while his eyes shone out from beneath them with a steely glitter. -His face was bent downward, his shoulders bowed, his lips -compressed, and the veins stood out like whipcord in his long, -sinewy neck. His nostrils seemed to dilate with a purely animal -lust for the chase, and his mind was so absolutely concentrated -upon the matter before him that a question or remark fell -unheeded upon his ears, or, at the most, only provoked a quick, -impatient snarl in reply. Swiftly and silently he made his way -along the track which ran through the meadows, and so by way of -the woods to the Boscombe Pool. It was damp, marshy ground, as is -all that district, and there were marks of many feet, both upon -the path and amid the short grass which bounded it on either -side. Sometimes Holmes would hurry on, sometimes stop dead, and -once he made quite a little detour into the meadow. Lestrade and -I walked behind him, the detective indifferent and contemptuous, -while I watched my friend with the interest which sprang from the -conviction that every one of his actions was directed towards a -definite end. - -The Boscombe Pool, which is a little reed-girt sheet of water -some fifty yards across, is situated at the boundary between the -Hatherley Farm and the private park of the wealthy Mr. Turner. -Above the woods which lined it upon the farther side we could see -the red, jutting pinnacles which marked the site of the rich -landowner's dwelling. On the Hatherley side of the pool the woods -grew very thick, and there was a narrow belt of sodden grass -twenty paces across between the edge of the trees and the reeds -which lined the lake. Lestrade showed us the exact spot at which -the body had been found, and, indeed, so moist was the ground, -that I could plainly see the traces which had been left by the -fall of the stricken man. To Holmes, as I could see by his eager -face and peering eyes, very many other things were to be read -upon the trampled grass. He ran round, like a dog who is picking -up a scent, and then turned upon my companion. - -"What did you go into the pool for?" he asked. - -"I fished about with a rake. I thought there might be some weapon -or other trace. But how on earth--" - -"Oh, tut, tut! I have no time! That left foot of yours with its -inward twist is all over the place. A mole could trace it, and -there it vanishes among the reeds. Oh, how simple it would all -have been had I been here before they came like a herd of buffalo -and wallowed all over it. Here is where the party with the -lodge-keeper came, and they have covered all tracks for six or -eight feet round the body. But here are three separate tracks of -the same feet." He drew out a lens and lay down upon his -waterproof to have a better view, talking all the time rather to -himself than to us. "These are young McCarthy's feet. Twice he -was walking, and once he ran swiftly, so that the soles are -deeply marked and the heels hardly visible. That bears out his -story. He ran when he saw his father on the ground. Then here are -the father's feet as he paced up and down. What is this, then? It -is the butt-end of the gun as the son stood listening. And this? -Ha, ha! What have we here? Tiptoes! tiptoes! Square, too, quite -unusual boots! They come, they go, they come again--of course -that was for the cloak. Now where did they come from?" He ran up -and down, sometimes losing, sometimes finding the track until we -were well within the edge of the wood and under the shadow of a -great beech, the largest tree in the neighbourhood. Holmes traced -his way to the farther side of this and lay down once more upon -his face with a little cry of satisfaction. For a long time he -remained there, turning over the leaves and dried sticks, -gathering up what seemed to me to be dust into an envelope and -examining with his lens not only the ground but even the bark of -the tree as far as he could reach. A jagged stone was lying among -the moss, and this also he carefully examined and retained. Then -he followed a pathway through the wood until he came to the -highroad, where all traces were lost. - -"It has been a case of considerable interest," he remarked, -returning to his natural manner. "I fancy that this grey house on -the right must be the lodge. I think that I will go in and have a -word with Moran, and perhaps write a little note. Having done -that, we may drive back to our luncheon. You may walk to the cab, -and I shall be with you presently." - -It was about ten minutes before we regained our cab and drove -back into Ross, Holmes still carrying with him the stone which he -had picked up in the wood. - -"This may interest you, Lestrade," he remarked, holding it out. -"The murder was done with it." - -"I see no marks." - -"There are none." - -"How do you know, then?" - -"The grass was growing under it. It had only lain there a few -days. There was no sign of a place whence it had been taken. It -corresponds with the injuries. There is no sign of any other -weapon." - -"And the murderer?" - -"Is a tall man, left-handed, limps with the right leg, wears -thick-soled shooting-boots and a grey cloak, smokes Indian -cigars, uses a cigar-holder, and carries a blunt pen-knife in his -pocket. There are several other indications, but these may be -enough to aid us in our search." - -Lestrade laughed. "I am afraid that I am still a sceptic," he -said. "Theories are all very well, but we have to deal with a -hard-headed British jury." - -"Nous verrons," answered Holmes calmly. "You work your own -method, and I shall work mine. I shall be busy this afternoon, -and shall probably return to London by the evening train." - -"And leave your case unfinished?" - -"No, finished." - -"But the mystery?" - -"It is solved." - -"Who was the criminal, then?" - -"The gentleman I describe." - -"But who is he?" - -"Surely it would not be difficult to find out. This is not such a -populous neighbourhood." - -Lestrade shrugged his shoulders. "I am a practical man," he said, -"and I really cannot undertake to go about the country looking -for a left-handed gentleman with a game leg. I should become the -laughing-stock of Scotland Yard." - -"All right," said Holmes quietly. "I have given you the chance. -Here are your lodgings. Good-bye. I shall drop you a line before -I leave." - -Having left Lestrade at his rooms, we drove to our hotel, where -we found lunch upon the table. Holmes was silent and buried in -thought with a pained expression upon his face, as one who finds -himself in a perplexing position. - -"Look here, Watson," he said when the cloth was cleared "just sit -down in this chair and let me preach to you for a little. I don't -know quite what to do, and I should value your advice. Light a -cigar and let me expound." - - "Pray do so." - -"Well, now, in considering this case there are two points about -young McCarthy's narrative which struck us both instantly, -although they impressed me in his favour and you against him. One -was the fact that his father should, according to his account, -cry 'Cooee!' before seeing him. The other was his singular dying -reference to a rat. He mumbled several words, you understand, but -that was all that caught the son's ear. Now from this double -point our research must commence, and we will begin it by -presuming that what the lad says is absolutely true." - -"What of this 'Cooee!' then?" - -"Well, obviously it could not have been meant for the son. The -son, as far as he knew, was in Bristol. It was mere chance that -he was within earshot. The 'Cooee!' was meant to attract the -attention of whoever it was that he had the appointment with. But -'Cooee' is a distinctly Australian cry, and one which is used -between Australians. There is a strong presumption that the -person whom McCarthy expected to meet him at Boscombe Pool was -someone who had been in Australia." - -"What of the rat, then?" - -Sherlock Holmes took a folded paper from his pocket and flattened -it out on the table. "This is a map of the Colony of Victoria," -he said. "I wired to Bristol for it last night." He put his hand -over part of the map. "What do you read?" - -"ARAT," I read. - -"And now?" He raised his hand. - -"BALLARAT." - -"Quite so. That was the word the man uttered, and of which his -son only caught the last two syllables. He was trying to utter -the name of his murderer. So and so, of Ballarat." - -"It is wonderful!" I exclaimed. - -"It is obvious. And now, you see, I had narrowed the field down -considerably. The possession of a grey garment was a third point -which, granting the son's statement to be correct, was a -certainty. We have come now out of mere vagueness to the definite -conception of an Australian from Ballarat with a grey cloak." - -"Certainly." - -"And one who was at home in the district, for the pool can only -be approached by the farm or by the estate, where strangers could -hardly wander." - -"Quite so." - -"Then comes our expedition of to-day. By an examination of the -ground I gained the trifling details which I gave to that -imbecile Lestrade, as to the personality of the criminal." - -"But how did you gain them?" - -"You know my method. It is founded upon the observation of -trifles." - -"His height I know that you might roughly judge from the length -of his stride. His boots, too, might be told from their traces." - -"Yes, they were peculiar boots." - -"But his lameness?" - -"The impression of his right foot was always less distinct than -his left. He put less weight upon it. Why? Because he limped--he -was lame." - -"But his left-handedness." - -"You were yourself struck by the nature of the injury as recorded -by the surgeon at the inquest. The blow was struck from -immediately behind, and yet was upon the left side. Now, how can -that be unless it were by a left-handed man? He had stood behind -that tree during the interview between the father and son. He had -even smoked there. I found the ash of a cigar, which my special -knowledge of tobacco ashes enables me to pronounce as an Indian -cigar. I have, as you know, devoted some attention to this, and -written a little monograph on the ashes of 140 different -varieties of pipe, cigar, and cigarette tobacco. Having found the -ash, I then looked round and discovered the stump among the moss -where he had tossed it. It was an Indian cigar, of the variety -which are rolled in Rotterdam." - -"And the cigar-holder?" - -"I could see that the end had not been in his mouth. Therefore he -used a holder. The tip had been cut off, not bitten off, but the -cut was not a clean one, so I deduced a blunt pen-knife." - -"Holmes," I said, "you have drawn a net round this man from which -he cannot escape, and you have saved an innocent human life as -truly as if you had cut the cord which was hanging him. I see the -direction in which all this points. The culprit is--" - -"Mr. John Turner," cried the hotel waiter, opening the door of -our sitting-room, and ushering in a visitor. - -The man who entered was a strange and impressive figure. His -slow, limping step and bowed shoulders gave the appearance of -decrepitude, and yet his hard, deep-lined, craggy features, and -his enormous limbs showed that he was possessed of unusual -strength of body and of character. His tangled beard, grizzled -hair, and outstanding, drooping eyebrows combined to give an air -of dignity and power to his appearance, but his face was of an -ashen white, while his lips and the corners of his nostrils were -tinged with a shade of blue. It was clear to me at a glance that -he was in the grip of some deadly and chronic disease. - -"Pray sit down on the sofa," said Holmes gently. "You had my -note?" - -"Yes, the lodge-keeper brought it up. You said that you wished to -see me here to avoid scandal." - -"I thought people would talk if I went to the Hall." - -"And why did you wish to see me?" He looked across at my -companion with despair in his weary eyes, as though his question -was already answered. - -"Yes," said Holmes, answering the look rather than the words. "It -is so. I know all about McCarthy." - -The old man sank his face in his hands. "God help me!" he cried. -"But I would not have let the young man come to harm. I give you -my word that I would have spoken out if it went against him at -the Assizes." - -"I am glad to hear you say so," said Holmes gravely. - -"I would have spoken now had it not been for my dear girl. It -would break her heart--it will break her heart when she hears -that I am arrested." - -"It may not come to that," said Holmes. - -"What?" - -"I am no official agent. I understand that it was your daughter -who required my presence here, and I am acting in her interests. -Young McCarthy must be got off, however." - -"I am a dying man," said old Turner. "I have had diabetes for -years. My doctor says it is a question whether I shall live a -month. Yet I would rather die under my own roof than in a gaol." - -Holmes rose and sat down at the table with his pen in his hand -and a bundle of paper before him. "Just tell us the truth," he -said. "I shall jot down the facts. You will sign it, and Watson -here can witness it. Then I could produce your confession at the -last extremity to save young McCarthy. I promise you that I shall -not use it unless it is absolutely needed." - -"It's as well," said the old man; "it's a question whether I -shall live to the Assizes, so it matters little to me, but I -should wish to spare Alice the shock. And now I will make the -thing clear to you; it has been a long time in the acting, but -will not take me long to tell. - -"You didn't know this dead man, McCarthy. He was a devil -incarnate. I tell you that. God keep you out of the clutches of -such a man as he. His grip has been upon me these twenty years, -and he has blasted my life. I'll tell you first how I came to be -in his power. - -"It was in the early '60's at the diggings. I was a young chap -then, hot-blooded and reckless, ready to turn my hand at -anything; I got among bad companions, took to drink, had no luck -with my claim, took to the bush, and in a word became what you -would call over here a highway robber. There were six of us, and -we had a wild, free life of it, sticking up a station from time -to time, or stopping the wagons on the road to the diggings. -Black Jack of Ballarat was the name I went under, and our party -is still remembered in the colony as the Ballarat Gang. - -"One day a gold convoy came down from Ballarat to Melbourne, and -we lay in wait for it and attacked it. There were six troopers -and six of us, so it was a close thing, but we emptied four of -their saddles at the first volley. Three of our boys were killed, -however, before we got the swag. I put my pistol to the head of -the wagon-driver, who was this very man McCarthy. I wish to the -Lord that I had shot him then, but I spared him, though I saw his -wicked little eyes fixed on my face, as though to remember every -feature. We got away with the gold, became wealthy men, and made -our way over to England without being suspected. There I parted -from my old pals and determined to settle down to a quiet and -respectable life. I bought this estate, which chanced to be in -the market, and I set myself to do a little good with my money, -to make up for the way in which I had earned it. I married, too, -and though my wife died young she left me my dear little Alice. -Even when she was just a baby her wee hand seemed to lead me down -the right path as nothing else had ever done. In a word, I turned -over a new leaf and did my best to make up for the past. All was -going well when McCarthy laid his grip upon me. - -"I had gone up to town about an investment, and I met him in -Regent Street with hardly a coat to his back or a boot to his -foot. - -"'Here we are, Jack,' says he, touching me on the arm; 'we'll be -as good as a family to you. There's two of us, me and my son, and -you can have the keeping of us. If you don't--it's a fine, -law-abiding country is England, and there's always a policeman -within hail.' - -"Well, down they came to the west country, there was no shaking -them off, and there they have lived rent free on my best land -ever since. There was no rest for me, no peace, no forgetfulness; -turn where I would, there was his cunning, grinning face at my -elbow. It grew worse as Alice grew up, for he soon saw I was more -afraid of her knowing my past than of the police. Whatever he -wanted he must have, and whatever it was I gave him without -question, land, money, houses, until at last he asked a thing -which I could not give. He asked for Alice. - -"His son, you see, had grown up, and so had my girl, and as I was -known to be in weak health, it seemed a fine stroke to him that -his lad should step into the whole property. But there I was -firm. I would not have his cursed stock mixed with mine; not that -I had any dislike to the lad, but his blood was in him, and that -was enough. I stood firm. McCarthy threatened. I braved him to do -his worst. We were to meet at the pool midway between our houses -to talk it over. - -"When I went down there I found him talking with his son, so I -smoked a cigar and waited behind a tree until he should be alone. -But as I listened to his talk all that was black and bitter in -me seemed to come uppermost. He was urging his son to marry my -daughter with as little regard for what she might think as if she -were a slut from off the streets. It drove me mad to think that I -and all that I held most dear should be in the power of such a -man as this. Could I not snap the bond? I was already a dying and -a desperate man. Though clear of mind and fairly strong of limb, -I knew that my own fate was sealed. But my memory and my girl! -Both could be saved if I could but silence that foul tongue. I -did it, Mr. Holmes. I would do it again. Deeply as I have sinned, -I have led a life of martyrdom to atone for it. But that my girl -should be entangled in the same meshes which held me was more -than I could suffer. I struck him down with no more compunction -than if he had been some foul and venomous beast. His cry brought -back his son; but I had gained the cover of the wood, though I -was forced to go back to fetch the cloak which I had dropped in -my flight. That is the true story, gentlemen, of all that -occurred." - -"Well, it is not for me to judge you," said Holmes as the old man -signed the statement which had been drawn out. "I pray that we -may never be exposed to such a temptation." - -"I pray not, sir. And what do you intend to do?" - -"In view of your health, nothing. You are yourself aware that you -will soon have to answer for your deed at a higher court than the -Assizes. I will keep your confession, and if McCarthy is -condemned I shall be forced to use it. If not, it shall never be -seen by mortal eye; and your secret, whether you be alive or -dead, shall be safe with us." - -"Farewell, then," said the old man solemnly. "Your own deathbeds, -when they come, will be the easier for the thought of the peace -which you have given to mine." Tottering and shaking in all his -giant frame, he stumbled slowly from the room. - -"God help us!" said Holmes after a long silence. "Why does fate -play such tricks with poor, helpless worms? I never hear of such -a case as this that I do not think of Baxter's words, and say, -'There, but for the grace of God, goes Sherlock Holmes.'" - -James McCarthy was acquitted at the Assizes on the strength of a -number of objections which had been drawn out by Holmes and -submitted to the defending counsel. Old Turner lived for seven -months after our interview, but he is now dead; and there is -every prospect that the son and daughter may come to live happily -together in ignorance of the black cloud which rests upon their -past. - - - -ADVENTURE V. THE FIVE ORANGE PIPS - -When I glance over my notes and records of the Sherlock Holmes -cases between the years '82 and '90, I am faced by so many which -present strange and interesting features that it is no easy -matter to know which to choose and which to leave. Some, however, -have already gained publicity through the papers, and others have -not offered a field for those peculiar qualities which my friend -possessed in so high a degree, and which it is the object of -these papers to illustrate. Some, too, have baffled his -analytical skill, and would be, as narratives, beginnings without -an ending, while others have been but partially cleared up, and -have their explanations founded rather upon conjecture and -surmise than on that absolute logical proof which was so dear to -him. There is, however, one of these last which was so remarkable -in its details and so startling in its results that I am tempted -to give some account of it in spite of the fact that there are -points in connection with it which never have been, and probably -never will be, entirely cleared up. - -The year '87 furnished us with a long series of cases of greater -or less interest, of which I retain the records. Among my -headings under this one twelve months I find an account of the -adventure of the Paradol Chamber, of the Amateur Mendicant -Society, who held a luxurious club in the lower vault of a -furniture warehouse, of the facts connected with the loss of the -British barque "Sophy Anderson", of the singular adventures of the -Grice Patersons in the island of Uffa, and finally of the -Camberwell poisoning case. In the latter, as may be remembered, -Sherlock Holmes was able, by winding up the dead man's watch, to -prove that it had been wound up two hours before, and that -therefore the deceased had gone to bed within that time--a -deduction which was of the greatest importance in clearing up the -case. All these I may sketch out at some future date, but none of -them present such singular features as the strange train of -circumstances which I have now taken up my pen to describe. - -It was in the latter days of September, and the equinoctial gales -had set in with exceptional violence. All day the wind had -screamed and the rain had beaten against the windows, so that -even here in the heart of great, hand-made London we were forced -to raise our minds for the instant from the routine of life and -to recognise the presence of those great elemental forces which -shriek at mankind through the bars of his civilisation, like -untamed beasts in a cage. As evening drew in, the storm grew -higher and louder, and the wind cried and sobbed like a child in -the chimney. Sherlock Holmes sat moodily at one side of the -fireplace cross-indexing his records of crime, while I at the -other was deep in one of Clark Russell's fine sea-stories until -the howl of the gale from without seemed to blend with the text, -and the splash of the rain to lengthen out into the long swash of -the sea waves. My wife was on a visit to her mother's, and for a -few days I was a dweller once more in my old quarters at Baker -Street. - -"Why," said I, glancing up at my companion, "that was surely the -bell. Who could come to-night? Some friend of yours, perhaps?" - -"Except yourself I have none," he answered. "I do not encourage -visitors." - -"A client, then?" - -"If so, it is a serious case. Nothing less would bring a man out -on such a day and at such an hour. But I take it that it is more -likely to be some crony of the landlady's." - -Sherlock Holmes was wrong in his conjecture, however, for there -came a step in the passage and a tapping at the door. He -stretched out his long arm to turn the lamp away from himself and -towards the vacant chair upon which a newcomer must sit. - -"Come in!" said he. - -The man who entered was young, some two-and-twenty at the -outside, well-groomed and trimly clad, with something of -refinement and delicacy in his bearing. The streaming umbrella -which he held in his hand, and his long shining waterproof told -of the fierce weather through which he had come. He looked about -him anxiously in the glare of the lamp, and I could see that his -face was pale and his eyes heavy, like those of a man who is -weighed down with some great anxiety. - -"I owe you an apology," he said, raising his golden pince-nez to -his eyes. "I trust that I am not intruding. I fear that I have -brought some traces of the storm and rain into your snug -chamber." - -"Give me your coat and umbrella," said Holmes. "They may rest -here on the hook and will be dry presently. You have come up from -the south-west, I see." - -"Yes, from Horsham." - -"That clay and chalk mixture which I see upon your toe caps is -quite distinctive." - -"I have come for advice." - -"That is easily got." - -"And help." - -"That is not always so easy." - -"I have heard of you, Mr. Holmes. I heard from Major Prendergast -how you saved him in the Tankerville Club scandal." - -"Ah, of course. He was wrongfully accused of cheating at cards." - -"He said that you could solve anything." - -"He said too much." - -"That you are never beaten." - -"I have been beaten four times--three times by men, and once by a -woman." - -"But what is that compared with the number of your successes?" - -"It is true that I have been generally successful." - -"Then you may be so with me." - -"I beg that you will draw your chair up to the fire and favour me -with some details as to your case." - -"It is no ordinary one." - -"None of those which come to me are. I am the last court of -appeal." - -"And yet I question, sir, whether, in all your experience, you -have ever listened to a more mysterious and inexplicable chain of -events than those which have happened in my own family." - -"You fill me with interest," said Holmes. "Pray give us the -essential facts from the commencement, and I can afterwards -question you as to those details which seem to me to be most -important." - -The young man pulled his chair up and pushed his wet feet out -towards the blaze. - -"My name," said he, "is John Openshaw, but my own affairs have, -as far as I can understand, little to do with this awful -business. It is a hereditary matter; so in order to give you an -idea of the facts, I must go back to the commencement of the -affair. - -"You must know that my grandfather had two sons--my uncle Elias -and my father Joseph. My father had a small factory at Coventry, -which he enlarged at the time of the invention of bicycling. He -was a patentee of the Openshaw unbreakable tire, and his business -met with such success that he was able to sell it and to retire -upon a handsome competence. - -"My uncle Elias emigrated to America when he was a young man and -became a planter in Florida, where he was reported to have done -very well. At the time of the war he fought in Jackson's army, -and afterwards under Hood, where he rose to be a colonel. When -Lee laid down his arms my uncle returned to his plantation, where -he remained for three or four years. About 1869 or 1870 he came -back to Europe and took a small estate in Sussex, near Horsham. -He had made a very considerable fortune in the States, and his -reason for leaving them was his aversion to the negroes, and his -dislike of the Republican policy in extending the franchise to -them. He was a singular man, fierce and quick-tempered, very -foul-mouthed when he was angry, and of a most retiring -disposition. During all the years that he lived at Horsham, I -doubt if ever he set foot in the town. He had a garden and two or -three fields round his house, and there he would take his -exercise, though very often for weeks on end he would never leave -his room. He drank a great deal of brandy and smoked very -heavily, but he would see no society and did not want any -friends, not even his own brother. - -"He didn't mind me; in fact, he took a fancy to me, for at the -time when he saw me first I was a youngster of twelve or so. This -would be in the year 1878, after he had been eight or nine years -in England. He begged my father to let me live with him and he -was very kind to me in his way. When he was sober he used to be -fond of playing backgammon and draughts with me, and he would -make me his representative both with the servants and with the -tradespeople, so that by the time that I was sixteen I was quite -master of the house. I kept all the keys and could go where I -liked and do what I liked, so long as I did not disturb him in -his privacy. There was one singular exception, however, for he -had a single room, a lumber-room up among the attics, which was -invariably locked, and which he would never permit either me or -anyone else to enter. With a boy's curiosity I have peeped -through the keyhole, but I was never able to see more than such a -collection of old trunks and bundles as would be expected in such -a room. - -"One day--it was in March, 1883--a letter with a foreign stamp -lay upon the table in front of the colonel's plate. It was not a -common thing for him to receive letters, for his bills were all -paid in ready money, and he had no friends of any sort. 'From -India!' said he as he took it up, 'Pondicherry postmark! What can -this be?' Opening it hurriedly, out there jumped five little -dried orange pips, which pattered down upon his plate. I began to -laugh at this, but the laugh was struck from my lips at the sight -of his face. His lip had fallen, his eyes were protruding, his -skin the colour of putty, and he glared at the envelope which he -still held in his trembling hand, 'K. K. K.!' he shrieked, and -then, 'My God, my God, my sins have overtaken me!' - -"'What is it, uncle?' I cried. - -"'Death,' said he, and rising from the table he retired to his -room, leaving me palpitating with horror. I took up the envelope -and saw scrawled in red ink upon the inner flap, just above the -gum, the letter K three times repeated. There was nothing else -save the five dried pips. What could be the reason of his -overpowering terror? I left the breakfast-table, and as I -ascended the stair I met him coming down with an old rusty key, -which must have belonged to the attic, in one hand, and a small -brass box, like a cashbox, in the other. - -"'They may do what they like, but I'll checkmate them still,' -said he with an oath. 'Tell Mary that I shall want a fire in my -room to-day, and send down to Fordham, the Horsham lawyer.' - -"I did as he ordered, and when the lawyer arrived I was asked to -step up to the room. The fire was burning brightly, and in the -grate there was a mass of black, fluffy ashes, as of burned -paper, while the brass box stood open and empty beside it. As I -glanced at the box I noticed, with a start, that upon the lid was -printed the treble K which I had read in the morning upon the -envelope. - -"'I wish you, John,' said my uncle, 'to witness my will. I leave -my estate, with all its advantages and all its disadvantages, to -my brother, your father, whence it will, no doubt, descend to -you. If you can enjoy it in peace, well and good! If you find you -cannot, take my advice, my boy, and leave it to your deadliest -enemy. I am sorry to give you such a two-edged thing, but I can't -say what turn things are going to take. Kindly sign the paper -where Mr. Fordham shows you.' - -"I signed the paper as directed, and the lawyer took it away with -him. The singular incident made, as you may think, the deepest -impression upon me, and I pondered over it and turned it every -way in my mind without being able to make anything of it. Yet I -could not shake off the vague feeling of dread which it left -behind, though the sensation grew less keen as the weeks passed -and nothing happened to disturb the usual routine of our lives. I -could see a change in my uncle, however. He drank more than ever, -and he was less inclined for any sort of society. Most of his -time he would spend in his room, with the door locked upon the -inside, but sometimes he would emerge in a sort of drunken frenzy -and would burst out of the house and tear about the garden with a -revolver in his hand, screaming out that he was afraid of no man, -and that he was not to be cooped up, like a sheep in a pen, by -man or devil. When these hot fits were over, however, he would -rush tumultuously in at the door and lock and bar it behind him, -like a man who can brazen it out no longer against the terror -which lies at the roots of his soul. At such times I have seen -his face, even on a cold day, glisten with moisture, as though it -were new raised from a basin. - -"Well, to come to an end of the matter, Mr. Holmes, and not to -abuse your patience, there came a night when he made one of those -drunken sallies from which he never came back. We found him, when -we went to search for him, face downward in a little -green-scummed pool, which lay at the foot of the garden. There -was no sign of any violence, and the water was but two feet deep, -so that the jury, having regard to his known eccentricity, -brought in a verdict of 'suicide.' But I, who knew how he winced -from the very thought of death, had much ado to persuade myself -that he had gone out of his way to meet it. The matter passed, -however, and my father entered into possession of the estate, and -of some 14,000 pounds, which lay to his credit at the bank." - -"One moment," Holmes interposed, "your statement is, I foresee, -one of the most remarkable to which I have ever listened. Let me -have the date of the reception by your uncle of the letter, and -the date of his supposed suicide." - -"The letter arrived on March 10, 1883. His death was seven weeks -later, upon the night of May 2nd." - -"Thank you. Pray proceed." - -"When my father took over the Horsham property, he, at my -request, made a careful examination of the attic, which had been -always locked up. We found the brass box there, although its -contents had been destroyed. On the inside of the cover was a -paper label, with the initials of K. K. K. repeated upon it, and -'Letters, memoranda, receipts, and a register' written beneath. -These, we presume, indicated the nature of the papers which had -been destroyed by Colonel Openshaw. For the rest, there was -nothing of much importance in the attic save a great many -scattered papers and note-books bearing upon my uncle's life in -America. Some of them were of the war time and showed that he had -done his duty well and had borne the repute of a brave soldier. -Others were of a date during the reconstruction of the Southern -states, and were mostly concerned with politics, for he had -evidently taken a strong part in opposing the carpet-bag -politicians who had been sent down from the North. - -"Well, it was the beginning of '84 when my father came to live at -Horsham, and all went as well as possible with us until the -January of '85. On the fourth day after the new year I heard my -father give a sharp cry of surprise as we sat together at the -breakfast-table. There he was, sitting with a newly opened -envelope in one hand and five dried orange pips in the -outstretched palm of the other one. He had always laughed at what -he called my cock-and-bull story about the colonel, but he looked -very scared and puzzled now that the same thing had come upon -himself. - -"'Why, what on earth does this mean, John?' he stammered. - -"My heart had turned to lead. 'It is K. K. K.,' said I. - -"He looked inside the envelope. 'So it is,' he cried. 'Here are -the very letters. But what is this written above them?' - -"'Put the papers on the sundial,' I read, peeping over his -shoulder. - -"'What papers? What sundial?' he asked. - -"'The sundial in the garden. There is no other,' said I; 'but the -papers must be those that are destroyed.' - -"'Pooh!' said he, gripping hard at his courage. 'We are in a -civilised land here, and we can't have tomfoolery of this kind. -Where does the thing come from?' - -"'From Dundee,' I answered, glancing at the postmark. - -"'Some preposterous practical joke,' said he. 'What have I to do -with sundials and papers? I shall take no notice of such -nonsense.' - -"'I should certainly speak to the police,' I said. - -"'And be laughed at for my pains. Nothing of the sort.' - -"'Then let me do so?' - -"'No, I forbid you. I won't have a fuss made about such -nonsense.' - -"It was in vain to argue with him, for he was a very obstinate -man. I went about, however, with a heart which was full of -forebodings. - -"On the third day after the coming of the letter my father went -from home to visit an old friend of his, Major Freebody, who is -in command of one of the forts upon Portsdown Hill. I was glad -that he should go, for it seemed to me that he was farther from -danger when he was away from home. In that, however, I was in -error. Upon the second day of his absence I received a telegram -from the major, imploring me to come at once. My father had -fallen over one of the deep chalk-pits which abound in the -neighbourhood, and was lying senseless, with a shattered skull. I -hurried to him, but he passed away without having ever recovered -his consciousness. He had, as it appears, been returning from -Fareham in the twilight, and as the country was unknown to him, -and the chalk-pit unfenced, the jury had no hesitation in -bringing in a verdict of 'death from accidental causes.' -Carefully as I examined every fact connected with his death, I -was unable to find anything which could suggest the idea of -murder. There were no signs of violence, no footmarks, no -robbery, no record of strangers having been seen upon the roads. -And yet I need not tell you that my mind was far from at ease, -and that I was well-nigh certain that some foul plot had been -woven round him. - -"In this sinister way I came into my inheritance. You will ask me -why I did not dispose of it? I answer, because I was well -convinced that our troubles were in some way dependent upon an -incident in my uncle's life, and that the danger would be as -pressing in one house as in another. - -"It was in January, '85, that my poor father met his end, and two -years and eight months have elapsed since then. During that time -I have lived happily at Horsham, and I had begun to hope that -this curse had passed away from the family, and that it had ended -with the last generation. I had begun to take comfort too soon, -however; yesterday morning the blow fell in the very shape in -which it had come upon my father." - -The young man took from his waistcoat a crumpled envelope, and -turning to the table he shook out upon it five little dried -orange pips. - -"This is the envelope," he continued. "The postmark is -London--eastern division. Within are the very words which were -upon my father's last message: 'K. K. K.'; and then 'Put the -papers on the sundial.'" - -"What have you done?" asked Holmes. - -"Nothing." - -"Nothing?" - -"To tell the truth"--he sank his face into his thin, white -hands--"I have felt helpless. I have felt like one of those poor -rabbits when the snake is writhing towards it. I seem to be in -the grasp of some resistless, inexorable evil, which no foresight -and no precautions can guard against." - -"Tut! tut!" cried Sherlock Holmes. "You must act, man, or you are -lost. Nothing but energy can save you. This is no time for -despair." - -"I have seen the police." - -"Ah!" - -"But they listened to my story with a smile. I am convinced that -the inspector has formed the opinion that the letters are all -practical jokes, and that the deaths of my relations were really -accidents, as the jury stated, and were not to be connected with -the warnings." - -Holmes shook his clenched hands in the air. "Incredible -imbecility!" he cried. - -"They have, however, allowed me a policeman, who may remain in -the house with me." - -"Has he come with you to-night?" - -"No. His orders were to stay in the house." - -Again Holmes raved in the air. - -"Why did you come to me," he cried, "and, above all, why did you -not come at once?" - -"I did not know. It was only to-day that I spoke to Major -Prendergast about my troubles and was advised by him to come to -you." - -"It is really two days since you had the letter. We should have -acted before this. You have no further evidence, I suppose, than -that which you have placed before us--no suggestive detail which -might help us?" - -"There is one thing," said John Openshaw. He rummaged in his coat -pocket, and, drawing out a piece of discoloured, blue-tinted -paper, he laid it out upon the table. "I have some remembrance," -said he, "that on the day when my uncle burned the papers I -observed that the small, unburned margins which lay amid the -ashes were of this particular colour. I found this single sheet -upon the floor of his room, and I am inclined to think that it -may be one of the papers which has, perhaps, fluttered out from -among the others, and in that way has escaped destruction. Beyond -the mention of pips, I do not see that it helps us much. I think -myself that it is a page from some private diary. The writing is -undoubtedly my uncle's." - -Holmes moved the lamp, and we both bent over the sheet of paper, -which showed by its ragged edge that it had indeed been torn from -a book. It was headed, "March, 1869," and beneath were the -following enigmatical notices: - -"4th. Hudson came. Same old platform. - -"7th. Set the pips on McCauley, Paramore, and - John Swain, of St. Augustine. - -"9th. McCauley cleared. - -"10th. John Swain cleared. - -"12th. Visited Paramore. All well." - -"Thank you!" said Holmes, folding up the paper and returning it -to our visitor. "And now you must on no account lose another -instant. We cannot spare time even to discuss what you have told -me. You must get home instantly and act." - -"What shall I do?" - -"There is but one thing to do. It must be done at once. You must -put this piece of paper which you have shown us into the brass -box which you have described. You must also put in a note to say -that all the other papers were burned by your uncle, and that -this is the only one which remains. You must assert that in such -words as will carry conviction with them. Having done this, you -must at once put the box out upon the sundial, as directed. Do -you understand?" - -"Entirely." - -"Do not think of revenge, or anything of the sort, at present. I -think that we may gain that by means of the law; but we have our -web to weave, while theirs is already woven. The first -consideration is to remove the pressing danger which threatens -you. The second is to clear up the mystery and to punish the -guilty parties." - -"I thank you," said the young man, rising and pulling on his -overcoat. "You have given me fresh life and hope. I shall -certainly do as you advise." - -"Do not lose an instant. And, above all, take care of yourself in -the meanwhile, for I do not think that there can be a doubt that -you are threatened by a very real and imminent danger. How do you -go back?" - -"By train from Waterloo." - -"It is not yet nine. The streets will be crowded, so I trust that -you may be in safety. And yet you cannot guard yourself too -closely." - -"I am armed." - -"That is well. To-morrow I shall set to work upon your case." - -"I shall see you at Horsham, then?" - -"No, your secret lies in London. It is there that I shall seek -it." - -"Then I shall call upon you in a day, or in two days, with news -as to the box and the papers. I shall take your advice in every -particular." He shook hands with us and took his leave. Outside -the wind still screamed and the rain splashed and pattered -against the windows. This strange, wild story seemed to have come -to us from amid the mad elements--blown in upon us like a sheet -of sea-weed in a gale--and now to have been reabsorbed by them -once more. - -Sherlock Holmes sat for some time in silence, with his head sunk -forward and his eyes bent upon the red glow of the fire. Then he -lit his pipe, and leaning back in his chair he watched the blue -smoke-rings as they chased each other up to the ceiling. - -"I think, Watson," he remarked at last, "that of all our cases we -have had none more fantastic than this." - -"Save, perhaps, the Sign of Four." - -"Well, yes. Save, perhaps, that. And yet this John Openshaw seems -to me to be walking amid even greater perils than did the -Sholtos." - -"But have you," I asked, "formed any definite conception as to -what these perils are?" - -"There can be no question as to their nature," he answered. - -"Then what are they? Who is this K. K. K., and why does he pursue -this unhappy family?" - -Sherlock Holmes closed his eyes and placed his elbows upon the -arms of his chair, with his finger-tips together. "The ideal -reasoner," he remarked, "would, when he had once been shown a -single fact in all its bearings, deduce from it not only all the -chain of events which led up to it but also all the results which -would follow from it. As Cuvier could correctly describe a whole -animal by the contemplation of a single bone, so the observer who -has thoroughly understood one link in a series of incidents -should be able to accurately state all the other ones, both -before and after. We have not yet grasped the results which the -reason alone can attain to. Problems may be solved in the study -which have baffled all those who have sought a solution by the -aid of their senses. To carry the art, however, to its highest -pitch, it is necessary that the reasoner should be able to -utilise all the facts which have come to his knowledge; and this -in itself implies, as you will readily see, a possession of all -knowledge, which, even in these days of free education and -encyclopaedias, is a somewhat rare accomplishment. It is not so -impossible, however, that a man should possess all knowledge -which is likely to be useful to him in his work, and this I have -endeavoured in my case to do. If I remember rightly, you on one -occasion, in the early days of our friendship, defined my limits -in a very precise fashion." - -"Yes," I answered, laughing. "It was a singular document. -Philosophy, astronomy, and politics were marked at zero, I -remember. Botany variable, geology profound as regards the -mud-stains from any region within fifty miles of town, chemistry -eccentric, anatomy unsystematic, sensational literature and crime -records unique, violin-player, boxer, swordsman, lawyer, and -self-poisoner by cocaine and tobacco. Those, I think, were the -main points of my analysis." - -Holmes grinned at the last item. "Well," he said, "I say now, as -I said then, that a man should keep his little brain-attic -stocked with all the furniture that he is likely to use, and the -rest he can put away in the lumber-room of his library, where he -can get it if he wants it. Now, for such a case as the one which -has been submitted to us to-night, we need certainly to muster -all our resources. Kindly hand me down the letter K of the -'American Encyclopaedia' which stands upon the shelf beside you. -Thank you. Now let us consider the situation and see what may be -deduced from it. In the first place, we may start with a strong -presumption that Colonel Openshaw had some very strong reason for -leaving America. Men at his time of life do not change all their -habits and exchange willingly the charming climate of Florida for -the lonely life of an English provincial town. His extreme love -of solitude in England suggests the idea that he was in fear of -someone or something, so we may assume as a working hypothesis -that it was fear of someone or something which drove him from -America. As to what it was he feared, we can only deduce that by -considering the formidable letters which were received by himself -and his successors. Did you remark the postmarks of those -letters?" - -"The first was from Pondicherry, the second from Dundee, and the -third from London." - -"From East London. What do you deduce from that?" - -"They are all seaports. That the writer was on board of a ship." - -"Excellent. We have already a clue. There can be no doubt that -the probability--the strong probability--is that the writer was -on board of a ship. And now let us consider another point. In the -case of Pondicherry, seven weeks elapsed between the threat and -its fulfilment, in Dundee it was only some three or four days. -Does that suggest anything?" - -"A greater distance to travel." - -"But the letter had also a greater distance to come." - -"Then I do not see the point." - -"There is at least a presumption that the vessel in which the man -or men are is a sailing-ship. It looks as if they always send -their singular warning or token before them when starting upon -their mission. You see how quickly the deed followed the sign -when it came from Dundee. If they had come from Pondicherry in a -steamer they would have arrived almost as soon as their letter. -But, as a matter of fact, seven weeks elapsed. I think that those -seven weeks represented the difference between the mail-boat which -brought the letter and the sailing vessel which brought the -writer." - -"It is possible." - -"More than that. It is probable. And now you see the deadly -urgency of this new case, and why I urged young Openshaw to -caution. The blow has always fallen at the end of the time which -it would take the senders to travel the distance. But this one -comes from London, and therefore we cannot count upon delay." - -"Good God!" I cried. "What can it mean, this relentless -persecution?" - -"The papers which Openshaw carried are obviously of vital -importance to the person or persons in the sailing-ship. I think -that it is quite clear that there must be more than one of them. -A single man could not have carried out two deaths in such a way -as to deceive a coroner's jury. There must have been several in -it, and they must have been men of resource and determination. -Their papers they mean to have, be the holder of them who it may. -In this way you see K. K. K. ceases to be the initials of an -individual and becomes the badge of a society." - -"But of what society?" - -"Have you never--" said Sherlock Holmes, bending forward and -sinking his voice--"have you never heard of the Ku Klux Klan?" - -"I never have." - -Holmes turned over the leaves of the book upon his knee. "Here it -is," said he presently: - -"'Ku Klux Klan. A name derived from the fanciful resemblance to -the sound produced by cocking a rifle. This terrible secret -society was formed by some ex-Confederate soldiers in the -Southern states after the Civil War, and it rapidly formed local -branches in different parts of the country, notably in Tennessee, -Louisiana, the Carolinas, Georgia, and Florida. Its power was -used for political purposes, principally for the terrorising of -the negro voters and the murdering and driving from the country -of those who were opposed to its views. Its outrages were usually -preceded by a warning sent to the marked man in some fantastic -but generally recognised shape--a sprig of oak-leaves in some -parts, melon seeds or orange pips in others. On receiving this -the victim might either openly abjure his former ways, or might -fly from the country. If he braved the matter out, death would -unfailingly come upon him, and usually in some strange and -unforeseen manner. So perfect was the organisation of the -society, and so systematic its methods, that there is hardly a -case upon record where any man succeeded in braving it with -impunity, or in which any of its outrages were traced home to the -perpetrators. For some years the organisation flourished in spite -of the efforts of the United States government and of the better -classes of the community in the South. Eventually, in the year -1869, the movement rather suddenly collapsed, although there have -been sporadic outbreaks of the same sort since that date.' - -"You will observe," said Holmes, laying down the volume, "that -the sudden breaking up of the society was coincident with the -disappearance of Openshaw from America with their papers. It may -well have been cause and effect. It is no wonder that he and his -family have some of the more implacable spirits upon their track. -You can understand that this register and diary may implicate -some of the first men in the South, and that there may be many -who will not sleep easy at night until it is recovered." - -"Then the page we have seen--" - -"Is such as we might expect. It ran, if I remember right, 'sent -the pips to A, B, and C'--that is, sent the society's warning to -them. Then there are successive entries that A and B cleared, or -left the country, and finally that C was visited, with, I fear, a -sinister result for C. Well, I think, Doctor, that we may let -some light into this dark place, and I believe that the only -chance young Openshaw has in the meantime is to do what I have -told him. There is nothing more to be said or to be done -to-night, so hand me over my violin and let us try to forget for -half an hour the miserable weather and the still more miserable -ways of our fellow-men." - - -It had cleared in the morning, and the sun was shining with a -subdued brightness through the dim veil which hangs over the -great city. Sherlock Holmes was already at breakfast when I came -down. - -"You will excuse me for not waiting for you," said he; "I have, I -foresee, a very busy day before me in looking into this case of -young Openshaw's." - -"What steps will you take?" I asked. - -"It will very much depend upon the results of my first inquiries. -I may have to go down to Horsham, after all." - -"You will not go there first?" - -"No, I shall commence with the City. Just ring the bell and the -maid will bring up your coffee." - -As I waited, I lifted the unopened newspaper from the table and -glanced my eye over it. It rested upon a heading which sent a -chill to my heart. - -"Holmes," I cried, "you are too late." - -"Ah!" said he, laying down his cup, "I feared as much. How was it -done?" He spoke calmly, but I could see that he was deeply moved. - -"My eye caught the name of Openshaw, and the heading 'Tragedy -Near Waterloo Bridge.' Here is the account: - -"Between nine and ten last night Police-Constable Cook, of the H -Division, on duty near Waterloo Bridge, heard a cry for help and -a splash in the water. The night, however, was extremely dark and -stormy, so that, in spite of the help of several passers-by, it -was quite impossible to effect a rescue. The alarm, however, was -given, and, by the aid of the water-police, the body was -eventually recovered. It proved to be that of a young gentleman -whose name, as it appears from an envelope which was found in his -pocket, was John Openshaw, and whose residence is near Horsham. -It is conjectured that he may have been hurrying down to catch -the last train from Waterloo Station, and that in his haste and -the extreme darkness he missed his path and walked over the edge -of one of the small landing-places for river steamboats. The body -exhibited no traces of violence, and there can be no doubt that -the deceased had been the victim of an unfortunate accident, -which should have the effect of calling the attention of the -authorities to the condition of the riverside landing-stages." - -We sat in silence for some minutes, Holmes more depressed and -shaken than I had ever seen him. - -"That hurts my pride, Watson," he said at last. "It is a petty -feeling, no doubt, but it hurts my pride. It becomes a personal -matter with me now, and, if God sends me health, I shall set my -hand upon this gang. That he should come to me for help, and that -I should send him away to his death--!" He sprang from his chair -and paced about the room in uncontrollable agitation, with a -flush upon his sallow cheeks and a nervous clasping and -unclasping of his long thin hands. - -"They must be cunning devils," he exclaimed at last. "How could -they have decoyed him down there? The Embankment is not on the -direct line to the station. The bridge, no doubt, was too -crowded, even on such a night, for their purpose. Well, Watson, -we shall see who will win in the long run. I am going out now!" - -"To the police?" - -"No; I shall be my own police. When I have spun the web they may -take the flies, but not before." - -All day I was engaged in my professional work, and it was late in -the evening before I returned to Baker Street. Sherlock Holmes -had not come back yet. It was nearly ten o'clock before he -entered, looking pale and worn. He walked up to the sideboard, -and tearing a piece from the loaf he devoured it voraciously, -washing it down with a long draught of water. - -"You are hungry," I remarked. - -"Starving. It had escaped my memory. I have had nothing since -breakfast." - -"Nothing?" - -"Not a bite. I had no time to think of it." - -"And how have you succeeded?" - -"Well." - -"You have a clue?" - -"I have them in the hollow of my hand. Young Openshaw shall not -long remain unavenged. Why, Watson, let us put their own devilish -trade-mark upon them. It is well thought of!" - -"What do you mean?" - -He took an orange from the cupboard, and tearing it to pieces he -squeezed out the pips upon the table. Of these he took five and -thrust them into an envelope. On the inside of the flap he wrote -"S. H. for J. O." Then he sealed it and addressed it to "Captain -James Calhoun, Barque 'Lone Star,' Savannah, Georgia." - -"That will await him when he enters port," said he, chuckling. -"It may give him a sleepless night. He will find it as sure a -precursor of his fate as Openshaw did before him." - -"And who is this Captain Calhoun?" - -"The leader of the gang. I shall have the others, but he first." - -"How did you trace it, then?" - -He took a large sheet of paper from his pocket, all covered with -dates and names. - -"I have spent the whole day," said he, "over Lloyd's registers -and files of the old papers, following the future career of every -vessel which touched at Pondicherry in January and February in -'83. There were thirty-six ships of fair tonnage which were -reported there during those months. Of these, one, the 'Lone Star,' -instantly attracted my attention, since, although it was reported -as having cleared from London, the name is that which is given to -one of the states of the Union." - -"Texas, I think." - -"I was not and am not sure which; but I knew that the ship must -have an American origin." - -"What then?" - -"I searched the Dundee records, and when I found that the barque -'Lone Star' was there in January, '85, my suspicion became a -certainty. I then inquired as to the vessels which lay at present -in the port of London." - -"Yes?" - -"The 'Lone Star' had arrived here last week. I went down to the -Albert Dock and found that she had been taken down the river by -the early tide this morning, homeward bound to Savannah. I wired -to Gravesend and learned that she had passed some time ago, and -as the wind is easterly I have no doubt that she is now past the -Goodwins and not very far from the Isle of Wight." - -"What will you do, then?" - -"Oh, I have my hand upon him. He and the two mates, are as I -learn, the only native-born Americans in the ship. The others are -Finns and Germans. I know, also, that they were all three away -from the ship last night. I had it from the stevedore who has -been loading their cargo. By the time that their sailing-ship -reaches Savannah the mail-boat will have carried this letter, and -the cable will have informed the police of Savannah that these -three gentlemen are badly wanted here upon a charge of murder." - -There is ever a flaw, however, in the best laid of human plans, -and the murderers of John Openshaw were never to receive the -orange pips which would show them that another, as cunning and as -resolute as themselves, was upon their track. Very long and very -severe were the equinoctial gales that year. We waited long for -news of the "Lone Star" of Savannah, but none ever reached us. We -did at last hear that somewhere far out in the Atlantic a -shattered stern-post of a boat was seen swinging in the trough -of a wave, with the letters "L. S." carved upon it, and that is -all which we shall ever know of the fate of the "Lone Star." - - - -ADVENTURE VI. THE MAN WITH THE TWISTED LIP - -Isa Whitney, brother of the late Elias Whitney, D.D., Principal -of the Theological College of St. George's, was much addicted to -opium. The habit grew upon him, as I understand, from some -foolish freak when he was at college; for having read De -Quincey's description of his dreams and sensations, he had -drenched his tobacco with laudanum in an attempt to produce the -same effects. He found, as so many more have done, that the -practice is easier to attain than to get rid of, and for many -years he continued to be a slave to the drug, an object of -mingled horror and pity to his friends and relatives. I can see -him now, with yellow, pasty face, drooping lids, and pin-point -pupils, all huddled in a chair, the wreck and ruin of a noble -man. - -One night--it was in June, '89--there came a ring to my bell, -about the hour when a man gives his first yawn and glances at the -clock. I sat up in my chair, and my wife laid her needle-work -down in her lap and made a little face of disappointment. - -"A patient!" said she. "You'll have to go out." - -I groaned, for I was newly come back from a weary day. - -We heard the door open, a few hurried words, and then quick steps -upon the linoleum. Our own door flew open, and a lady, clad in -some dark-coloured stuff, with a black veil, entered the room. - -"You will excuse my calling so late," she began, and then, -suddenly losing her self-control, she ran forward, threw her arms -about my wife's neck, and sobbed upon her shoulder. "Oh, I'm in -such trouble!" she cried; "I do so want a little help." - -"Why," said my wife, pulling up her veil, "it is Kate Whitney. -How you startled me, Kate! I had not an idea who you were when -you came in." - -"I didn't know what to do, so I came straight to you." That was -always the way. Folk who were in grief came to my wife like birds -to a light-house. - -"It was very sweet of you to come. Now, you must have some wine -and water, and sit here comfortably and tell us all about it. Or -should you rather that I sent James off to bed?" - -"Oh, no, no! I want the doctor's advice and help, too. It's about -Isa. He has not been home for two days. I am so frightened about -him!" - -It was not the first time that she had spoken to us of her -husband's trouble, to me as a doctor, to my wife as an old friend -and school companion. We soothed and comforted her by such words -as we could find. Did she know where her husband was? Was it -possible that we could bring him back to her? - -It seems that it was. She had the surest information that of late -he had, when the fit was on him, made use of an opium den in the -farthest east of the City. Hitherto his orgies had always been -confined to one day, and he had come back, twitching and -shattered, in the evening. But now the spell had been upon him -eight-and-forty hours, and he lay there, doubtless among the -dregs of the docks, breathing in the poison or sleeping off the -effects. There he was to be found, she was sure of it, at the Bar -of Gold, in Upper Swandam Lane. But what was she to do? How could -she, a young and timid woman, make her way into such a place and -pluck her husband out from among the ruffians who surrounded him? - -There was the case, and of course there was but one way out of -it. Might I not escort her to this place? And then, as a second -thought, why should she come at all? I was Isa Whitney's medical -adviser, and as such I had influence over him. I could manage it -better if I were alone. I promised her on my word that I would -send him home in a cab within two hours if he were indeed at the -address which she had given me. And so in ten minutes I had left -my armchair and cheery sitting-room behind me, and was speeding -eastward in a hansom on a strange errand, as it seemed to me at -the time, though the future only could show how strange it was to -be. - -But there was no great difficulty in the first stage of my -adventure. Upper Swandam Lane is a vile alley lurking behind the -high wharves which line the north side of the river to the east -of London Bridge. Between a slop-shop and a gin-shop, approached -by a steep flight of steps leading down to a black gap like the -mouth of a cave, I found the den of which I was in search. -Ordering my cab to wait, I passed down the steps, worn hollow in -the centre by the ceaseless tread of drunken feet; and by the -light of a flickering oil-lamp above the door I found the latch -and made my way into a long, low room, thick and heavy with the -brown opium smoke, and terraced with wooden berths, like the -forecastle of an emigrant ship. - -Through the gloom one could dimly catch a glimpse of bodies lying -in strange fantastic poses, bowed shoulders, bent knees, heads -thrown back, and chins pointing upward, with here and there a -dark, lack-lustre eye turned upon the newcomer. Out of the black -shadows there glimmered little red circles of light, now bright, -now faint, as the burning poison waxed or waned in the bowls of -the metal pipes. The most lay silent, but some muttered to -themselves, and others talked together in a strange, low, -monotonous voice, their conversation coming in gushes, and then -suddenly tailing off into silence, each mumbling out his own -thoughts and paying little heed to the words of his neighbour. At -the farther end was a small brazier of burning charcoal, beside -which on a three-legged wooden stool there sat a tall, thin old -man, with his jaw resting upon his two fists, and his elbows upon -his knees, staring into the fire. - -As I entered, a sallow Malay attendant had hurried up with a pipe -for me and a supply of the drug, beckoning me to an empty berth. - -"Thank you. I have not come to stay," said I. "There is a friend -of mine here, Mr. Isa Whitney, and I wish to speak with him." - -There was a movement and an exclamation from my right, and -peering through the gloom, I saw Whitney, pale, haggard, and -unkempt, staring out at me. - -"My God! It's Watson," said he. He was in a pitiable state of -reaction, with every nerve in a twitter. "I say, Watson, what -o'clock is it?" - -"Nearly eleven." - -"Of what day?" - -"Of Friday, June 19th." - -"Good heavens! I thought it was Wednesday. It is Wednesday. What -d'you want to frighten a chap for?" He sank his face onto his -arms and began to sob in a high treble key. - -"I tell you that it is Friday, man. Your wife has been waiting -this two days for you. You should be ashamed of yourself!" - -"So I am. But you've got mixed, Watson, for I have only been here -a few hours, three pipes, four pipes--I forget how many. But I'll -go home with you. I wouldn't frighten Kate--poor little Kate. -Give me your hand! Have you a cab?" - -"Yes, I have one waiting." - -"Then I shall go in it. But I must owe something. Find what I -owe, Watson. I am all off colour. I can do nothing for myself." - -I walked down the narrow passage between the double row of -sleepers, holding my breath to keep out the vile, stupefying -fumes of the drug, and looking about for the manager. As I passed -the tall man who sat by the brazier I felt a sudden pluck at my -skirt, and a low voice whispered, "Walk past me, and then look -back at me." The words fell quite distinctly upon my ear. I -glanced down. They could only have come from the old man at my -side, and yet he sat now as absorbed as ever, very thin, very -wrinkled, bent with age, an opium pipe dangling down from between -his knees, as though it had dropped in sheer lassitude from his -fingers. I took two steps forward and looked back. It took all my -self-control to prevent me from breaking out into a cry of -astonishment. He had turned his back so that none could see him -but I. His form had filled out, his wrinkles were gone, the dull -eyes had regained their fire, and there, sitting by the fire and -grinning at my surprise, was none other than Sherlock Holmes. He -made a slight motion to me to approach him, and instantly, as he -turned his face half round to the company once more, subsided -into a doddering, loose-lipped senility. - -"Holmes!" I whispered, "what on earth are you doing in this den?" - -"As low as you can," he answered; "I have excellent ears. If you -would have the great kindness to get rid of that sottish friend -of yours I should be exceedingly glad to have a little talk with -you." - -"I have a cab outside." - -"Then pray send him home in it. You may safely trust him, for he -appears to be too limp to get into any mischief. I should -recommend you also to send a note by the cabman to your wife to -say that you have thrown in your lot with me. If you will wait -outside, I shall be with you in five minutes." - -It was difficult to refuse any of Sherlock Holmes' requests, for -they were always so exceedingly definite, and put forward with -such a quiet air of mastery. I felt, however, that when Whitney -was once confined in the cab my mission was practically -accomplished; and for the rest, I could not wish anything better -than to be associated with my friend in one of those singular -adventures which were the normal condition of his existence. In a -few minutes I had written my note, paid Whitney's bill, led him -out to the cab, and seen him driven through the darkness. In a -very short time a decrepit figure had emerged from the opium den, -and I was walking down the street with Sherlock Holmes. For two -streets he shuffled along with a bent back and an uncertain foot. -Then, glancing quickly round, he straightened himself out and -burst into a hearty fit of laughter. - -"I suppose, Watson," said he, "that you imagine that I have added -opium-smoking to cocaine injections, and all the other little -weaknesses on which you have favoured me with your medical -views." - -"I was certainly surprised to find you there." - -"But not more so than I to find you." - -"I came to find a friend." - -"And I to find an enemy." - -"An enemy?" - -"Yes; one of my natural enemies, or, shall I say, my natural -prey. Briefly, Watson, I am in the midst of a very remarkable -inquiry, and I have hoped to find a clue in the incoherent -ramblings of these sots, as I have done before now. Had I been -recognised in that den my life would not have been worth an -hour's purchase; for I have used it before now for my own -purposes, and the rascally Lascar who runs it has sworn to have -vengeance upon me. There is a trap-door at the back of that -building, near the corner of Paul's Wharf, which could tell some -strange tales of what has passed through it upon the moonless -nights." - -"What! You do not mean bodies?" - -"Ay, bodies, Watson. We should be rich men if we had 1000 pounds -for every poor devil who has been done to death in that den. It -is the vilest murder-trap on the whole riverside, and I fear that -Neville St. Clair has entered it never to leave it more. But our -trap should be here." He put his two forefingers between his -teeth and whistled shrilly--a signal which was answered by a -similar whistle from the distance, followed shortly by the rattle -of wheels and the clink of horses' hoofs. - -"Now, Watson," said Holmes, as a tall dog-cart dashed up through -the gloom, throwing out two golden tunnels of yellow light from -its side lanterns. "You'll come with me, won't you?" - -"If I can be of use." - -"Oh, a trusty comrade is always of use; and a chronicler still -more so. My room at The Cedars is a double-bedded one." - -"The Cedars?" - -"Yes; that is Mr. St. Clair's house. I am staying there while I -conduct the inquiry." - -"Where is it, then?" - -"Near Lee, in Kent. We have a seven-mile drive before us." - -"But I am all in the dark." - -"Of course you are. You'll know all about it presently. Jump up -here. All right, John; we shall not need you. Here's half a -crown. Look out for me to-morrow, about eleven. Give her her -head. So long, then!" - -He flicked the horse with his whip, and we dashed away through -the endless succession of sombre and deserted streets, which -widened gradually, until we were flying across a broad -balustraded bridge, with the murky river flowing sluggishly -beneath us. Beyond lay another dull wilderness of bricks and -mortar, its silence broken only by the heavy, regular footfall of -the policeman, or the songs and shouts of some belated party of -revellers. A dull wrack was drifting slowly across the sky, and a -star or two twinkled dimly here and there through the rifts of -the clouds. Holmes drove in silence, with his head sunk upon his -breast, and the air of a man who is lost in thought, while I sat -beside him, curious to learn what this new quest might be which -seemed to tax his powers so sorely, and yet afraid to break in -upon the current of his thoughts. We had driven several miles, -and were beginning to get to the fringe of the belt of suburban -villas, when he shook himself, shrugged his shoulders, and lit up -his pipe with the air of a man who has satisfied himself that he -is acting for the best. - -"You have a grand gift of silence, Watson," said he. "It makes -you quite invaluable as a companion. 'Pon my word, it is a great -thing for me to have someone to talk to, for my own thoughts are -not over-pleasant. I was wondering what I should say to this dear -little woman to-night when she meets me at the door." - -"You forget that I know nothing about it." - -"I shall just have time to tell you the facts of the case before -we get to Lee. It seems absurdly simple, and yet, somehow I can -get nothing to go upon. There's plenty of thread, no doubt, but I -can't get the end of it into my hand. Now, I'll state the case -clearly and concisely to you, Watson, and maybe you can see a -spark where all is dark to me." - -"Proceed, then." - -"Some years ago--to be definite, in May, 1884--there came to Lee -a gentleman, Neville St. Clair by name, who appeared to have -plenty of money. He took a large villa, laid out the grounds very -nicely, and lived generally in good style. By degrees he made -friends in the neighbourhood, and in 1887 he married the daughter -of a local brewer, by whom he now has two children. He had no -occupation, but was interested in several companies and went into -town as a rule in the morning, returning by the 5:14 from Cannon -Street every night. Mr. St. Clair is now thirty-seven years of -age, is a man of temperate habits, a good husband, a very -affectionate father, and a man who is popular with all who know -him. I may add that his whole debts at the present moment, as far -as we have been able to ascertain, amount to 88 pounds 10s., while -he has 220 pounds standing to his credit in the Capital and -Counties Bank. There is no reason, therefore, to think that money -troubles have been weighing upon his mind. - -"Last Monday Mr. Neville St. Clair went into town rather earlier -than usual, remarking before he started that he had two important -commissions to perform, and that he would bring his little boy -home a box of bricks. Now, by the merest chance, his wife -received a telegram upon this same Monday, very shortly after his -departure, to the effect that a small parcel of considerable -value which she had been expecting was waiting for her at the -offices of the Aberdeen Shipping Company. Now, if you are well up -in your London, you will know that the office of the company is -in Fresno Street, which branches out of Upper Swandam Lane, where -you found me to-night. Mrs. St. Clair had her lunch, started for -the City, did some shopping, proceeded to the company's office, -got her packet, and found herself at exactly 4:35 walking through -Swandam Lane on her way back to the station. Have you followed me -so far?" - -"It is very clear." - -"If you remember, Monday was an exceedingly hot day, and Mrs. St. -Clair walked slowly, glancing about in the hope of seeing a cab, -as she did not like the neighbourhood in which she found herself. -While she was walking in this way down Swandam Lane, she suddenly -heard an ejaculation or cry, and was struck cold to see her -husband looking down at her and, as it seemed to her, beckoning -to her from a second-floor window. The window was open, and she -distinctly saw his face, which she describes as being terribly -agitated. He waved his hands frantically to her, and then -vanished from the window so suddenly that it seemed to her that -he had been plucked back by some irresistible force from behind. -One singular point which struck her quick feminine eye was that -although he wore some dark coat, such as he had started to town -in, he had on neither collar nor necktie. - -"Convinced that something was amiss with him, she rushed down the -steps--for the house was none other than the opium den in which -you found me to-night--and running through the front room she -attempted to ascend the stairs which led to the first floor. At -the foot of the stairs, however, she met this Lascar scoundrel of -whom I have spoken, who thrust her back and, aided by a Dane, who -acts as assistant there, pushed her out into the street. Filled -with the most maddening doubts and fears, she rushed down the -lane and, by rare good-fortune, met in Fresno Street a number of -constables with an inspector, all on their way to their beat. The -inspector and two men accompanied her back, and in spite of the -continued resistance of the proprietor, they made their way to -the room in which Mr. St. Clair had last been seen. There was no -sign of him there. In fact, in the whole of that floor there was -no one to be found save a crippled wretch of hideous aspect, who, -it seems, made his home there. Both he and the Lascar stoutly -swore that no one else had been in the front room during the -afternoon. So determined was their denial that the inspector was -staggered, and had almost come to believe that Mrs. St. Clair had -been deluded when, with a cry, she sprang at a small deal box -which lay upon the table and tore the lid from it. Out there fell -a cascade of children's bricks. It was the toy which he had -promised to bring home. - -"This discovery, and the evident confusion which the cripple -showed, made the inspector realise that the matter was serious. -The rooms were carefully examined, and results all pointed to an -abominable crime. The front room was plainly furnished as a -sitting-room and led into a small bedroom, which looked out upon -the back of one of the wharves. Between the wharf and the bedroom -window is a narrow strip, which is dry at low tide but is covered -at high tide with at least four and a half feet of water. The -bedroom window was a broad one and opened from below. On -examination traces of blood were to be seen upon the windowsill, -and several scattered drops were visible upon the wooden floor of -the bedroom. Thrust away behind a curtain in the front room were -all the clothes of Mr. Neville St. Clair, with the exception of -his coat. His boots, his socks, his hat, and his watch--all were -there. There were no signs of violence upon any of these -garments, and there were no other traces of Mr. Neville St. -Clair. Out of the window he must apparently have gone for no -other exit could be discovered, and the ominous bloodstains upon -the sill gave little promise that he could save himself by -swimming, for the tide was at its very highest at the moment of -the tragedy. - -"And now as to the villains who seemed to be immediately -implicated in the matter. The Lascar was known to be a man of the -vilest antecedents, but as, by Mrs. St. Clair's story, he was -known to have been at the foot of the stair within a very few -seconds of her husband's appearance at the window, he could -hardly have been more than an accessory to the crime. His defence -was one of absolute ignorance, and he protested that he had no -knowledge as to the doings of Hugh Boone, his lodger, and that he -could not account in any way for the presence of the missing -gentleman's clothes. - -"So much for the Lascar manager. Now for the sinister cripple who -lives upon the second floor of the opium den, and who was -certainly the last human being whose eyes rested upon Neville St. -Clair. His name is Hugh Boone, and his hideous face is one which -is familiar to every man who goes much to the City. He is a -professional beggar, though in order to avoid the police -regulations he pretends to a small trade in wax vestas. Some -little distance down Threadneedle Street, upon the left-hand -side, there is, as you may have remarked, a small angle in the -wall. Here it is that this creature takes his daily seat, -cross-legged with his tiny stock of matches on his lap, and as he -is a piteous spectacle a small rain of charity descends into the -greasy leather cap which lies upon the pavement beside him. I -have watched the fellow more than once before ever I thought of -making his professional acquaintance, and I have been surprised -at the harvest which he has reaped in a short time. His -appearance, you see, is so remarkable that no one can pass him -without observing him. A shock of orange hair, a pale face -disfigured by a horrible scar, which, by its contraction, has -turned up the outer edge of his upper lip, a bulldog chin, and a -pair of very penetrating dark eyes, which present a singular -contrast to the colour of his hair, all mark him out from amid -the common crowd of mendicants and so, too, does his wit, for he -is ever ready with a reply to any piece of chaff which may be -thrown at him by the passers-by. This is the man whom we now -learn to have been the lodger at the opium den, and to have been -the last man to see the gentleman of whom we are in quest." - -"But a cripple!" said I. "What could he have done single-handed -against a man in the prime of life?" - -"He is a cripple in the sense that he walks with a limp; but in -other respects he appears to be a powerful and well-nurtured man. -Surely your medical experience would tell you, Watson, that -weakness in one limb is often compensated for by exceptional -strength in the others." - -"Pray continue your narrative." - -"Mrs. St. Clair had fainted at the sight of the blood upon the -window, and she was escorted home in a cab by the police, as her -presence could be of no help to them in their investigations. -Inspector Barton, who had charge of the case, made a very careful -examination of the premises, but without finding anything which -threw any light upon the matter. One mistake had been made in not -arresting Boone instantly, as he was allowed some few minutes -during which he might have communicated with his friend the -Lascar, but this fault was soon remedied, and he was seized and -searched, without anything being found which could incriminate -him. There were, it is true, some blood-stains upon his right -shirt-sleeve, but he pointed to his ring-finger, which had been -cut near the nail, and explained that the bleeding came from -there, adding that he had been to the window not long before, and -that the stains which had been observed there came doubtless from -the same source. He denied strenuously having ever seen Mr. -Neville St. Clair and swore that the presence of the clothes in -his room was as much a mystery to him as to the police. As to -Mrs. St. Clair's assertion that she had actually seen her husband -at the window, he declared that she must have been either mad or -dreaming. He was removed, loudly protesting, to the -police-station, while the inspector remained upon the premises in -the hope that the ebbing tide might afford some fresh clue. - -"And it did, though they hardly found upon the mud-bank what they -had feared to find. It was Neville St. Clair's coat, and not -Neville St. Clair, which lay uncovered as the tide receded. And -what do you think they found in the pockets?" - -"I cannot imagine." - -"No, I don't think you would guess. Every pocket stuffed with -pennies and half-pennies--421 pennies and 270 half-pennies. It -was no wonder that it had not been swept away by the tide. But a -human body is a different matter. There is a fierce eddy between -the wharf and the house. It seemed likely enough that the -weighted coat had remained when the stripped body had been sucked -away into the river." - -"But I understand that all the other clothes were found in the -room. Would the body be dressed in a coat alone?" - -"No, sir, but the facts might be met speciously enough. Suppose -that this man Boone had thrust Neville St. Clair through the -window, there is no human eye which could have seen the deed. -What would he do then? It would of course instantly strike him -that he must get rid of the tell-tale garments. He would seize -the coat, then, and be in the act of throwing it out, when it -would occur to him that it would swim and not sink. He has little -time, for he has heard the scuffle downstairs when the wife tried -to force her way up, and perhaps he has already heard from his -Lascar confederate that the police are hurrying up the street. -There is not an instant to be lost. He rushes to some secret -hoard, where he has accumulated the fruits of his beggary, and he -stuffs all the coins upon which he can lay his hands into the -pockets to make sure of the coat's sinking. He throws it out, and -would have done the same with the other garments had not he heard -the rush of steps below, and only just had time to close the -window when the police appeared." - -"It certainly sounds feasible." - -"Well, we will take it as a working hypothesis for want of a -better. Boone, as I have told you, was arrested and taken to the -station, but it could not be shown that there had ever before -been anything against him. He had for years been known as a -professional beggar, but his life appeared to have been a very -quiet and innocent one. There the matter stands at present, and -the questions which have to be solved--what Neville St. Clair was -doing in the opium den, what happened to him when there, where is -he now, and what Hugh Boone had to do with his disappearance--are -all as far from a solution as ever. I confess that I cannot -recall any case within my experience which looked at the first -glance so simple and yet which presented such difficulties." - -While Sherlock Holmes had been detailing this singular series of -events, we had been whirling through the outskirts of the great -town until the last straggling houses had been left behind, and -we rattled along with a country hedge upon either side of us. -Just as he finished, however, we drove through two scattered -villages, where a few lights still glimmered in the windows. - -"We are on the outskirts of Lee," said my companion. "We have -touched on three English counties in our short drive, starting in -Middlesex, passing over an angle of Surrey, and ending in Kent. -See that light among the trees? That is The Cedars, and beside -that lamp sits a woman whose anxious ears have already, I have -little doubt, caught the clink of our horse's feet." - -"But why are you not conducting the case from Baker Street?" I -asked. - -"Because there are many inquiries which must be made out here. -Mrs. St. Clair has most kindly put two rooms at my disposal, and -you may rest assured that she will have nothing but a welcome for -my friend and colleague. I hate to meet her, Watson, when I have -no news of her husband. Here we are. Whoa, there, whoa!" - -We had pulled up in front of a large villa which stood within its -own grounds. A stable-boy had run out to the horse's head, and -springing down, I followed Holmes up the small, winding -gravel-drive which led to the house. As we approached, the door -flew open, and a little blonde woman stood in the opening, clad -in some sort of light mousseline de soie, with a touch of fluffy -pink chiffon at her neck and wrists. She stood with her figure -outlined against the flood of light, one hand upon the door, one -half-raised in her eagerness, her body slightly bent, her head -and face protruded, with eager eyes and parted lips, a standing -question. - -"Well?" she cried, "well?" And then, seeing that there were two -of us, she gave a cry of hope which sank into a groan as she saw -that my companion shook his head and shrugged his shoulders. - -"No good news?" - -"None." - -"No bad?" - -"No." - -"Thank God for that. But come in. You must be weary, for you have -had a long day." - -"This is my friend, Dr. Watson. He has been of most vital use to -me in several of my cases, and a lucky chance has made it -possible for me to bring him out and associate him with this -investigation." - -"I am delighted to see you," said she, pressing my hand warmly. -"You will, I am sure, forgive anything that may be wanting in our -arrangements, when you consider the blow which has come so -suddenly upon us." - -"My dear madam," said I, "I am an old campaigner, and if I were -not I can very well see that no apology is needed. If I can be of -any assistance, either to you or to my friend here, I shall be -indeed happy." - -"Now, Mr. Sherlock Holmes," said the lady as we entered a -well-lit dining-room, upon the table of which a cold supper had -been laid out, "I should very much like to ask you one or two -plain questions, to which I beg that you will give a plain -answer." - -"Certainly, madam." - -"Do not trouble about my feelings. I am not hysterical, nor given -to fainting. I simply wish to hear your real, real opinion." - -"Upon what point?" - -"In your heart of hearts, do you think that Neville is alive?" - -Sherlock Holmes seemed to be embarrassed by the question. -"Frankly, now!" she repeated, standing upon the rug and looking -keenly down at him as he leaned back in a basket-chair. - -"Frankly, then, madam, I do not." - -"You think that he is dead?" - -"I do." - -"Murdered?" - -"I don't say that. Perhaps." - -"And on what day did he meet his death?" - -"On Monday." - -"Then perhaps, Mr. Holmes, you will be good enough to explain how -it is that I have received a letter from him to-day." - -Sherlock Holmes sprang out of his chair as if he had been -galvanised. - -"What!" he roared. - -"Yes, to-day." She stood smiling, holding up a little slip of -paper in the air. - -"May I see it?" - -"Certainly." - -He snatched it from her in his eagerness, and smoothing it out -upon the table he drew over the lamp and examined it intently. I -had left my chair and was gazing at it over his shoulder. The -envelope was a very coarse one and was stamped with the Gravesend -postmark and with the date of that very day, or rather of the day -before, for it was considerably after midnight. - -"Coarse writing," murmured Holmes. "Surely this is not your -husband's writing, madam." - -"No, but the enclosure is." - -"I perceive also that whoever addressed the envelope had to go -and inquire as to the address." - -"How can you tell that?" - -"The name, you see, is in perfectly black ink, which has dried -itself. The rest is of the greyish colour, which shows that -blotting-paper has been used. If it had been written straight -off, and then blotted, none would be of a deep black shade. This -man has written the name, and there has then been a pause before -he wrote the address, which can only mean that he was not -familiar with it. It is, of course, a trifle, but there is -nothing so important as trifles. Let us now see the letter. Ha! -there has been an enclosure here!" - -"Yes, there was a ring. His signet-ring." - -"And you are sure that this is your husband's hand?" - -"One of his hands." - -"One?" - -"His hand when he wrote hurriedly. It is very unlike his usual -writing, and yet I know it well." - -"'Dearest do not be frightened. All will come well. There is a -huge error which it may take some little time to rectify. -Wait in patience.--NEVILLE.' Written in pencil upon the fly-leaf -of a book, octavo size, no water-mark. Hum! Posted to-day in -Gravesend by a man with a dirty thumb. Ha! And the flap has been -gummed, if I am not very much in error, by a person who had been -chewing tobacco. And you have no doubt that it is your husband's -hand, madam?" - -"None. Neville wrote those words." - -"And they were posted to-day at Gravesend. Well, Mrs. St. Clair, -the clouds lighten, though I should not venture to say that the -danger is over." - -"But he must be alive, Mr. Holmes." - -"Unless this is a clever forgery to put us on the wrong scent. -The ring, after all, proves nothing. It may have been taken from -him." - -"No, no; it is, it is his very own writing!" - -"Very well. It may, however, have been written on Monday and only -posted to-day." - -"That is possible." - -"If so, much may have happened between." - -"Oh, you must not discourage me, Mr. Holmes. I know that all is -well with him. There is so keen a sympathy between us that I -should know if evil came upon him. On the very day that I saw him -last he cut himself in the bedroom, and yet I in the dining-room -rushed upstairs instantly with the utmost certainty that -something had happened. Do you think that I would respond to such -a trifle and yet be ignorant of his death?" - -"I have seen too much not to know that the impression of a woman -may be more valuable than the conclusion of an analytical -reasoner. And in this letter you certainly have a very strong -piece of evidence to corroborate your view. But if your husband -is alive and able to write letters, why should he remain away -from you?" - -"I cannot imagine. It is unthinkable." - -"And on Monday he made no remarks before leaving you?" - -"No." - -"And you were surprised to see him in Swandam Lane?" - -"Very much so." - -"Was the window open?" - -"Yes." - -"Then he might have called to you?" - -"He might." - -"He only, as I understand, gave an inarticulate cry?" - -"Yes." - -"A call for help, you thought?" - -"Yes. He waved his hands." - -"But it might have been a cry of surprise. Astonishment at the -unexpected sight of you might cause him to throw up his hands?" - -"It is possible." - -"And you thought he was pulled back?" - -"He disappeared so suddenly." - -"He might have leaped back. You did not see anyone else in the -room?" - -"No, but this horrible man confessed to having been there, and -the Lascar was at the foot of the stairs." - -"Quite so. Your husband, as far as you could see, had his -ordinary clothes on?" - -"But without his collar or tie. I distinctly saw his bare -throat." - -"Had he ever spoken of Swandam Lane?" - -"Never." - -"Had he ever showed any signs of having taken opium?" - -"Never." - -"Thank you, Mrs. St. Clair. Those are the principal points about -which I wished to be absolutely clear. We shall now have a little -supper and then retire, for we may have a very busy day -to-morrow." - -A large and comfortable double-bedded room had been placed at our -disposal, and I was quickly between the sheets, for I was weary -after my night of adventure. Sherlock Holmes was a man, however, -who, when he had an unsolved problem upon his mind, would go for -days, and even for a week, without rest, turning it over, -rearranging his facts, looking at it from every point of view -until he had either fathomed it or convinced himself that his -data were insufficient. It was soon evident to me that he was now -preparing for an all-night sitting. He took off his coat and -waistcoat, put on a large blue dressing-gown, and then wandered -about the room collecting pillows from his bed and cushions from -the sofa and armchairs. With these he constructed a sort of -Eastern divan, upon which he perched himself cross-legged, with -an ounce of shag tobacco and a box of matches laid out in front -of him. In the dim light of the lamp I saw him sitting there, an -old briar pipe between his lips, his eyes fixed vacantly upon the -corner of the ceiling, the blue smoke curling up from him, -silent, motionless, with the light shining upon his strong-set -aquiline features. So he sat as I dropped off to sleep, and so he -sat when a sudden ejaculation caused me to wake up, and I found -the summer sun shining into the apartment. The pipe was still -between his lips, the smoke still curled upward, and the room was -full of a dense tobacco haze, but nothing remained of the heap of -shag which I had seen upon the previous night. - -"Awake, Watson?" he asked. - -"Yes." - -"Game for a morning drive?" - -"Certainly." - -"Then dress. No one is stirring yet, but I know where the -stable-boy sleeps, and we shall soon have the trap out." He -chuckled to himself as he spoke, his eyes twinkled, and he seemed -a different man to the sombre thinker of the previous night. - -As I dressed I glanced at my watch. It was no wonder that no one -was stirring. It was twenty-five minutes past four. I had hardly -finished when Holmes returned with the news that the boy was -putting in the horse. - -"I want to test a little theory of mine," said he, pulling on his -boots. "I think, Watson, that you are now standing in the -presence of one of the most absolute fools in Europe. I deserve -to be kicked from here to Charing Cross. But I think I have the -key of the affair now." - -"And where is it?" I asked, smiling. - -"In the bathroom," he answered. "Oh, yes, I am not joking," he -continued, seeing my look of incredulity. "I have just been -there, and I have taken it out, and I have got it in this -Gladstone bag. Come on, my boy, and we shall see whether it will -not fit the lock." - -We made our way downstairs as quietly as possible, and out into -the bright morning sunshine. In the road stood our horse and -trap, with the half-clad stable-boy waiting at the head. We both -sprang in, and away we dashed down the London Road. A few country -carts were stirring, bearing in vegetables to the metropolis, but -the lines of villas on either side were as silent and lifeless as -some city in a dream. - -"It has been in some points a singular case," said Holmes, -flicking the horse on into a gallop. "I confess that I have been -as blind as a mole, but it is better to learn wisdom late than -never to learn it at all." - -In town the earliest risers were just beginning to look sleepily -from their windows as we drove through the streets of the Surrey -side. Passing down the Waterloo Bridge Road we crossed over the -river, and dashing up Wellington Street wheeled sharply to the -right and found ourselves in Bow Street. Sherlock Holmes was well -known to the force, and the two constables at the door saluted -him. One of them held the horse's head while the other led us in. - -"Who is on duty?" asked Holmes. - -"Inspector Bradstreet, sir." - -"Ah, Bradstreet, how are you?" A tall, stout official had come -down the stone-flagged passage, in a peaked cap and frogged -jacket. "I wish to have a quiet word with you, Bradstreet." -"Certainly, Mr. Holmes. Step into my room here." It was a small, -office-like room, with a huge ledger upon the table, and a -telephone projecting from the wall. The inspector sat down at his -desk. - -"What can I do for you, Mr. Holmes?" - -"I called about that beggarman, Boone--the one who was charged -with being concerned in the disappearance of Mr. Neville St. -Clair, of Lee." - -"Yes. He was brought up and remanded for further inquiries." - -"So I heard. You have him here?" - -"In the cells." - -"Is he quiet?" - -"Oh, he gives no trouble. But he is a dirty scoundrel." - -"Dirty?" - -"Yes, it is all we can do to make him wash his hands, and his -face is as black as a tinker's. Well, when once his case has been -settled, he will have a regular prison bath; and I think, if you -saw him, you would agree with me that he needed it." - -"I should like to see him very much." - -"Would you? That is easily done. Come this way. You can leave -your bag." - -"No, I think that I'll take it." - -"Very good. Come this way, if you please." He led us down a -passage, opened a barred door, passed down a winding stair, and -brought us to a whitewashed corridor with a line of doors on each -side. - -"The third on the right is his," said the inspector. "Here it -is!" He quietly shot back a panel in the upper part of the door -and glanced through. - -"He is asleep," said he. "You can see him very well." - -We both put our eyes to the grating. The prisoner lay with his -face towards us, in a very deep sleep, breathing slowly and -heavily. He was a middle-sized man, coarsely clad as became his -calling, with a coloured shirt protruding through the rent in his -tattered coat. He was, as the inspector had said, extremely -dirty, but the grime which covered his face could not conceal its -repulsive ugliness. A broad wheal from an old scar ran right -across it from eye to chin, and by its contraction had turned up -one side of the upper lip, so that three teeth were exposed in a -perpetual snarl. A shock of very bright red hair grew low over -his eyes and forehead. - -"He's a beauty, isn't he?" said the inspector. - -"He certainly needs a wash," remarked Holmes. "I had an idea that -he might, and I took the liberty of bringing the tools with me." -He opened the Gladstone bag as he spoke, and took out, to my -astonishment, a very large bath-sponge. - -"He! he! You are a funny one," chuckled the inspector. - -"Now, if you will have the great goodness to open that door very -quietly, we will soon make him cut a much more respectable -figure." - -"Well, I don't know why not," said the inspector. "He doesn't -look a credit to the Bow Street cells, does he?" He slipped his -key into the lock, and we all very quietly entered the cell. The -sleeper half turned, and then settled down once more into a deep -slumber. Holmes stooped to the water-jug, moistened his sponge, -and then rubbed it twice vigorously across and down the -prisoner's face. - -"Let me introduce you," he shouted, "to Mr. Neville St. Clair, of -Lee, in the county of Kent." - -Never in my life have I seen such a sight. The man's face peeled -off under the sponge like the bark from a tree. Gone was the -coarse brown tint! Gone, too, was the horrid scar which had -seamed it across, and the twisted lip which had given the -repulsive sneer to the face! A twitch brought away the tangled -red hair, and there, sitting up in his bed, was a pale, -sad-faced, refined-looking man, black-haired and smooth-skinned, -rubbing his eyes and staring about him with sleepy bewilderment. -Then suddenly realising the exposure, he broke into a scream and -threw himself down with his face to the pillow. - -"Great heavens!" cried the inspector, "it is, indeed, the missing -man. I know him from the photograph." - -The prisoner turned with the reckless air of a man who abandons -himself to his destiny. "Be it so," said he. "And pray what am I -charged with?" - -"With making away with Mr. Neville St.-- Oh, come, you can't be -charged with that unless they make a case of attempted suicide of -it," said the inspector with a grin. "Well, I have been -twenty-seven years in the force, but this really takes the cake." - -"If I am Mr. Neville St. Clair, then it is obvious that no crime -has been committed, and that, therefore, I am illegally -detained." - -"No crime, but a very great error has been committed," said -Holmes. "You would have done better to have trusted your wife." - -"It was not the wife; it was the children," groaned the prisoner. -"God help me, I would not have them ashamed of their father. My -God! What an exposure! What can I do?" - -Sherlock Holmes sat down beside him on the couch and patted him -kindly on the shoulder. - -"If you leave it to a court of law to clear the matter up," said -he, "of course you can hardly avoid publicity. On the other hand, -if you convince the police authorities that there is no possible -case against you, I do not know that there is any reason that the -details should find their way into the papers. Inspector -Bradstreet would, I am sure, make notes upon anything which you -might tell us and submit it to the proper authorities. The case -would then never go into court at all." - -"God bless you!" cried the prisoner passionately. "I would have -endured imprisonment, ay, even execution, rather than have left -my miserable secret as a family blot to my children. - -"You are the first who have ever heard my story. My father was a -schoolmaster in Chesterfield, where I received an excellent -education. I travelled in my youth, took to the stage, and -finally became a reporter on an evening paper in London. One day -my editor wished to have a series of articles upon begging in the -metropolis, and I volunteered to supply them. There was the point -from which all my adventures started. It was only by trying -begging as an amateur that I could get the facts upon which to -base my articles. When an actor I had, of course, learned all the -secrets of making up, and had been famous in the green-room for -my skill. I took advantage now of my attainments. I painted my -face, and to make myself as pitiable as possible I made a good -scar and fixed one side of my lip in a twist by the aid of a -small slip of flesh-coloured plaster. Then with a red head of -hair, and an appropriate dress, I took my station in the business -part of the city, ostensibly as a match-seller but really as a -beggar. For seven hours I plied my trade, and when I returned -home in the evening I found to my surprise that I had received no -less than 26s. 4d. - -"I wrote my articles and thought little more of the matter until, -some time later, I backed a bill for a friend and had a writ -served upon me for 25 pounds. I was at my wit's end where to get -the money, but a sudden idea came to me. I begged a fortnight's -grace from the creditor, asked for a holiday from my employers, -and spent the time in begging in the City under my disguise. In -ten days I had the money and had paid the debt. - -"Well, you can imagine how hard it was to settle down to arduous -work at 2 pounds a week when I knew that I could earn as much in -a day by smearing my face with a little paint, laying my cap on -the ground, and sitting still. It was a long fight between my -pride and the money, but the dollars won at last, and I threw up -reporting and sat day after day in the corner which I had first -chosen, inspiring pity by my ghastly face and filling my pockets -with coppers. Only one man knew my secret. He was the keeper of a -low den in which I used to lodge in Swandam Lane, where I could -every morning emerge as a squalid beggar and in the evenings -transform myself into a well-dressed man about town. This fellow, -a Lascar, was well paid by me for his rooms, so that I knew that -my secret was safe in his possession. - -"Well, very soon I found that I was saving considerable sums of -money. I do not mean that any beggar in the streets of London -could earn 700 pounds a year--which is less than my average -takings--but I had exceptional advantages in my power of making -up, and also in a facility of repartee, which improved by -practice and made me quite a recognised character in the City. -All day a stream of pennies, varied by silver, poured in upon me, -and it was a very bad day in which I failed to take 2 pounds. - -"As I grew richer I grew more ambitious, took a house in the -country, and eventually married, without anyone having a -suspicion as to my real occupation. My dear wife knew that I had -business in the City. She little knew what. - -"Last Monday I had finished for the day and was dressing in my -room above the opium den when I looked out of my window and saw, -to my horror and astonishment, that my wife was standing in the -street, with her eyes fixed full upon me. I gave a cry of -surprise, threw up my arms to cover my face, and, rushing to my -confidant, the Lascar, entreated him to prevent anyone from -coming up to me. I heard her voice downstairs, but I knew that -she could not ascend. Swiftly I threw off my clothes, pulled on -those of a beggar, and put on my pigments and wig. Even a wife's -eyes could not pierce so complete a disguise. But then it -occurred to me that there might be a search in the room, and that -the clothes might betray me. I threw open the window, reopening -by my violence a small cut which I had inflicted upon myself in -the bedroom that morning. Then I seized my coat, which was -weighted by the coppers which I had just transferred to it from -the leather bag in which I carried my takings. I hurled it out of -the window, and it disappeared into the Thames. The other clothes -would have followed, but at that moment there was a rush of -constables up the stair, and a few minutes after I found, rather, -I confess, to my relief, that instead of being identified as Mr. -Neville St. Clair, I was arrested as his murderer. - -"I do not know that there is anything else for me to explain. I -was determined to preserve my disguise as long as possible, and -hence my preference for a dirty face. Knowing that my wife would -be terribly anxious, I slipped off my ring and confided it to the -Lascar at a moment when no constable was watching me, together -with a hurried scrawl, telling her that she had no cause to -fear." - -"That note only reached her yesterday," said Holmes. - -"Good God! What a week she must have spent!" - -"The police have watched this Lascar," said Inspector Bradstreet, -"and I can quite understand that he might find it difficult to -post a letter unobserved. Probably he handed it to some sailor -customer of his, who forgot all about it for some days." - -"That was it," said Holmes, nodding approvingly; "I have no doubt -of it. But have you never been prosecuted for begging?" - -"Many times; but what was a fine to me?" - -"It must stop here, however," said Bradstreet. "If the police are -to hush this thing up, there must be no more of Hugh Boone." - -"I have sworn it by the most solemn oaths which a man can take." - -"In that case I think that it is probable that no further steps -may be taken. But if you are found again, then all must come out. -I am sure, Mr. Holmes, that we are very much indebted to you for -having cleared the matter up. I wish I knew how you reach your -results." - -"I reached this one," said my friend, "by sitting upon five -pillows and consuming an ounce of shag. I think, Watson, that if -we drive to Baker Street we shall just be in time for breakfast." - - - -VII. THE ADVENTURE OF THE BLUE CARBUNCLE - -I had called upon my friend Sherlock Holmes upon the second -morning after Christmas, with the intention of wishing him the -compliments of the season. He was lounging upon the sofa in a -purple dressing-gown, a pipe-rack within his reach upon the -right, and a pile of crumpled morning papers, evidently newly -studied, near at hand. Beside the couch was a wooden chair, and -on the angle of the back hung a very seedy and disreputable -hard-felt hat, much the worse for wear, and cracked in several -places. A lens and a forceps lying upon the seat of the chair -suggested that the hat had been suspended in this manner for the -purpose of examination. - -"You are engaged," said I; "perhaps I interrupt you." - -"Not at all. I am glad to have a friend with whom I can discuss -my results. The matter is a perfectly trivial one"--he jerked his -thumb in the direction of the old hat--"but there are points in -connection with it which are not entirely devoid of interest and -even of instruction." - -I seated myself in his armchair and warmed my hands before his -crackling fire, for a sharp frost had set in, and the windows -were thick with the ice crystals. "I suppose," I remarked, "that, -homely as it looks, this thing has some deadly story linked on to -it--that it is the clue which will guide you in the solution of -some mystery and the punishment of some crime." - -"No, no. No crime," said Sherlock Holmes, laughing. "Only one of -those whimsical little incidents which will happen when you have -four million human beings all jostling each other within the -space of a few square miles. Amid the action and reaction of so -dense a swarm of humanity, every possible combination of events -may be expected to take place, and many a little problem will be -presented which may be striking and bizarre without being -criminal. We have already had experience of such." - -"So much so," I remarked, "that of the last six cases which I -have added to my notes, three have been entirely free of any -legal crime." - -"Precisely. You allude to my attempt to recover the Irene Adler -papers, to the singular case of Miss Mary Sutherland, and to the -adventure of the man with the twisted lip. Well, I have no doubt -that this small matter will fall into the same innocent category. -You know Peterson, the commissionaire?" - -"Yes." - -"It is to him that this trophy belongs." - -"It is his hat." - -"No, no, he found it. Its owner is unknown. I beg that you will -look upon it not as a battered billycock but as an intellectual -problem. And, first, as to how it came here. It arrived upon -Christmas morning, in company with a good fat goose, which is, I -have no doubt, roasting at this moment in front of Peterson's -fire. The facts are these: about four o'clock on Christmas -morning, Peterson, who, as you know, is a very honest fellow, was -returning from some small jollification and was making his way -homeward down Tottenham Court Road. In front of him he saw, in -the gaslight, a tallish man, walking with a slight stagger, and -carrying a white goose slung over his shoulder. As he reached the -corner of Goodge Street, a row broke out between this stranger -and a little knot of roughs. One of the latter knocked off the -man's hat, on which he raised his stick to defend himself and, -swinging it over his head, smashed the shop window behind him. -Peterson had rushed forward to protect the stranger from his -assailants; but the man, shocked at having broken the window, and -seeing an official-looking person in uniform rushing towards him, -dropped his goose, took to his heels, and vanished amid the -labyrinth of small streets which lie at the back of Tottenham -Court Road. The roughs had also fled at the appearance of -Peterson, so that he was left in possession of the field of -battle, and also of the spoils of victory in the shape of this -battered hat and a most unimpeachable Christmas goose." - -"Which surely he restored to their owner?" - -"My dear fellow, there lies the problem. It is true that 'For -Mrs. Henry Baker' was printed upon a small card which was tied to -the bird's left leg, and it is also true that the initials 'H. -B.' are legible upon the lining of this hat, but as there are -some thousands of Bakers, and some hundreds of Henry Bakers in -this city of ours, it is not easy to restore lost property to any -one of them." - -"What, then, did Peterson do?" - -"He brought round both hat and goose to me on Christmas morning, -knowing that even the smallest problems are of interest to me. -The goose we retained until this morning, when there were signs -that, in spite of the slight frost, it would be well that it -should be eaten without unnecessary delay. Its finder has carried -it off, therefore, to fulfil the ultimate destiny of a goose, -while I continue to retain the hat of the unknown gentleman who -lost his Christmas dinner." - -"Did he not advertise?" - -"No." - -"Then, what clue could you have as to his identity?" - -"Only as much as we can deduce." - -"From his hat?" - -"Precisely." - -"But you are joking. What can you gather from this old battered -felt?" - -"Here is my lens. You know my methods. What can you gather -yourself as to the individuality of the man who has worn this -article?" - -I took the tattered object in my hands and turned it over rather -ruefully. It was a very ordinary black hat of the usual round -shape, hard and much the worse for wear. The lining had been of -red silk, but was a good deal discoloured. There was no maker's -name; but, as Holmes had remarked, the initials "H. B." were -scrawled upon one side. It was pierced in the brim for a -hat-securer, but the elastic was missing. For the rest, it was -cracked, exceedingly dusty, and spotted in several places, -although there seemed to have been some attempt to hide the -discoloured patches by smearing them with ink. - -"I can see nothing," said I, handing it back to my friend. - -"On the contrary, Watson, you can see everything. You fail, -however, to reason from what you see. You are too timid in -drawing your inferences." - -"Then, pray tell me what it is that you can infer from this hat?" - -He picked it up and gazed at it in the peculiar introspective -fashion which was characteristic of him. "It is perhaps less -suggestive than it might have been," he remarked, "and yet there -are a few inferences which are very distinct, and a few others -which represent at least a strong balance of probability. That -the man was highly intellectual is of course obvious upon the -face of it, and also that he was fairly well-to-do within the -last three years, although he has now fallen upon evil days. He -had foresight, but has less now than formerly, pointing to a -moral retrogression, which, when taken with the decline of his -fortunes, seems to indicate some evil influence, probably drink, -at work upon him. This may account also for the obvious fact that -his wife has ceased to love him." - -"My dear Holmes!" - -"He has, however, retained some degree of self-respect," he -continued, disregarding my remonstrance. "He is a man who leads a -sedentary life, goes out little, is out of training entirely, is -middle-aged, has grizzled hair which he has had cut within the -last few days, and which he anoints with lime-cream. These are -the more patent facts which are to be deduced from his hat. Also, -by the way, that it is extremely improbable that he has gas laid -on in his house." - -"You are certainly joking, Holmes." - -"Not in the least. Is it possible that even now, when I give you -these results, you are unable to see how they are attained?" - -"I have no doubt that I am very stupid, but I must confess that I -am unable to follow you. For example, how did you deduce that -this man was intellectual?" - -For answer Holmes clapped the hat upon his head. It came right -over the forehead and settled upon the bridge of his nose. "It is -a question of cubic capacity," said he; "a man with so large a -brain must have something in it." - -"The decline of his fortunes, then?" - -"This hat is three years old. These flat brims curled at the edge -came in then. It is a hat of the very best quality. Look at the -band of ribbed silk and the excellent lining. If this man could -afford to buy so expensive a hat three years ago, and has had no -hat since, then he has assuredly gone down in the world." - -"Well, that is clear enough, certainly. But how about the -foresight and the moral retrogression?" - -Sherlock Holmes laughed. "Here is the foresight," said he putting -his finger upon the little disc and loop of the hat-securer. -"They are never sold upon hats. If this man ordered one, it is a -sign of a certain amount of foresight, since he went out of his -way to take this precaution against the wind. But since we see -that he has broken the elastic and has not troubled to replace -it, it is obvious that he has less foresight now than formerly, -which is a distinct proof of a weakening nature. On the other -hand, he has endeavoured to conceal some of these stains upon the -felt by daubing them with ink, which is a sign that he has not -entirely lost his self-respect." - -"Your reasoning is certainly plausible." - -"The further points, that he is middle-aged, that his hair is -grizzled, that it has been recently cut, and that he uses -lime-cream, are all to be gathered from a close examination of the -lower part of the lining. The lens discloses a large number of -hair-ends, clean cut by the scissors of the barber. They all -appear to be adhesive, and there is a distinct odour of -lime-cream. This dust, you will observe, is not the gritty, grey -dust of the street but the fluffy brown dust of the house, -showing that it has been hung up indoors most of the time, while -the marks of moisture upon the inside are proof positive that the -wearer perspired very freely, and could therefore, hardly be in -the best of training." - -"But his wife--you said that she had ceased to love him." - -"This hat has not been brushed for weeks. When I see you, my dear -Watson, with a week's accumulation of dust upon your hat, and -when your wife allows you to go out in such a state, I shall fear -that you also have been unfortunate enough to lose your wife's -affection." - -"But he might be a bachelor." - -"Nay, he was bringing home the goose as a peace-offering to his -wife. Remember the card upon the bird's leg." - -"You have an answer to everything. But how on earth do you deduce -that the gas is not laid on in his house?" - -"One tallow stain, or even two, might come by chance; but when I -see no less than five, I think that there can be little doubt -that the individual must be brought into frequent contact with -burning tallow--walks upstairs at night probably with his hat in -one hand and a guttering candle in the other. Anyhow, he never -got tallow-stains from a gas-jet. Are you satisfied?" - -"Well, it is very ingenious," said I, laughing; "but since, as -you said just now, there has been no crime committed, and no harm -done save the loss of a goose, all this seems to be rather a -waste of energy." - -Sherlock Holmes had opened his mouth to reply, when the door flew -open, and Peterson, the commissionaire, rushed into the apartment -with flushed cheeks and the face of a man who is dazed with -astonishment. - -"The goose, Mr. Holmes! The goose, sir!" he gasped. - -"Eh? What of it, then? Has it returned to life and flapped off -through the kitchen window?" Holmes twisted himself round upon -the sofa to get a fairer view of the man's excited face. - -"See here, sir! See what my wife found in its crop!" He held out -his hand and displayed upon the centre of the palm a brilliantly -scintillating blue stone, rather smaller than a bean in size, but -of such purity and radiance that it twinkled like an electric -point in the dark hollow of his hand. - -Sherlock Holmes sat up with a whistle. "By Jove, Peterson!" said -he, "this is treasure trove indeed. I suppose you know what you -have got?" - -"A diamond, sir? A precious stone. It cuts into glass as though -it were putty." - -"It's more than a precious stone. It is the precious stone." - -"Not the Countess of Morcar's blue carbuncle!" I ejaculated. - -"Precisely so. I ought to know its size and shape, seeing that I -have read the advertisement about it in The Times every day -lately. It is absolutely unique, and its value can only be -conjectured, but the reward offered of 1000 pounds is certainly -not within a twentieth part of the market price." - -"A thousand pounds! Great Lord of mercy!" The commissionaire -plumped down into a chair and stared from one to the other of us. - -"That is the reward, and I have reason to know that there are -sentimental considerations in the background which would induce -the Countess to part with half her fortune if she could but -recover the gem." - -"It was lost, if I remember aright, at the Hotel Cosmopolitan," I -remarked. - -"Precisely so, on December 22nd, just five days ago. John Horner, -a plumber, was accused of having abstracted it from the lady's -jewel-case. The evidence against him was so strong that the case -has been referred to the Assizes. I have some account of the -matter here, I believe." He rummaged amid his newspapers, -glancing over the dates, until at last he smoothed one out, -doubled it over, and read the following paragraph: - -"Hotel Cosmopolitan Jewel Robbery. John Horner, 26, plumber, was -brought up upon the charge of having upon the 22nd inst., -abstracted from the jewel-case of the Countess of Morcar the -valuable gem known as the blue carbuncle. James Ryder, -upper-attendant at the hotel, gave his evidence to the effect -that he had shown Horner up to the dressing-room of the Countess -of Morcar upon the day of the robbery in order that he might -solder the second bar of the grate, which was loose. He had -remained with Horner some little time, but had finally been -called away. On returning, he found that Horner had disappeared, -that the bureau had been forced open, and that the small morocco -casket in which, as it afterwards transpired, the Countess was -accustomed to keep her jewel, was lying empty upon the -dressing-table. Ryder instantly gave the alarm, and Horner was -arrested the same evening; but the stone could not be found -either upon his person or in his rooms. Catherine Cusack, maid to -the Countess, deposed to having heard Ryder's cry of dismay on -discovering the robbery, and to having rushed into the room, -where she found matters as described by the last witness. -Inspector Bradstreet, B division, gave evidence as to the arrest -of Horner, who struggled frantically, and protested his innocence -in the strongest terms. Evidence of a previous conviction for -robbery having been given against the prisoner, the magistrate -refused to deal summarily with the offence, but referred it to -the Assizes. Horner, who had shown signs of intense emotion -during the proceedings, fainted away at the conclusion and was -carried out of court." - -"Hum! So much for the police-court," said Holmes thoughtfully, -tossing aside the paper. "The question for us now to solve is the -sequence of events leading from a rifled jewel-case at one end to -the crop of a goose in Tottenham Court Road at the other. You -see, Watson, our little deductions have suddenly assumed a much -more important and less innocent aspect. Here is the stone; the -stone came from the goose, and the goose came from Mr. Henry -Baker, the gentleman with the bad hat and all the other -characteristics with which I have bored you. So now we must set -ourselves very seriously to finding this gentleman and -ascertaining what part he has played in this little mystery. To -do this, we must try the simplest means first, and these lie -undoubtedly in an advertisement in all the evening papers. If -this fail, I shall have recourse to other methods." - -"What will you say?" - -"Give me a pencil and that slip of paper. Now, then: 'Found at -the corner of Goodge Street, a goose and a black felt hat. Mr. -Henry Baker can have the same by applying at 6:30 this evening at -221B, Baker Street.' That is clear and concise." - -"Very. But will he see it?" - -"Well, he is sure to keep an eye on the papers, since, to a poor -man, the loss was a heavy one. He was clearly so scared by his -mischance in breaking the window and by the approach of Peterson -that he thought of nothing but flight, but since then he must -have bitterly regretted the impulse which caused him to drop his -bird. Then, again, the introduction of his name will cause him to -see it, for everyone who knows him will direct his attention to -it. Here you are, Peterson, run down to the advertising agency -and have this put in the evening papers." - -"In which, sir?" - -"Oh, in the Globe, Star, Pall Mall, St. James's, Evening News, -Standard, Echo, and any others that occur to you." - -"Very well, sir. And this stone?" - -"Ah, yes, I shall keep the stone. Thank you. And, I say, -Peterson, just buy a goose on your way back and leave it here -with me, for we must have one to give to this gentleman in place -of the one which your family is now devouring." - -When the commissionaire had gone, Holmes took up the stone and -held it against the light. "It's a bonny thing," said he. "Just -see how it glints and sparkles. Of course it is a nucleus and -focus of crime. Every good stone is. They are the devil's pet -baits. In the larger and older jewels every facet may stand for a -bloody deed. This stone is not yet twenty years old. It was found -in the banks of the Amoy River in southern China and is remarkable -in having every characteristic of the carbuncle, save that it is -blue in shade instead of ruby red. In spite of its youth, it has -already a sinister history. There have been two murders, a -vitriol-throwing, a suicide, and several robberies brought about -for the sake of this forty-grain weight of crystallised charcoal. -Who would think that so pretty a toy would be a purveyor to the -gallows and the prison? I'll lock it up in my strong box now and -drop a line to the Countess to say that we have it." - -"Do you think that this man Horner is innocent?" - -"I cannot tell." - -"Well, then, do you imagine that this other one, Henry Baker, had -anything to do with the matter?" - -"It is, I think, much more likely that Henry Baker is an -absolutely innocent man, who had no idea that the bird which he -was carrying was of considerably more value than if it were made -of solid gold. That, however, I shall determine by a very simple -test if we have an answer to our advertisement." - -"And you can do nothing until then?" - -"Nothing." - -"In that case I shall continue my professional round. But I shall -come back in the evening at the hour you have mentioned, for I -should like to see the solution of so tangled a business." - -"Very glad to see you. I dine at seven. There is a woodcock, I -believe. By the way, in view of recent occurrences, perhaps I -ought to ask Mrs. Hudson to examine its crop." - -I had been delayed at a case, and it was a little after half-past -six when I found myself in Baker Street once more. As I -approached the house I saw a tall man in a Scotch bonnet with a -coat which was buttoned up to his chin waiting outside in the -bright semicircle which was thrown from the fanlight. Just as I -arrived the door was opened, and we were shown up together to -Holmes' room. - -"Mr. Henry Baker, I believe," said he, rising from his armchair -and greeting his visitor with the easy air of geniality which he -could so readily assume. "Pray take this chair by the fire, Mr. -Baker. It is a cold night, and I observe that your circulation is -more adapted for summer than for winter. Ah, Watson, you have -just come at the right time. Is that your hat, Mr. Baker?" - -"Yes, sir, that is undoubtedly my hat." - -He was a large man with rounded shoulders, a massive head, and a -broad, intelligent face, sloping down to a pointed beard of -grizzled brown. A touch of red in nose and cheeks, with a slight -tremor of his extended hand, recalled Holmes' surmise as to his -habits. His rusty black frock-coat was buttoned right up in -front, with the collar turned up, and his lank wrists protruded -from his sleeves without a sign of cuff or shirt. He spoke in a -slow staccato fashion, choosing his words with care, and gave the -impression generally of a man of learning and letters who had had -ill-usage at the hands of fortune. - -"We have retained these things for some days," said Holmes, -"because we expected to see an advertisement from you giving your -address. I am at a loss to know now why you did not advertise." - -Our visitor gave a rather shamefaced laugh. "Shillings have not -been so plentiful with me as they once were," he remarked. "I had -no doubt that the gang of roughs who assaulted me had carried off -both my hat and the bird. I did not care to spend more money in a -hopeless attempt at recovering them." - -"Very naturally. By the way, about the bird, we were compelled to -eat it." - -"To eat it!" Our visitor half rose from his chair in his -excitement. - -"Yes, it would have been of no use to anyone had we not done so. -But I presume that this other goose upon the sideboard, which is -about the same weight and perfectly fresh, will answer your -purpose equally well?" - -"Oh, certainly, certainly," answered Mr. Baker with a sigh of -relief. - -"Of course, we still have the feathers, legs, crop, and so on of -your own bird, so if you wish--" - -The man burst into a hearty laugh. "They might be useful to me as -relics of my adventure," said he, "but beyond that I can hardly -see what use the disjecta membra of my late acquaintance are -going to be to me. No, sir, I think that, with your permission, I -will confine my attentions to the excellent bird which I perceive -upon the sideboard." - -Sherlock Holmes glanced sharply across at me with a slight shrug -of his shoulders. - -"There is your hat, then, and there your bird," said he. "By the -way, would it bore you to tell me where you got the other one -from? I am somewhat of a fowl fancier, and I have seldom seen a -better grown goose." - -"Certainly, sir," said Baker, who had risen and tucked his newly -gained property under his arm. "There are a few of us who -frequent the Alpha Inn, near the Museum--we are to be found in -the Museum itself during the day, you understand. This year our -good host, Windigate by name, instituted a goose club, by which, -on consideration of some few pence every week, we were each to -receive a bird at Christmas. My pence were duly paid, and the -rest is familiar to you. I am much indebted to you, sir, for a -Scotch bonnet is fitted neither to my years nor my gravity." With -a comical pomposity of manner he bowed solemnly to both of us and -strode off upon his way. - -"So much for Mr. Henry Baker," said Holmes when he had closed the -door behind him. "It is quite certain that he knows nothing -whatever about the matter. Are you hungry, Watson?" - -"Not particularly." - -"Then I suggest that we turn our dinner into a supper and follow -up this clue while it is still hot." - -"By all means." - -It was a bitter night, so we drew on our ulsters and wrapped -cravats about our throats. Outside, the stars were shining coldly -in a cloudless sky, and the breath of the passers-by blew out -into smoke like so many pistol shots. Our footfalls rang out -crisply and loudly as we swung through the doctors' quarter, -Wimpole Street, Harley Street, and so through Wigmore Street into -Oxford Street. In a quarter of an hour we were in Bloomsbury at -the Alpha Inn, which is a small public-house at the corner of one -of the streets which runs down into Holborn. Holmes pushed open -the door of the private bar and ordered two glasses of beer from -the ruddy-faced, white-aproned landlord. - -"Your beer should be excellent if it is as good as your geese," -said he. - -"My geese!" The man seemed surprised. - -"Yes. I was speaking only half an hour ago to Mr. Henry Baker, -who was a member of your goose club." - -"Ah! yes, I see. But you see, sir, them's not our geese." - -"Indeed! Whose, then?" - -"Well, I got the two dozen from a salesman in Covent Garden." - -"Indeed? I know some of them. Which was it?" - -"Breckinridge is his name." - -"Ah! I don't know him. Well, here's your good health landlord, -and prosperity to your house. Good-night." - -"Now for Mr. Breckinridge," he continued, buttoning up his coat -as we came out into the frosty air. "Remember, Watson that though -we have so homely a thing as a goose at one end of this chain, we -have at the other a man who will certainly get seven years' penal -servitude unless we can establish his innocence. It is possible -that our inquiry may but confirm his guilt; but, in any case, we -have a line of investigation which has been missed by the police, -and which a singular chance has placed in our hands. Let us -follow it out to the bitter end. Faces to the south, then, and -quick march!" - -We passed across Holborn, down Endell Street, and so through a -zigzag of slums to Covent Garden Market. One of the largest -stalls bore the name of Breckinridge upon it, and the proprietor -a horsey-looking man, with a sharp face and trim side-whiskers was -helping a boy to put up the shutters. - -"Good-evening. It's a cold night," said Holmes. - -The salesman nodded and shot a questioning glance at my -companion. - -"Sold out of geese, I see," continued Holmes, pointing at the -bare slabs of marble. - -"Let you have five hundred to-morrow morning." - -"That's no good." - -"Well, there are some on the stall with the gas-flare." - -"Ah, but I was recommended to you." - -"Who by?" - -"The landlord of the Alpha." - -"Oh, yes; I sent him a couple of dozen." - -"Fine birds they were, too. Now where did you get them from?" - -To my surprise the question provoked a burst of anger from the -salesman. - -"Now, then, mister," said he, with his head cocked and his arms -akimbo, "what are you driving at? Let's have it straight, now." - -"It is straight enough. I should like to know who sold you the -geese which you supplied to the Alpha." - -"Well then, I shan't tell you. So now!" - -"Oh, it is a matter of no importance; but I don't know why you -should be so warm over such a trifle." - -"Warm! You'd be as warm, maybe, if you were as pestered as I am. -When I pay good money for a good article there should be an end -of the business; but it's 'Where are the geese?' and 'Who did you -sell the geese to?' and 'What will you take for the geese?' One -would think they were the only geese in the world, to hear the -fuss that is made over them." - -"Well, I have no connection with any other people who have been -making inquiries," said Holmes carelessly. "If you won't tell us -the bet is off, that is all. But I'm always ready to back my -opinion on a matter of fowls, and I have a fiver on it that the -bird I ate is country bred." - -"Well, then, you've lost your fiver, for it's town bred," snapped -the salesman. - -"It's nothing of the kind." - -"I say it is." - -"I don't believe it." - -"D'you think you know more about fowls than I, who have handled -them ever since I was a nipper? I tell you, all those birds that -went to the Alpha were town bred." - -"You'll never persuade me to believe that." - -"Will you bet, then?" - -"It's merely taking your money, for I know that I am right. But -I'll have a sovereign on with you, just to teach you not to be -obstinate." - -The salesman chuckled grimly. "Bring me the books, Bill," said -he. - -The small boy brought round a small thin volume and a great -greasy-backed one, laying them out together beneath the hanging -lamp. - -"Now then, Mr. Cocksure," said the salesman, "I thought that I -was out of geese, but before I finish you'll find that there is -still one left in my shop. You see this little book?" - -"Well?" - -"That's the list of the folk from whom I buy. D'you see? Well, -then, here on this page are the country folk, and the numbers -after their names are where their accounts are in the big ledger. -Now, then! You see this other page in red ink? Well, that is a -list of my town suppliers. Now, look at that third name. Just -read it out to me." - -"Mrs. Oakshott, 117, Brixton Road--249," read Holmes. - -"Quite so. Now turn that up in the ledger." - -Holmes turned to the page indicated. "Here you are, 'Mrs. -Oakshott, 117, Brixton Road, egg and poultry supplier.'" - -"Now, then, what's the last entry?" - -"'December 22nd. Twenty-four geese at 7s. 6d.'" - -"Quite so. There you are. And underneath?" - -"'Sold to Mr. Windigate of the Alpha, at 12s.'" - -"What have you to say now?" - -Sherlock Holmes looked deeply chagrined. He drew a sovereign from -his pocket and threw it down upon the slab, turning away with the -air of a man whose disgust is too deep for words. A few yards off -he stopped under a lamp-post and laughed in the hearty, noiseless -fashion which was peculiar to him. - -"When you see a man with whiskers of that cut and the 'Pink 'un' -protruding out of his pocket, you can always draw him by a bet," -said he. "I daresay that if I had put 100 pounds down in front of -him, that man would not have given me such complete information -as was drawn from him by the idea that he was doing me on a -wager. Well, Watson, we are, I fancy, nearing the end of our -quest, and the only point which remains to be determined is -whether we should go on to this Mrs. Oakshott to-night, or -whether we should reserve it for to-morrow. It is clear from what -that surly fellow said that there are others besides ourselves -who are anxious about the matter, and I should--" - -His remarks were suddenly cut short by a loud hubbub which broke -out from the stall which we had just left. Turning round we saw a -little rat-faced fellow standing in the centre of the circle of -yellow light which was thrown by the swinging lamp, while -Breckinridge, the salesman, framed in the door of his stall, was -shaking his fists fiercely at the cringing figure. - -"I've had enough of you and your geese," he shouted. "I wish you -were all at the devil together. If you come pestering me any more -with your silly talk I'll set the dog at you. You bring Mrs. -Oakshott here and I'll answer her, but what have you to do with -it? Did I buy the geese off you?" - -"No; but one of them was mine all the same," whined the little -man. - -"Well, then, ask Mrs. Oakshott for it." - -"She told me to ask you." - -"Well, you can ask the King of Proosia, for all I care. I've had -enough of it. Get out of this!" He rushed fiercely forward, and -the inquirer flitted away into the darkness. - -"Ha! this may save us a visit to Brixton Road," whispered Holmes. -"Come with me, and we will see what is to be made of this -fellow." Striding through the scattered knots of people who -lounged round the flaring stalls, my companion speedily overtook -the little man and touched him upon the shoulder. He sprang -round, and I could see in the gas-light that every vestige of -colour had been driven from his face. - -"Who are you, then? What do you want?" he asked in a quavering -voice. - -"You will excuse me," said Holmes blandly, "but I could not help -overhearing the questions which you put to the salesman just now. -I think that I could be of assistance to you." - -"You? Who are you? How could you know anything of the matter?" - -"My name is Sherlock Holmes. It is my business to know what other -people don't know." - -"But you can know nothing of this?" - -"Excuse me, I know everything of it. You are endeavouring to -trace some geese which were sold by Mrs. Oakshott, of Brixton -Road, to a salesman named Breckinridge, by him in turn to Mr. -Windigate, of the Alpha, and by him to his club, of which Mr. -Henry Baker is a member." - -"Oh, sir, you are the very man whom I have longed to meet," cried -the little fellow with outstretched hands and quivering fingers. -"I can hardly explain to you how interested I am in this matter." - -Sherlock Holmes hailed a four-wheeler which was passing. "In that -case we had better discuss it in a cosy room rather than in this -wind-swept market-place," said he. "But pray tell me, before we -go farther, who it is that I have the pleasure of assisting." - -The man hesitated for an instant. "My name is John Robinson," he -answered with a sidelong glance. - -"No, no; the real name," said Holmes sweetly. "It is always -awkward doing business with an alias." - -A flush sprang to the white cheeks of the stranger. "Well then," -said he, "my real name is James Ryder." - -"Precisely so. Head attendant at the Hotel Cosmopolitan. Pray -step into the cab, and I shall soon be able to tell you -everything which you would wish to know." - -The little man stood glancing from one to the other of us with -half-frightened, half-hopeful eyes, as one who is not sure -whether he is on the verge of a windfall or of a catastrophe. -Then he stepped into the cab, and in half an hour we were back in -the sitting-room at Baker Street. Nothing had been said during -our drive, but the high, thin breathing of our new companion, and -the claspings and unclaspings of his hands, spoke of the nervous -tension within him. - -"Here we are!" said Holmes cheerily as we filed into the room. -"The fire looks very seasonable in this weather. You look cold, -Mr. Ryder. Pray take the basket-chair. I will just put on my -slippers before we settle this little matter of yours. Now, then! -You want to know what became of those geese?" - -"Yes, sir." - -"Or rather, I fancy, of that goose. It was one bird, I imagine in -which you were interested--white, with a black bar across the -tail." - -Ryder quivered with emotion. "Oh, sir," he cried, "can you tell -me where it went to?" - -"It came here." - -"Here?" - -"Yes, and a most remarkable bird it proved. I don't wonder that -you should take an interest in it. It laid an egg after it was -dead--the bonniest, brightest little blue egg that ever was seen. -I have it here in my museum." - -Our visitor staggered to his feet and clutched the mantelpiece -with his right hand. Holmes unlocked his strong-box and held up -the blue carbuncle, which shone out like a star, with a cold, -brilliant, many-pointed radiance. Ryder stood glaring with a -drawn face, uncertain whether to claim or to disown it. - -"The game's up, Ryder," said Holmes quietly. "Hold up, man, or -you'll be into the fire! Give him an arm back into his chair, -Watson. He's not got blood enough to go in for felony with -impunity. Give him a dash of brandy. So! Now he looks a little -more human. What a shrimp it is, to be sure!" - -For a moment he had staggered and nearly fallen, but the brandy -brought a tinge of colour into his cheeks, and he sat staring -with frightened eyes at his accuser. - -"I have almost every link in my hands, and all the proofs which I -could possibly need, so there is little which you need tell me. -Still, that little may as well be cleared up to make the case -complete. You had heard, Ryder, of this blue stone of the -Countess of Morcar's?" - -"It was Catherine Cusack who told me of it," said he in a -crackling voice. - -"I see--her ladyship's waiting-maid. Well, the temptation of -sudden wealth so easily acquired was too much for you, as it has -been for better men before you; but you were not very scrupulous -in the means you used. It seems to me, Ryder, that there is the -making of a very pretty villain in you. You knew that this man -Horner, the plumber, had been concerned in some such matter -before, and that suspicion would rest the more readily upon him. -What did you do, then? You made some small job in my lady's -room--you and your confederate Cusack--and you managed that he -should be the man sent for. Then, when he had left, you rifled -the jewel-case, raised the alarm, and had this unfortunate man -arrested. You then--" - -Ryder threw himself down suddenly upon the rug and clutched at my -companion's knees. "For God's sake, have mercy!" he shrieked. -"Think of my father! Of my mother! It would break their hearts. I -never went wrong before! I never will again. I swear it. I'll -swear it on a Bible. Oh, don't bring it into court! For Christ's -sake, don't!" - -"Get back into your chair!" said Holmes sternly. "It is very well -to cringe and crawl now, but you thought little enough of this -poor Horner in the dock for a crime of which he knew nothing." - -"I will fly, Mr. Holmes. I will leave the country, sir. Then the -charge against him will break down." - -"Hum! We will talk about that. And now let us hear a true account -of the next act. How came the stone into the goose, and how came -the goose into the open market? Tell us the truth, for there lies -your only hope of safety." - -Ryder passed his tongue over his parched lips. "I will tell you -it just as it happened, sir," said he. "When Horner had been -arrested, it seemed to me that it would be best for me to get -away with the stone at once, for I did not know at what moment -the police might not take it into their heads to search me and my -room. There was no place about the hotel where it would be safe. -I went out, as if on some commission, and I made for my sister's -house. She had married a man named Oakshott, and lived in Brixton -Road, where she fattened fowls for the market. All the way there -every man I met seemed to me to be a policeman or a detective; -and, for all that it was a cold night, the sweat was pouring down -my face before I came to the Brixton Road. My sister asked me -what was the matter, and why I was so pale; but I told her that I -had been upset by the jewel robbery at the hotel. Then I went -into the back yard and smoked a pipe and wondered what it would -be best to do. - -"I had a friend once called Maudsley, who went to the bad, and -has just been serving his time in Pentonville. One day he had met -me, and fell into talk about the ways of thieves, and how they -could get rid of what they stole. I knew that he would be true to -me, for I knew one or two things about him; so I made up my mind -to go right on to Kilburn, where he lived, and take him into my -confidence. He would show me how to turn the stone into money. -But how to get to him in safety? I thought of the agonies I had -gone through in coming from the hotel. I might at any moment be -seized and searched, and there would be the stone in my waistcoat -pocket. I was leaning against the wall at the time and looking at -the geese which were waddling about round my feet, and suddenly -an idea came into my head which showed me how I could beat the -best detective that ever lived. - -"My sister had told me some weeks before that I might have the -pick of her geese for a Christmas present, and I knew that she -was always as good as her word. I would take my goose now, and in -it I would carry my stone to Kilburn. There was a little shed in -the yard, and behind this I drove one of the birds--a fine big -one, white, with a barred tail. I caught it, and prying its bill -open, I thrust the stone down its throat as far as my finger -could reach. The bird gave a gulp, and I felt the stone pass -along its gullet and down into its crop. But the creature flapped -and struggled, and out came my sister to know what was the -matter. As I turned to speak to her the brute broke loose and -fluttered off among the others. - -"'Whatever were you doing with that bird, Jem?' says she. - -"'Well,' said I, 'you said you'd give me one for Christmas, and I -was feeling which was the fattest.' - -"'Oh,' says she, 'we've set yours aside for you--Jem's bird, we -call it. It's the big white one over yonder. There's twenty-six -of them, which makes one for you, and one for us, and two dozen -for the market.' - -"'Thank you, Maggie,' says I; 'but if it is all the same to you, -I'd rather have that one I was handling just now.' - -"'The other is a good three pound heavier,' said she, 'and we -fattened it expressly for you.' - -"'Never mind. I'll have the other, and I'll take it now,' said I. - -"'Oh, just as you like,' said she, a little huffed. 'Which is it -you want, then?' - -"'That white one with the barred tail, right in the middle of the -flock.' - -"'Oh, very well. Kill it and take it with you.' - -"Well, I did what she said, Mr. Holmes, and I carried the bird -all the way to Kilburn. I told my pal what I had done, for he was -a man that it was easy to tell a thing like that to. He laughed -until he choked, and we got a knife and opened the goose. My -heart turned to water, for there was no sign of the stone, and I -knew that some terrible mistake had occurred. I left the bird, -rushed back to my sister's, and hurried into the back yard. There -was not a bird to be seen there. - -"'Where are they all, Maggie?' I cried. - -"'Gone to the dealer's, Jem.' - -"'Which dealer's?' - -"'Breckinridge, of Covent Garden.' - -"'But was there another with a barred tail?' I asked, 'the same -as the one I chose?' - -"'Yes, Jem; there were two barred-tailed ones, and I could never -tell them apart.' - -"Well, then, of course I saw it all, and I ran off as hard as my -feet would carry me to this man Breckinridge; but he had sold the -lot at once, and not one word would he tell me as to where they -had gone. You heard him yourselves to-night. Well, he has always -answered me like that. My sister thinks that I am going mad. -Sometimes I think that I am myself. And now--and now I am myself -a branded thief, without ever having touched the wealth for which -I sold my character. God help me! God help me!" He burst into -convulsive sobbing, with his face buried in his hands. - -There was a long silence, broken only by his heavy breathing and -by the measured tapping of Sherlock Holmes' finger-tips upon the -edge of the table. Then my friend rose and threw open the door. - -"Get out!" said he. - -"What, sir! Oh, Heaven bless you!" - -"No more words. Get out!" - -And no more words were needed. There was a rush, a clatter upon -the stairs, the bang of a door, and the crisp rattle of running -footfalls from the street. - -"After all, Watson," said Holmes, reaching up his hand for his -clay pipe, "I am not retained by the police to supply their -deficiencies. If Horner were in danger it would be another thing; -but this fellow will not appear against him, and the case must -collapse. I suppose that I am commuting a felony, but it is just -possible that I am saving a soul. This fellow will not go wrong -again; he is too terribly frightened. Send him to gaol now, and -you make him a gaol-bird for life. Besides, it is the season of -forgiveness. Chance has put in our way a most singular and -whimsical problem, and its solution is its own reward. If you -will have the goodness to touch the bell, Doctor, we will begin -another investigation, in which, also a bird will be the chief -feature." - - - -VIII. THE ADVENTURE OF THE SPECKLED BAND - -On glancing over my notes of the seventy odd cases in which I -have during the last eight years studied the methods of my friend -Sherlock Holmes, I find many tragic, some comic, a large number -merely strange, but none commonplace; for, working as he did -rather for the love of his art than for the acquirement of -wealth, he refused to associate himself with any investigation -which did not tend towards the unusual, and even the fantastic. -Of all these varied cases, however, I cannot recall any which -presented more singular features than that which was associated -with the well-known Surrey family of the Roylotts of Stoke Moran. -The events in question occurred in the early days of my -association with Holmes, when we were sharing rooms as bachelors -in Baker Street. It is possible that I might have placed them -upon record before, but a promise of secrecy was made at the -time, from which I have only been freed during the last month by -the untimely death of the lady to whom the pledge was given. It -is perhaps as well that the facts should now come to light, for I -have reasons to know that there are widespread rumours as to the -death of Dr. Grimesby Roylott which tend to make the matter even -more terrible than the truth. - -It was early in April in the year '83 that I woke one morning to -find Sherlock Holmes standing, fully dressed, by the side of my -bed. He was a late riser, as a rule, and as the clock on the -mantelpiece showed me that it was only a quarter-past seven, I -blinked up at him in some surprise, and perhaps just a little -resentment, for I was myself regular in my habits. - -"Very sorry to knock you up, Watson," said he, "but it's the -common lot this morning. Mrs. Hudson has been knocked up, she -retorted upon me, and I on you." - -"What is it, then--a fire?" - -"No; a client. It seems that a young lady has arrived in a -considerable state of excitement, who insists upon seeing me. She -is waiting now in the sitting-room. Now, when young ladies wander -about the metropolis at this hour of the morning, and knock -sleepy people up out of their beds, I presume that it is -something very pressing which they have to communicate. Should it -prove to be an interesting case, you would, I am sure, wish to -follow it from the outset. I thought, at any rate, that I should -call you and give you the chance." - -"My dear fellow, I would not miss it for anything." - -I had no keener pleasure than in following Holmes in his -professional investigations, and in admiring the rapid -deductions, as swift as intuitions, and yet always founded on a -logical basis with which he unravelled the problems which were -submitted to him. I rapidly threw on my clothes and was ready in -a few minutes to accompany my friend down to the sitting-room. A -lady dressed in black and heavily veiled, who had been sitting in -the window, rose as we entered. - -"Good-morning, madam," said Holmes cheerily. "My name is Sherlock -Holmes. This is my intimate friend and associate, Dr. Watson, -before whom you can speak as freely as before myself. Ha! I am -glad to see that Mrs. Hudson has had the good sense to light the -fire. Pray draw up to it, and I shall order you a cup of hot -coffee, for I observe that you are shivering." - -"It is not cold which makes me shiver," said the woman in a low -voice, changing her seat as requested. - -"What, then?" - -"It is fear, Mr. Holmes. It is terror." She raised her veil as -she spoke, and we could see that she was indeed in a pitiable -state of agitation, her face all drawn and grey, with restless -frightened eyes, like those of some hunted animal. Her features -and figure were those of a woman of thirty, but her hair was shot -with premature grey, and her expression was weary and haggard. -Sherlock Holmes ran her over with one of his quick, -all-comprehensive glances. - -"You must not fear," said he soothingly, bending forward and -patting her forearm. "We shall soon set matters right, I have no -doubt. You have come in by train this morning, I see." - -"You know me, then?" - -"No, but I observe the second half of a return ticket in the palm -of your left glove. You must have started early, and yet you had -a good drive in a dog-cart, along heavy roads, before you reached -the station." - -The lady gave a violent start and stared in bewilderment at my -companion. - -"There is no mystery, my dear madam," said he, smiling. "The left -arm of your jacket is spattered with mud in no less than seven -places. The marks are perfectly fresh. There is no vehicle save a -dog-cart which throws up mud in that way, and then only when you -sit on the left-hand side of the driver." - -"Whatever your reasons may be, you are perfectly correct," said -she. "I started from home before six, reached Leatherhead at -twenty past, and came in by the first train to Waterloo. Sir, I -can stand this strain no longer; I shall go mad if it continues. -I have no one to turn to--none, save only one, who cares for me, -and he, poor fellow, can be of little aid. I have heard of you, -Mr. Holmes; I have heard of you from Mrs. Farintosh, whom you -helped in the hour of her sore need. It was from her that I had -your address. Oh, sir, do you not think that you could help me, -too, and at least throw a little light through the dense darkness -which surrounds me? At present it is out of my power to reward -you for your services, but in a month or six weeks I shall be -married, with the control of my own income, and then at least you -shall not find me ungrateful." - -Holmes turned to his desk and, unlocking it, drew out a small -case-book, which he consulted. - -"Farintosh," said he. "Ah yes, I recall the case; it was -concerned with an opal tiara. I think it was before your time, -Watson. I can only say, madam, that I shall be happy to devote -the same care to your case as I did to that of your friend. As to -reward, my profession is its own reward; but you are at liberty -to defray whatever expenses I may be put to, at the time which -suits you best. And now I beg that you will lay before us -everything that may help us in forming an opinion upon the -matter." - -"Alas!" replied our visitor, "the very horror of my situation -lies in the fact that my fears are so vague, and my suspicions -depend so entirely upon small points, which might seem trivial to -another, that even he to whom of all others I have a right to -look for help and advice looks upon all that I tell him about it -as the fancies of a nervous woman. He does not say so, but I can -read it from his soothing answers and averted eyes. But I have -heard, Mr. Holmes, that you can see deeply into the manifold -wickedness of the human heart. You may advise me how to walk amid -the dangers which encompass me." - -"I am all attention, madam." - -"My name is Helen Stoner, and I am living with my stepfather, who -is the last survivor of one of the oldest Saxon families in -England, the Roylotts of Stoke Moran, on the western border of -Surrey." - -Holmes nodded his head. "The name is familiar to me," said he. - -"The family was at one time among the richest in England, and the -estates extended over the borders into Berkshire in the north, -and Hampshire in the west. In the last century, however, four -successive heirs were of a dissolute and wasteful disposition, -and the family ruin was eventually completed by a gambler in the -days of the Regency. Nothing was left save a few acres of ground, -and the two-hundred-year-old house, which is itself crushed under -a heavy mortgage. The last squire dragged out his existence -there, living the horrible life of an aristocratic pauper; but -his only son, my stepfather, seeing that he must adapt himself to -the new conditions, obtained an advance from a relative, which -enabled him to take a medical degree and went out to Calcutta, -where, by his professional skill and his force of character, he -established a large practice. In a fit of anger, however, caused -by some robberies which had been perpetrated in the house, he -beat his native butler to death and narrowly escaped a capital -sentence. As it was, he suffered a long term of imprisonment and -afterwards returned to England a morose and disappointed man. - -"When Dr. Roylott was in India he married my mother, Mrs. Stoner, -the young widow of Major-General Stoner, of the Bengal Artillery. -My sister Julia and I were twins, and we were only two years old -at the time of my mother's re-marriage. She had a considerable -sum of money--not less than 1000 pounds a year--and this she -bequeathed to Dr. Roylott entirely while we resided with him, -with a provision that a certain annual sum should be allowed to -each of us in the event of our marriage. Shortly after our return -to England my mother died--she was killed eight years ago in a -railway accident near Crewe. Dr. Roylott then abandoned his -attempts to establish himself in practice in London and took us -to live with him in the old ancestral house at Stoke Moran. The -money which my mother had left was enough for all our wants, and -there seemed to be no obstacle to our happiness. - -"But a terrible change came over our stepfather about this time. -Instead of making friends and exchanging visits with our -neighbours, who had at first been overjoyed to see a Roylott of -Stoke Moran back in the old family seat, he shut himself up in -his house and seldom came out save to indulge in ferocious -quarrels with whoever might cross his path. Violence of temper -approaching to mania has been hereditary in the men of the -family, and in my stepfather's case it had, I believe, been -intensified by his long residence in the tropics. A series of -disgraceful brawls took place, two of which ended in the -police-court, until at last he became the terror of the village, -and the folks would fly at his approach, for he is a man of -immense strength, and absolutely uncontrollable in his anger. - -"Last week he hurled the local blacksmith over a parapet into a -stream, and it was only by paying over all the money which I -could gather together that I was able to avert another public -exposure. He had no friends at all save the wandering gipsies, -and he would give these vagabonds leave to encamp upon the few -acres of bramble-covered land which represent the family estate, -and would accept in return the hospitality of their tents, -wandering away with them sometimes for weeks on end. He has a -passion also for Indian animals, which are sent over to him by a -correspondent, and he has at this moment a cheetah and a baboon, -which wander freely over his grounds and are feared by the -villagers almost as much as their master. - -"You can imagine from what I say that my poor sister Julia and I -had no great pleasure in our lives. No servant would stay with -us, and for a long time we did all the work of the house. She was -but thirty at the time of her death, and yet her hair had already -begun to whiten, even as mine has." - -"Your sister is dead, then?" - -"She died just two years ago, and it is of her death that I wish -to speak to you. You can understand that, living the life which I -have described, we were little likely to see anyone of our own -age and position. We had, however, an aunt, my mother's maiden -sister, Miss Honoria Westphail, who lives near Harrow, and we -were occasionally allowed to pay short visits at this lady's -house. Julia went there at Christmas two years ago, and met there -a half-pay major of marines, to whom she became engaged. My -stepfather learned of the engagement when my sister returned and -offered no objection to the marriage; but within a fortnight of -the day which had been fixed for the wedding, the terrible event -occurred which has deprived me of my only companion." - -Sherlock Holmes had been leaning back in his chair with his eyes -closed and his head sunk in a cushion, but he half opened his -lids now and glanced across at his visitor. - -"Pray be precise as to details," said he. - -"It is easy for me to be so, for every event of that dreadful -time is seared into my memory. The manor-house is, as I have -already said, very old, and only one wing is now inhabited. The -bedrooms in this wing are on the ground floor, the sitting-rooms -being in the central block of the buildings. Of these bedrooms -the first is Dr. Roylott's, the second my sister's, and the third -my own. There is no communication between them, but they all open -out into the same corridor. Do I make myself plain?" - -"Perfectly so." - -"The windows of the three rooms open out upon the lawn. That -fatal night Dr. Roylott had gone to his room early, though we -knew that he had not retired to rest, for my sister was troubled -by the smell of the strong Indian cigars which it was his custom -to smoke. She left her room, therefore, and came into mine, where -she sat for some time, chatting about her approaching wedding. At -eleven o'clock she rose to leave me, but she paused at the door -and looked back. - -"'Tell me, Helen,' said she, 'have you ever heard anyone whistle -in the dead of the night?' - -"'Never,' said I. - -"'I suppose that you could not possibly whistle, yourself, in -your sleep?' - -"'Certainly not. But why?' - -"'Because during the last few nights I have always, about three -in the morning, heard a low, clear whistle. I am a light sleeper, -and it has awakened me. I cannot tell where it came from--perhaps -from the next room, perhaps from the lawn. I thought that I would -just ask you whether you had heard it.' - -"'No, I have not. It must be those wretched gipsies in the -plantation.' - -"'Very likely. And yet if it were on the lawn, I wonder that you -did not hear it also.' - -"'Ah, but I sleep more heavily than you.' - -"'Well, it is of no great consequence, at any rate.' She smiled -back at me, closed my door, and a few moments later I heard her -key turn in the lock." - -"Indeed," said Holmes. "Was it your custom always to lock -yourselves in at night?" - -"Always." - -"And why?" - -"I think that I mentioned to you that the doctor kept a cheetah -and a baboon. We had no feeling of security unless our doors were -locked." - -"Quite so. Pray proceed with your statement." - -"I could not sleep that night. A vague feeling of impending -misfortune impressed me. My sister and I, you will recollect, -were twins, and you know how subtle are the links which bind two -souls which are so closely allied. It was a wild night. The wind -was howling outside, and the rain was beating and splashing -against the windows. Suddenly, amid all the hubbub of the gale, -there burst forth the wild scream of a terrified woman. I knew -that it was my sister's voice. I sprang from my bed, wrapped a -shawl round me, and rushed into the corridor. As I opened my door -I seemed to hear a low whistle, such as my sister described, and -a few moments later a clanging sound, as if a mass of metal had -fallen. As I ran down the passage, my sister's door was unlocked, -and revolved slowly upon its hinges. I stared at it -horror-stricken, not knowing what was about to issue from it. By -the light of the corridor-lamp I saw my sister appear at the -opening, her face blanched with terror, her hands groping for -help, her whole figure swaying to and fro like that of a -drunkard. I ran to her and threw my arms round her, but at that -moment her knees seemed to give way and she fell to the ground. -She writhed as one who is in terrible pain, and her limbs were -dreadfully convulsed. At first I thought that she had not -recognised me, but as I bent over her she suddenly shrieked out -in a voice which I shall never forget, 'Oh, my God! Helen! It was -the band! The speckled band!' There was something else which she -would fain have said, and she stabbed with her finger into the -air in the direction of the doctor's room, but a fresh convulsion -seized her and choked her words. I rushed out, calling loudly for -my stepfather, and I met him hastening from his room in his -dressing-gown. When he reached my sister's side she was -unconscious, and though he poured brandy down her throat and sent -for medical aid from the village, all efforts were in vain, for -she slowly sank and died without having recovered her -consciousness. Such was the dreadful end of my beloved sister." - -"One moment," said Holmes, "are you sure about this whistle and -metallic sound? Could you swear to it?" - -"That was what the county coroner asked me at the inquiry. It is -my strong impression that I heard it, and yet, among the crash of -the gale and the creaking of an old house, I may possibly have -been deceived." - -"Was your sister dressed?" - -"No, she was in her night-dress. In her right hand was found the -charred stump of a match, and in her left a match-box." - -"Showing that she had struck a light and looked about her when -the alarm took place. That is important. And what conclusions did -the coroner come to?" - -"He investigated the case with great care, for Dr. Roylott's -conduct had long been notorious in the county, but he was unable -to find any satisfactory cause of death. My evidence showed that -the door had been fastened upon the inner side, and the windows -were blocked by old-fashioned shutters with broad iron bars, -which were secured every night. The walls were carefully sounded, -and were shown to be quite solid all round, and the flooring was -also thoroughly examined, with the same result. The chimney is -wide, but is barred up by four large staples. It is certain, -therefore, that my sister was quite alone when she met her end. -Besides, there were no marks of any violence upon her." - -"How about poison?" - -"The doctors examined her for it, but without success." - -"What do you think that this unfortunate lady died of, then?" - -"It is my belief that she died of pure fear and nervous shock, -though what it was that frightened her I cannot imagine." - -"Were there gipsies in the plantation at the time?" - -"Yes, there are nearly always some there." - -"Ah, and what did you gather from this allusion to a band--a -speckled band?" - -"Sometimes I have thought that it was merely the wild talk of -delirium, sometimes that it may have referred to some band of -people, perhaps to these very gipsies in the plantation. I do not -know whether the spotted handkerchiefs which so many of them wear -over their heads might have suggested the strange adjective which -she used." - -Holmes shook his head like a man who is far from being satisfied. - -"These are very deep waters," said he; "pray go on with your -narrative." - -"Two years have passed since then, and my life has been until -lately lonelier than ever. A month ago, however, a dear friend, -whom I have known for many years, has done me the honour to ask -my hand in marriage. His name is Armitage--Percy Armitage--the -second son of Mr. Armitage, of Crane Water, near Reading. My -stepfather has offered no opposition to the match, and we are to -be married in the course of the spring. Two days ago some repairs -were started in the west wing of the building, and my bedroom -wall has been pierced, so that I have had to move into the -chamber in which my sister died, and to sleep in the very bed in -which she slept. Imagine, then, my thrill of terror when last -night, as I lay awake, thinking over her terrible fate, I -suddenly heard in the silence of the night the low whistle which -had been the herald of her own death. I sprang up and lit the -lamp, but nothing was to be seen in the room. I was too shaken to -go to bed again, however, so I dressed, and as soon as it was -daylight I slipped down, got a dog-cart at the Crown Inn, which -is opposite, and drove to Leatherhead, from whence I have come on -this morning with the one object of seeing you and asking your -advice." - -"You have done wisely," said my friend. "But have you told me -all?" - -"Yes, all." - -"Miss Roylott, you have not. You are screening your stepfather." - -"Why, what do you mean?" - -For answer Holmes pushed back the frill of black lace which -fringed the hand that lay upon our visitor's knee. Five little -livid spots, the marks of four fingers and a thumb, were printed -upon the white wrist. - -"You have been cruelly used," said Holmes. - -The lady coloured deeply and covered over her injured wrist. "He -is a hard man," she said, "and perhaps he hardly knows his own -strength." - -There was a long silence, during which Holmes leaned his chin -upon his hands and stared into the crackling fire. - -"This is a very deep business," he said at last. "There are a -thousand details which I should desire to know before I decide -upon our course of action. Yet we have not a moment to lose. If -we were to come to Stoke Moran to-day, would it be possible for -us to see over these rooms without the knowledge of your -stepfather?" - -"As it happens, he spoke of coming into town to-day upon some -most important business. It is probable that he will be away all -day, and that there would be nothing to disturb you. We have a -housekeeper now, but she is old and foolish, and I could easily -get her out of the way." - -"Excellent. You are not averse to this trip, Watson?" - -"By no means." - -"Then we shall both come. What are you going to do yourself?" - -"I have one or two things which I would wish to do now that I am -in town. But I shall return by the twelve o'clock train, so as to -be there in time for your coming." - -"And you may expect us early in the afternoon. I have myself some -small business matters to attend to. Will you not wait and -breakfast?" - -"No, I must go. My heart is lightened already since I have -confided my trouble to you. I shall look forward to seeing you -again this afternoon." She dropped her thick black veil over her -face and glided from the room. - -"And what do you think of it all, Watson?" asked Sherlock Holmes, -leaning back in his chair. - -"It seems to me to be a most dark and sinister business." - -"Dark enough and sinister enough." - -"Yet if the lady is correct in saying that the flooring and walls -are sound, and that the door, window, and chimney are impassable, -then her sister must have been undoubtedly alone when she met her -mysterious end." - -"What becomes, then, of these nocturnal whistles, and what of the -very peculiar words of the dying woman?" - -"I cannot think." - -"When you combine the ideas of whistles at night, the presence of -a band of gipsies who are on intimate terms with this old doctor, -the fact that we have every reason to believe that the doctor has -an interest in preventing his stepdaughter's marriage, the dying -allusion to a band, and, finally, the fact that Miss Helen Stoner -heard a metallic clang, which might have been caused by one of -those metal bars that secured the shutters falling back into its -place, I think that there is good ground to think that the -mystery may be cleared along those lines." - -"But what, then, did the gipsies do?" - -"I cannot imagine." - -"I see many objections to any such theory." - -"And so do I. It is precisely for that reason that we are going -to Stoke Moran this day. I want to see whether the objections are -fatal, or if they may be explained away. But what in the name of -the devil!" - -The ejaculation had been drawn from my companion by the fact that -our door had been suddenly dashed open, and that a huge man had -framed himself in the aperture. His costume was a peculiar -mixture of the professional and of the agricultural, having a -black top-hat, a long frock-coat, and a pair of high gaiters, -with a hunting-crop swinging in his hand. So tall was he that his -hat actually brushed the cross bar of the doorway, and his -breadth seemed to span it across from side to side. A large face, -seared with a thousand wrinkles, burned yellow with the sun, and -marked with every evil passion, was turned from one to the other -of us, while his deep-set, bile-shot eyes, and his high, thin, -fleshless nose, gave him somewhat the resemblance to a fierce old -bird of prey. - -"Which of you is Holmes?" asked this apparition. - -"My name, sir; but you have the advantage of me," said my -companion quietly. - -"I am Dr. Grimesby Roylott, of Stoke Moran." - -"Indeed, Doctor," said Holmes blandly. "Pray take a seat." - -"I will do nothing of the kind. My stepdaughter has been here. I -have traced her. What has she been saying to you?" - -"It is a little cold for the time of the year," said Holmes. - -"What has she been saying to you?" screamed the old man -furiously. - -"But I have heard that the crocuses promise well," continued my -companion imperturbably. - -"Ha! You put me off, do you?" said our new visitor, taking a step -forward and shaking his hunting-crop. "I know you, you scoundrel! -I have heard of you before. You are Holmes, the meddler." - -My friend smiled. - -"Holmes, the busybody!" - -His smile broadened. - -"Holmes, the Scotland Yard Jack-in-office!" - -Holmes chuckled heartily. "Your conversation is most -entertaining," said he. "When you go out close the door, for -there is a decided draught." - -"I will go when I have said my say. Don't you dare to meddle with -my affairs. I know that Miss Stoner has been here. I traced her! -I am a dangerous man to fall foul of! See here." He stepped -swiftly forward, seized the poker, and bent it into a curve with -his huge brown hands. - -"See that you keep yourself out of my grip," he snarled, and -hurling the twisted poker into the fireplace he strode out of the -room. - -"He seems a very amiable person," said Holmes, laughing. "I am -not quite so bulky, but if he had remained I might have shown him -that my grip was not much more feeble than his own." As he spoke -he picked up the steel poker and, with a sudden effort, -straightened it out again. - -"Fancy his having the insolence to confound me with the official -detective force! This incident gives zest to our investigation, -however, and I only trust that our little friend will not suffer -from her imprudence in allowing this brute to trace her. And now, -Watson, we shall order breakfast, and afterwards I shall walk -down to Doctors' Commons, where I hope to get some data which may -help us in this matter." - - -It was nearly one o'clock when Sherlock Holmes returned from his -excursion. He held in his hand a sheet of blue paper, scrawled -over with notes and figures. - -"I have seen the will of the deceased wife," said he. "To -determine its exact meaning I have been obliged to work out the -present prices of the investments with which it is concerned. The -total income, which at the time of the wife's death was little -short of 1100 pounds, is now, through the fall in agricultural -prices, not more than 750 pounds. Each daughter can claim an -income of 250 pounds, in case of marriage. It is evident, -therefore, that if both girls had married, this beauty would have -had a mere pittance, while even one of them would cripple him to -a very serious extent. My morning's work has not been wasted, -since it has proved that he has the very strongest motives for -standing in the way of anything of the sort. And now, Watson, -this is too serious for dawdling, especially as the old man is -aware that we are interesting ourselves in his affairs; so if you -are ready, we shall call a cab and drive to Waterloo. I should be -very much obliged if you would slip your revolver into your -pocket. An Eley's No. 2 is an excellent argument with gentlemen -who can twist steel pokers into knots. That and a tooth-brush -are, I think, all that we need." - -At Waterloo we were fortunate in catching a train for -Leatherhead, where we hired a trap at the station inn and drove -for four or five miles through the lovely Surrey lanes. It was a -perfect day, with a bright sun and a few fleecy clouds in the -heavens. The trees and wayside hedges were just throwing out -their first green shoots, and the air was full of the pleasant -smell of the moist earth. To me at least there was a strange -contrast between the sweet promise of the spring and this -sinister quest upon which we were engaged. My companion sat in -the front of the trap, his arms folded, his hat pulled down over -his eyes, and his chin sunk upon his breast, buried in the -deepest thought. Suddenly, however, he started, tapped me on the -shoulder, and pointed over the meadows. - -"Look there!" said he. - -A heavily timbered park stretched up in a gentle slope, -thickening into a grove at the highest point. From amid the -branches there jutted out the grey gables and high roof-tree of a -very old mansion. - -"Stoke Moran?" said he. - -"Yes, sir, that be the house of Dr. Grimesby Roylott," remarked -the driver. - -"There is some building going on there," said Holmes; "that is -where we are going." - -"There's the village," said the driver, pointing to a cluster of -roofs some distance to the left; "but if you want to get to the -house, you'll find it shorter to get over this stile, and so by -the foot-path over the fields. There it is, where the lady is -walking." - -"And the lady, I fancy, is Miss Stoner," observed Holmes, shading -his eyes. "Yes, I think we had better do as you suggest." - -We got off, paid our fare, and the trap rattled back on its way -to Leatherhead. - -"I thought it as well," said Holmes as we climbed the stile, -"that this fellow should think we had come here as architects, or -on some definite business. It may stop his gossip. -Good-afternoon, Miss Stoner. You see that we have been as good as -our word." - -Our client of the morning had hurried forward to meet us with a -face which spoke her joy. "I have been waiting so eagerly for -you," she cried, shaking hands with us warmly. "All has turned -out splendidly. Dr. Roylott has gone to town, and it is unlikely -that he will be back before evening." - -"We have had the pleasure of making the doctor's acquaintance," -said Holmes, and in a few words he sketched out what had -occurred. Miss Stoner turned white to the lips as she listened. - -"Good heavens!" she cried, "he has followed me, then." - -"So it appears." - -"He is so cunning that I never know when I am safe from him. What -will he say when he returns?" - -"He must guard himself, for he may find that there is someone -more cunning than himself upon his track. You must lock yourself -up from him to-night. If he is violent, we shall take you away to -your aunt's at Harrow. Now, we must make the best use of our -time, so kindly take us at once to the rooms which we are to -examine." - -The building was of grey, lichen-blotched stone, with a high -central portion and two curving wings, like the claws of a crab, -thrown out on each side. In one of these wings the windows were -broken and blocked with wooden boards, while the roof was partly -caved in, a picture of ruin. The central portion was in little -better repair, but the right-hand block was comparatively modern, -and the blinds in the windows, with the blue smoke curling up -from the chimneys, showed that this was where the family resided. -Some scaffolding had been erected against the end wall, and the -stone-work had been broken into, but there were no signs of any -workmen at the moment of our visit. Holmes walked slowly up and -down the ill-trimmed lawn and examined with deep attention the -outsides of the windows. - -"This, I take it, belongs to the room in which you used to sleep, -the centre one to your sister's, and the one next to the main -building to Dr. Roylott's chamber?" - -"Exactly so. But I am now sleeping in the middle one." - -"Pending the alterations, as I understand. By the way, there does -not seem to be any very pressing need for repairs at that end -wall." - -"There were none. I believe that it was an excuse to move me from -my room." - -"Ah! that is suggestive. Now, on the other side of this narrow -wing runs the corridor from which these three rooms open. There -are windows in it, of course?" - -"Yes, but very small ones. Too narrow for anyone to pass -through." - -"As you both locked your doors at night, your rooms were -unapproachable from that side. Now, would you have the kindness -to go into your room and bar your shutters?" - -Miss Stoner did so, and Holmes, after a careful examination -through the open window, endeavoured in every way to force the -shutter open, but without success. There was no slit through -which a knife could be passed to raise the bar. Then with his -lens he tested the hinges, but they were of solid iron, built -firmly into the massive masonry. "Hum!" said he, scratching his -chin in some perplexity, "my theory certainly presents some -difficulties. No one could pass these shutters if they were -bolted. Well, we shall see if the inside throws any light upon -the matter." - -A small side door led into the whitewashed corridor from which -the three bedrooms opened. Holmes refused to examine the third -chamber, so we passed at once to the second, that in which Miss -Stoner was now sleeping, and in which her sister had met with her -fate. It was a homely little room, with a low ceiling and a -gaping fireplace, after the fashion of old country-houses. A -brown chest of drawers stood in one corner, a narrow -white-counterpaned bed in another, and a dressing-table on the -left-hand side of the window. These articles, with two small -wicker-work chairs, made up all the furniture in the room save -for a square of Wilton carpet in the centre. The boards round and -the panelling of the walls were of brown, worm-eaten oak, so old -and discoloured that it may have dated from the original building -of the house. Holmes drew one of the chairs into a corner and sat -silent, while his eyes travelled round and round and up and down, -taking in every detail of the apartment. - -"Where does that bell communicate with?" he asked at last -pointing to a thick bell-rope which hung down beside the bed, the -tassel actually lying upon the pillow. - -"It goes to the housekeeper's room." - -"It looks newer than the other things?" - -"Yes, it was only put there a couple of years ago." - -"Your sister asked for it, I suppose?" - -"No, I never heard of her using it. We used always to get what we -wanted for ourselves." - -"Indeed, it seemed unnecessary to put so nice a bell-pull there. -You will excuse me for a few minutes while I satisfy myself as to -this floor." He threw himself down upon his face with his lens in -his hand and crawled swiftly backward and forward, examining -minutely the cracks between the boards. Then he did the same with -the wood-work with which the chamber was panelled. Finally he -walked over to the bed and spent some time in staring at it and -in running his eye up and down the wall. Finally he took the -bell-rope in his hand and gave it a brisk tug. - -"Why, it's a dummy," said he. - -"Won't it ring?" - -"No, it is not even attached to a wire. This is very interesting. -You can see now that it is fastened to a hook just above where -the little opening for the ventilator is." - -"How very absurd! I never noticed that before." - -"Very strange!" muttered Holmes, pulling at the rope. "There are -one or two very singular points about this room. For example, -what a fool a builder must be to open a ventilator into another -room, when, with the same trouble, he might have communicated -with the outside air!" - -"That is also quite modern," said the lady. - -"Done about the same time as the bell-rope?" remarked Holmes. - -"Yes, there were several little changes carried out about that -time." - -"They seem to have been of a most interesting character--dummy -bell-ropes, and ventilators which do not ventilate. With your -permission, Miss Stoner, we shall now carry our researches into -the inner apartment." - -Dr. Grimesby Roylott's chamber was larger than that of his -step-daughter, but was as plainly furnished. A camp-bed, a small -wooden shelf full of books, mostly of a technical character, an -armchair beside the bed, a plain wooden chair against the wall, a -round table, and a large iron safe were the principal things -which met the eye. Holmes walked slowly round and examined each -and all of them with the keenest interest. - -"What's in here?" he asked, tapping the safe. - -"My stepfather's business papers." - -"Oh! you have seen inside, then?" - -"Only once, some years ago. I remember that it was full of -papers." - -"There isn't a cat in it, for example?" - -"No. What a strange idea!" - -"Well, look at this!" He took up a small saucer of milk which -stood on the top of it. - -"No; we don't keep a cat. But there is a cheetah and a baboon." - -"Ah, yes, of course! Well, a cheetah is just a big cat, and yet a -saucer of milk does not go very far in satisfying its wants, I -daresay. There is one point which I should wish to determine." He -squatted down in front of the wooden chair and examined the seat -of it with the greatest attention. - -"Thank you. That is quite settled," said he, rising and putting -his lens in his pocket. "Hullo! Here is something interesting!" - -The object which had caught his eye was a small dog lash hung on -one corner of the bed. The lash, however, was curled upon itself -and tied so as to make a loop of whipcord. - -"What do you make of that, Watson?" - -"It's a common enough lash. But I don't know why it should be -tied." - -"That is not quite so common, is it? Ah, me! it's a wicked world, -and when a clever man turns his brains to crime it is the worst -of all. I think that I have seen enough now, Miss Stoner, and -with your permission we shall walk out upon the lawn." - -I had never seen my friend's face so grim or his brow so dark as -it was when we turned from the scene of this investigation. We -had walked several times up and down the lawn, neither Miss -Stoner nor myself liking to break in upon his thoughts before he -roused himself from his reverie. - -"It is very essential, Miss Stoner," said he, "that you should -absolutely follow my advice in every respect." - -"I shall most certainly do so." - -"The matter is too serious for any hesitation. Your life may -depend upon your compliance." - -"I assure you that I am in your hands." - -"In the first place, both my friend and I must spend the night in -your room." - -Both Miss Stoner and I gazed at him in astonishment. - -"Yes, it must be so. Let me explain. I believe that that is the -village inn over there?" - -"Yes, that is the Crown." - -"Very good. Your windows would be visible from there?" - -"Certainly." - -"You must confine yourself to your room, on pretence of a -headache, when your stepfather comes back. Then when you hear him -retire for the night, you must open the shutters of your window, -undo the hasp, put your lamp there as a signal to us, and then -withdraw quietly with everything which you are likely to want -into the room which you used to occupy. I have no doubt that, in -spite of the repairs, you could manage there for one night." - -"Oh, yes, easily." - -"The rest you will leave in our hands." - -"But what will you do?" - -"We shall spend the night in your room, and we shall investigate -the cause of this noise which has disturbed you." - -"I believe, Mr. Holmes, that you have already made up your mind," -said Miss Stoner, laying her hand upon my companion's sleeve. - -"Perhaps I have." - -"Then, for pity's sake, tell me what was the cause of my sister's -death." - -"I should prefer to have clearer proofs before I speak." - -"You can at least tell me whether my own thought is correct, and -if she died from some sudden fright." - -"No, I do not think so. I think that there was probably some more -tangible cause. And now, Miss Stoner, we must leave you for if -Dr. Roylott returned and saw us our journey would be in vain. -Good-bye, and be brave, for if you will do what I have told you, -you may rest assured that we shall soon drive away the dangers -that threaten you." - -Sherlock Holmes and I had no difficulty in engaging a bedroom and -sitting-room at the Crown Inn. They were on the upper floor, and -from our window we could command a view of the avenue gate, and -of the inhabited wing of Stoke Moran Manor House. At dusk we saw -Dr. Grimesby Roylott drive past, his huge form looming up beside -the little figure of the lad who drove him. The boy had some -slight difficulty in undoing the heavy iron gates, and we heard -the hoarse roar of the doctor's voice and saw the fury with which -he shook his clinched fists at him. The trap drove on, and a few -minutes later we saw a sudden light spring up among the trees as -the lamp was lit in one of the sitting-rooms. - -"Do you know, Watson," said Holmes as we sat together in the -gathering darkness, "I have really some scruples as to taking you -to-night. There is a distinct element of danger." - -"Can I be of assistance?" - -"Your presence might be invaluable." - -"Then I shall certainly come." - -"It is very kind of you." - -"You speak of danger. You have evidently seen more in these rooms -than was visible to me." - -"No, but I fancy that I may have deduced a little more. I imagine -that you saw all that I did." - -"I saw nothing remarkable save the bell-rope, and what purpose -that could answer I confess is more than I can imagine." - -"You saw the ventilator, too?" - -"Yes, but I do not think that it is such a very unusual thing to -have a small opening between two rooms. It was so small that a -rat could hardly pass through." - -"I knew that we should find a ventilator before ever we came to -Stoke Moran." - -"My dear Holmes!" - -"Oh, yes, I did. You remember in her statement she said that her -sister could smell Dr. Roylott's cigar. Now, of course that -suggested at once that there must be a communication between the -two rooms. It could only be a small one, or it would have been -remarked upon at the coroner's inquiry. I deduced a ventilator." - -"But what harm can there be in that?" - -"Well, there is at least a curious coincidence of dates. A -ventilator is made, a cord is hung, and a lady who sleeps in the -bed dies. Does not that strike you?" - -"I cannot as yet see any connection." - -"Did you observe anything very peculiar about that bed?" - -"No." - -"It was clamped to the floor. Did you ever see a bed fastened -like that before?" - -"I cannot say that I have." - -"The lady could not move her bed. It must always be in the same -relative position to the ventilator and to the rope--or so we may -call it, since it was clearly never meant for a bell-pull." - -"Holmes," I cried, "I seem to see dimly what you are hinting at. -We are only just in time to prevent some subtle and horrible -crime." - -"Subtle enough and horrible enough. When a doctor does go wrong -he is the first of criminals. He has nerve and he has knowledge. -Palmer and Pritchard were among the heads of their profession. -This man strikes even deeper, but I think, Watson, that we shall -be able to strike deeper still. But we shall have horrors enough -before the night is over; for goodness' sake let us have a quiet -pipe and turn our minds for a few hours to something more -cheerful." - - -About nine o'clock the light among the trees was extinguished, -and all was dark in the direction of the Manor House. Two hours -passed slowly away, and then, suddenly, just at the stroke of -eleven, a single bright light shone out right in front of us. - -"That is our signal," said Holmes, springing to his feet; "it -comes from the middle window." - -As we passed out he exchanged a few words with the landlord, -explaining that we were going on a late visit to an acquaintance, -and that it was possible that we might spend the night there. A -moment later we were out on the dark road, a chill wind blowing -in our faces, and one yellow light twinkling in front of us -through the gloom to guide us on our sombre errand. - -There was little difficulty in entering the grounds, for -unrepaired breaches gaped in the old park wall. Making our way -among the trees, we reached the lawn, crossed it, and were about -to enter through the window when out from a clump of laurel -bushes there darted what seemed to be a hideous and distorted -child, who threw itself upon the grass with writhing limbs and -then ran swiftly across the lawn into the darkness. - -"My God!" I whispered; "did you see it?" - -Holmes was for the moment as startled as I. His hand closed like -a vice upon my wrist in his agitation. Then he broke into a low -laugh and put his lips to my ear. - -"It is a nice household," he murmured. "That is the baboon." - -I had forgotten the strange pets which the doctor affected. There -was a cheetah, too; perhaps we might find it upon our shoulders -at any moment. I confess that I felt easier in my mind when, -after following Holmes' example and slipping off my shoes, I -found myself inside the bedroom. My companion noiselessly closed -the shutters, moved the lamp onto the table, and cast his eyes -round the room. All was as we had seen it in the daytime. Then -creeping up to me and making a trumpet of his hand, he whispered -into my ear again so gently that it was all that I could do to -distinguish the words: - -"The least sound would be fatal to our plans." - -I nodded to show that I had heard. - -"We must sit without light. He would see it through the -ventilator." - -I nodded again. - -"Do not go asleep; your very life may depend upon it. Have your -pistol ready in case we should need it. I will sit on the side of -the bed, and you in that chair." - -I took out my revolver and laid it on the corner of the table. - -Holmes had brought up a long thin cane, and this he placed upon -the bed beside him. By it he laid the box of matches and the -stump of a candle. Then he turned down the lamp, and we were left -in darkness. - -How shall I ever forget that dreadful vigil? I could not hear a -sound, not even the drawing of a breath, and yet I knew that my -companion sat open-eyed, within a few feet of me, in the same -state of nervous tension in which I was myself. The shutters cut -off the least ray of light, and we waited in absolute darkness. - -From outside came the occasional cry of a night-bird, and once at -our very window a long drawn catlike whine, which told us that -the cheetah was indeed at liberty. Far away we could hear the -deep tones of the parish clock, which boomed out every quarter of -an hour. How long they seemed, those quarters! Twelve struck, and -one and two and three, and still we sat waiting silently for -whatever might befall. - -Suddenly there was the momentary gleam of a light up in the -direction of the ventilator, which vanished immediately, but was -succeeded by a strong smell of burning oil and heated metal. -Someone in the next room had lit a dark-lantern. I heard a gentle -sound of movement, and then all was silent once more, though the -smell grew stronger. For half an hour I sat with straining ears. -Then suddenly another sound became audible--a very gentle, -soothing sound, like that of a small jet of steam escaping -continually from a kettle. The instant that we heard it, Holmes -sprang from the bed, struck a match, and lashed furiously with -his cane at the bell-pull. - -"You see it, Watson?" he yelled. "You see it?" - -But I saw nothing. At the moment when Holmes struck the light I -heard a low, clear whistle, but the sudden glare flashing into my -weary eyes made it impossible for me to tell what it was at which -my friend lashed so savagely. I could, however, see that his face -was deadly pale and filled with horror and loathing. He had -ceased to strike and was gazing up at the ventilator when -suddenly there broke from the silence of the night the most -horrible cry to which I have ever listened. It swelled up louder -and louder, a hoarse yell of pain and fear and anger all mingled -in the one dreadful shriek. They say that away down in the -village, and even in the distant parsonage, that cry raised the -sleepers from their beds. It struck cold to our hearts, and I -stood gazing at Holmes, and he at me, until the last echoes of it -had died away into the silence from which it rose. - -"What can it mean?" I gasped. - -"It means that it is all over," Holmes answered. "And perhaps, -after all, it is for the best. Take your pistol, and we will -enter Dr. Roylott's room." - -With a grave face he lit the lamp and led the way down the -corridor. Twice he struck at the chamber door without any reply -from within. Then he turned the handle and entered, I at his -heels, with the cocked pistol in my hand. - -It was a singular sight which met our eyes. On the table stood a -dark-lantern with the shutter half open, throwing a brilliant -beam of light upon the iron safe, the door of which was ajar. -Beside this table, on the wooden chair, sat Dr. Grimesby Roylott -clad in a long grey dressing-gown, his bare ankles protruding -beneath, and his feet thrust into red heelless Turkish slippers. -Across his lap lay the short stock with the long lash which we -had noticed during the day. His chin was cocked upward and his -eyes were fixed in a dreadful, rigid stare at the corner of the -ceiling. Round his brow he had a peculiar yellow band, with -brownish speckles, which seemed to be bound tightly round his -head. As we entered he made neither sound nor motion. - -"The band! the speckled band!" whispered Holmes. - -I took a step forward. In an instant his strange headgear began -to move, and there reared itself from among his hair the squat -diamond-shaped head and puffed neck of a loathsome serpent. - -"It is a swamp adder!" cried Holmes; "the deadliest snake in -India. He has died within ten seconds of being bitten. Violence -does, in truth, recoil upon the violent, and the schemer falls -into the pit which he digs for another. Let us thrust this -creature back into its den, and we can then remove Miss Stoner to -some place of shelter and let the county police know what has -happened." - -As he spoke he drew the dog-whip swiftly from the dead man's lap, -and throwing the noose round the reptile's neck he drew it from -its horrid perch and, carrying it at arm's length, threw it into -the iron safe, which he closed upon it. - -Such are the true facts of the death of Dr. Grimesby Roylott, of -Stoke Moran. It is not necessary that I should prolong a -narrative which has already run to too great a length by telling -how we broke the sad news to the terrified girl, how we conveyed -her by the morning train to the care of her good aunt at Harrow, -of how the slow process of official inquiry came to the -conclusion that the doctor met his fate while indiscreetly -playing with a dangerous pet. The little which I had yet to learn -of the case was told me by Sherlock Holmes as we travelled back -next day. - -"I had," said he, "come to an entirely erroneous conclusion which -shows, my dear Watson, how dangerous it always is to reason from -insufficient data. The presence of the gipsies, and the use of -the word 'band,' which was used by the poor girl, no doubt, to -explain the appearance which she had caught a hurried glimpse of -by the light of her match, were sufficient to put me upon an -entirely wrong scent. I can only claim the merit that I instantly -reconsidered my position when, however, it became clear to me -that whatever danger threatened an occupant of the room could not -come either from the window or the door. My attention was -speedily drawn, as I have already remarked to you, to this -ventilator, and to the bell-rope which hung down to the bed. The -discovery that this was a dummy, and that the bed was clamped to -the floor, instantly gave rise to the suspicion that the rope was -there as a bridge for something passing through the hole and -coming to the bed. The idea of a snake instantly occurred to me, -and when I coupled it with my knowledge that the doctor was -furnished with a supply of creatures from India, I felt that I -was probably on the right track. The idea of using a form of -poison which could not possibly be discovered by any chemical -test was just such a one as would occur to a clever and ruthless -man who had had an Eastern training. The rapidity with which such -a poison would take effect would also, from his point of view, be -an advantage. It would be a sharp-eyed coroner, indeed, who could -distinguish the two little dark punctures which would show where -the poison fangs had done their work. Then I thought of the -whistle. Of course he must recall the snake before the morning -light revealed it to the victim. He had trained it, probably by -the use of the milk which we saw, to return to him when summoned. -He would put it through this ventilator at the hour that he -thought best, with the certainty that it would crawl down the -rope and land on the bed. It might or might not bite the -occupant, perhaps she might escape every night for a week, but -sooner or later she must fall a victim. - -"I had come to these conclusions before ever I had entered his -room. An inspection of his chair showed me that he had been in -the habit of standing on it, which of course would be necessary -in order that he should reach the ventilator. The sight of the -safe, the saucer of milk, and the loop of whipcord were enough to -finally dispel any doubts which may have remained. The metallic -clang heard by Miss Stoner was obviously caused by her stepfather -hastily closing the door of his safe upon its terrible occupant. -Having once made up my mind, you know the steps which I took in -order to put the matter to the proof. I heard the creature hiss -as I have no doubt that you did also, and I instantly lit the -light and attacked it." - -"With the result of driving it through the ventilator." - -"And also with the result of causing it to turn upon its master -at the other side. Some of the blows of my cane came home and -roused its snakish temper, so that it flew upon the first person -it saw. In this way I am no doubt indirectly responsible for Dr. -Grimesby Roylott's death, and I cannot say that it is likely to -weigh very heavily upon my conscience." - - - -IX. THE ADVENTURE OF THE ENGINEER'S THUMB - -Of all the problems which have been submitted to my friend, Mr. -Sherlock Holmes, for solution during the years of our intimacy, -there were only two which I was the means of introducing to his -notice--that of Mr. Hatherley's thumb, and that of Colonel -Warburton's madness. Of these the latter may have afforded a -finer field for an acute and original observer, but the other was -so strange in its inception and so dramatic in its details that -it may be the more worthy of being placed upon record, even if it -gave my friend fewer openings for those deductive methods of -reasoning by which he achieved such remarkable results. The story -has, I believe, been told more than once in the newspapers, but, -like all such narratives, its effect is much less striking when -set forth en bloc in a single half-column of print than when the -facts slowly evolve before your own eyes, and the mystery clears -gradually away as each new discovery furnishes a step which leads -on to the complete truth. At the time the circumstances made a -deep impression upon me, and the lapse of two years has hardly -served to weaken the effect. - -It was in the summer of '89, not long after my marriage, that the -events occurred which I am now about to summarise. I had returned -to civil practice and had finally abandoned Holmes in his Baker -Street rooms, although I continually visited him and occasionally -even persuaded him to forgo his Bohemian habits so far as to come -and visit us. My practice had steadily increased, and as I -happened to live at no very great distance from Paddington -Station, I got a few patients from among the officials. One of -these, whom I had cured of a painful and lingering disease, was -never weary of advertising my virtues and of endeavouring to send -me on every sufferer over whom he might have any influence. - -One morning, at a little before seven o'clock, I was awakened by -the maid tapping at the door to announce that two men had come -from Paddington and were waiting in the consulting-room. I -dressed hurriedly, for I knew by experience that railway cases -were seldom trivial, and hastened downstairs. As I descended, my -old ally, the guard, came out of the room and closed the door -tightly behind him. - -"I've got him here," he whispered, jerking his thumb over his -shoulder; "he's all right." - -"What is it, then?" I asked, for his manner suggested that it was -some strange creature which he had caged up in my room. - -"It's a new patient," he whispered. "I thought I'd bring him -round myself; then he couldn't slip away. There he is, all safe -and sound. I must go now, Doctor; I have my dooties, just the -same as you." And off he went, this trusty tout, without even -giving me time to thank him. - -I entered my consulting-room and found a gentleman seated by the -table. He was quietly dressed in a suit of heather tweed with a -soft cloth cap which he had laid down upon my books. Round one of -his hands he had a handkerchief wrapped, which was mottled all -over with bloodstains. He was young, not more than -five-and-twenty, I should say, with a strong, masculine face; but -he was exceedingly pale and gave me the impression of a man who -was suffering from some strong agitation, which it took all his -strength of mind to control. - -"I am sorry to knock you up so early, Doctor," said he, "but I -have had a very serious accident during the night. I came in by -train this morning, and on inquiring at Paddington as to where I -might find a doctor, a worthy fellow very kindly escorted me -here. I gave the maid a card, but I see that she has left it upon -the side-table." - -I took it up and glanced at it. "Mr. Victor Hatherley, hydraulic -engineer, 16A, Victoria Street (3rd floor)." That was the name, -style, and abode of my morning visitor. "I regret that I have -kept you waiting," said I, sitting down in my library-chair. "You -are fresh from a night journey, I understand, which is in itself -a monotonous occupation." - -"Oh, my night could not be called monotonous," said he, and -laughed. He laughed very heartily, with a high, ringing note, -leaning back in his chair and shaking his sides. All my medical -instincts rose up against that laugh. - -"Stop it!" I cried; "pull yourself together!" and I poured out -some water from a caraffe. - -It was useless, however. He was off in one of those hysterical -outbursts which come upon a strong nature when some great crisis -is over and gone. Presently he came to himself once more, very -weary and pale-looking. - -"I have been making a fool of myself," he gasped. - -"Not at all. Drink this." I dashed some brandy into the water, -and the colour began to come back to his bloodless cheeks. - -"That's better!" said he. "And now, Doctor, perhaps you would -kindly attend to my thumb, or rather to the place where my thumb -used to be." - -He unwound the handkerchief and held out his hand. It gave even -my hardened nerves a shudder to look at it. There were four -protruding fingers and a horrid red, spongy surface where the -thumb should have been. It had been hacked or torn right out from -the roots. - -"Good heavens!" I cried, "this is a terrible injury. It must have -bled considerably." - -"Yes, it did. I fainted when it was done, and I think that I must -have been senseless for a long time. When I came to I found that -it was still bleeding, so I tied one end of my handkerchief very -tightly round the wrist and braced it up with a twig." - -"Excellent! You should have been a surgeon." - -"It is a question of hydraulics, you see, and came within my own -province." - -"This has been done," said I, examining the wound, "by a very -heavy and sharp instrument." - -"A thing like a cleaver," said he. - -"An accident, I presume?" - -"By no means." - -"What! a murderous attack?" - -"Very murderous indeed." - -"You horrify me." - -I sponged the wound, cleaned it, dressed it, and finally covered -it over with cotton wadding and carbolised bandages. He lay back -without wincing, though he bit his lip from time to time. - -"How is that?" I asked when I had finished. - -"Capital! Between your brandy and your bandage, I feel a new man. -I was very weak, but I have had a good deal to go through." - -"Perhaps you had better not speak of the matter. It is evidently -trying to your nerves." - -"Oh, no, not now. I shall have to tell my tale to the police; -but, between ourselves, if it were not for the convincing -evidence of this wound of mine, I should be surprised if they -believed my statement, for it is a very extraordinary one, and I -have not much in the way of proof with which to back it up; and, -even if they believe me, the clues which I can give them are so -vague that it is a question whether justice will be done." - -"Ha!" cried I, "if it is anything in the nature of a problem -which you desire to see solved, I should strongly recommend you -to come to my friend, Mr. Sherlock Holmes, before you go to the -official police." - -"Oh, I have heard of that fellow," answered my visitor, "and I -should be very glad if he would take the matter up, though of -course I must use the official police as well. Would you give me -an introduction to him?" - -"I'll do better. I'll take you round to him myself." - -"I should be immensely obliged to you." - -"We'll call a cab and go together. We shall just be in time to -have a little breakfast with him. Do you feel equal to it?" - -"Yes; I shall not feel easy until I have told my story." - -"Then my servant will call a cab, and I shall be with you in an -instant." I rushed upstairs, explained the matter shortly to my -wife, and in five minutes was inside a hansom, driving with my -new acquaintance to Baker Street. - -Sherlock Holmes was, as I expected, lounging about his -sitting-room in his dressing-gown, reading the agony column of The -Times and smoking his before-breakfast pipe, which was composed -of all the plugs and dottles left from his smokes of the day -before, all carefully dried and collected on the corner of the -mantelpiece. He received us in his quietly genial fashion, -ordered fresh rashers and eggs, and joined us in a hearty meal. -When it was concluded he settled our new acquaintance upon the -sofa, placed a pillow beneath his head, and laid a glass of -brandy and water within his reach. - -"It is easy to see that your experience has been no common one, -Mr. Hatherley," said he. "Pray, lie down there and make yourself -absolutely at home. Tell us what you can, but stop when you are -tired and keep up your strength with a little stimulant." - -"Thank you," said my patient, "but I have felt another man since -the doctor bandaged me, and I think that your breakfast has -completed the cure. I shall take up as little of your valuable -time as possible, so I shall start at once upon my peculiar -experiences." - -Holmes sat in his big armchair with the weary, heavy-lidded -expression which veiled his keen and eager nature, while I sat -opposite to him, and we listened in silence to the strange story -which our visitor detailed to us. - -"You must know," said he, "that I am an orphan and a bachelor, -residing alone in lodgings in London. By profession I am a -hydraulic engineer, and I have had considerable experience of my -work during the seven years that I was apprenticed to Venner & -Matheson, the well-known firm, of Greenwich. Two years ago, -having served my time, and having also come into a fair sum of -money through my poor father's death, I determined to start in -business for myself and took professional chambers in Victoria -Street. - -"I suppose that everyone finds his first independent start in -business a dreary experience. To me it has been exceptionally so. -During two years I have had three consultations and one small -job, and that is absolutely all that my profession has brought -me. My gross takings amount to 27 pounds 10s. Every day, from -nine in the morning until four in the afternoon, I waited in my -little den, until at last my heart began to sink, and I came to -believe that I should never have any practice at all. - -"Yesterday, however, just as I was thinking of leaving the -office, my clerk entered to say there was a gentleman waiting who -wished to see me upon business. He brought up a card, too, with -the name of 'Colonel Lysander Stark' engraved upon it. Close at -his heels came the colonel himself, a man rather over the middle -size, but of an exceeding thinness. I do not think that I have -ever seen so thin a man. His whole face sharpened away into nose -and chin, and the skin of his cheeks was drawn quite tense over -his outstanding bones. Yet this emaciation seemed to be his -natural habit, and due to no disease, for his eye was bright, his -step brisk, and his bearing assured. He was plainly but neatly -dressed, and his age, I should judge, would be nearer forty than -thirty. - -"'Mr. Hatherley?' said he, with something of a German accent. -'You have been recommended to me, Mr. Hatherley, as being a man -who is not only proficient in his profession but is also discreet -and capable of preserving a secret.' - -"I bowed, feeling as flattered as any young man would at such an -address. 'May I ask who it was who gave me so good a character?' - -"'Well, perhaps it is better that I should not tell you that just -at this moment. I have it from the same source that you are both -an orphan and a bachelor and are residing alone in London.' - -"'That is quite correct,' I answered; 'but you will excuse me if -I say that I cannot see how all this bears upon my professional -qualifications. I understand that it was on a professional matter -that you wished to speak to me?' - -"'Undoubtedly so. But you will find that all I say is really to -the point. I have a professional commission for you, but absolute -secrecy is quite essential--absolute secrecy, you understand, and -of course we may expect that more from a man who is alone than -from one who lives in the bosom of his family.' - -"'If I promise to keep a secret,' said I, 'you may absolutely -depend upon my doing so.' - -"He looked very hard at me as I spoke, and it seemed to me that I -had never seen so suspicious and questioning an eye. - -"'Do you promise, then?' said he at last. - -"'Yes, I promise.' - -"'Absolute and complete silence before, during, and after? No -reference to the matter at all, either in word or writing?' - -"'I have already given you my word.' - -"'Very good.' He suddenly sprang up, and darting like lightning -across the room he flung open the door. The passage outside was -empty. - -"'That's all right,' said he, coming back. 'I know that clerks are -sometimes curious as to their master's affairs. Now we can talk -in safety.' He drew up his chair very close to mine and began to -stare at me again with the same questioning and thoughtful look. - -"A feeling of repulsion, and of something akin to fear had begun -to rise within me at the strange antics of this fleshless man. -Even my dread of losing a client could not restrain me from -showing my impatience. - -"'I beg that you will state your business, sir,' said I; 'my time -is of value.' Heaven forgive me for that last sentence, but the -words came to my lips. - -"'How would fifty guineas for a night's work suit you?' he asked. - -"'Most admirably.' - -"'I say a night's work, but an hour's would be nearer the mark. I -simply want your opinion about a hydraulic stamping machine which -has got out of gear. If you show us what is wrong we shall soon -set it right ourselves. What do you think of such a commission as -that?' - -"'The work appears to be light and the pay munificent.' - -"'Precisely so. We shall want you to come to-night by the last -train.' - -"'Where to?' - -"'To Eyford, in Berkshire. It is a little place near the borders -of Oxfordshire, and within seven miles of Reading. There is a -train from Paddington which would bring you there at about -11:15.' - -"'Very good.' - -"'I shall come down in a carriage to meet you.' - -"'There is a drive, then?' - -"'Yes, our little place is quite out in the country. It is a good -seven miles from Eyford Station.' - -"'Then we can hardly get there before midnight. I suppose there -would be no chance of a train back. I should be compelled to stop -the night.' - -"'Yes, we could easily give you a shake-down.' - -"'That is very awkward. Could I not come at some more convenient -hour?' - -"'We have judged it best that you should come late. It is to -recompense you for any inconvenience that we are paying to you, a -young and unknown man, a fee which would buy an opinion from the -very heads of your profession. Still, of course, if you would -like to draw out of the business, there is plenty of time to do -so.' - -"I thought of the fifty guineas, and of how very useful they -would be to me. 'Not at all,' said I, 'I shall be very happy to -accommodate myself to your wishes. I should like, however, to -understand a little more clearly what it is that you wish me to -do.' - -"'Quite so. It is very natural that the pledge of secrecy which -we have exacted from you should have aroused your curiosity. I -have no wish to commit you to anything without your having it all -laid before you. I suppose that we are absolutely safe from -eavesdroppers?' - -"'Entirely.' - -"'Then the matter stands thus. You are probably aware that -fuller's-earth is a valuable product, and that it is only found -in one or two places in England?' - -"'I have heard so.' - -"'Some little time ago I bought a small place--a very small -place--within ten miles of Reading. I was fortunate enough to -discover that there was a deposit of fuller's-earth in one of my -fields. On examining it, however, I found that this deposit was a -comparatively small one, and that it formed a link between two -very much larger ones upon the right and left--both of them, -however, in the grounds of my neighbours. These good people were -absolutely ignorant that their land contained that which was -quite as valuable as a gold-mine. Naturally, it was to my -interest to buy their land before they discovered its true value, -but unfortunately I had no capital by which I could do this. I -took a few of my friends into the secret, however, and they -suggested that we should quietly and secretly work our own little -deposit and that in this way we should earn the money which would -enable us to buy the neighbouring fields. This we have now been -doing for some time, and in order to help us in our operations we -erected a hydraulic press. This press, as I have already -explained, has got out of order, and we wish your advice upon the -subject. We guard our secret very jealously, however, and if it -once became known that we had hydraulic engineers coming to our -little house, it would soon rouse inquiry, and then, if the facts -came out, it would be good-bye to any chance of getting these -fields and carrying out our plans. That is why I have made you -promise me that you will not tell a human being that you are -going to Eyford to-night. I hope that I make it all plain?' - -"'I quite follow you,' said I. 'The only point which I could not -quite understand was what use you could make of a hydraulic press -in excavating fuller's-earth, which, as I understand, is dug out -like gravel from a pit.' - -"'Ah!' said he carelessly, 'we have our own process. We compress -the earth into bricks, so as to remove them without revealing -what they are. But that is a mere detail. I have taken you fully -into my confidence now, Mr. Hatherley, and I have shown you how I -trust you.' He rose as he spoke. 'I shall expect you, then, at -Eyford at 11:15.' - -"'I shall certainly be there.' - -"'And not a word to a soul.' He looked at me with a last long, -questioning gaze, and then, pressing my hand in a cold, dank -grasp, he hurried from the room. - -"Well, when I came to think it all over in cool blood I was very -much astonished, as you may both think, at this sudden commission -which had been intrusted to me. On the one hand, of course, I was -glad, for the fee was at least tenfold what I should have asked -had I set a price upon my own services, and it was possible that -this order might lead to other ones. On the other hand, the face -and manner of my patron had made an unpleasant impression upon -me, and I could not think that his explanation of the -fuller's-earth was sufficient to explain the necessity for my -coming at midnight, and his extreme anxiety lest I should tell -anyone of my errand. However, I threw all fears to the winds, ate -a hearty supper, drove to Paddington, and started off, having -obeyed to the letter the injunction as to holding my tongue. - -"At Reading I had to change not only my carriage but my station. -However, I was in time for the last train to Eyford, and I -reached the little dim-lit station after eleven o'clock. I was the -only passenger who got out there, and there was no one upon the -platform save a single sleepy porter with a lantern. As I passed -out through the wicket gate, however, I found my acquaintance of -the morning waiting in the shadow upon the other side. Without a -word he grasped my arm and hurried me into a carriage, the door -of which was standing open. He drew up the windows on either -side, tapped on the wood-work, and away we went as fast as the -horse could go." - -"One horse?" interjected Holmes. - -"Yes, only one." - -"Did you observe the colour?" - -"Yes, I saw it by the side-lights when I was stepping into the -carriage. It was a chestnut." - -"Tired-looking or fresh?" - -"Oh, fresh and glossy." - -"Thank you. I am sorry to have interrupted you. Pray continue -your most interesting statement." - -"Away we went then, and we drove for at least an hour. Colonel -Lysander Stark had said that it was only seven miles, but I -should think, from the rate that we seemed to go, and from the -time that we took, that it must have been nearer twelve. He sat -at my side in silence all the time, and I was aware, more than -once when I glanced in his direction, that he was looking at me -with great intensity. The country roads seem to be not very good -in that part of the world, for we lurched and jolted terribly. I -tried to look out of the windows to see something of where we -were, but they were made of frosted glass, and I could make out -nothing save the occasional bright blur of a passing light. Now -and then I hazarded some remark to break the monotony of the -journey, but the colonel answered only in monosyllables, and the -conversation soon flagged. At last, however, the bumping of the -road was exchanged for the crisp smoothness of a gravel-drive, -and the carriage came to a stand. Colonel Lysander Stark sprang -out, and, as I followed after him, pulled me swiftly into a porch -which gaped in front of us. We stepped, as it were, right out of -the carriage and into the hall, so that I failed to catch the -most fleeting glance of the front of the house. The instant that -I had crossed the threshold the door slammed heavily behind us, -and I heard faintly the rattle of the wheels as the carriage -drove away. - -"It was pitch dark inside the house, and the colonel fumbled -about looking for matches and muttering under his breath. -Suddenly a door opened at the other end of the passage, and a -long, golden bar of light shot out in our direction. It grew -broader, and a woman appeared with a lamp in her hand, which she -held above her head, pushing her face forward and peering at us. -I could see that she was pretty, and from the gloss with which -the light shone upon her dark dress I knew that it was a rich -material. She spoke a few words in a foreign tongue in a tone as -though asking a question, and when my companion answered in a -gruff monosyllable she gave such a start that the lamp nearly -fell from her hand. Colonel Stark went up to her, whispered -something in her ear, and then, pushing her back into the room -from whence she had come, he walked towards me again with the -lamp in his hand. - -"'Perhaps you will have the kindness to wait in this room for a -few minutes,' said he, throwing open another door. It was a -quiet, little, plainly furnished room, with a round table in the -centre, on which several German books were scattered. Colonel -Stark laid down the lamp on the top of a harmonium beside the -door. 'I shall not keep you waiting an instant,' said he, and -vanished into the darkness. - -"I glanced at the books upon the table, and in spite of my -ignorance of German I could see that two of them were treatises -on science, the others being volumes of poetry. Then I walked -across to the window, hoping that I might catch some glimpse of -the country-side, but an oak shutter, heavily barred, was folded -across it. It was a wonderfully silent house. There was an old -clock ticking loudly somewhere in the passage, but otherwise -everything was deadly still. A vague feeling of uneasiness began -to steal over me. Who were these German people, and what were -they doing living in this strange, out-of-the-way place? And -where was the place? I was ten miles or so from Eyford, that was -all I knew, but whether north, south, east, or west I had no -idea. For that matter, Reading, and possibly other large towns, -were within that radius, so the place might not be so secluded, -after all. Yet it was quite certain, from the absolute stillness, -that we were in the country. I paced up and down the room, -humming a tune under my breath to keep up my spirits and feeling -that I was thoroughly earning my fifty-guinea fee. - -"Suddenly, without any preliminary sound in the midst of the -utter stillness, the door of my room swung slowly open. The woman -was standing in the aperture, the darkness of the hall behind -her, the yellow light from my lamp beating upon her eager and -beautiful face. I could see at a glance that she was sick with -fear, and the sight sent a chill to my own heart. She held up one -shaking finger to warn me to be silent, and she shot a few -whispered words of broken English at me, her eyes glancing back, -like those of a frightened horse, into the gloom behind her. - -"'I would go,' said she, trying hard, as it seemed to me, to -speak calmly; 'I would go. I should not stay here. There is no -good for you to do.' - -"'But, madam,' said I, 'I have not yet done what I came for. I -cannot possibly leave until I have seen the machine.' - -"'It is not worth your while to wait,' she went on. 'You can pass -through the door; no one hinders.' And then, seeing that I smiled -and shook my head, she suddenly threw aside her constraint and -made a step forward, with her hands wrung together. 'For the love -of Heaven!' she whispered, 'get away from here before it is too -late!' - -"But I am somewhat headstrong by nature, and the more ready to -engage in an affair when there is some obstacle in the way. I -thought of my fifty-guinea fee, of my wearisome journey, and of -the unpleasant night which seemed to be before me. Was it all to -go for nothing? Why should I slink away without having carried -out my commission, and without the payment which was my due? This -woman might, for all I knew, be a monomaniac. With a stout -bearing, therefore, though her manner had shaken me more than I -cared to confess, I still shook my head and declared my intention -of remaining where I was. She was about to renew her entreaties -when a door slammed overhead, and the sound of several footsteps -was heard upon the stairs. She listened for an instant, threw up -her hands with a despairing gesture, and vanished as suddenly and -as noiselessly as she had come. - -"The newcomers were Colonel Lysander Stark and a short thick man -with a chinchilla beard growing out of the creases of his double -chin, who was introduced to me as Mr. Ferguson. - -"'This is my secretary and manager,' said the colonel. 'By the -way, I was under the impression that I left this door shut just -now. I fear that you have felt the draught.' - -"'On the contrary,' said I, 'I opened the door myself because I -felt the room to be a little close.' - -"He shot one of his suspicious looks at me. 'Perhaps we had -better proceed to business, then,' said he. 'Mr. Ferguson and I -will take you up to see the machine.' - -"'I had better put my hat on, I suppose.' - -"'Oh, no, it is in the house.' - -"'What, you dig fuller's-earth in the house?' - -"'No, no. This is only where we compress it. But never mind that. -All we wish you to do is to examine the machine and to let us -know what is wrong with it.' - -"We went upstairs together, the colonel first with the lamp, the -fat manager and I behind him. It was a labyrinth of an old house, -with corridors, passages, narrow winding staircases, and little -low doors, the thresholds of which were hollowed out by the -generations who had crossed them. There were no carpets and no -signs of any furniture above the ground floor, while the plaster -was peeling off the walls, and the damp was breaking through in -green, unhealthy blotches. I tried to put on as unconcerned an -air as possible, but I had not forgotten the warnings of the -lady, even though I disregarded them, and I kept a keen eye upon -my two companions. Ferguson appeared to be a morose and silent -man, but I could see from the little that he said that he was at -least a fellow-countryman. - -"Colonel Lysander Stark stopped at last before a low door, which -he unlocked. Within was a small, square room, in which the three -of us could hardly get at one time. Ferguson remained outside, -and the colonel ushered me in. - -"'We are now,' said he, 'actually within the hydraulic press, and -it would be a particularly unpleasant thing for us if anyone were -to turn it on. The ceiling of this small chamber is really the -end of the descending piston, and it comes down with the force of -many tons upon this metal floor. There are small lateral columns -of water outside which receive the force, and which transmit and -multiply it in the manner which is familiar to you. The machine -goes readily enough, but there is some stiffness in the working -of it, and it has lost a little of its force. Perhaps you will -have the goodness to look it over and to show us how we can set -it right.' - -"I took the lamp from him, and I examined the machine very -thoroughly. It was indeed a gigantic one, and capable of -exercising enormous pressure. When I passed outside, however, and -pressed down the levers which controlled it, I knew at once by -the whishing sound that there was a slight leakage, which allowed -a regurgitation of water through one of the side cylinders. An -examination showed that one of the india-rubber bands which was -round the head of a driving-rod had shrunk so as not quite to -fill the socket along which it worked. This was clearly the cause -of the loss of power, and I pointed it out to my companions, who -followed my remarks very carefully and asked several practical -questions as to how they should proceed to set it right. When I -had made it clear to them, I returned to the main chamber of the -machine and took a good look at it to satisfy my own curiosity. -It was obvious at a glance that the story of the fuller's-earth -was the merest fabrication, for it would be absurd to suppose -that so powerful an engine could be designed for so inadequate a -purpose. The walls were of wood, but the floor consisted of a -large iron trough, and when I came to examine it I could see a -crust of metallic deposit all over it. I had stooped and was -scraping at this to see exactly what it was when I heard a -muttered exclamation in German and saw the cadaverous face of the -colonel looking down at me. - -"'What are you doing there?' he asked. - -"I felt angry at having been tricked by so elaborate a story as -that which he had told me. 'I was admiring your fuller's-earth,' -said I; 'I think that I should be better able to advise you as to -your machine if I knew what the exact purpose was for which it -was used.' - -"The instant that I uttered the words I regretted the rashness of -my speech. His face set hard, and a baleful light sprang up in -his grey eyes. - -"'Very well,' said he, 'you shall know all about the machine.' He -took a step backward, slammed the little door, and turned the key -in the lock. I rushed towards it and pulled at the handle, but it -was quite secure, and did not give in the least to my kicks and -shoves. 'Hullo!' I yelled. 'Hullo! Colonel! Let me out!' - -"And then suddenly in the silence I heard a sound which sent my -heart into my mouth. It was the clank of the levers and the swish -of the leaking cylinder. He had set the engine at work. The lamp -still stood upon the floor where I had placed it when examining -the trough. By its light I saw that the black ceiling was coming -down upon me, slowly, jerkily, but, as none knew better than -myself, with a force which must within a minute grind me to a -shapeless pulp. I threw myself, screaming, against the door, and -dragged with my nails at the lock. I implored the colonel to let -me out, but the remorseless clanking of the levers drowned my -cries. The ceiling was only a foot or two above my head, and with -my hand upraised I could feel its hard, rough surface. Then it -flashed through my mind that the pain of my death would depend -very much upon the position in which I met it. If I lay on my -face the weight would come upon my spine, and I shuddered to -think of that dreadful snap. Easier the other way, perhaps; and -yet, had I the nerve to lie and look up at that deadly black -shadow wavering down upon me? Already I was unable to stand -erect, when my eye caught something which brought a gush of hope -back to my heart. - -"I have said that though the floor and ceiling were of iron, the -walls were of wood. As I gave a last hurried glance around, I saw -a thin line of yellow light between two of the boards, which -broadened and broadened as a small panel was pushed backward. For -an instant I could hardly believe that here was indeed a door -which led away from death. The next instant I threw myself -through, and lay half-fainting upon the other side. The panel had -closed again behind me, but the crash of the lamp, and a few -moments afterwards the clang of the two slabs of metal, told me -how narrow had been my escape. - -"I was recalled to myself by a frantic plucking at my wrist, and -I found myself lying upon the stone floor of a narrow corridor, -while a woman bent over me and tugged at me with her left hand, -while she held a candle in her right. It was the same good friend -whose warning I had so foolishly rejected. - -"'Come! come!' she cried breathlessly. 'They will be here in a -moment. They will see that you are not there. Oh, do not waste -the so-precious time, but come!' - -"This time, at least, I did not scorn her advice. I staggered to -my feet and ran with her along the corridor and down a winding -stair. The latter led to another broad passage, and just as we -reached it we heard the sound of running feet and the shouting of -two voices, one answering the other from the floor on which we -were and from the one beneath. My guide stopped and looked about -her like one who is at her wit's end. Then she threw open a door -which led into a bedroom, through the window of which the moon -was shining brightly. - -"'It is your only chance,' said she. 'It is high, but it may be -that you can jump it.' - -"As she spoke a light sprang into view at the further end of the -passage, and I saw the lean figure of Colonel Lysander Stark -rushing forward with a lantern in one hand and a weapon like a -butcher's cleaver in the other. I rushed across the bedroom, -flung open the window, and looked out. How quiet and sweet and -wholesome the garden looked in the moonlight, and it could not be -more than thirty feet down. I clambered out upon the sill, but I -hesitated to jump until I should have heard what passed between -my saviour and the ruffian who pursued me. If she were ill-used, -then at any risks I was determined to go back to her assistance. -The thought had hardly flashed through my mind before he was at -the door, pushing his way past her; but she threw her arms round -him and tried to hold him back. - -"'Fritz! Fritz!' she cried in English, 'remember your promise -after the last time. You said it should not be again. He will be -silent! Oh, he will be silent!' - -"'You are mad, Elise!' he shouted, struggling to break away from -her. 'You will be the ruin of us. He has seen too much. Let me -pass, I say!' He dashed her to one side, and, rushing to the -window, cut at me with his heavy weapon. I had let myself go, and -was hanging by the hands to the sill, when his blow fell. I was -conscious of a dull pain, my grip loosened, and I fell into the -garden below. - -"I was shaken but not hurt by the fall; so I picked myself up and -rushed off among the bushes as hard as I could run, for I -understood that I was far from being out of danger yet. Suddenly, -however, as I ran, a deadly dizziness and sickness came over me. -I glanced down at my hand, which was throbbing painfully, and -then, for the first time, saw that my thumb had been cut off and -that the blood was pouring from my wound. I endeavoured to tie my -handkerchief round it, but there came a sudden buzzing in my -ears, and next moment I fell in a dead faint among the -rose-bushes. - -"How long I remained unconscious I cannot tell. It must have been -a very long time, for the moon had sunk, and a bright morning was -breaking when I came to myself. My clothes were all sodden with -dew, and my coat-sleeve was drenched with blood from my wounded -thumb. The smarting of it recalled in an instant all the -particulars of my night's adventure, and I sprang to my feet with -the feeling that I might hardly yet be safe from my pursuers. But -to my astonishment, when I came to look round me, neither house -nor garden were to be seen. I had been lying in an angle of the -hedge close by the highroad, and just a little lower down was a -long building, which proved, upon my approaching it, to be the -very station at which I had arrived upon the previous night. Were -it not for the ugly wound upon my hand, all that had passed -during those dreadful hours might have been an evil dream. - -"Half dazed, I went into the station and asked about the morning -train. There would be one to Reading in less than an hour. The -same porter was on duty, I found, as had been there when I -arrived. I inquired of him whether he had ever heard of Colonel -Lysander Stark. The name was strange to him. Had he observed a -carriage the night before waiting for me? No, he had not. Was -there a police-station anywhere near? There was one about three -miles off. - -"It was too far for me to go, weak and ill as I was. I determined -to wait until I got back to town before telling my story to the -police. It was a little past six when I arrived, so I went first -to have my wound dressed, and then the doctor was kind enough to -bring me along here. I put the case into your hands and shall do -exactly what you advise." - -We both sat in silence for some little time after listening to -this extraordinary narrative. Then Sherlock Holmes pulled down -from the shelf one of the ponderous commonplace books in which he -placed his cuttings. - -"Here is an advertisement which will interest you," said he. "It -appeared in all the papers about a year ago. Listen to this: -'Lost, on the 9th inst., Mr. Jeremiah Hayling, aged -twenty-six, a hydraulic engineer. Left his lodgings at ten -o'clock at night, and has not been heard of since. Was -dressed in,' etc., etc. Ha! That represents the last time that -the colonel needed to have his machine overhauled, I fancy." - -"Good heavens!" cried my patient. "Then that explains what the -girl said." - -"Undoubtedly. It is quite clear that the colonel was a cool and -desperate man, who was absolutely determined that nothing should -stand in the way of his little game, like those out-and-out -pirates who will leave no survivor from a captured ship. Well, -every moment now is precious, so if you feel equal to it we shall -go down to Scotland Yard at once as a preliminary to starting for -Eyford." - -Some three hours or so afterwards we were all in the train -together, bound from Reading to the little Berkshire village. -There were Sherlock Holmes, the hydraulic engineer, Inspector -Bradstreet, of Scotland Yard, a plain-clothes man, and myself. -Bradstreet had spread an ordnance map of the county out upon the -seat and was busy with his compasses drawing a circle with Eyford -for its centre. - -"There you are," said he. "That circle is drawn at a radius of -ten miles from the village. The place we want must be somewhere -near that line. You said ten miles, I think, sir." - -"It was an hour's good drive." - -"And you think that they brought you back all that way when you -were unconscious?" - -"They must have done so. I have a confused memory, too, of having -been lifted and conveyed somewhere." - -"What I cannot understand," said I, "is why they should have -spared you when they found you lying fainting in the garden. -Perhaps the villain was softened by the woman's entreaties." - -"I hardly think that likely. I never saw a more inexorable face -in my life." - -"Oh, we shall soon clear up all that," said Bradstreet. "Well, I -have drawn my circle, and I only wish I knew at what point upon -it the folk that we are in search of are to be found." - -"I think I could lay my finger on it," said Holmes quietly. - -"Really, now!" cried the inspector, "you have formed your -opinion! Come, now, we shall see who agrees with you. I say it is -south, for the country is more deserted there." - -"And I say east," said my patient. - -"I am for west," remarked the plain-clothes man. "There are -several quiet little villages up there." - -"And I am for north," said I, "because there are no hills there, -and our friend says that he did not notice the carriage go up -any." - -"Come," cried the inspector, laughing; "it's a very pretty -diversity of opinion. We have boxed the compass among us. Who do -you give your casting vote to?" - -"You are all wrong." - -"But we can't all be." - -"Oh, yes, you can. This is my point." He placed his finger in the -centre of the circle. "This is where we shall find them." - -"But the twelve-mile drive?" gasped Hatherley. - -"Six out and six back. Nothing simpler. You say yourself that the -horse was fresh and glossy when you got in. How could it be that -if it had gone twelve miles over heavy roads?" - -"Indeed, it is a likely ruse enough," observed Bradstreet -thoughtfully. "Of course there can be no doubt as to the nature -of this gang." - -"None at all," said Holmes. "They are coiners on a large scale, -and have used the machine to form the amalgam which has taken the -place of silver." - -"We have known for some time that a clever gang was at work," -said the inspector. "They have been turning out half-crowns by -the thousand. We even traced them as far as Reading, but could -get no farther, for they had covered their traces in a way that -showed that they were very old hands. But now, thanks to this -lucky chance, I think that we have got them right enough." - -But the inspector was mistaken, for those criminals were not -destined to fall into the hands of justice. As we rolled into -Eyford Station we saw a gigantic column of smoke which streamed -up from behind a small clump of trees in the neighbourhood and -hung like an immense ostrich feather over the landscape. - -"A house on fire?" asked Bradstreet as the train steamed off -again on its way. - -"Yes, sir!" said the station-master. - -"When did it break out?" - -"I hear that it was during the night, sir, but it has got worse, -and the whole place is in a blaze." - -"Whose house is it?" - -"Dr. Becher's." - -"Tell me," broke in the engineer, "is Dr. Becher a German, very -thin, with a long, sharp nose?" - -The station-master laughed heartily. "No, sir, Dr. Becher is an -Englishman, and there isn't a man in the parish who has a -better-lined waistcoat. But he has a gentleman staying with him, -a patient, as I understand, who is a foreigner, and he looks as -if a little good Berkshire beef would do him no harm." - -The station-master had not finished his speech before we were all -hastening in the direction of the fire. The road topped a low -hill, and there was a great widespread whitewashed building in -front of us, spouting fire at every chink and window, while in -the garden in front three fire-engines were vainly striving to -keep the flames under. - -"That's it!" cried Hatherley, in intense excitement. "There is -the gravel-drive, and there are the rose-bushes where I lay. That -second window is the one that I jumped from." - -"Well, at least," said Holmes, "you have had your revenge upon -them. There can be no question that it was your oil-lamp which, -when it was crushed in the press, set fire to the wooden walls, -though no doubt they were too excited in the chase after you to -observe it at the time. Now keep your eyes open in this crowd for -your friends of last night, though I very much fear that they are -a good hundred miles off by now." - -And Holmes' fears came to be realised, for from that day to this -no word has ever been heard either of the beautiful woman, the -sinister German, or the morose Englishman. Early that morning a -peasant had met a cart containing several people and some very -bulky boxes driving rapidly in the direction of Reading, but -there all traces of the fugitives disappeared, and even Holmes' -ingenuity failed ever to discover the least clue as to their -whereabouts. - -The firemen had been much perturbed at the strange arrangements -which they had found within, and still more so by discovering a -newly severed human thumb upon a window-sill of the second floor. -About sunset, however, their efforts were at last successful, and -they subdued the flames, but not before the roof had fallen in, -and the whole place been reduced to such absolute ruin that, save -some twisted cylinders and iron piping, not a trace remained of -the machinery which had cost our unfortunate acquaintance so -dearly. Large masses of nickel and of tin were discovered stored -in an out-house, but no coins were to be found, which may have -explained the presence of those bulky boxes which have been -already referred to. - -How our hydraulic engineer had been conveyed from the garden to -the spot where he recovered his senses might have remained -forever a mystery were it not for the soft mould, which told us a -very plain tale. He had evidently been carried down by two -persons, one of whom had remarkably small feet and the other -unusually large ones. On the whole, it was most probable that the -silent Englishman, being less bold or less murderous than his -companion, had assisted the woman to bear the unconscious man out -of the way of danger. - -"Well," said our engineer ruefully as we took our seats to return -once more to London, "it has been a pretty business for me! I -have lost my thumb and I have lost a fifty-guinea fee, and what -have I gained?" - -"Experience," said Holmes, laughing. "Indirectly it may be of -value, you know; you have only to put it into words to gain the -reputation of being excellent company for the remainder of your -existence." - - - -X. THE ADVENTURE OF THE NOBLE BACHELOR - -The Lord St. Simon marriage, and its curious termination, have -long ceased to be a subject of interest in those exalted circles -in which the unfortunate bridegroom moves. Fresh scandals have -eclipsed it, and their more piquant details have drawn the -gossips away from this four-year-old drama. As I have reason to -believe, however, that the full facts have never been revealed to -the general public, and as my friend Sherlock Holmes had a -considerable share in clearing the matter up, I feel that no -memoir of him would be complete without some little sketch of -this remarkable episode. - -It was a few weeks before my own marriage, during the days when I -was still sharing rooms with Holmes in Baker Street, that he came -home from an afternoon stroll to find a letter on the table -waiting for him. I had remained indoors all day, for the weather -had taken a sudden turn to rain, with high autumnal winds, and -the Jezail bullet which I had brought back in one of my limbs as -a relic of my Afghan campaign throbbed with dull persistence. -With my body in one easy-chair and my legs upon another, I had -surrounded myself with a cloud of newspapers until at last, -saturated with the news of the day, I tossed them all aside and -lay listless, watching the huge crest and monogram upon the -envelope upon the table and wondering lazily who my friend's -noble correspondent could be. - -"Here is a very fashionable epistle," I remarked as he entered. -"Your morning letters, if I remember right, were from a -fish-monger and a tide-waiter." - -"Yes, my correspondence has certainly the charm of variety," he -answered, smiling, "and the humbler are usually the more -interesting. This looks like one of those unwelcome social -summonses which call upon a man either to be bored or to lie." - -He broke the seal and glanced over the contents. - -"Oh, come, it may prove to be something of interest, after all." - -"Not social, then?" - -"No, distinctly professional." - -"And from a noble client?" - -"One of the highest in England." - -"My dear fellow, I congratulate you." - -"I assure you, Watson, without affectation, that the status of my -client is a matter of less moment to me than the interest of his -case. It is just possible, however, that that also may not be -wanting in this new investigation. You have been reading the -papers diligently of late, have you not?" - -"It looks like it," said I ruefully, pointing to a huge bundle in -the corner. "I have had nothing else to do." - -"It is fortunate, for you will perhaps be able to post me up. I -read nothing except the criminal news and the agony column. The -latter is always instructive. But if you have followed recent -events so closely you must have read about Lord St. Simon and his -wedding?" - -"Oh, yes, with the deepest interest." - -"That is well. The letter which I hold in my hand is from Lord -St. Simon. I will read it to you, and in return you must turn -over these papers and let me have whatever bears upon the matter. -This is what he says: - -"'MY DEAR MR. SHERLOCK HOLMES:--Lord Backwater tells me that I -may place implicit reliance upon your judgment and discretion. I -have determined, therefore, to call upon you and to consult you -in reference to the very painful event which has occurred in -connection with my wedding. Mr. Lestrade, of Scotland Yard, is -acting already in the matter, but he assures me that he sees no -objection to your co-operation, and that he even thinks that -it might be of some assistance. I will call at four o'clock in -the afternoon, and, should you have any other engagement at that -time, I hope that you will postpone it, as this matter is of -paramount importance. Yours faithfully, ST. SIMON.' - -"It is dated from Grosvenor Mansions, written with a quill pen, -and the noble lord has had the misfortune to get a smear of ink -upon the outer side of his right little finger," remarked Holmes -as he folded up the epistle. - -"He says four o'clock. It is three now. He will be here in an -hour." - -"Then I have just time, with your assistance, to get clear upon -the subject. Turn over those papers and arrange the extracts in -their order of time, while I take a glance as to who our client -is." He picked a red-covered volume from a line of books of -reference beside the mantelpiece. "Here he is," said he, sitting -down and flattening it out upon his knee. "'Lord Robert Walsingham -de Vere St. Simon, second son of the Duke of Balmoral.' Hum! 'Arms: -Azure, three caltrops in chief over a fess sable. Born in 1846.' -He's forty-one years of age, which is mature for marriage. Was -Under-Secretary for the colonies in a late administration. The -Duke, his father, was at one time Secretary for Foreign Affairs. -They inherit Plantagenet blood by direct descent, and Tudor on -the distaff side. Ha! Well, there is nothing very instructive in -all this. I think that I must turn to you Watson, for something -more solid." - -"I have very little difficulty in finding what I want," said I, -"for the facts are quite recent, and the matter struck me as -remarkable. I feared to refer them to you, however, as I knew -that you had an inquiry on hand and that you disliked the -intrusion of other matters." - -"Oh, you mean the little problem of the Grosvenor Square -furniture van. That is quite cleared up now--though, indeed, it -was obvious from the first. Pray give me the results of your -newspaper selections." - -"Here is the first notice which I can find. It is in the personal -column of the Morning Post, and dates, as you see, some weeks -back: 'A marriage has been arranged,' it says, 'and will, if -rumour is correct, very shortly take place, between Lord Robert -St. Simon, second son of the Duke of Balmoral, and Miss Hatty -Doran, the only daughter of Aloysius Doran. Esq., of San -Francisco, Cal., U.S.A.' That is all." - -"Terse and to the point," remarked Holmes, stretching his long, -thin legs towards the fire. - -"There was a paragraph amplifying this in one of the society -papers of the same week. Ah, here it is: 'There will soon be a -call for protection in the marriage market, for the present -free-trade principle appears to tell heavily against our home -product. One by one the management of the noble houses of Great -Britain is passing into the hands of our fair cousins from across -the Atlantic. An important addition has been made during the last -week to the list of the prizes which have been borne away by -these charming invaders. Lord St. Simon, who has shown himself -for over twenty years proof against the little god's arrows, has -now definitely announced his approaching marriage with Miss Hatty -Doran, the fascinating daughter of a California millionaire. Miss -Doran, whose graceful figure and striking face attracted much -attention at the Westbury House festivities, is an only child, -and it is currently reported that her dowry will run to -considerably over the six figures, with expectancies for the -future. As it is an open secret that the Duke of Balmoral has -been compelled to sell his pictures within the last few years, -and as Lord St. Simon has no property of his own save the small -estate of Birchmoor, it is obvious that the Californian heiress -is not the only gainer by an alliance which will enable her to -make the easy and common transition from a Republican lady to a -British peeress.'" - -"Anything else?" asked Holmes, yawning. - -"Oh, yes; plenty. Then there is another note in the Morning Post -to say that the marriage would be an absolutely quiet one, that it -would be at St. George's, Hanover Square, that only half a dozen -intimate friends would be invited, and that the party would -return to the furnished house at Lancaster Gate which has been -taken by Mr. Aloysius Doran. Two days later--that is, on -Wednesday last--there is a curt announcement that the wedding had -taken place, and that the honeymoon would be passed at Lord -Backwater's place, near Petersfield. Those are all the notices -which appeared before the disappearance of the bride." - -"Before the what?" asked Holmes with a start. - -"The vanishing of the lady." - -"When did she vanish, then?" - -"At the wedding breakfast." - -"Indeed. This is more interesting than it promised to be; quite -dramatic, in fact." - -"Yes; it struck me as being a little out of the common." - -"They often vanish before the ceremony, and occasionally during -the honeymoon; but I cannot call to mind anything quite so prompt -as this. Pray let me have the details." - -"I warn you that they are very incomplete." - -"Perhaps we may make them less so." - -"Such as they are, they are set forth in a single article of a -morning paper of yesterday, which I will read to you. It is -headed, 'Singular Occurrence at a Fashionable Wedding': - -"'The family of Lord Robert St. Simon has been thrown into the -greatest consternation by the strange and painful episodes which -have taken place in connection with his wedding. The ceremony, as -shortly announced in the papers of yesterday, occurred on the -previous morning; but it is only now that it has been possible to -confirm the strange rumours which have been so persistently -floating about. In spite of the attempts of the friends to hush -the matter up, so much public attention has now been drawn to it -that no good purpose can be served by affecting to disregard what -is a common subject for conversation. - -"'The ceremony, which was performed at St. George's, Hanover -Square, was a very quiet one, no one being present save the -father of the bride, Mr. Aloysius Doran, the Duchess of Balmoral, -Lord Backwater, Lord Eustace and Lady Clara St. Simon (the -younger brother and sister of the bridegroom), and Lady Alicia -Whittington. The whole party proceeded afterwards to the house of -Mr. Aloysius Doran, at Lancaster Gate, where breakfast had been -prepared. It appears that some little trouble was caused by a -woman, whose name has not been ascertained, who endeavoured to -force her way into the house after the bridal party, alleging -that she had some claim upon Lord St. Simon. It was only after a -painful and prolonged scene that she was ejected by the butler -and the footman. The bride, who had fortunately entered the house -before this unpleasant interruption, had sat down to breakfast -with the rest, when she complained of a sudden indisposition and -retired to her room. Her prolonged absence having caused some -comment, her father followed her, but learned from her maid that -she had only come up to her chamber for an instant, caught up an -ulster and bonnet, and hurried down to the passage. One of the -footmen declared that he had seen a lady leave the house thus -apparelled, but had refused to credit that it was his mistress, -believing her to be with the company. On ascertaining that his -daughter had disappeared, Mr. Aloysius Doran, in conjunction with -the bridegroom, instantly put themselves in communication with -the police, and very energetic inquiries are being made, which -will probably result in a speedy clearing up of this very -singular business. Up to a late hour last night, however, nothing -had transpired as to the whereabouts of the missing lady. There -are rumours of foul play in the matter, and it is said that the -police have caused the arrest of the woman who had caused the -original disturbance, in the belief that, from jealousy or some -other motive, she may have been concerned in the strange -disappearance of the bride.'" - -"And is that all?" - -"Only one little item in another of the morning papers, but it is -a suggestive one." - -"And it is--" - -"That Miss Flora Millar, the lady who had caused the disturbance, -has actually been arrested. It appears that she was formerly a -danseuse at the Allegro, and that she has known the bridegroom -for some years. There are no further particulars, and the whole -case is in your hands now--so far as it has been set forth in the -public press." - -"And an exceedingly interesting case it appears to be. I would -not have missed it for worlds. But there is a ring at the bell, -Watson, and as the clock makes it a few minutes after four, I -have no doubt that this will prove to be our noble client. Do not -dream of going, Watson, for I very much prefer having a witness, -if only as a check to my own memory." - -"Lord Robert St. Simon," announced our page-boy, throwing open -the door. A gentleman entered, with a pleasant, cultured face, -high-nosed and pale, with something perhaps of petulance about -the mouth, and with the steady, well-opened eye of a man whose -pleasant lot it had ever been to command and to be obeyed. His -manner was brisk, and yet his general appearance gave an undue -impression of age, for he had a slight forward stoop and a little -bend of the knees as he walked. His hair, too, as he swept off -his very curly-brimmed hat, was grizzled round the edges and thin -upon the top. As to his dress, it was careful to the verge of -foppishness, with high collar, black frock-coat, white waistcoat, -yellow gloves, patent-leather shoes, and light-coloured gaiters. -He advanced slowly into the room, turning his head from left to -right, and swinging in his right hand the cord which held his -golden eyeglasses. - -"Good-day, Lord St. Simon," said Holmes, rising and bowing. "Pray -take the basket-chair. This is my friend and colleague, Dr. -Watson. Draw up a little to the fire, and we will talk this -matter over." - -"A most painful matter to me, as you can most readily imagine, -Mr. Holmes. I have been cut to the quick. I understand that you -have already managed several delicate cases of this sort, sir, -though I presume that they were hardly from the same class of -society." - -"No, I am descending." - -"I beg pardon." - -"My last client of the sort was a king." - -"Oh, really! I had no idea. And which king?" - -"The King of Scandinavia." - -"What! Had he lost his wife?" - -"You can understand," said Holmes suavely, "that I extend to the -affairs of my other clients the same secrecy which I promise to -you in yours." - -"Of course! Very right! very right! I'm sure I beg pardon. As to -my own case, I am ready to give you any information which may -assist you in forming an opinion." - -"Thank you. I have already learned all that is in the public -prints, nothing more. I presume that I may take it as correct--this -article, for example, as to the disappearance of the bride." - -Lord St. Simon glanced over it. "Yes, it is correct, as far as it -goes." - -"But it needs a great deal of supplementing before anyone could -offer an opinion. I think that I may arrive at my facts most -directly by questioning you." - -"Pray do so." - -"When did you first meet Miss Hatty Doran?" - -"In San Francisco, a year ago." - -"You were travelling in the States?" - -"Yes." - -"Did you become engaged then?" - -"No." - -"But you were on a friendly footing?" - -"I was amused by her society, and she could see that I was -amused." - -"Her father is very rich?" - -"He is said to be the richest man on the Pacific slope." - -"And how did he make his money?" - -"In mining. He had nothing a few years ago. Then he struck gold, -invested it, and came up by leaps and bounds." - -"Now, what is your own impression as to the young lady's--your -wife's character?" - -The nobleman swung his glasses a little faster and stared down -into the fire. "You see, Mr. Holmes," said he, "my wife was -twenty before her father became a rich man. During that time she -ran free in a mining camp and wandered through woods or -mountains, so that her education has come from Nature rather than -from the schoolmaster. She is what we call in England a tomboy, -with a strong nature, wild and free, unfettered by any sort of -traditions. She is impetuous--volcanic, I was about to say. She -is swift in making up her mind and fearless in carrying out her -resolutions. On the other hand, I would not have given her the -name which I have the honour to bear"--he gave a little stately -cough--"had not I thought her to be at bottom a noble woman. I -believe that she is capable of heroic self-sacrifice and that -anything dishonourable would be repugnant to her." - -"Have you her photograph?" - -"I brought this with me." He opened a locket and showed us the -full face of a very lovely woman. It was not a photograph but an -ivory miniature, and the artist had brought out the full effect -of the lustrous black hair, the large dark eyes, and the -exquisite mouth. Holmes gazed long and earnestly at it. Then he -closed the locket and handed it back to Lord St. Simon. - -"The young lady came to London, then, and you renewed your -acquaintance?" - -"Yes, her father brought her over for this last London season. I -met her several times, became engaged to her, and have now -married her." - -"She brought, I understand, a considerable dowry?" - -"A fair dowry. Not more than is usual in my family." - -"And this, of course, remains to you, since the marriage is a -fait accompli?" - -"I really have made no inquiries on the subject." - -"Very naturally not. Did you see Miss Doran on the day before the -wedding?" - -"Yes." - -"Was she in good spirits?" - -"Never better. She kept talking of what we should do in our -future lives." - -"Indeed! That is very interesting. And on the morning of the -wedding?" - -"She was as bright as possible--at least until after the -ceremony." - -"And did you observe any change in her then?" - -"Well, to tell the truth, I saw then the first signs that I had -ever seen that her temper was just a little sharp. The incident -however, was too trivial to relate and can have no possible -bearing upon the case." - -"Pray let us have it, for all that." - -"Oh, it is childish. She dropped her bouquet as we went towards -the vestry. She was passing the front pew at the time, and it -fell over into the pew. There was a moment's delay, but the -gentleman in the pew handed it up to her again, and it did not -appear to be the worse for the fall. Yet when I spoke to her of -the matter, she answered me abruptly; and in the carriage, on our -way home, she seemed absurdly agitated over this trifling cause." - -"Indeed! You say that there was a gentleman in the pew. Some of -the general public were present, then?" - -"Oh, yes. It is impossible to exclude them when the church is -open." - -"This gentleman was not one of your wife's friends?" - -"No, no; I call him a gentleman by courtesy, but he was quite a -common-looking person. I hardly noticed his appearance. But -really I think that we are wandering rather far from the point." - -"Lady St. Simon, then, returned from the wedding in a less -cheerful frame of mind than she had gone to it. What did she do -on re-entering her father's house?" - -"I saw her in conversation with her maid." - -"And who is her maid?" - -"Alice is her name. She is an American and came from California -with her." - -"A confidential servant?" - -"A little too much so. It seemed to me that her mistress allowed -her to take great liberties. Still, of course, in America they -look upon these things in a different way." - -"How long did she speak to this Alice?" - -"Oh, a few minutes. I had something else to think of." - -"You did not overhear what they said?" - -"Lady St. Simon said something about 'jumping a claim.' She was -accustomed to use slang of the kind. I have no idea what she -meant." - -"American slang is very expressive sometimes. And what did your -wife do when she finished speaking to her maid?" - -"She walked into the breakfast-room." - -"On your arm?" - -"No, alone. She was very independent in little matters like that. -Then, after we had sat down for ten minutes or so, she rose -hurriedly, muttered some words of apology, and left the room. She -never came back." - -"But this maid, Alice, as I understand, deposes that she went to -her room, covered her bride's dress with a long ulster, put on a -bonnet, and went out." - -"Quite so. And she was afterwards seen walking into Hyde Park in -company with Flora Millar, a woman who is now in custody, and who -had already made a disturbance at Mr. Doran's house that -morning." - -"Ah, yes. I should like a few particulars as to this young lady, -and your relations to her." - -Lord St. Simon shrugged his shoulders and raised his eyebrows. -"We have been on a friendly footing for some years--I may say on -a very friendly footing. She used to be at the Allegro. I have -not treated her ungenerously, and she had no just cause of -complaint against me, but you know what women are, Mr. Holmes. -Flora was a dear little thing, but exceedingly hot-headed and -devotedly attached to me. She wrote me dreadful letters when she -heard that I was about to be married, and, to tell the truth, the -reason why I had the marriage celebrated so quietly was that I -feared lest there might be a scandal in the church. She came to -Mr. Doran's door just after we returned, and she endeavoured to -push her way in, uttering very abusive expressions towards my -wife, and even threatening her, but I had foreseen the -possibility of something of the sort, and I had two police -fellows there in private clothes, who soon pushed her out again. -She was quiet when she saw that there was no good in making a -row." - -"Did your wife hear all this?" - -"No, thank goodness, she did not." - -"And she was seen walking with this very woman afterwards?" - -"Yes. That is what Mr. Lestrade, of Scotland Yard, looks upon as -so serious. It is thought that Flora decoyed my wife out and laid -some terrible trap for her." - -"Well, it is a possible supposition." - -"You think so, too?" - -"I did not say a probable one. But you do not yourself look upon -this as likely?" - -"I do not think Flora would hurt a fly." - -"Still, jealousy is a strange transformer of characters. Pray -what is your own theory as to what took place?" - -"Well, really, I came to seek a theory, not to propound one. I -have given you all the facts. Since you ask me, however, I may -say that it has occurred to me as possible that the excitement of -this affair, the consciousness that she had made so immense a -social stride, had the effect of causing some little nervous -disturbance in my wife." - -"In short, that she had become suddenly deranged?" - -"Well, really, when I consider that she has turned her back--I -will not say upon me, but upon so much that many have aspired to -without success--I can hardly explain it in any other fashion." - -"Well, certainly that is also a conceivable hypothesis," said -Holmes, smiling. "And now, Lord St. Simon, I think that I have -nearly all my data. May I ask whether you were seated at the -breakfast-table so that you could see out of the window?" - -"We could see the other side of the road and the Park." - -"Quite so. Then I do not think that I need to detain you longer. -I shall communicate with you." - -"Should you be fortunate enough to solve this problem," said our -client, rising. - -"I have solved it." - -"Eh? What was that?" - -"I say that I have solved it." - -"Where, then, is my wife?" - -"That is a detail which I shall speedily supply." - -Lord St. Simon shook his head. "I am afraid that it will take -wiser heads than yours or mine," he remarked, and bowing in a -stately, old-fashioned manner he departed. - -"It is very good of Lord St. Simon to honour my head by putting -it on a level with his own," said Sherlock Holmes, laughing. "I -think that I shall have a whisky and soda and a cigar after all -this cross-questioning. I had formed my conclusions as to the -case before our client came into the room." - -"My dear Holmes!" - -"I have notes of several similar cases, though none, as I -remarked before, which were quite as prompt. My whole examination -served to turn my conjecture into a certainty. Circumstantial -evidence is occasionally very convincing, as when you find a -trout in the milk, to quote Thoreau's example." - -"But I have heard all that you have heard." - -"Without, however, the knowledge of pre-existing cases which -serves me so well. There was a parallel instance in Aberdeen some -years back, and something on very much the same lines at Munich -the year after the Franco-Prussian War. It is one of these -cases--but, hullo, here is Lestrade! Good-afternoon, Lestrade! -You will find an extra tumbler upon the sideboard, and there are -cigars in the box." - -The official detective was attired in a pea-jacket and cravat, -which gave him a decidedly nautical appearance, and he carried a -black canvas bag in his hand. With a short greeting he seated -himself and lit the cigar which had been offered to him. - -"What's up, then?" asked Holmes with a twinkle in his eye. "You -look dissatisfied." - -"And I feel dissatisfied. It is this infernal St. Simon marriage -case. I can make neither head nor tail of the business." - -"Really! You surprise me." - -"Who ever heard of such a mixed affair? Every clue seems to slip -through my fingers. I have been at work upon it all day." - -"And very wet it seems to have made you," said Holmes laying his -hand upon the arm of the pea-jacket. - -"Yes, I have been dragging the Serpentine." - -"In heaven's name, what for?" - -"In search of the body of Lady St. Simon." - -Sherlock Holmes leaned back in his chair and laughed heartily. - -"Have you dragged the basin of Trafalgar Square fountain?" he -asked. - -"Why? What do you mean?" - -"Because you have just as good a chance of finding this lady in -the one as in the other." - -Lestrade shot an angry glance at my companion. "I suppose you -know all about it," he snarled. - -"Well, I have only just heard the facts, but my mind is made up." - -"Oh, indeed! Then you think that the Serpentine plays no part in -the matter?" - -"I think it very unlikely." - -"Then perhaps you will kindly explain how it is that we found -this in it?" He opened his bag as he spoke, and tumbled onto the -floor a wedding-dress of watered silk, a pair of white satin -shoes and a bride's wreath and veil, all discoloured and soaked -in water. "There," said he, putting a new wedding-ring upon the -top of the pile. "There is a little nut for you to crack, Master -Holmes." - -"Oh, indeed!" said my friend, blowing blue rings into the air. -"You dragged them from the Serpentine?" - -"No. They were found floating near the margin by a park-keeper. -They have been identified as her clothes, and it seemed to me -that if the clothes were there the body would not be far off." - -"By the same brilliant reasoning, every man's body is to be found -in the neighbourhood of his wardrobe. And pray what did you hope -to arrive at through this?" - -"At some evidence implicating Flora Millar in the disappearance." - -"I am afraid that you will find it difficult." - -"Are you, indeed, now?" cried Lestrade with some bitterness. "I -am afraid, Holmes, that you are not very practical with your -deductions and your inferences. You have made two blunders in as -many minutes. This dress does implicate Miss Flora Millar." - -"And how?" - -"In the dress is a pocket. In the pocket is a card-case. In the -card-case is a note. And here is the very note." He slapped it -down upon the table in front of him. "Listen to this: 'You will -see me when all is ready. Come at once. F.H.M.' Now my theory all -along has been that Lady St. Simon was decoyed away by Flora -Millar, and that she, with confederates, no doubt, was -responsible for her disappearance. Here, signed with her -initials, is the very note which was no doubt quietly slipped -into her hand at the door and which lured her within their -reach." - -"Very good, Lestrade," said Holmes, laughing. "You really are -very fine indeed. Let me see it." He took up the paper in a -listless way, but his attention instantly became riveted, and he -gave a little cry of satisfaction. "This is indeed important," -said he. - -"Ha! you find it so?" - -"Extremely so. I congratulate you warmly." - -Lestrade rose in his triumph and bent his head to look. "Why," he -shrieked, "you're looking at the wrong side!" - -"On the contrary, this is the right side." - -"The right side? You're mad! Here is the note written in pencil -over here." - -"And over here is what appears to be the fragment of a hotel -bill, which interests me deeply." - -"There's nothing in it. I looked at it before," said Lestrade. -"'Oct. 4th, rooms 8s., breakfast 2s. 6d., cocktail 1s., lunch 2s. -6d., glass sherry, 8d.' I see nothing in that." - -"Very likely not. It is most important, all the same. As to the -note, it is important also, or at least the initials are, so I -congratulate you again." - -"I've wasted time enough," said Lestrade, rising. "I believe in -hard work and not in sitting by the fire spinning fine theories. -Good-day, Mr. Holmes, and we shall see which gets to the bottom -of the matter first." He gathered up the garments, thrust them -into the bag, and made for the door. - -"Just one hint to you, Lestrade," drawled Holmes before his rival -vanished; "I will tell you the true solution of the matter. Lady -St. Simon is a myth. There is not, and there never has been, any -such person." - -Lestrade looked sadly at my companion. Then he turned to me, -tapped his forehead three times, shook his head solemnly, and -hurried away. - -He had hardly shut the door behind him when Holmes rose to put on -his overcoat. "There is something in what the fellow says about -outdoor work," he remarked, "so I think, Watson, that I must -leave you to your papers for a little." - -It was after five o'clock when Sherlock Holmes left me, but I had -no time to be lonely, for within an hour there arrived a -confectioner's man with a very large flat box. This he unpacked -with the help of a youth whom he had brought with him, and -presently, to my very great astonishment, a quite epicurean -little cold supper began to be laid out upon our humble -lodging-house mahogany. There were a couple of brace of cold -woodcock, a pheasant, a pâté de foie gras pie with a group of -ancient and cobwebby bottles. Having laid out all these luxuries, -my two visitors vanished away, like the genii of the Arabian -Nights, with no explanation save that the things had been paid -for and were ordered to this address. - -Just before nine o'clock Sherlock Holmes stepped briskly into the -room. His features were gravely set, but there was a light in his -eye which made me think that he had not been disappointed in his -conclusions. - -"They have laid the supper, then," he said, rubbing his hands. - -"You seem to expect company. They have laid for five." - -"Yes, I fancy we may have some company dropping in," said he. "I -am surprised that Lord St. Simon has not already arrived. Ha! I -fancy that I hear his step now upon the stairs." - -It was indeed our visitor of the afternoon who came bustling in, -dangling his glasses more vigorously than ever, and with a very -perturbed expression upon his aristocratic features. - -"My messenger reached you, then?" asked Holmes. - -"Yes, and I confess that the contents startled me beyond measure. -Have you good authority for what you say?" - -"The best possible." - -Lord St. Simon sank into a chair and passed his hand over his -forehead. - -"What will the Duke say," he murmured, "when he hears that one of -the family has been subjected to such humiliation?" - -"It is the purest accident. I cannot allow that there is any -humiliation." - -"Ah, you look on these things from another standpoint." - -"I fail to see that anyone is to blame. I can hardly see how the -lady could have acted otherwise, though her abrupt method of -doing it was undoubtedly to be regretted. Having no mother, she -had no one to advise her at such a crisis." - -"It was a slight, sir, a public slight," said Lord St. Simon, -tapping his fingers upon the table. - -"You must make allowance for this poor girl, placed in so -unprecedented a position." - -"I will make no allowance. I am very angry indeed, and I have -been shamefully used." - -"I think that I heard a ring," said Holmes. "Yes, there are steps -on the landing. If I cannot persuade you to take a lenient view -of the matter, Lord St. Simon, I have brought an advocate here -who may be more successful." He opened the door and ushered in a -lady and gentleman. "Lord St. Simon," said he "allow me to -introduce you to Mr. and Mrs. Francis Hay Moulton. The lady, I -think, you have already met." - -At the sight of these newcomers our client had sprung from his -seat and stood very erect, with his eyes cast down and his hand -thrust into the breast of his frock-coat, a picture of offended -dignity. The lady had taken a quick step forward and had held out -her hand to him, but he still refused to raise his eyes. It was -as well for his resolution, perhaps, for her pleading face was -one which it was hard to resist. - -"You're angry, Robert," said she. "Well, I guess you have every -cause to be." - -"Pray make no apology to me," said Lord St. Simon bitterly. - -"Oh, yes, I know that I have treated you real bad and that I -should have spoken to you before I went; but I was kind of -rattled, and from the time when I saw Frank here again I just -didn't know what I was doing or saying. I only wonder I didn't -fall down and do a faint right there before the altar." - -"Perhaps, Mrs. Moulton, you would like my friend and me to leave -the room while you explain this matter?" - -"If I may give an opinion," remarked the strange gentleman, -"we've had just a little too much secrecy over this business -already. For my part, I should like all Europe and America to -hear the rights of it." He was a small, wiry, sunburnt man, -clean-shaven, with a sharp face and alert manner. - -"Then I'll tell our story right away," said the lady. "Frank here -and I met in '84, in McQuire's camp, near the Rockies, where pa -was working a claim. We were engaged to each other, Frank and I; -but then one day father struck a rich pocket and made a pile, -while poor Frank here had a claim that petered out and came to -nothing. The richer pa grew the poorer was Frank; so at last pa -wouldn't hear of our engagement lasting any longer, and he took -me away to 'Frisco. Frank wouldn't throw up his hand, though; so -he followed me there, and he saw me without pa knowing anything -about it. It would only have made him mad to know, so we just -fixed it all up for ourselves. Frank said that he would go and -make his pile, too, and never come back to claim me until he had -as much as pa. So then I promised to wait for him to the end of -time and pledged myself not to marry anyone else while he lived. -'Why shouldn't we be married right away, then,' said he, 'and -then I will feel sure of you; and I won't claim to be your -husband until I come back?' Well, we talked it over, and he had -fixed it all up so nicely, with a clergyman all ready in waiting, -that we just did it right there; and then Frank went off to seek -his fortune, and I went back to pa. - -"The next I heard of Frank was that he was in Montana, and then -he went prospecting in Arizona, and then I heard of him from New -Mexico. After that came a long newspaper story about how a -miners' camp had been attacked by Apache Indians, and there was -my Frank's name among the killed. I fainted dead away, and I was -very sick for months after. Pa thought I had a decline and took -me to half the doctors in 'Frisco. Not a word of news came for a -year and more, so that I never doubted that Frank was really -dead. Then Lord St. Simon came to 'Frisco, and we came to London, -and a marriage was arranged, and pa was very pleased, but I felt -all the time that no man on this earth would ever take the place -in my heart that had been given to my poor Frank. - -"Still, if I had married Lord St. Simon, of course I'd have done -my duty by him. We can't command our love, but we can our -actions. I went to the altar with him with the intention to make -him just as good a wife as it was in me to be. But you may -imagine what I felt when, just as I came to the altar rails, I -glanced back and saw Frank standing and looking at me out of the -first pew. I thought it was his ghost at first; but when I looked -again there he was still, with a kind of question in his eyes, as -if to ask me whether I were glad or sorry to see him. I wonder I -didn't drop. I know that everything was turning round, and the -words of the clergyman were just like the buzz of a bee in my -ear. I didn't know what to do. Should I stop the service and make -a scene in the church? I glanced at him again, and he seemed to -know what I was thinking, for he raised his finger to his lips to -tell me to be still. Then I saw him scribble on a piece of paper, -and I knew that he was writing me a note. As I passed his pew on -the way out I dropped my bouquet over to him, and he slipped the -note into my hand when he returned me the flowers. It was only a -line asking me to join him when he made the sign to me to do so. -Of course I never doubted for a moment that my first duty was now -to him, and I determined to do just whatever he might direct. - -"When I got back I told my maid, who had known him in California, -and had always been his friend. I ordered her to say nothing, but -to get a few things packed and my ulster ready. I know I ought to -have spoken to Lord St. Simon, but it was dreadful hard before -his mother and all those great people. I just made up my mind to -run away and explain afterwards. I hadn't been at the table ten -minutes before I saw Frank out of the window at the other side of -the road. He beckoned to me and then began walking into the Park. -I slipped out, put on my things, and followed him. Some woman -came talking something or other about Lord St. Simon to -me--seemed to me from the little I heard as if he had a little -secret of his own before marriage also--but I managed to get away -from her and soon overtook Frank. We got into a cab together, and -away we drove to some lodgings he had taken in Gordon Square, and -that was my true wedding after all those years of waiting. Frank -had been a prisoner among the Apaches, had escaped, came on to -'Frisco, found that I had given him up for dead and had gone to -England, followed me there, and had come upon me at last on the -very morning of my second wedding." - -"I saw it in a paper," explained the American. "It gave the name -and the church but not where the lady lived." - -"Then we had a talk as to what we should do, and Frank was all -for openness, but I was so ashamed of it all that I felt as if I -should like to vanish away and never see any of them again--just -sending a line to pa, perhaps, to show him that I was alive. It -was awful to me to think of all those lords and ladies sitting -round that breakfast-table and waiting for me to come back. So -Frank took my wedding-clothes and things and made a bundle of -them, so that I should not be traced, and dropped them away -somewhere where no one could find them. It is likely that we -should have gone on to Paris to-morrow, only that this good -gentleman, Mr. Holmes, came round to us this evening, though how -he found us is more than I can think, and he showed us very -clearly and kindly that I was wrong and that Frank was right, and -that we should be putting ourselves in the wrong if we were so -secret. Then he offered to give us a chance of talking to Lord -St. Simon alone, and so we came right away round to his rooms at -once. Now, Robert, you have heard it all, and I am very sorry if -I have given you pain, and I hope that you do not think very -meanly of me." - -Lord St. Simon had by no means relaxed his rigid attitude, but -had listened with a frowning brow and a compressed lip to this -long narrative. - -"Excuse me," he said, "but it is not my custom to discuss my most -intimate personal affairs in this public manner." - -"Then you won't forgive me? You won't shake hands before I go?" - -"Oh, certainly, if it would give you any pleasure." He put out -his hand and coldly grasped that which she extended to him. - -"I had hoped," suggested Holmes, "that you would have joined us -in a friendly supper." - -"I think that there you ask a little too much," responded his -Lordship. "I may be forced to acquiesce in these recent -developments, but I can hardly be expected to make merry over -them. I think that with your permission I will now wish you all a -very good-night." He included us all in a sweeping bow and -stalked out of the room. - -"Then I trust that you at least will honour me with your -company," said Sherlock Holmes. "It is always a joy to meet an -American, Mr. Moulton, for I am one of those who believe that the -folly of a monarch and the blundering of a minister in far-gone -years will not prevent our children from being some day citizens -of the same world-wide country under a flag which shall be a -quartering of the Union Jack with the Stars and Stripes." - -"The case has been an interesting one," remarked Holmes when our -visitors had left us, "because it serves to show very clearly how -simple the explanation may be of an affair which at first sight -seems to be almost inexplicable. Nothing could be more natural -than the sequence of events as narrated by this lady, and nothing -stranger than the result when viewed, for instance, by Mr. -Lestrade of Scotland Yard." - -"You were not yourself at fault at all, then?" - -"From the first, two facts were very obvious to me, the one that -the lady had been quite willing to undergo the wedding ceremony, -the other that she had repented of it within a few minutes of -returning home. Obviously something had occurred during the -morning, then, to cause her to change her mind. What could that -something be? She could not have spoken to anyone when she was -out, for she had been in the company of the bridegroom. Had she -seen someone, then? If she had, it must be someone from America -because she had spent so short a time in this country that she -could hardly have allowed anyone to acquire so deep an influence -over her that the mere sight of him would induce her to change -her plans so completely. You see we have already arrived, by a -process of exclusion, at the idea that she might have seen an -American. Then who could this American be, and why should he -possess so much influence over her? It might be a lover; it might -be a husband. Her young womanhood had, I knew, been spent in -rough scenes and under strange conditions. So far I had got -before I ever heard Lord St. Simon's narrative. When he told us -of a man in a pew, of the change in the bride's manner, of so -transparent a device for obtaining a note as the dropping of a -bouquet, of her resort to her confidential maid, and of her very -significant allusion to claim-jumping--which in miners' parlance -means taking possession of that which another person has a prior -claim to--the whole situation became absolutely clear. She had -gone off with a man, and the man was either a lover or was a -previous husband--the chances being in favour of the latter." - -"And how in the world did you find them?" - -"It might have been difficult, but friend Lestrade held -information in his hands the value of which he did not himself -know. The initials were, of course, of the highest importance, -but more valuable still was it to know that within a week he had -settled his bill at one of the most select London hotels." - -"How did you deduce the select?" - -"By the select prices. Eight shillings for a bed and eightpence -for a glass of sherry pointed to one of the most expensive -hotels. There are not many in London which charge at that rate. -In the second one which I visited in Northumberland Avenue, I -learned by an inspection of the book that Francis H. Moulton, an -American gentleman, had left only the day before, and on looking -over the entries against him, I came upon the very items which I -had seen in the duplicate bill. His letters were to be forwarded -to 226 Gordon Square; so thither I travelled, and being fortunate -enough to find the loving couple at home, I ventured to give them -some paternal advice and to point out to them that it would be -better in every way that they should make their position a little -clearer both to the general public and to Lord St. Simon in -particular. I invited them to meet him here, and, as you see, I -made him keep the appointment." - -"But with no very good result," I remarked. "His conduct was -certainly not very gracious." - -"Ah, Watson," said Holmes, smiling, "perhaps you would not be -very gracious either, if, after all the trouble of wooing and -wedding, you found yourself deprived in an instant of wife and of -fortune. I think that we may judge Lord St. Simon very mercifully -and thank our stars that we are never likely to find ourselves in -the same position. Draw your chair up and hand me my violin, for -the only problem we have still to solve is how to while away -these bleak autumnal evenings." - - - -XI. THE ADVENTURE OF THE BERYL CORONET - -"Holmes," said I as I stood one morning in our bow-window looking -down the street, "here is a madman coming along. It seems rather -sad that his relatives should allow him to come out alone." - -My friend rose lazily from his armchair and stood with his hands -in the pockets of his dressing-gown, looking over my shoulder. It -was a bright, crisp February morning, and the snow of the day -before still lay deep upon the ground, shimmering brightly in the -wintry sun. Down the centre of Baker Street it had been ploughed -into a brown crumbly band by the traffic, but at either side and -on the heaped-up edges of the foot-paths it still lay as white as -when it fell. The grey pavement had been cleaned and scraped, but -was still dangerously slippery, so that there were fewer -passengers than usual. Indeed, from the direction of the -Metropolitan Station no one was coming save the single gentleman -whose eccentric conduct had drawn my attention. - -He was a man of about fifty, tall, portly, and imposing, with a -massive, strongly marked face and a commanding figure. He was -dressed in a sombre yet rich style, in black frock-coat, shining -hat, neat brown gaiters, and well-cut pearl-grey trousers. Yet -his actions were in absurd contrast to the dignity of his dress -and features, for he was running hard, with occasional little -springs, such as a weary man gives who is little accustomed to -set any tax upon his legs. As he ran he jerked his hands up and -down, waggled his head, and writhed his face into the most -extraordinary contortions. - -"What on earth can be the matter with him?" I asked. "He is -looking up at the numbers of the houses." - -"I believe that he is coming here," said Holmes, rubbing his -hands. - -"Here?" - -"Yes; I rather think he is coming to consult me professionally. I -think that I recognise the symptoms. Ha! did I not tell you?" As -he spoke, the man, puffing and blowing, rushed at our door and -pulled at our bell until the whole house resounded with the -clanging. - -A few moments later he was in our room, still puffing, still -gesticulating, but with so fixed a look of grief and despair in -his eyes that our smiles were turned in an instant to horror and -pity. For a while he could not get his words out, but swayed his -body and plucked at his hair like one who has been driven to the -extreme limits of his reason. Then, suddenly springing to his -feet, he beat his head against the wall with such force that we -both rushed upon him and tore him away to the centre of the room. -Sherlock Holmes pushed him down into the easy-chair and, sitting -beside him, patted his hand and chatted with him in the easy, -soothing tones which he knew so well how to employ. - -"You have come to me to tell your story, have you not?" said he. -"You are fatigued with your haste. Pray wait until you have -recovered yourself, and then I shall be most happy to look into -any little problem which you may submit to me." - -The man sat for a minute or more with a heaving chest, fighting -against his emotion. Then he passed his handkerchief over his -brow, set his lips tight, and turned his face towards us. - -"No doubt you think me mad?" said he. - -"I see that you have had some great trouble," responded Holmes. - -"God knows I have!--a trouble which is enough to unseat my -reason, so sudden and so terrible is it. Public disgrace I might -have faced, although I am a man whose character has never yet -borne a stain. Private affliction also is the lot of every man; -but the two coming together, and in so frightful a form, have -been enough to shake my very soul. Besides, it is not I alone. -The very noblest in the land may suffer unless some way be found -out of this horrible affair." - -"Pray compose yourself, sir," said Holmes, "and let me have a -clear account of who you are and what it is that has befallen -you." - -"My name," answered our visitor, "is probably familiar to your -ears. I am Alexander Holder, of the banking firm of Holder & -Stevenson, of Threadneedle Street." - -The name was indeed well known to us as belonging to the senior -partner in the second largest private banking concern in the City -of London. What could have happened, then, to bring one of the -foremost citizens of London to this most pitiable pass? We -waited, all curiosity, until with another effort he braced -himself to tell his story. - -"I feel that time is of value," said he; "that is why I hastened -here when the police inspector suggested that I should secure -your co-operation. I came to Baker Street by the Underground and -hurried from there on foot, for the cabs go slowly through this -snow. That is why I was so out of breath, for I am a man who -takes very little exercise. I feel better now, and I will put the -facts before you as shortly and yet as clearly as I can. - -"It is, of course, well known to you that in a successful banking -business as much depends upon our being able to find remunerative -investments for our funds as upon our increasing our connection -and the number of our depositors. One of our most lucrative means -of laying out money is in the shape of loans, where the security -is unimpeachable. We have done a good deal in this direction -during the last few years, and there are many noble families to -whom we have advanced large sums upon the security of their -pictures, libraries, or plate. - -"Yesterday morning I was seated in my office at the bank when a -card was brought in to me by one of the clerks. I started when I -saw the name, for it was that of none other than--well, perhaps -even to you I had better say no more than that it was a name -which is a household word all over the earth--one of the highest, -noblest, most exalted names in England. I was overwhelmed by the -honour and attempted, when he entered, to say so, but he plunged -at once into business with the air of a man who wishes to hurry -quickly through a disagreeable task. - -"'Mr. Holder,' said he, 'I have been informed that you are in the -habit of advancing money.' - -"'The firm does so when the security is good.' I answered. - -"'It is absolutely essential to me,' said he, 'that I should have -50,000 pounds at once. I could, of course, borrow so trifling a -sum ten times over from my friends, but I much prefer to make it -a matter of business and to carry out that business myself. In my -position you can readily understand that it is unwise to place -one's self under obligations.' - -"'For how long, may I ask, do you want this sum?' I asked. - -"'Next Monday I have a large sum due to me, and I shall then most -certainly repay what you advance, with whatever interest you -think it right to charge. But it is very essential to me that the -money should be paid at once.' - -"'I should be happy to advance it without further parley from my -own private purse,' said I, 'were it not that the strain would be -rather more than it could bear. If, on the other hand, I am to do -it in the name of the firm, then in justice to my partner I must -insist that, even in your case, every businesslike precaution -should be taken.' - -"'I should much prefer to have it so,' said he, raising up a -square, black morocco case which he had laid beside his chair. -'You have doubtless heard of the Beryl Coronet?' - -"'One of the most precious public possessions of the empire,' -said I. - -"'Precisely.' He opened the case, and there, imbedded in soft, -flesh-coloured velvet, lay the magnificent piece of jewellery -which he had named. 'There are thirty-nine enormous beryls,' said -he, 'and the price of the gold chasing is incalculable. The -lowest estimate would put the worth of the coronet at double the -sum which I have asked. I am prepared to leave it with you as my -security.' - -"I took the precious case into my hands and looked in some -perplexity from it to my illustrious client. - -"'You doubt its value?' he asked. - -"'Not at all. I only doubt--' - -"'The propriety of my leaving it. You may set your mind at rest -about that. I should not dream of doing so were it not absolutely -certain that I should be able in four days to reclaim it. It is a -pure matter of form. Is the security sufficient?' - -"'Ample.' - -"'You understand, Mr. Holder, that I am giving you a strong proof -of the confidence which I have in you, founded upon all that I -have heard of you. I rely upon you not only to be discreet and to -refrain from all gossip upon the matter but, above all, to -preserve this coronet with every possible precaution because I -need not say that a great public scandal would be caused if any -harm were to befall it. Any injury to it would be almost as -serious as its complete loss, for there are no beryls in the -world to match these, and it would be impossible to replace them. -I leave it with you, however, with every confidence, and I shall -call for it in person on Monday morning.' - -"Seeing that my client was anxious to leave, I said no more but, -calling for my cashier, I ordered him to pay over fifty 1000 -pound notes. When I was alone once more, however, with the -precious case lying upon the table in front of me, I could not -but think with some misgivings of the immense responsibility -which it entailed upon me. There could be no doubt that, as it -was a national possession, a horrible scandal would ensue if any -misfortune should occur to it. I already regretted having ever -consented to take charge of it. However, it was too late to alter -the matter now, so I locked it up in my private safe and turned -once more to my work. - -"When evening came I felt that it would be an imprudence to leave -so precious a thing in the office behind me. Bankers' safes had -been forced before now, and why should not mine be? If so, how -terrible would be the position in which I should find myself! I -determined, therefore, that for the next few days I would always -carry the case backward and forward with me, so that it might -never be really out of my reach. With this intention, I called a -cab and drove out to my house at Streatham, carrying the jewel -with me. I did not breathe freely until I had taken it upstairs -and locked it in the bureau of my dressing-room. - -"And now a word as to my household, Mr. Holmes, for I wish you to -thoroughly understand the situation. My groom and my page sleep -out of the house, and may be set aside altogether. I have three -maid-servants who have been with me a number of years and whose -absolute reliability is quite above suspicion. Another, Lucy -Parr, the second waiting-maid, has only been in my service a few -months. She came with an excellent character, however, and has -always given me satisfaction. She is a very pretty girl and has -attracted admirers who have occasionally hung about the place. -That is the only drawback which we have found to her, but we -believe her to be a thoroughly good girl in every way. - -"So much for the servants. My family itself is so small that it -will not take me long to describe it. I am a widower and have an -only son, Arthur. He has been a disappointment to me, Mr. -Holmes--a grievous disappointment. I have no doubt that I am -myself to blame. People tell me that I have spoiled him. Very -likely I have. When my dear wife died I felt that he was all I -had to love. I could not bear to see the smile fade even for a -moment from his face. I have never denied him a wish. Perhaps it -would have been better for both of us had I been sterner, but I -meant it for the best. - -"It was naturally my intention that he should succeed me in my -business, but he was not of a business turn. He was wild, -wayward, and, to speak the truth, I could not trust him in the -handling of large sums of money. When he was young he became a -member of an aristocratic club, and there, having charming -manners, he was soon the intimate of a number of men with long -purses and expensive habits. He learned to play heavily at cards -and to squander money on the turf, until he had again and again -to come to me and implore me to give him an advance upon his -allowance, that he might settle his debts of honour. He tried -more than once to break away from the dangerous company which he -was keeping, but each time the influence of his friend, Sir -George Burnwell, was enough to draw him back again. - -"And, indeed, I could not wonder that such a man as Sir George -Burnwell should gain an influence over him, for he has frequently -brought him to my house, and I have found myself that I could -hardly resist the fascination of his manner. He is older than -Arthur, a man of the world to his finger-tips, one who had been -everywhere, seen everything, a brilliant talker, and a man of -great personal beauty. Yet when I think of him in cold blood, far -away from the glamour of his presence, I am convinced from his -cynical speech and the look which I have caught in his eyes that -he is one who should be deeply distrusted. So I think, and so, -too, thinks my little Mary, who has a woman's quick insight into -character. - -"And now there is only she to be described. She is my niece; but -when my brother died five years ago and left her alone in the -world I adopted her, and have looked upon her ever since as my -daughter. She is a sunbeam in my house--sweet, loving, beautiful, -a wonderful manager and housekeeper, yet as tender and quiet and -gentle as a woman could be. She is my right hand. I do not know -what I could do without her. In only one matter has she ever gone -against my wishes. Twice my boy has asked her to marry him, for -he loves her devotedly, but each time she has refused him. I -think that if anyone could have drawn him into the right path it -would have been she, and that his marriage might have changed his -whole life; but now, alas! it is too late--forever too late! - -"Now, Mr. Holmes, you know the people who live under my roof, and -I shall continue with my miserable story. - -"When we were taking coffee in the drawing-room that night after -dinner, I told Arthur and Mary my experience, and of the precious -treasure which we had under our roof, suppressing only the name -of my client. Lucy Parr, who had brought in the coffee, had, I am -sure, left the room; but I cannot swear that the door was closed. -Mary and Arthur were much interested and wished to see the famous -coronet, but I thought it better not to disturb it. - -"'Where have you put it?' asked Arthur. - -"'In my own bureau.' - -"'Well, I hope to goodness the house won't be burgled during the -night.' said he. - -"'It is locked up,' I answered. - -"'Oh, any old key will fit that bureau. When I was a youngster I -have opened it myself with the key of the box-room cupboard.' - -"He often had a wild way of talking, so that I thought little of -what he said. He followed me to my room, however, that night with -a very grave face. - -"'Look here, dad,' said he with his eyes cast down, 'can you let -me have 200 pounds?' - -"'No, I cannot!' I answered sharply. 'I have been far too -generous with you in money matters.' - -"'You have been very kind,' said he, 'but I must have this money, -or else I can never show my face inside the club again.' - -"'And a very good thing, too!' I cried. - -"'Yes, but you would not have me leave it a dishonoured man,' -said he. 'I could not bear the disgrace. I must raise the money -in some way, and if you will not let me have it, then I must try -other means.' - -"I was very angry, for this was the third demand during the -month. 'You shall not have a farthing from me,' I cried, on which -he bowed and left the room without another word. - -"When he was gone I unlocked my bureau, made sure that my -treasure was safe, and locked it again. Then I started to go -round the house to see that all was secure--a duty which I -usually leave to Mary but which I thought it well to perform -myself that night. As I came down the stairs I saw Mary herself -at the side window of the hall, which she closed and fastened as -I approached. - -"'Tell me, dad,' said she, looking, I thought, a little -disturbed, 'did you give Lucy, the maid, leave to go out -to-night?' - -"'Certainly not.' - -"'She came in just now by the back door. I have no doubt that she -has only been to the side gate to see someone, but I think that -it is hardly safe and should be stopped.' - -"'You must speak to her in the morning, or I will if you prefer -it. Are you sure that everything is fastened?' - -"'Quite sure, dad.' - -"'Then, good-night.' I kissed her and went up to my bedroom -again, where I was soon asleep. - -"I am endeavouring to tell you everything, Mr. Holmes, which may -have any bearing upon the case, but I beg that you will question -me upon any point which I do not make clear." - -"On the contrary, your statement is singularly lucid." - -"I come to a part of my story now in which I should wish to be -particularly so. I am not a very heavy sleeper, and the anxiety -in my mind tended, no doubt, to make me even less so than usual. -About two in the morning, then, I was awakened by some sound in -the house. It had ceased ere I was wide awake, but it had left an -impression behind it as though a window had gently closed -somewhere. I lay listening with all my ears. Suddenly, to my -horror, there was a distinct sound of footsteps moving softly in -the next room. I slipped out of bed, all palpitating with fear, -and peeped round the corner of my dressing-room door. - -"'Arthur!' I screamed, 'you villain! you thief! How dare you -touch that coronet?' - -"The gas was half up, as I had left it, and my unhappy boy, -dressed only in his shirt and trousers, was standing beside the -light, holding the coronet in his hands. He appeared to be -wrenching at it, or bending it with all his strength. At my cry -he dropped it from his grasp and turned as pale as death. I -snatched it up and examined it. One of the gold corners, with -three of the beryls in it, was missing. - -"'You blackguard!' I shouted, beside myself with rage. 'You have -destroyed it! You have dishonoured me forever! Where are the -jewels which you have stolen?' - -"'Stolen!' he cried. - -"'Yes, thief!' I roared, shaking him by the shoulder. - -"'There are none missing. There cannot be any missing,' said he. - -"'There are three missing. And you know where they are. Must I -call you a liar as well as a thief? Did I not see you trying to -tear off another piece?' - -"'You have called me names enough,' said he, 'I will not stand it -any longer. I shall not say another word about this business, -since you have chosen to insult me. I will leave your house in -the morning and make my own way in the world.' - -"'You shall leave it in the hands of the police!' I cried -half-mad with grief and rage. 'I shall have this matter probed to -the bottom.' - -"'You shall learn nothing from me,' said he with a passion such -as I should not have thought was in his nature. 'If you choose to -call the police, let the police find what they can.' - -"By this time the whole house was astir, for I had raised my -voice in my anger. Mary was the first to rush into my room, and, -at the sight of the coronet and of Arthur's face, she read the -whole story and, with a scream, fell down senseless on the -ground. I sent the house-maid for the police and put the -investigation into their hands at once. When the inspector and a -constable entered the house, Arthur, who had stood sullenly with -his arms folded, asked me whether it was my intention to charge -him with theft. I answered that it had ceased to be a private -matter, but had become a public one, since the ruined coronet was -national property. I was determined that the law should have its -way in everything. - -"'At least,' said he, 'you will not have me arrested at once. It -would be to your advantage as well as mine if I might leave the -house for five minutes.' - -"'That you may get away, or perhaps that you may conceal what you -have stolen,' said I. And then, realising the dreadful position -in which I was placed, I implored him to remember that not only -my honour but that of one who was far greater than I was at -stake; and that he threatened to raise a scandal which would -convulse the nation. He might avert it all if he would but tell -me what he had done with the three missing stones. - -"'You may as well face the matter,' said I; 'you have been caught -in the act, and no confession could make your guilt more heinous. -If you but make such reparation as is in your power, by telling -us where the beryls are, all shall be forgiven and forgotten.' - -"'Keep your forgiveness for those who ask for it,' he answered, -turning away from me with a sneer. I saw that he was too hardened -for any words of mine to influence him. There was but one way for -it. I called in the inspector and gave him into custody. A search -was made at once not only of his person but of his room and of -every portion of the house where he could possibly have concealed -the gems; but no trace of them could be found, nor would the -wretched boy open his mouth for all our persuasions and our -threats. This morning he was removed to a cell, and I, after -going through all the police formalities, have hurried round to -you to implore you to use your skill in unravelling the matter. -The police have openly confessed that they can at present make -nothing of it. You may go to any expense which you think -necessary. I have already offered a reward of 1000 pounds. My -God, what shall I do! I have lost my honour, my gems, and my son -in one night. Oh, what shall I do!" - -He put a hand on either side of his head and rocked himself to -and fro, droning to himself like a child whose grief has got -beyond words. - -Sherlock Holmes sat silent for some few minutes, with his brows -knitted and his eyes fixed upon the fire. - -"Do you receive much company?" he asked. - -"None save my partner with his family and an occasional friend of -Arthur's. Sir George Burnwell has been several times lately. No -one else, I think." - -"Do you go out much in society?" - -"Arthur does. Mary and I stay at home. We neither of us care for -it." - -"That is unusual in a young girl." - -"She is of a quiet nature. Besides, she is not so very young. She -is four-and-twenty." - -"This matter, from what you say, seems to have been a shock to -her also." - -"Terrible! She is even more affected than I." - -"You have neither of you any doubt as to your son's guilt?" - -"How can we have when I saw him with my own eyes with the coronet -in his hands." - -"I hardly consider that a conclusive proof. Was the remainder of -the coronet at all injured?" - -"Yes, it was twisted." - -"Do you not think, then, that he might have been trying to -straighten it?" - -"God bless you! You are doing what you can for him and for me. -But it is too heavy a task. What was he doing there at all? If -his purpose were innocent, why did he not say so?" - -"Precisely. And if it were guilty, why did he not invent a lie? -His silence appears to me to cut both ways. There are several -singular points about the case. What did the police think of the -noise which awoke you from your sleep?" - -"They considered that it might be caused by Arthur's closing his -bedroom door." - -"A likely story! As if a man bent on felony would slam his door -so as to wake a household. What did they say, then, of the -disappearance of these gems?" - -"They are still sounding the planking and probing the furniture -in the hope of finding them." - -"Have they thought of looking outside the house?" - -"Yes, they have shown extraordinary energy. The whole garden has -already been minutely examined." - -"Now, my dear sir," said Holmes, "is it not obvious to you now -that this matter really strikes very much deeper than either you -or the police were at first inclined to think? It appeared to you -to be a simple case; to me it seems exceedingly complex. Consider -what is involved by your theory. You suppose that your son came -down from his bed, went, at great risk, to your dressing-room, -opened your bureau, took out your coronet, broke off by main -force a small portion of it, went off to some other place, -concealed three gems out of the thirty-nine, with such skill that -nobody can find them, and then returned with the other thirty-six -into the room in which he exposed himself to the greatest danger -of being discovered. I ask you now, is such a theory tenable?" - -"But what other is there?" cried the banker with a gesture of -despair. "If his motives were innocent, why does he not explain -them?" - -"It is our task to find that out," replied Holmes; "so now, if -you please, Mr. Holder, we will set off for Streatham together, -and devote an hour to glancing a little more closely into -details." - -My friend insisted upon my accompanying them in their expedition, -which I was eager enough to do, for my curiosity and sympathy -were deeply stirred by the story to which we had listened. I -confess that the guilt of the banker's son appeared to me to be -as obvious as it did to his unhappy father, but still I had such -faith in Holmes' judgment that I felt that there must be some -grounds for hope as long as he was dissatisfied with the accepted -explanation. He hardly spoke a word the whole way out to the -southern suburb, but sat with his chin upon his breast and his -hat drawn over his eyes, sunk in the deepest thought. Our client -appeared to have taken fresh heart at the little glimpse of hope -which had been presented to him, and he even broke into a -desultory chat with me over his business affairs. A short railway -journey and a shorter walk brought us to Fairbank, the modest -residence of the great financier. - -Fairbank was a good-sized square house of white stone, standing -back a little from the road. A double carriage-sweep, with a -snow-clad lawn, stretched down in front to two large iron gates -which closed the entrance. On the right side was a small wooden -thicket, which led into a narrow path between two neat hedges -stretching from the road to the kitchen door, and forming the -tradesmen's entrance. On the left ran a lane which led to the -stables, and was not itself within the grounds at all, being a -public, though little used, thoroughfare. Holmes left us standing -at the door and walked slowly all round the house, across the -front, down the tradesmen's path, and so round by the garden -behind into the stable lane. So long was he that Mr. Holder and I -went into the dining-room and waited by the fire until he should -return. We were sitting there in silence when the door opened and -a young lady came in. She was rather above the middle height, -slim, with dark hair and eyes, which seemed the darker against -the absolute pallor of her skin. I do not think that I have ever -seen such deadly paleness in a woman's face. Her lips, too, were -bloodless, but her eyes were flushed with crying. As she swept -silently into the room she impressed me with a greater sense of -grief than the banker had done in the morning, and it was the -more striking in her as she was evidently a woman of strong -character, with immense capacity for self-restraint. Disregarding -my presence, she went straight to her uncle and passed her hand -over his head with a sweet womanly caress. - -"You have given orders that Arthur should be liberated, have you -not, dad?" she asked. - -"No, no, my girl, the matter must be probed to the bottom." - -"But I am so sure that he is innocent. You know what woman's -instincts are. I know that he has done no harm and that you will -be sorry for having acted so harshly." - -"Why is he silent, then, if he is innocent?" - -"Who knows? Perhaps because he was so angry that you should -suspect him." - -"How could I help suspecting him, when I actually saw him with -the coronet in his hand?" - -"Oh, but he had only picked it up to look at it. Oh, do, do take -my word for it that he is innocent. Let the matter drop and say -no more. It is so dreadful to think of our dear Arthur in -prison!" - -"I shall never let it drop until the gems are found--never, Mary! -Your affection for Arthur blinds you as to the awful consequences -to me. Far from hushing the thing up, I have brought a gentleman -down from London to inquire more deeply into it." - -"This gentleman?" she asked, facing round to me. - -"No, his friend. He wished us to leave him alone. He is round in -the stable lane now." - -"The stable lane?" She raised her dark eyebrows. "What can he -hope to find there? Ah! this, I suppose, is he. I trust, sir, -that you will succeed in proving, what I feel sure is the truth, -that my cousin Arthur is innocent of this crime." - -"I fully share your opinion, and I trust, with you, that we may -prove it," returned Holmes, going back to the mat to knock the -snow from his shoes. "I believe I have the honour of addressing -Miss Mary Holder. Might I ask you a question or two?" - -"Pray do, sir, if it may help to clear this horrible affair up." - -"You heard nothing yourself last night?" - -"Nothing, until my uncle here began to speak loudly. I heard -that, and I came down." - -"You shut up the windows and doors the night before. Did you -fasten all the windows?" - -"Yes." - -"Were they all fastened this morning?" - -"Yes." - -"You have a maid who has a sweetheart? I think that you remarked -to your uncle last night that she had been out to see him?" - -"Yes, and she was the girl who waited in the drawing-room, and -who may have heard uncle's remarks about the coronet." - -"I see. You infer that she may have gone out to tell her -sweetheart, and that the two may have planned the robbery." - -"But what is the good of all these vague theories," cried the -banker impatiently, "when I have told you that I saw Arthur with -the coronet in his hands?" - -"Wait a little, Mr. Holder. We must come back to that. About this -girl, Miss Holder. You saw her return by the kitchen door, I -presume?" - -"Yes; when I went to see if the door was fastened for the night I -met her slipping in. I saw the man, too, in the gloom." - -"Do you know him?" - -"Oh, yes! he is the green-grocer who brings our vegetables round. -His name is Francis Prosper." - -"He stood," said Holmes, "to the left of the door--that is to -say, farther up the path than is necessary to reach the door?" - -"Yes, he did." - -"And he is a man with a wooden leg?" - -Something like fear sprang up in the young lady's expressive -black eyes. "Why, you are like a magician," said she. "How do you -know that?" She smiled, but there was no answering smile in -Holmes' thin, eager face. - -"I should be very glad now to go upstairs," said he. "I shall -probably wish to go over the outside of the house again. Perhaps -I had better take a look at the lower windows before I go up." - -He walked swiftly round from one to the other, pausing only at -the large one which looked from the hall onto the stable lane. -This he opened and made a very careful examination of the sill -with his powerful magnifying lens. "Now we shall go upstairs," -said he at last. - -The banker's dressing-room was a plainly furnished little -chamber, with a grey carpet, a large bureau, and a long mirror. -Holmes went to the bureau first and looked hard at the lock. - -"Which key was used to open it?" he asked. - -"That which my son himself indicated--that of the cupboard of the -lumber-room." - -"Have you it here?" - -"That is it on the dressing-table." - -Sherlock Holmes took it up and opened the bureau. - -"It is a noiseless lock," said he. "It is no wonder that it did -not wake you. This case, I presume, contains the coronet. We must -have a look at it." He opened the case, and taking out the diadem -he laid it upon the table. It was a magnificent specimen of the -jeweller's art, and the thirty-six stones were the finest that I -have ever seen. At one side of the coronet was a cracked edge, -where a corner holding three gems had been torn away. - -"Now, Mr. Holder," said Holmes, "here is the corner which -corresponds to that which has been so unfortunately lost. Might I -beg that you will break it off." - -The banker recoiled in horror. "I should not dream of trying," -said he. - -"Then I will." Holmes suddenly bent his strength upon it, but -without result. "I feel it give a little," said he; "but, though -I am exceptionally strong in the fingers, it would take me all my -time to break it. An ordinary man could not do it. Now, what do -you think would happen if I did break it, Mr. Holder? There would -be a noise like a pistol shot. Do you tell me that all this -happened within a few yards of your bed and that you heard -nothing of it?" - -"I do not know what to think. It is all dark to me." - -"But perhaps it may grow lighter as we go. What do you think, -Miss Holder?" - -"I confess that I still share my uncle's perplexity." - -"Your son had no shoes or slippers on when you saw him?" - -"He had nothing on save only his trousers and shirt." - -"Thank you. We have certainly been favoured with extraordinary -luck during this inquiry, and it will be entirely our own fault -if we do not succeed in clearing the matter up. With your -permission, Mr. Holder, I shall now continue my investigations -outside." - -He went alone, at his own request, for he explained that any -unnecessary footmarks might make his task more difficult. For an -hour or more he was at work, returning at last with his feet -heavy with snow and his features as inscrutable as ever. - -"I think that I have seen now all that there is to see, Mr. -Holder," said he; "I can serve you best by returning to my -rooms." - -"But the gems, Mr. Holmes. Where are they?" - -"I cannot tell." - -The banker wrung his hands. "I shall never see them again!" he -cried. "And my son? You give me hopes?" - -"My opinion is in no way altered." - -"Then, for God's sake, what was this dark business which was -acted in my house last night?" - -"If you can call upon me at my Baker Street rooms to-morrow -morning between nine and ten I shall be happy to do what I can to -make it clearer. I understand that you give me carte blanche to -act for you, provided only that I get back the gems, and that you -place no limit on the sum I may draw." - -"I would give my fortune to have them back." - -"Very good. I shall look into the matter between this and then. -Good-bye; it is just possible that I may have to come over here -again before evening." - -It was obvious to me that my companion's mind was now made up -about the case, although what his conclusions were was more than -I could even dimly imagine. Several times during our homeward -journey I endeavoured to sound him upon the point, but he always -glided away to some other topic, until at last I gave it over in -despair. It was not yet three when we found ourselves in our -rooms once more. He hurried to his chamber and was down again in -a few minutes dressed as a common loafer. With his collar turned -up, his shiny, seedy coat, his red cravat, and his worn boots, he -was a perfect sample of the class. - -"I think that this should do," said he, glancing into the glass -above the fireplace. "I only wish that you could come with me, -Watson, but I fear that it won't do. I may be on the trail in -this matter, or I may be following a will-o'-the-wisp, but I -shall soon know which it is. I hope that I may be back in a few -hours." He cut a slice of beef from the joint upon the sideboard, -sandwiched it between two rounds of bread, and thrusting this -rude meal into his pocket he started off upon his expedition. - -I had just finished my tea when he returned, evidently in -excellent spirits, swinging an old elastic-sided boot in his -hand. He chucked it down into a corner and helped himself to a -cup of tea. - -"I only looked in as I passed," said he. "I am going right on." - -"Where to?" - -"Oh, to the other side of the West End. It may be some time -before I get back. Don't wait up for me in case I should be -late." - -"How are you getting on?" - -"Oh, so so. Nothing to complain of. I have been out to Streatham -since I saw you last, but I did not call at the house. It is a -very sweet little problem, and I would not have missed it for a -good deal. However, I must not sit gossiping here, but must get -these disreputable clothes off and return to my highly -respectable self." - -I could see by his manner that he had stronger reasons for -satisfaction than his words alone would imply. His eyes twinkled, -and there was even a touch of colour upon his sallow cheeks. He -hastened upstairs, and a few minutes later I heard the slam of -the hall door, which told me that he was off once more upon his -congenial hunt. - -I waited until midnight, but there was no sign of his return, so -I retired to my room. It was no uncommon thing for him to be away -for days and nights on end when he was hot upon a scent, so that -his lateness caused me no surprise. I do not know at what hour he -came in, but when I came down to breakfast in the morning there -he was with a cup of coffee in one hand and the paper in the -other, as fresh and trim as possible. - -"You will excuse my beginning without you, Watson," said he, "but -you remember that our client has rather an early appointment this -morning." - -"Why, it is after nine now," I answered. "I should not be -surprised if that were he. I thought I heard a ring." - -It was, indeed, our friend the financier. I was shocked by the -change which had come over him, for his face which was naturally -of a broad and massive mould, was now pinched and fallen in, -while his hair seemed to me at least a shade whiter. He entered -with a weariness and lethargy which was even more painful than -his violence of the morning before, and he dropped heavily into -the armchair which I pushed forward for him. - -"I do not know what I have done to be so severely tried," said -he. "Only two days ago I was a happy and prosperous man, without -a care in the world. Now I am left to a lonely and dishonoured -age. One sorrow comes close upon the heels of another. My niece, -Mary, has deserted me." - -"Deserted you?" - -"Yes. Her bed this morning had not been slept in, her room was -empty, and a note for me lay upon the hall table. I had said to -her last night, in sorrow and not in anger, that if she had -married my boy all might have been well with him. Perhaps it was -thoughtless of me to say so. It is to that remark that she refers -in this note: - -"'MY DEAREST UNCLE:--I feel that I have brought trouble upon you, -and that if I had acted differently this terrible misfortune -might never have occurred. I cannot, with this thought in my -mind, ever again be happy under your roof, and I feel that I must -leave you forever. Do not worry about my future, for that is -provided for; and, above all, do not search for me, for it will -be fruitless labour and an ill-service to me. In life or in -death, I am ever your loving,--MARY.' - -"What could she mean by that note, Mr. Holmes? Do you think it -points to suicide?" - -"No, no, nothing of the kind. It is perhaps the best possible -solution. I trust, Mr. Holder, that you are nearing the end of -your troubles." - -"Ha! You say so! You have heard something, Mr. Holmes; you have -learned something! Where are the gems?" - -"You would not think 1000 pounds apiece an excessive sum for -them?" - -"I would pay ten." - -"That would be unnecessary. Three thousand will cover the matter. -And there is a little reward, I fancy. Have you your check-book? -Here is a pen. Better make it out for 4000 pounds." - -With a dazed face the banker made out the required check. Holmes -walked over to his desk, took out a little triangular piece of -gold with three gems in it, and threw it down upon the table. - -With a shriek of joy our client clutched it up. - -"You have it!" he gasped. "I am saved! I am saved!" - -The reaction of joy was as passionate as his grief had been, and -he hugged his recovered gems to his bosom. - -"There is one other thing you owe, Mr. Holder," said Sherlock -Holmes rather sternly. - -"Owe!" He caught up a pen. "Name the sum, and I will pay it." - -"No, the debt is not to me. You owe a very humble apology to that -noble lad, your son, who has carried himself in this matter as I -should be proud to see my own son do, should I ever chance to -have one." - -"Then it was not Arthur who took them?" - -"I told you yesterday, and I repeat to-day, that it was not." - -"You are sure of it! Then let us hurry to him at once to let him -know that the truth is known." - -"He knows it already. When I had cleared it all up I had an -interview with him, and finding that he would not tell me the -story, I told it to him, on which he had to confess that I was -right and to add the very few details which were not yet quite -clear to me. Your news of this morning, however, may open his -lips." - -"For heaven's sake, tell me, then, what is this extraordinary -mystery!" - -"I will do so, and I will show you the steps by which I reached -it. And let me say to you, first, that which it is hardest for me -to say and for you to hear: there has been an understanding -between Sir George Burnwell and your niece Mary. They have now -fled together." - -"My Mary? Impossible!" - -"It is unfortunately more than possible; it is certain. Neither -you nor your son knew the true character of this man when you -admitted him into your family circle. He is one of the most -dangerous men in England--a ruined gambler, an absolutely -desperate villain, a man without heart or conscience. Your niece -knew nothing of such men. When he breathed his vows to her, as he -had done to a hundred before her, she flattered herself that she -alone had touched his heart. The devil knows best what he said, -but at least she became his tool and was in the habit of seeing -him nearly every evening." - -"I cannot, and I will not, believe it!" cried the banker with an -ashen face. - -"I will tell you, then, what occurred in your house last night. -Your niece, when you had, as she thought, gone to your room, -slipped down and talked to her lover through the window which -leads into the stable lane. His footmarks had pressed right -through the snow, so long had he stood there. She told him of the -coronet. His wicked lust for gold kindled at the news, and he -bent her to his will. I have no doubt that she loved you, but -there are women in whom the love of a lover extinguishes all -other loves, and I think that she must have been one. She had -hardly listened to his instructions when she saw you coming -downstairs, on which she closed the window rapidly and told you -about one of the servants' escapade with her wooden-legged lover, -which was all perfectly true. - -"Your boy, Arthur, went to bed after his interview with you but -he slept badly on account of his uneasiness about his club debts. -In the middle of the night he heard a soft tread pass his door, -so he rose and, looking out, was surprised to see his cousin -walking very stealthily along the passage until she disappeared -into your dressing-room. Petrified with astonishment, the lad -slipped on some clothes and waited there in the dark to see what -would come of this strange affair. Presently she emerged from the -room again, and in the light of the passage-lamp your son saw -that she carried the precious coronet in her hands. She passed -down the stairs, and he, thrilling with horror, ran along and -slipped behind the curtain near your door, whence he could see -what passed in the hall beneath. He saw her stealthily open the -window, hand out the coronet to someone in the gloom, and then -closing it once more hurry back to her room, passing quite close -to where he stood hid behind the curtain. - -"As long as she was on the scene he could not take any action -without a horrible exposure of the woman whom he loved. But the -instant that she was gone he realised how crushing a misfortune -this would be for you, and how all-important it was to set it -right. He rushed down, just as he was, in his bare feet, opened -the window, sprang out into the snow, and ran down the lane, -where he could see a dark figure in the moonlight. Sir George -Burnwell tried to get away, but Arthur caught him, and there was -a struggle between them, your lad tugging at one side of the -coronet, and his opponent at the other. In the scuffle, your son -struck Sir George and cut him over the eye. Then something -suddenly snapped, and your son, finding that he had the coronet -in his hands, rushed back, closed the window, ascended to your -room, and had just observed that the coronet had been twisted in -the struggle and was endeavouring to straighten it when you -appeared upon the scene." - -"Is it possible?" gasped the banker. - -"You then roused his anger by calling him names at a moment when -he felt that he had deserved your warmest thanks. He could not -explain the true state of affairs without betraying one who -certainly deserved little enough consideration at his hands. He -took the more chivalrous view, however, and preserved her -secret." - -"And that was why she shrieked and fainted when she saw the -coronet," cried Mr. Holder. "Oh, my God! what a blind fool I have -been! And his asking to be allowed to go out for five minutes! -The dear fellow wanted to see if the missing piece were at the -scene of the struggle. How cruelly I have misjudged him!" - -"When I arrived at the house," continued Holmes, "I at once went -very carefully round it to observe if there were any traces in -the snow which might help me. I knew that none had fallen since -the evening before, and also that there had been a strong frost -to preserve impressions. I passed along the tradesmen's path, but -found it all trampled down and indistinguishable. Just beyond it, -however, at the far side of the kitchen door, a woman had stood -and talked with a man, whose round impressions on one side showed -that he had a wooden leg. I could even tell that they had been -disturbed, for the woman had run back swiftly to the door, as was -shown by the deep toe and light heel marks, while Wooden-leg had -waited a little, and then had gone away. I thought at the time -that this might be the maid and her sweetheart, of whom you had -already spoken to me, and inquiry showed it was so. I passed -round the garden without seeing anything more than random tracks, -which I took to be the police; but when I got into the stable -lane a very long and complex story was written in the snow in -front of me. - -"There was a double line of tracks of a booted man, and a second -double line which I saw with delight belonged to a man with naked -feet. I was at once convinced from what you had told me that the -latter was your son. The first had walked both ways, but the -other had run swiftly, and as his tread was marked in places over -the depression of the boot, it was obvious that he had passed -after the other. I followed them up and found they led to the -hall window, where Boots had worn all the snow away while -waiting. Then I walked to the other end, which was a hundred -yards or more down the lane. I saw where Boots had faced round, -where the snow was cut up as though there had been a struggle, -and, finally, where a few drops of blood had fallen, to show me -that I was not mistaken. Boots had then run down the lane, and -another little smudge of blood showed that it was he who had been -hurt. When he came to the highroad at the other end, I found that -the pavement had been cleared, so there was an end to that clue. - -"On entering the house, however, I examined, as you remember, the -sill and framework of the hall window with my lens, and I could -at once see that someone had passed out. I could distinguish the -outline of an instep where the wet foot had been placed in coming -in. I was then beginning to be able to form an opinion as to what -had occurred. A man had waited outside the window; someone had -brought the gems; the deed had been overseen by your son; he had -pursued the thief; had struggled with him; they had each tugged -at the coronet, their united strength causing injuries which -neither alone could have effected. He had returned with the -prize, but had left a fragment in the grasp of his opponent. So -far I was clear. The question now was, who was the man and who -was it brought him the coronet? - -"It is an old maxim of mine that when you have excluded the -impossible, whatever remains, however improbable, must be the -truth. Now, I knew that it was not you who had brought it down, -so there only remained your niece and the maids. But if it were -the maids, why should your son allow himself to be accused in -their place? There could be no possible reason. As he loved his -cousin, however, there was an excellent explanation why he should -retain her secret--the more so as the secret was a disgraceful -one. When I remembered that you had seen her at that window, and -how she had fainted on seeing the coronet again, my conjecture -became a certainty. - -"And who could it be who was her confederate? A lover evidently, -for who else could outweigh the love and gratitude which she must -feel to you? I knew that you went out little, and that your -circle of friends was a very limited one. But among them was Sir -George Burnwell. I had heard of him before as being a man of evil -reputation among women. It must have been he who wore those boots -and retained the missing gems. Even though he knew that Arthur -had discovered him, he might still flatter himself that he was -safe, for the lad could not say a word without compromising his -own family. - -"Well, your own good sense will suggest what measures I took -next. I went in the shape of a loafer to Sir George's house, -managed to pick up an acquaintance with his valet, learned that -his master had cut his head the night before, and, finally, at -the expense of six shillings, made all sure by buying a pair of -his cast-off shoes. With these I journeyed down to Streatham and -saw that they exactly fitted the tracks." - -"I saw an ill-dressed vagabond in the lane yesterday evening," -said Mr. Holder. - -"Precisely. It was I. I found that I had my man, so I came home -and changed my clothes. It was a delicate part which I had to -play then, for I saw that a prosecution must be avoided to avert -scandal, and I knew that so astute a villain would see that our -hands were tied in the matter. I went and saw him. At first, of -course, he denied everything. But when I gave him every -particular that had occurred, he tried to bluster and took down a -life-preserver from the wall. I knew my man, however, and I -clapped a pistol to his head before he could strike. Then he -became a little more reasonable. I told him that we would give -him a price for the stones he held--1000 pounds apiece. That -brought out the first signs of grief that he had shown. 'Why, -dash it all!' said he, 'I've let them go at six hundred for the -three!' I soon managed to get the address of the receiver who had -them, on promising him that there would be no prosecution. Off I -set to him, and after much chaffering I got our stones at 1000 -pounds apiece. Then I looked in upon your son, told him that all -was right, and eventually got to my bed about two o'clock, after -what I may call a really hard day's work." - -"A day which has saved England from a great public scandal," said -the banker, rising. "Sir, I cannot find words to thank you, but -you shall not find me ungrateful for what you have done. Your -skill has indeed exceeded all that I have heard of it. And now I -must fly to my dear boy to apologise to him for the wrong which I -have done him. As to what you tell me of poor Mary, it goes to my -very heart. Not even your skill can inform me where she is now." - -"I think that we may safely say," returned Holmes, "that she is -wherever Sir George Burnwell is. It is equally certain, too, that -whatever her sins are, they will soon receive a more than -sufficient punishment." - - - -XII. THE ADVENTURE OF THE COPPER BEECHES - -"To the man who loves art for its own sake," remarked Sherlock -Holmes, tossing aside the advertisement sheet of the Daily -Telegraph, "it is frequently in its least important and lowliest -manifestations that the keenest pleasure is to be derived. It is -pleasant to me to observe, Watson, that you have so far grasped -this truth that in these little records of our cases which you -have been good enough to draw up, and, I am bound to say, -occasionally to embellish, you have given prominence not so much -to the many causes célèbres and sensational trials in which I -have figured but rather to those incidents which may have been -trivial in themselves, but which have given room for those -faculties of deduction and of logical synthesis which I have made -my special province." - -"And yet," said I, smiling, "I cannot quite hold myself absolved -from the charge of sensationalism which has been urged against my -records." - -"You have erred, perhaps," he observed, taking up a glowing -cinder with the tongs and lighting with it the long cherry-wood -pipe which was wont to replace his clay when he was in a -disputatious rather than a meditative mood--"you have erred -perhaps in attempting to put colour and life into each of your -statements instead of confining yourself to the task of placing -upon record that severe reasoning from cause to effect which is -really the only notable feature about the thing." - -"It seems to me that I have done you full justice in the matter," -I remarked with some coldness, for I was repelled by the egotism -which I had more than once observed to be a strong factor in my -friend's singular character. - -"No, it is not selfishness or conceit," said he, answering, as -was his wont, my thoughts rather than my words. "If I claim full -justice for my art, it is because it is an impersonal thing--a -thing beyond myself. Crime is common. Logic is rare. Therefore it -is upon the logic rather than upon the crime that you should -dwell. You have degraded what should have been a course of -lectures into a series of tales." - -It was a cold morning of the early spring, and we sat after -breakfast on either side of a cheery fire in the old room at -Baker Street. A thick fog rolled down between the lines of -dun-coloured houses, and the opposing windows loomed like dark, -shapeless blurs through the heavy yellow wreaths. Our gas was lit -and shone on the white cloth and glimmer of china and metal, for -the table had not been cleared yet. Sherlock Holmes had been -silent all the morning, dipping continuously into the -advertisement columns of a succession of papers until at last, -having apparently given up his search, he had emerged in no very -sweet temper to lecture me upon my literary shortcomings. - -"At the same time," he remarked after a pause, during which he -had sat puffing at his long pipe and gazing down into the fire, -"you can hardly be open to a charge of sensationalism, for out of -these cases which you have been so kind as to interest yourself -in, a fair proportion do not treat of crime, in its legal sense, -at all. The small matter in which I endeavoured to help the King -of Bohemia, the singular experience of Miss Mary Sutherland, the -problem connected with the man with the twisted lip, and the -incident of the noble bachelor, were all matters which are -outside the pale of the law. But in avoiding the sensational, I -fear that you may have bordered on the trivial." - -"The end may have been so," I answered, "but the methods I hold -to have been novel and of interest." - -"Pshaw, my dear fellow, what do the public, the great unobservant -public, who could hardly tell a weaver by his tooth or a -compositor by his left thumb, care about the finer shades of -analysis and deduction! But, indeed, if you are trivial, I cannot -blame you, for the days of the great cases are past. Man, or at -least criminal man, has lost all enterprise and originality. As -to my own little practice, it seems to be degenerating into an -agency for recovering lost lead pencils and giving advice to -young ladies from boarding-schools. I think that I have touched -bottom at last, however. This note I had this morning marks my -zero-point, I fancy. Read it!" He tossed a crumpled letter across -to me. - -It was dated from Montague Place upon the preceding evening, and -ran thus: - -"DEAR MR. HOLMES:--I am very anxious to consult you as to whether -I should or should not accept a situation which has been offered -to me as governess. I shall call at half-past ten to-morrow if I -do not inconvenience you. Yours faithfully, - "VIOLET HUNTER." - -"Do you know the young lady?" I asked. - -"Not I." - -"It is half-past ten now." - -"Yes, and I have no doubt that is her ring." - -"It may turn out to be of more interest than you think. You -remember that the affair of the blue carbuncle, which appeared to -be a mere whim at first, developed into a serious investigation. -It may be so in this case, also." - -"Well, let us hope so. But our doubts will very soon be solved, -for here, unless I am much mistaken, is the person in question." - -As he spoke the door opened and a young lady entered the room. -She was plainly but neatly dressed, with a bright, quick face, -freckled like a plover's egg, and with the brisk manner of a -woman who has had her own way to make in the world. - -"You will excuse my troubling you, I am sure," said she, as my -companion rose to greet her, "but I have had a very strange -experience, and as I have no parents or relations of any sort -from whom I could ask advice, I thought that perhaps you would be -kind enough to tell me what I should do." - -"Pray take a seat, Miss Hunter. I shall be happy to do anything -that I can to serve you." - -I could see that Holmes was favourably impressed by the manner -and speech of his new client. He looked her over in his searching -fashion, and then composed himself, with his lids drooping and -his finger-tips together, to listen to her story. - -"I have been a governess for five years," said she, "in the -family of Colonel Spence Munro, but two months ago the colonel -received an appointment at Halifax, in Nova Scotia, and took his -children over to America with him, so that I found myself without -a situation. I advertised, and I answered advertisements, but -without success. At last the little money which I had saved began -to run short, and I was at my wit's end as to what I should do. - -"There is a well-known agency for governesses in the West End -called Westaway's, and there I used to call about once a week in -order to see whether anything had turned up which might suit me. -Westaway was the name of the founder of the business, but it is -really managed by Miss Stoper. She sits in her own little office, -and the ladies who are seeking employment wait in an anteroom, -and are then shown in one by one, when she consults her ledgers -and sees whether she has anything which would suit them. - -"Well, when I called last week I was shown into the little office -as usual, but I found that Miss Stoper was not alone. A -prodigiously stout man with a very smiling face and a great heavy -chin which rolled down in fold upon fold over his throat sat at -her elbow with a pair of glasses on his nose, looking very -earnestly at the ladies who entered. As I came in he gave quite a -jump in his chair and turned quickly to Miss Stoper. - -"'That will do,' said he; 'I could not ask for anything better. -Capital! capital!' He seemed quite enthusiastic and rubbed his -hands together in the most genial fashion. He was such a -comfortable-looking man that it was quite a pleasure to look at -him. - -"'You are looking for a situation, miss?' he asked. - -"'Yes, sir.' - -"'As governess?' - -"'Yes, sir.' - -"'And what salary do you ask?' - -"'I had 4 pounds a month in my last place with Colonel Spence -Munro.' - -"'Oh, tut, tut! sweating--rank sweating!' he cried, throwing his -fat hands out into the air like a man who is in a boiling -passion. 'How could anyone offer so pitiful a sum to a lady with -such attractions and accomplishments?' - -"'My accomplishments, sir, may be less than you imagine,' said I. -'A little French, a little German, music, and drawing--' - -"'Tut, tut!' he cried. 'This is all quite beside the question. -The point is, have you or have you not the bearing and deportment -of a lady? There it is in a nutshell. If you have not, you are -not fitted for the rearing of a child who may some day play a -considerable part in the history of the country. But if you have -why, then, how could any gentleman ask you to condescend to -accept anything under the three figures? Your salary with me, -madam, would commence at 100 pounds a year.' - -"You may imagine, Mr. Holmes, that to me, destitute as I was, -such an offer seemed almost too good to be true. The gentleman, -however, seeing perhaps the look of incredulity upon my face, -opened a pocket-book and took out a note. - -"'It is also my custom,' said he, smiling in the most pleasant -fashion until his eyes were just two little shining slits amid -the white creases of his face, 'to advance to my young ladies -half their salary beforehand, so that they may meet any little -expenses of their journey and their wardrobe.' - -"It seemed to me that I had never met so fascinating and so -thoughtful a man. As I was already in debt to my tradesmen, the -advance was a great convenience, and yet there was something -unnatural about the whole transaction which made me wish to know -a little more before I quite committed myself. - -"'May I ask where you live, sir?' said I. - -"'Hampshire. Charming rural place. The Copper Beeches, five miles -on the far side of Winchester. It is the most lovely country, my -dear young lady, and the dearest old country-house.' - -"'And my duties, sir? I should be glad to know what they would -be.' - -"'One child--one dear little romper just six years old. Oh, if -you could see him killing cockroaches with a slipper! Smack! -smack! smack! Three gone before you could wink!' He leaned back -in his chair and laughed his eyes into his head again. - -"I was a little startled at the nature of the child's amusement, -but the father's laughter made me think that perhaps he was -joking. - -"'My sole duties, then,' I asked, 'are to take charge of a single -child?' - -"'No, no, not the sole, not the sole, my dear young lady,' he -cried. 'Your duty would be, as I am sure your good sense would -suggest, to obey any little commands my wife might give, provided -always that they were such commands as a lady might with -propriety obey. You see no difficulty, heh?' - -"'I should be happy to make myself useful.' - -"'Quite so. In dress now, for example. We are faddy people, you -know--faddy but kind-hearted. If you were asked to wear any dress -which we might give you, you would not object to our little whim. -Heh?' - -"'No,' said I, considerably astonished at his words. - -"'Or to sit here, or sit there, that would not be offensive to -you?' - -"'Oh, no.' - -"'Or to cut your hair quite short before you come to us?' - -"I could hardly believe my ears. As you may observe, Mr. Holmes, -my hair is somewhat luxuriant, and of a rather peculiar tint of -chestnut. It has been considered artistic. I could not dream of -sacrificing it in this offhand fashion. - -"'I am afraid that that is quite impossible,' said I. He had been -watching me eagerly out of his small eyes, and I could see a -shadow pass over his face as I spoke. - -"'I am afraid that it is quite essential,' said he. 'It is a -little fancy of my wife's, and ladies' fancies, you know, madam, -ladies' fancies must be consulted. And so you won't cut your -hair?' - -"'No, sir, I really could not,' I answered firmly. - -"'Ah, very well; then that quite settles the matter. It is a -pity, because in other respects you would really have done very -nicely. In that case, Miss Stoper, I had best inspect a few more -of your young ladies.' - -"The manageress had sat all this while busy with her papers -without a word to either of us, but she glanced at me now with so -much annoyance upon her face that I could not help suspecting -that she had lost a handsome commission through my refusal. - -"'Do you desire your name to be kept upon the books?' she asked. - -"'If you please, Miss Stoper.' - -"'Well, really, it seems rather useless, since you refuse the -most excellent offers in this fashion,' said she sharply. 'You -can hardly expect us to exert ourselves to find another such -opening for you. Good-day to you, Miss Hunter.' She struck a gong -upon the table, and I was shown out by the page. - -"Well, Mr. Holmes, when I got back to my lodgings and found -little enough in the cupboard, and two or three bills upon the -table, I began to ask myself whether I had not done a very -foolish thing. After all, if these people had strange fads and -expected obedience on the most extraordinary matters, they were -at least ready to pay for their eccentricity. Very few -governesses in England are getting 100 pounds a year. Besides, -what use was my hair to me? Many people are improved by wearing -it short and perhaps I should be among the number. Next day I was -inclined to think that I had made a mistake, and by the day after -I was sure of it. I had almost overcome my pride so far as to go -back to the agency and inquire whether the place was still open -when I received this letter from the gentleman himself. I have it -here and I will read it to you: - - "'The Copper Beeches, near Winchester. -"'DEAR MISS HUNTER:--Miss Stoper has very kindly given me your -address, and I write from here to ask you whether you have -reconsidered your decision. My wife is very anxious that you -should come, for she has been much attracted by my description of -you. We are willing to give 30 pounds a quarter, or 120 pounds a -year, so as to recompense you for any little inconvenience which -our fads may cause you. They are not very exacting, after all. My -wife is fond of a particular shade of electric blue and would -like you to wear such a dress indoors in the morning. You need -not, however, go to the expense of purchasing one, as we have one -belonging to my dear daughter Alice (now in Philadelphia), which -would, I should think, fit you very well. Then, as to sitting -here or there, or amusing yourself in any manner indicated, that -need cause you no inconvenience. As regards your hair, it is no -doubt a pity, especially as I could not help remarking its beauty -during our short interview, but I am afraid that I must remain -firm upon this point, and I only hope that the increased salary -may recompense you for the loss. Your duties, as far as the child -is concerned, are very light. Now do try to come, and I shall -meet you with the dog-cart at Winchester. Let me know your train. -Yours faithfully, JEPHRO RUCASTLE.' - -"That is the letter which I have just received, Mr. Holmes, and -my mind is made up that I will accept it. I thought, however, -that before taking the final step I should like to submit the -whole matter to your consideration." - -"Well, Miss Hunter, if your mind is made up, that settles the -question," said Holmes, smiling. - -"But you would not advise me to refuse?" - -"I confess that it is not the situation which I should like to -see a sister of mine apply for." - -"What is the meaning of it all, Mr. Holmes?" - -"Ah, I have no data. I cannot tell. Perhaps you have yourself -formed some opinion?" - -"Well, there seems to me to be only one possible solution. Mr. -Rucastle seemed to be a very kind, good-natured man. Is it not -possible that his wife is a lunatic, that he desires to keep the -matter quiet for fear she should be taken to an asylum, and that -he humours her fancies in every way in order to prevent an -outbreak?" - -"That is a possible solution--in fact, as matters stand, it is -the most probable one. But in any case it does not seem to be a -nice household for a young lady." - -"But the money, Mr. Holmes, the money!" - -"Well, yes, of course the pay is good--too good. That is what -makes me uneasy. Why should they give you 120 pounds a year, when -they could have their pick for 40 pounds? There must be some -strong reason behind." - -"I thought that if I told you the circumstances you would -understand afterwards if I wanted your help. I should feel so -much stronger if I felt that you were at the back of me." - -"Oh, you may carry that feeling away with you. I assure you that -your little problem promises to be the most interesting which has -come my way for some months. There is something distinctly novel -about some of the features. If you should find yourself in doubt -or in danger--" - -"Danger! What danger do you foresee?" - -Holmes shook his head gravely. "It would cease to be a danger if -we could define it," said he. "But at any time, day or night, a -telegram would bring me down to your help." - -"That is enough." She rose briskly from her chair with the -anxiety all swept from her face. "I shall go down to Hampshire -quite easy in my mind now. I shall write to Mr. Rucastle at once, -sacrifice my poor hair to-night, and start for Winchester -to-morrow." With a few grateful words to Holmes she bade us both -good-night and bustled off upon her way. - -"At least," said I as we heard her quick, firm steps descending -the stairs, "she seems to be a young lady who is very well able -to take care of herself." - -"And she would need to be," said Holmes gravely. "I am much -mistaken if we do not hear from her before many days are past." - -It was not very long before my friend's prediction was fulfilled. -A fortnight went by, during which I frequently found my thoughts -turning in her direction and wondering what strange side-alley of -human experience this lonely woman had strayed into. The unusual -salary, the curious conditions, the light duties, all pointed to -something abnormal, though whether a fad or a plot, or whether -the man were a philanthropist or a villain, it was quite beyond -my powers to determine. As to Holmes, I observed that he sat -frequently for half an hour on end, with knitted brows and an -abstracted air, but he swept the matter away with a wave of his -hand when I mentioned it. "Data! data! data!" he cried -impatiently. "I can't make bricks without clay." And yet he would -always wind up by muttering that no sister of his should ever -have accepted such a situation. - -The telegram which we eventually received came late one night -just as I was thinking of turning in and Holmes was settling down -to one of those all-night chemical researches which he frequently -indulged in, when I would leave him stooping over a retort and a -test-tube at night and find him in the same position when I came -down to breakfast in the morning. He opened the yellow envelope, -and then, glancing at the message, threw it across to me. - -"Just look up the trains in Bradshaw," said he, and turned back -to his chemical studies. - -The summons was a brief and urgent one. - -"Please be at the Black Swan Hotel at Winchester at midday -to-morrow," it said. "Do come! I am at my wit's end. HUNTER." - -"Will you come with me?" asked Holmes, glancing up. - -"I should wish to." - -"Just look it up, then." - -"There is a train at half-past nine," said I, glancing over my -Bradshaw. "It is due at Winchester at 11:30." - -"That will do very nicely. Then perhaps I had better postpone my -analysis of the acetones, as we may need to be at our best in the -morning." - -By eleven o'clock the next day we were well upon our way to the -old English capital. Holmes had been buried in the morning papers -all the way down, but after we had passed the Hampshire border he -threw them down and began to admire the scenery. It was an ideal -spring day, a light blue sky, flecked with little fleecy white -clouds drifting across from west to east. The sun was shining -very brightly, and yet there was an exhilarating nip in the air, -which set an edge to a man's energy. All over the countryside, -away to the rolling hills around Aldershot, the little red and -grey roofs of the farm-steadings peeped out from amid the light -green of the new foliage. - -"Are they not fresh and beautiful?" I cried with all the -enthusiasm of a man fresh from the fogs of Baker Street. - -But Holmes shook his head gravely. - -"Do you know, Watson," said he, "that it is one of the curses of -a mind with a turn like mine that I must look at everything with -reference to my own special subject. You look at these scattered -houses, and you are impressed by their beauty. I look at them, -and the only thought which comes to me is a feeling of their -isolation and of the impunity with which crime may be committed -there." - -"Good heavens!" I cried. "Who would associate crime with these -dear old homesteads?" - -"They always fill me with a certain horror. It is my belief, -Watson, founded upon my experience, that the lowest and vilest -alleys in London do not present a more dreadful record of sin -than does the smiling and beautiful countryside." - -"You horrify me!" - -"But the reason is very obvious. The pressure of public opinion -can do in the town what the law cannot accomplish. There is no -lane so vile that the scream of a tortured child, or the thud of -a drunkard's blow, does not beget sympathy and indignation among -the neighbours, and then the whole machinery of justice is ever -so close that a word of complaint can set it going, and there is -but a step between the crime and the dock. But look at these -lonely houses, each in its own fields, filled for the most part -with poor ignorant folk who know little of the law. Think of the -deeds of hellish cruelty, the hidden wickedness which may go on, -year in, year out, in such places, and none the wiser. Had this -lady who appeals to us for help gone to live in Winchester, I -should never have had a fear for her. It is the five miles of -country which makes the danger. Still, it is clear that she is -not personally threatened." - -"No. If she can come to Winchester to meet us she can get away." - -"Quite so. She has her freedom." - -"What CAN be the matter, then? Can you suggest no explanation?" - -"I have devised seven separate explanations, each of which would -cover the facts as far as we know them. But which of these is -correct can only be determined by the fresh information which we -shall no doubt find waiting for us. Well, there is the tower of -the cathedral, and we shall soon learn all that Miss Hunter has -to tell." - -The Black Swan is an inn of repute in the High Street, at no -distance from the station, and there we found the young lady -waiting for us. She had engaged a sitting-room, and our lunch -awaited us upon the table. - -"I am so delighted that you have come," she said earnestly. "It -is so very kind of you both; but indeed I do not know what I -should do. Your advice will be altogether invaluable to me." - -"Pray tell us what has happened to you." - -"I will do so, and I must be quick, for I have promised Mr. -Rucastle to be back before three. I got his leave to come into -town this morning, though he little knew for what purpose." - -"Let us have everything in its due order." Holmes thrust his long -thin legs out towards the fire and composed himself to listen. - -"In the first place, I may say that I have met, on the whole, -with no actual ill-treatment from Mr. and Mrs. Rucastle. It is -only fair to them to say that. But I cannot understand them, and -I am not easy in my mind about them." - -"What can you not understand?" - -"Their reasons for their conduct. But you shall have it all just -as it occurred. When I came down, Mr. Rucastle met me here and -drove me in his dog-cart to the Copper Beeches. It is, as he -said, beautifully situated, but it is not beautiful in itself, -for it is a large square block of a house, whitewashed, but all -stained and streaked with damp and bad weather. There are grounds -round it, woods on three sides, and on the fourth a field which -slopes down to the Southampton highroad, which curves past about -a hundred yards from the front door. This ground in front belongs -to the house, but the woods all round are part of Lord -Southerton's preserves. A clump of copper beeches immediately in -front of the hall door has given its name to the place. - -"I was driven over by my employer, who was as amiable as ever, -and was introduced by him that evening to his wife and the child. -There was no truth, Mr. Holmes, in the conjecture which seemed to -us to be probable in your rooms at Baker Street. Mrs. Rucastle is -not mad. I found her to be a silent, pale-faced woman, much -younger than her husband, not more than thirty, I should think, -while he can hardly be less than forty-five. From their -conversation I have gathered that they have been married about -seven years, that he was a widower, and that his only child by -the first wife was the daughter who has gone to Philadelphia. Mr. -Rucastle told me in private that the reason why she had left them -was that she had an unreasoning aversion to her stepmother. As -the daughter could not have been less than twenty, I can quite -imagine that her position must have been uncomfortable with her -father's young wife. - -"Mrs. Rucastle seemed to me to be colourless in mind as well as -in feature. She impressed me neither favourably nor the reverse. -She was a nonentity. It was easy to see that she was passionately -devoted both to her husband and to her little son. Her light grey -eyes wandered continually from one to the other, noting every -little want and forestalling it if possible. He was kind to her -also in his bluff, boisterous fashion, and on the whole they -seemed to be a happy couple. And yet she had some secret sorrow, -this woman. She would often be lost in deep thought, with the -saddest look upon her face. More than once I have surprised her -in tears. I have thought sometimes that it was the disposition of -her child which weighed upon her mind, for I have never met so -utterly spoiled and so ill-natured a little creature. He is small -for his age, with a head which is quite disproportionately large. -His whole life appears to be spent in an alternation between -savage fits of passion and gloomy intervals of sulking. Giving -pain to any creature weaker than himself seems to be his one idea -of amusement, and he shows quite remarkable talent in planning -the capture of mice, little birds, and insects. But I would -rather not talk about the creature, Mr. Holmes, and, indeed, he -has little to do with my story." - -"I am glad of all details," remarked my friend, "whether they -seem to you to be relevant or not." - -"I shall try not to miss anything of importance. The one -unpleasant thing about the house, which struck me at once, was -the appearance and conduct of the servants. There are only two, a -man and his wife. Toller, for that is his name, is a rough, -uncouth man, with grizzled hair and whiskers, and a perpetual -smell of drink. Twice since I have been with them he has been -quite drunk, and yet Mr. Rucastle seemed to take no notice of it. -His wife is a very tall and strong woman with a sour face, as -silent as Mrs. Rucastle and much less amiable. They are a most -unpleasant couple, but fortunately I spend most of my time in the -nursery and my own room, which are next to each other in one -corner of the building. - -"For two days after my arrival at the Copper Beeches my life was -very quiet; on the third, Mrs. Rucastle came down just after -breakfast and whispered something to her husband. - -"'Oh, yes,' said he, turning to me, 'we are very much obliged to -you, Miss Hunter, for falling in with our whims so far as to cut -your hair. I assure you that it has not detracted in the tiniest -iota from your appearance. We shall now see how the electric-blue -dress will become you. You will find it laid out upon the bed in -your room, and if you would be so good as to put it on we should -both be extremely obliged.' - -"The dress which I found waiting for me was of a peculiar shade -of blue. It was of excellent material, a sort of beige, but it -bore unmistakable signs of having been worn before. It could not -have been a better fit if I had been measured for it. Both Mr. -and Mrs. Rucastle expressed a delight at the look of it, which -seemed quite exaggerated in its vehemence. They were waiting for -me in the drawing-room, which is a very large room, stretching -along the entire front of the house, with three long windows -reaching down to the floor. A chair had been placed close to the -central window, with its back turned towards it. In this I was -asked to sit, and then Mr. Rucastle, walking up and down on the -other side of the room, began to tell me a series of the funniest -stories that I have ever listened to. You cannot imagine how -comical he was, and I laughed until I was quite weary. Mrs. -Rucastle, however, who has evidently no sense of humour, never so -much as smiled, but sat with her hands in her lap, and a sad, -anxious look upon her face. After an hour or so, Mr. Rucastle -suddenly remarked that it was time to commence the duties of the -day, and that I might change my dress and go to little Edward in -the nursery. - -"Two days later this same performance was gone through under -exactly similar circumstances. Again I changed my dress, again I -sat in the window, and again I laughed very heartily at the funny -stories of which my employer had an immense répertoire, and which -he told inimitably. Then he handed me a yellow-backed novel, and -moving my chair a little sideways, that my own shadow might not -fall upon the page, he begged me to read aloud to him. I read for -about ten minutes, beginning in the heart of a chapter, and then -suddenly, in the middle of a sentence, he ordered me to cease and -to change my dress. - -"You can easily imagine, Mr. Holmes, how curious I became as to -what the meaning of this extraordinary performance could possibly -be. They were always very careful, I observed, to turn my face -away from the window, so that I became consumed with the desire -to see what was going on behind my back. At first it seemed to be -impossible, but I soon devised a means. My hand-mirror had been -broken, so a happy thought seized me, and I concealed a piece of -the glass in my handkerchief. On the next occasion, in the midst -of my laughter, I put my handkerchief up to my eyes, and was able -with a little management to see all that there was behind me. I -confess that I was disappointed. There was nothing. At least that -was my first impression. At the second glance, however, I -perceived that there was a man standing in the Southampton Road, -a small bearded man in a grey suit, who seemed to be looking in -my direction. The road is an important highway, and there are -usually people there. This man, however, was leaning against the -railings which bordered our field and was looking earnestly up. I -lowered my handkerchief and glanced at Mrs. Rucastle to find her -eyes fixed upon me with a most searching gaze. She said nothing, -but I am convinced that she had divined that I had a mirror in my -hand and had seen what was behind me. She rose at once. - -"'Jephro,' said she, 'there is an impertinent fellow upon the -road there who stares up at Miss Hunter.' - -"'No friend of yours, Miss Hunter?' he asked. - -"'No, I know no one in these parts.' - -"'Dear me! How very impertinent! Kindly turn round and motion to -him to go away.' - -"'Surely it would be better to take no notice.' - -"'No, no, we should have him loitering here always. Kindly turn -round and wave him away like that.' - -"I did as I was told, and at the same instant Mrs. Rucastle drew -down the blind. That was a week ago, and from that time I have -not sat again in the window, nor have I worn the blue dress, nor -seen the man in the road." - -"Pray continue," said Holmes. "Your narrative promises to be a -most interesting one." - -"You will find it rather disconnected, I fear, and there may -prove to be little relation between the different incidents of -which I speak. On the very first day that I was at the Copper -Beeches, Mr. Rucastle took me to a small outhouse which stands -near the kitchen door. As we approached it I heard the sharp -rattling of a chain, and the sound as of a large animal moving -about. - -"'Look in here!' said Mr. Rucastle, showing me a slit between two -planks. 'Is he not a beauty?' - -"I looked through and was conscious of two glowing eyes, and of a -vague figure huddled up in the darkness. - -"'Don't be frightened,' said my employer, laughing at the start -which I had given. 'It's only Carlo, my mastiff. I call him mine, -but really old Toller, my groom, is the only man who can do -anything with him. We feed him once a day, and not too much then, -so that he is always as keen as mustard. Toller lets him loose -every night, and God help the trespasser whom he lays his fangs -upon. For goodness' sake don't you ever on any pretext set your -foot over the threshold at night, for it's as much as your life -is worth.' - -"The warning was no idle one, for two nights later I happened to -look out of my bedroom window about two o'clock in the morning. -It was a beautiful moonlight night, and the lawn in front of the -house was silvered over and almost as bright as day. I was -standing, rapt in the peaceful beauty of the scene, when I was -aware that something was moving under the shadow of the copper -beeches. As it emerged into the moonshine I saw what it was. It -was a giant dog, as large as a calf, tawny tinted, with hanging -jowl, black muzzle, and huge projecting bones. It walked slowly -across the lawn and vanished into the shadow upon the other side. -That dreadful sentinel sent a chill to my heart which I do not -think that any burglar could have done. - -"And now I have a very strange experience to tell you. I had, as -you know, cut off my hair in London, and I had placed it in a -great coil at the bottom of my trunk. One evening, after the -child was in bed, I began to amuse myself by examining the -furniture of my room and by rearranging my own little things. -There was an old chest of drawers in the room, the two upper ones -empty and open, the lower one locked. I had filled the first two -with my linen, and as I had still much to pack away I was -naturally annoyed at not having the use of the third drawer. It -struck me that it might have been fastened by a mere oversight, -so I took out my bunch of keys and tried to open it. The very -first key fitted to perfection, and I drew the drawer open. There -was only one thing in it, but I am sure that you would never -guess what it was. It was my coil of hair. - -"I took it up and examined it. It was of the same peculiar tint, -and the same thickness. But then the impossibility of the thing -obtruded itself upon me. How could my hair have been locked in -the drawer? With trembling hands I undid my trunk, turned out the -contents, and drew from the bottom my own hair. I laid the two -tresses together, and I assure you that they were identical. Was -it not extraordinary? Puzzle as I would, I could make nothing at -all of what it meant. I returned the strange hair to the drawer, -and I said nothing of the matter to the Rucastles as I felt that -I had put myself in the wrong by opening a drawer which they had -locked. - -"I am naturally observant, as you may have remarked, Mr. Holmes, -and I soon had a pretty good plan of the whole house in my head. -There was one wing, however, which appeared not to be inhabited -at all. A door which faced that which led into the quarters of -the Tollers opened into this suite, but it was invariably locked. -One day, however, as I ascended the stair, I met Mr. Rucastle -coming out through this door, his keys in his hand, and a look on -his face which made him a very different person to the round, -jovial man to whom I was accustomed. His cheeks were red, his -brow was all crinkled with anger, and the veins stood out at his -temples with passion. He locked the door and hurried past me -without a word or a look. - -"This aroused my curiosity, so when I went out for a walk in the -grounds with my charge, I strolled round to the side from which I -could see the windows of this part of the house. There were four -of them in a row, three of which were simply dirty, while the -fourth was shuttered up. They were evidently all deserted. As I -strolled up and down, glancing at them occasionally, Mr. Rucastle -came out to me, looking as merry and jovial as ever. - -"'Ah!' said he, 'you must not think me rude if I passed you -without a word, my dear young lady. I was preoccupied with -business matters.' - -"I assured him that I was not offended. 'By the way,' said I, -'you seem to have quite a suite of spare rooms up there, and one -of them has the shutters up.' - -"He looked surprised and, as it seemed to me, a little startled -at my remark. - -"'Photography is one of my hobbies,' said he. 'I have made my -dark room up there. But, dear me! what an observant young lady we -have come upon. Who would have believed it? Who would have ever -believed it?' He spoke in a jesting tone, but there was no jest -in his eyes as he looked at me. I read suspicion there and -annoyance, but no jest. - -"Well, Mr. Holmes, from the moment that I understood that there -was something about that suite of rooms which I was not to know, -I was all on fire to go over them. It was not mere curiosity, -though I have my share of that. It was more a feeling of duty--a -feeling that some good might come from my penetrating to this -place. They talk of woman's instinct; perhaps it was woman's -instinct which gave me that feeling. At any rate, it was there, -and I was keenly on the lookout for any chance to pass the -forbidden door. - -"It was only yesterday that the chance came. I may tell you that, -besides Mr. Rucastle, both Toller and his wife find something to -do in these deserted rooms, and I once saw him carrying a large -black linen bag with him through the door. Recently he has been -drinking hard, and yesterday evening he was very drunk; and when -I came upstairs there was the key in the door. I have no doubt at -all that he had left it there. Mr. and Mrs. Rucastle were both -downstairs, and the child was with them, so that I had an -admirable opportunity. I turned the key gently in the lock, -opened the door, and slipped through. - -"There was a little passage in front of me, unpapered and -uncarpeted, which turned at a right angle at the farther end. -Round this corner were three doors in a line, the first and third -of which were open. They each led into an empty room, dusty and -cheerless, with two windows in the one and one in the other, so -thick with dirt that the evening light glimmered dimly through -them. The centre door was closed, and across the outside of it -had been fastened one of the broad bars of an iron bed, padlocked -at one end to a ring in the wall, and fastened at the other with -stout cord. The door itself was locked as well, and the key was -not there. This barricaded door corresponded clearly with the -shuttered window outside, and yet I could see by the glimmer from -beneath it that the room was not in darkness. Evidently there was -a skylight which let in light from above. As I stood in the -passage gazing at the sinister door and wondering what secret it -might veil, I suddenly heard the sound of steps within the room -and saw a shadow pass backward and forward against the little -slit of dim light which shone out from under the door. A mad, -unreasoning terror rose up in me at the sight, Mr. Holmes. My -overstrung nerves failed me suddenly, and I turned and ran--ran -as though some dreadful hand were behind me clutching at the -skirt of my dress. I rushed down the passage, through the door, -and straight into the arms of Mr. Rucastle, who was waiting -outside. - -"'So,' said he, smiling, 'it was you, then. I thought that it -must be when I saw the door open.' - -"'Oh, I am so frightened!' I panted. - -"'My dear young lady! my dear young lady!'--you cannot think how -caressing and soothing his manner was--'and what has frightened -you, my dear young lady?' - -"But his voice was just a little too coaxing. He overdid it. I -was keenly on my guard against him. - -"'I was foolish enough to go into the empty wing,' I answered. -'But it is so lonely and eerie in this dim light that I was -frightened and ran out again. Oh, it is so dreadfully still in -there!' - -"'Only that?' said he, looking at me keenly. - -"'Why, what did you think?' I asked. - -"'Why do you think that I lock this door?' - -"'I am sure that I do not know.' - -"'It is to keep people out who have no business there. Do you -see?' He was still smiling in the most amiable manner. - -"'I am sure if I had known--' - -"'Well, then, you know now. And if you ever put your foot over -that threshold again'--here in an instant the smile hardened into -a grin of rage, and he glared down at me with the face of a -demon--'I'll throw you to the mastiff.' - -"I was so terrified that I do not know what I did. I suppose that -I must have rushed past him into my room. I remember nothing -until I found myself lying on my bed trembling all over. Then I -thought of you, Mr. Holmes. I could not live there longer without -some advice. I was frightened of the house, of the man, of the -woman, of the servants, even of the child. They were all horrible -to me. If I could only bring you down all would be well. Of -course I might have fled from the house, but my curiosity was -almost as strong as my fears. My mind was soon made up. I would -send you a wire. I put on my hat and cloak, went down to the -office, which is about half a mile from the house, and then -returned, feeling very much easier. A horrible doubt came into my -mind as I approached the door lest the dog might be loose, but I -remembered that Toller had drunk himself into a state of -insensibility that evening, and I knew that he was the only one -in the household who had any influence with the savage creature, -or who would venture to set him free. I slipped in in safety and -lay awake half the night in my joy at the thought of seeing you. -I had no difficulty in getting leave to come into Winchester this -morning, but I must be back before three o'clock, for Mr. and -Mrs. Rucastle are going on a visit, and will be away all the -evening, so that I must look after the child. Now I have told you -all my adventures, Mr. Holmes, and I should be very glad if you -could tell me what it all means, and, above all, what I should -do." - -Holmes and I had listened spellbound to this extraordinary story. -My friend rose now and paced up and down the room, his hands in -his pockets, and an expression of the most profound gravity upon -his face. - -"Is Toller still drunk?" he asked. - -"Yes. I heard his wife tell Mrs. Rucastle that she could do -nothing with him." - -"That is well. And the Rucastles go out to-night?" - -"Yes." - -"Is there a cellar with a good strong lock?" - -"Yes, the wine-cellar." - -"You seem to me to have acted all through this matter like a very -brave and sensible girl, Miss Hunter. Do you think that you could -perform one more feat? I should not ask it of you if I did not -think you a quite exceptional woman." - -"I will try. What is it?" - -"We shall be at the Copper Beeches by seven o'clock, my friend -and I. The Rucastles will be gone by that time, and Toller will, -we hope, be incapable. There only remains Mrs. Toller, who might -give the alarm. If you could send her into the cellar on some -errand, and then turn the key upon her, you would facilitate -matters immensely." - -"I will do it." - -"Excellent! We shall then look thoroughly into the affair. Of -course there is only one feasible explanation. You have been -brought there to personate someone, and the real person is -imprisoned in this chamber. That is obvious. As to who this -prisoner is, I have no doubt that it is the daughter, Miss Alice -Rucastle, if I remember right, who was said to have gone to -America. You were chosen, doubtless, as resembling her in height, -figure, and the colour of your hair. Hers had been cut off, very -possibly in some illness through which she has passed, and so, of -course, yours had to be sacrificed also. By a curious chance you -came upon her tresses. The man in the road was undoubtedly some -friend of hers--possibly her fiancé--and no doubt, as you wore -the girl's dress and were so like her, he was convinced from your -laughter, whenever he saw you, and afterwards from your gesture, -that Miss Rucastle was perfectly happy, and that she no longer -desired his attentions. The dog is let loose at night to prevent -him from endeavouring to communicate with her. So much is fairly -clear. The most serious point in the case is the disposition of -the child." - -"What on earth has that to do with it?" I ejaculated. - -"My dear Watson, you as a medical man are continually gaining -light as to the tendencies of a child by the study of the -parents. Don't you see that the converse is equally valid. I have -frequently gained my first real insight into the character of -parents by studying their children. This child's disposition is -abnormally cruel, merely for cruelty's sake, and whether he -derives this from his smiling father, as I should suspect, or -from his mother, it bodes evil for the poor girl who is in their -power." - -"I am sure that you are right, Mr. Holmes," cried our client. "A -thousand things come back to me which make me certain that you -have hit it. Oh, let us lose not an instant in bringing help to -this poor creature." - -"We must be circumspect, for we are dealing with a very cunning -man. We can do nothing until seven o'clock. At that hour we shall -be with you, and it will not be long before we solve the -mystery." - -We were as good as our word, for it was just seven when we -reached the Copper Beeches, having put up our trap at a wayside -public-house. The group of trees, with their dark leaves shining -like burnished metal in the light of the setting sun, were -sufficient to mark the house even had Miss Hunter not been -standing smiling on the door-step. - -"Have you managed it?" asked Holmes. - -A loud thudding noise came from somewhere downstairs. "That is -Mrs. Toller in the cellar," said she. "Her husband lies snoring -on the kitchen rug. Here are his keys, which are the duplicates -of Mr. Rucastle's." - -"You have done well indeed!" cried Holmes with enthusiasm. "Now -lead the way, and we shall soon see the end of this black -business." - -We passed up the stair, unlocked the door, followed on down a -passage, and found ourselves in front of the barricade which Miss -Hunter had described. Holmes cut the cord and removed the -transverse bar. Then he tried the various keys in the lock, but -without success. No sound came from within, and at the silence -Holmes' face clouded over. - -"I trust that we are not too late," said he. "I think, Miss -Hunter, that we had better go in without you. Now, Watson, put -your shoulder to it, and we shall see whether we cannot make our -way in." - -It was an old rickety door and gave at once before our united -strength. Together we rushed into the room. It was empty. There -was no furniture save a little pallet bed, a small table, and a -basketful of linen. The skylight above was open, and the prisoner -gone. - -"There has been some villainy here," said Holmes; "this beauty -has guessed Miss Hunter's intentions and has carried his victim -off." - -"But how?" - -"Through the skylight. We shall soon see how he managed it." He -swung himself up onto the roof. "Ah, yes," he cried, "here's the -end of a long light ladder against the eaves. That is how he did -it." - -"But it is impossible," said Miss Hunter; "the ladder was not -there when the Rucastles went away." - -"He has come back and done it. I tell you that he is a clever and -dangerous man. I should not be very much surprised if this were -he whose step I hear now upon the stair. I think, Watson, that it -would be as well for you to have your pistol ready." - -The words were hardly out of his mouth before a man appeared at -the door of the room, a very fat and burly man, with a heavy -stick in his hand. Miss Hunter screamed and shrunk against the -wall at the sight of him, but Sherlock Holmes sprang forward and -confronted him. - -"You villain!" said he, "where's your daughter?" - -The fat man cast his eyes round, and then up at the open -skylight. - -"It is for me to ask you that," he shrieked, "you thieves! Spies -and thieves! I have caught you, have I? You are in my power. I'll -serve you!" He turned and clattered down the stairs as hard as he -could go. - -"He's gone for the dog!" cried Miss Hunter. - -"I have my revolver," said I. - -"Better close the front door," cried Holmes, and we all rushed -down the stairs together. We had hardly reached the hall when we -heard the baying of a hound, and then a scream of agony, with a -horrible worrying sound which it was dreadful to listen to. An -elderly man with a red face and shaking limbs came staggering out -at a side door. - -"My God!" he cried. "Someone has loosed the dog. It's not been -fed for two days. Quick, quick, or it'll be too late!" - -Holmes and I rushed out and round the angle of the house, with -Toller hurrying behind us. There was the huge famished brute, its -black muzzle buried in Rucastle's throat, while he writhed and -screamed upon the ground. Running up, I blew its brains out, and -it fell over with its keen white teeth still meeting in the great -creases of his neck. With much labour we separated them and -carried him, living but horribly mangled, into the house. We laid -him upon the drawing-room sofa, and having dispatched the sobered -Toller to bear the news to his wife, I did what I could to -relieve his pain. We were all assembled round him when the door -opened, and a tall, gaunt woman entered the room. - -"Mrs. Toller!" cried Miss Hunter. - -"Yes, miss. Mr. Rucastle let me out when he came back before he -went up to you. Ah, miss, it is a pity you didn't let me know -what you were planning, for I would have told you that your pains -were wasted." - -"Ha!" said Holmes, looking keenly at her. "It is clear that Mrs. -Toller knows more about this matter than anyone else." - -"Yes, sir, I do, and I am ready enough to tell what I know." - -"Then, pray, sit down, and let us hear it for there are several -points on which I must confess that I am still in the dark." - -"I will soon make it clear to you," said she; "and I'd have done -so before now if I could ha' got out from the cellar. If there's -police-court business over this, you'll remember that I was the -one that stood your friend, and that I was Miss Alice's friend -too. - -"She was never happy at home, Miss Alice wasn't, from the time -that her father married again. She was slighted like and had no -say in anything, but it never really became bad for her until -after she met Mr. Fowler at a friend's house. As well as I could -learn, Miss Alice had rights of her own by will, but she was so -quiet and patient, she was, that she never said a word about them -but just left everything in Mr. Rucastle's hands. He knew he was -safe with her; but when there was a chance of a husband coming -forward, who would ask for all that the law would give him, then -her father thought it time to put a stop on it. He wanted her to -sign a paper, so that whether she married or not, he could use -her money. When she wouldn't do it, he kept on worrying her until -she got brain-fever, and for six weeks was at death's door. Then -she got better at last, all worn to a shadow, and with her -beautiful hair cut off; but that didn't make no change in her -young man, and he stuck to her as true as man could be." - -"Ah," said Holmes, "I think that what you have been good enough -to tell us makes the matter fairly clear, and that I can deduce -all that remains. Mr. Rucastle then, I presume, took to this -system of imprisonment?" - -"Yes, sir." - -"And brought Miss Hunter down from London in order to get rid of -the disagreeable persistence of Mr. Fowler." - -"That was it, sir." - -"But Mr. Fowler being a persevering man, as a good seaman should -be, blockaded the house, and having met you succeeded by certain -arguments, metallic or otherwise, in convincing you that your -interests were the same as his." - -"Mr. Fowler was a very kind-spoken, free-handed gentleman," said -Mrs. Toller serenely. - -"And in this way he managed that your good man should have no -want of drink, and that a ladder should be ready at the moment -when your master had gone out." - -"You have it, sir, just as it happened." - -"I am sure we owe you an apology, Mrs. Toller," said Holmes, "for -you have certainly cleared up everything which puzzled us. And -here comes the country surgeon and Mrs. Rucastle, so I think, -Watson, that we had best escort Miss Hunter back to Winchester, -as it seems to me that our locus standi now is rather a -questionable one." - -And thus was solved the mystery of the sinister house with the -copper beeches in front of the door. Mr. Rucastle survived, but -was always a broken man, kept alive solely through the care of -his devoted wife. They still live with their old servants, who -probably know so much of Rucastle's past life that he finds it -difficult to part from them. Mr. Fowler and Miss Rucastle were -married, by special license, in Southampton the day after their -flight, and he is now the holder of a government appointment in -the island of Mauritius. As to Miss Violet Hunter, my friend -Holmes, rather to my disappointment, manifested no further -interest in her when once she had ceased to be the centre of one -of his problems, and she is now the head of a private school at -Walsall, where I believe that she has met with considerable success. - - - - - - - - - -End of the Project Gutenberg EBook of The Adventures of Sherlock Holmes, by -Arthur Conan Doyle - -*** END OF THIS PROJECT GUTENBERG EBOOK THE ADVENTURES OF SHERLOCK HOLMES *** - -***** This file should be named 1661-8.txt or 1661-8.zip ***** -This and all associated files of various formats will be found in: - http://www.gutenberg.org/1/6/6/1661/ - -Produced by an anonymous Project Gutenberg volunteer and Jose Menendez - -Updated editions will replace the previous one--the old editions -will be renamed. - -Creating the works from public domain print editions means that no -one owns a United States copyright in these works, so the Foundation -(and you!) can copy and distribute it in the United States without -permission and without paying copyright royalties. Special rules, -set forth in the General Terms of Use part of this license, apply to -copying and distributing Project Gutenberg-tm electronic works to -protect the PROJECT GUTENBERG-tm concept and trademark. Project -Gutenberg is a registered trademark, and may not be used if you -charge for the eBooks, unless you receive specific permission. If you -do not charge anything for copies of this eBook, complying with the -rules is very easy. You may use this eBook for nearly any purpose -such as creation of derivative works, reports, performances and -research. They may be modified and printed and given away--you may do -practically ANYTHING with public domain eBooks. Redistribution is -subject to the trademark license, especially commercial -redistribution. - - - -*** START: FULL LICENSE *** - -THE FULL PROJECT GUTENBERG LICENSE -PLEASE READ THIS BEFORE YOU DISTRIBUTE OR USE THIS WORK - -To protect the Project Gutenberg-tm mission of promoting the free -distribution of electronic works, by using or distributing this work -(or any other work associated in any way with the phrase "Project -Gutenberg"), you agree to comply with all the terms of the Full Project -Gutenberg-tm License (available with this file or online at -http://gutenberg.net/license). - - -Section 1. General Terms of Use and Redistributing Project Gutenberg-tm -electronic works - -1.A. By reading or using any part of this Project Gutenberg-tm -electronic work, you indicate that you have read, understand, agree to -and accept all the terms of this license and intellectual property -(trademark/copyright) agreement. If you do not agree to abide by all -the terms of this agreement, you must cease using and return or destroy -all copies of Project Gutenberg-tm electronic works in your possession. -If you paid a fee for obtaining a copy of or access to a Project -Gutenberg-tm electronic work and you do not agree to be bound by the -terms of this agreement, you may obtain a refund from the person or -entity to whom you paid the fee as set forth in paragraph 1.E.8. - -1.B. "Project Gutenberg" is a registered trademark. It may only be -used on or associated in any way with an electronic work by people who -agree to be bound by the terms of this agreement. There are a few -things that you can do with most Project Gutenberg-tm electronic works -even without complying with the full terms of this agreement. See -paragraph 1.C below. There are a lot of things you can do with Project -Gutenberg-tm electronic works if you follow the terms of this agreement -and help preserve free future access to Project Gutenberg-tm electronic -works. See paragraph 1.E below. - -1.C. The Project Gutenberg Literary Archive Foundation ("the Foundation" -or PGLAF), owns a compilation copyright in the collection of Project -Gutenberg-tm electronic works. Nearly all the individual works in the -collection are in the public domain in the United States. If an -individual work is in the public domain in the United States and you are -located in the United States, we do not claim a right to prevent you from -copying, distributing, performing, displaying or creating derivative -works based on the work as long as all references to Project Gutenberg -are removed. Of course, we hope that you will support the Project -Gutenberg-tm mission of promoting free access to electronic works by -freely sharing Project Gutenberg-tm works in compliance with the terms of -this agreement for keeping the Project Gutenberg-tm name associated with -the work. You can easily comply with the terms of this agreement by -keeping this work in the same format with its attached full Project -Gutenberg-tm License when you share it without charge with others. - -1.D. The copyright laws of the place where you are located also govern -what you can do with this work. Copyright laws in most countries are in -a constant state of change. If you are outside the United States, check -the laws of your country in addition to the terms of this agreement -before downloading, copying, displaying, performing, distributing or -creating derivative works based on this work or any other Project -Gutenberg-tm work. The Foundation makes no representations concerning -the copyright status of any work in any country outside the United -States. - -1.E. Unless you have removed all references to Project Gutenberg: - -1.E.1. The following sentence, with active links to, or other immediate -access to, the full Project Gutenberg-tm License must appear prominently -whenever any copy of a Project Gutenberg-tm work (any work on which the -phrase "Project Gutenberg" appears, or with which the phrase "Project -Gutenberg" is associated) is accessed, displayed, performed, viewed, -copied or distributed: - -This eBook is for the use of anyone anywhere at no cost and with -almost no restrictions whatsoever. You may copy it, give it away or -re-use it under the terms of the Project Gutenberg License included -with this eBook or online at www.gutenberg.net - -1.E.2. If an individual Project Gutenberg-tm electronic work is derived -from the public domain (does not contain a notice indicating that it is -posted with permission of the copyright holder), the work can be copied -and distributed to anyone in the United States without paying any fees -or charges. If you are redistributing or providing access to a work -with the phrase "Project Gutenberg" associated with or appearing on the -work, you must comply either with the requirements of paragraphs 1.E.1 -through 1.E.7 or obtain permission for the use of the work and the -Project Gutenberg-tm trademark as set forth in paragraphs 1.E.8 or -1.E.9. - -1.E.3. If an individual Project Gutenberg-tm electronic work is posted -with the permission of the copyright holder, your use and distribution -must comply with both paragraphs 1.E.1 through 1.E.7 and any additional -terms imposed by the copyright holder. Additional terms will be linked -to the Project Gutenberg-tm License for all works posted with the -permission of the copyright holder found at the beginning of this work. - -1.E.4. Do not unlink or detach or remove the full Project Gutenberg-tm -License terms from this work, or any files containing a part of this -work or any other work associated with Project Gutenberg-tm. - -1.E.5. Do not copy, display, perform, distribute or redistribute this -electronic work, or any part of this electronic work, without -prominently displaying the sentence set forth in paragraph 1.E.1 with -active links or immediate access to the full terms of the Project -Gutenberg-tm License. - -1.E.6. You may convert to and distribute this work in any binary, -compressed, marked up, nonproprietary or proprietary form, including any -word processing or hypertext form. However, if you provide access to or -distribute copies of a Project Gutenberg-tm work in a format other than -"Plain Vanilla ASCII" or other format used in the official version -posted on the official Project Gutenberg-tm web site (www.gutenberg.net), -you must, at no additional cost, fee or expense to the user, provide a -copy, a means of exporting a copy, or a means of obtaining a copy upon -request, of the work in its original "Plain Vanilla ASCII" or other -form. Any alternate format must include the full Project Gutenberg-tm -License as specified in paragraph 1.E.1. - -1.E.7. Do not charge a fee for access to, viewing, displaying, -performing, copying or distributing any Project Gutenberg-tm works -unless you comply with paragraph 1.E.8 or 1.E.9. - -1.E.8. You may charge a reasonable fee for copies of or providing -access to or distributing Project Gutenberg-tm electronic works provided -that - -- You pay a royalty fee of 20% of the gross profits you derive from - the use of Project Gutenberg-tm works calculated using the method - you already use to calculate your applicable taxes. The fee is - owed to the owner of the Project Gutenberg-tm trademark, but he - has agreed to donate royalties under this paragraph to the - Project Gutenberg Literary Archive Foundation. Royalty payments - must be paid within 60 days following each date on which you - prepare (or are legally required to prepare) your periodic tax - returns. Royalty payments should be clearly marked as such and - sent to the Project Gutenberg Literary Archive Foundation at the - address specified in Section 4, "Information about donations to - the Project Gutenberg Literary Archive Foundation." - -- You provide a full refund of any money paid by a user who notifies - you in writing (or by e-mail) within 30 days of receipt that s/he - does not agree to the terms of the full Project Gutenberg-tm - License. You must require such a user to return or - destroy all copies of the works possessed in a physical medium - and discontinue all use of and all access to other copies of - Project Gutenberg-tm works. - -- You provide, in accordance with paragraph 1.F.3, a full refund of any - money paid for a work or a replacement copy, if a defect in the - electronic work is discovered and reported to you within 90 days - of receipt of the work. - -- You comply with all other terms of this agreement for free - distribution of Project Gutenberg-tm works. - -1.E.9. If you wish to charge a fee or distribute a Project Gutenberg-tm -electronic work or group of works on different terms than are set -forth in this agreement, you must obtain permission in writing from -both the Project Gutenberg Literary Archive Foundation and Michael -Hart, the owner of the Project Gutenberg-tm trademark. Contact the -Foundation as set forth in Section 3 below. - -1.F. - -1.F.1. Project Gutenberg volunteers and employees expend considerable -effort to identify, do copyright research on, transcribe and proofread -public domain works in creating the Project Gutenberg-tm -collection. Despite these efforts, Project Gutenberg-tm electronic -works, and the medium on which they may be stored, may contain -"Defects," such as, but not limited to, incomplete, inaccurate or -corrupt data, transcription errors, a copyright or other intellectual -property infringement, a defective or damaged disk or other medium, a -computer virus, or computer codes that damage or cannot be read by -your equipment. - -1.F.2. LIMITED WARRANTY, DISCLAIMER OF DAMAGES - Except for the "Right -of Replacement or Refund" described in paragraph 1.F.3, the Project -Gutenberg Literary Archive Foundation, the owner of the Project -Gutenberg-tm trademark, and any other party distributing a Project -Gutenberg-tm electronic work under this agreement, disclaim all -liability to you for damages, costs and expenses, including legal -fees. YOU AGREE THAT YOU HAVE NO REMEDIES FOR NEGLIGENCE, STRICT -LIABILITY, BREACH OF WARRANTY OR BREACH OF CONTRACT EXCEPT THOSE -PROVIDED IN PARAGRAPH 1.F.3. YOU AGREE THAT THE FOUNDATION, THE -TRADEMARK OWNER, AND ANY DISTRIBUTOR UNDER THIS AGREEMENT WILL NOT BE -LIABLE TO YOU FOR ACTUAL, DIRECT, INDIRECT, CONSEQUENTIAL, PUNITIVE OR -INCIDENTAL DAMAGES EVEN IF YOU GIVE NOTICE OF THE POSSIBILITY OF SUCH -DAMAGE. - -1.F.3. LIMITED RIGHT OF REPLACEMENT OR REFUND - If you discover a -defect in this electronic work within 90 days of receiving it, you can -receive a refund of the money (if any) you paid for it by sending a -written explanation to the person you received the work from. If you -received the work on a physical medium, you must return the medium with -your written explanation. The person or entity that provided you with -the defective work may elect to provide a replacement copy in lieu of a -refund. If you received the work electronically, the person or entity -providing it to you may choose to give you a second opportunity to -receive the work electronically in lieu of a refund. If the second copy -is also defective, you may demand a refund in writing without further -opportunities to fix the problem. - -1.F.4. Except for the limited right of replacement or refund set forth -in paragraph 1.F.3, this work is provided to you 'AS-IS' WITH NO OTHER -WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -WARRANTIES OF MERCHANTIBILITY OR FITNESS FOR ANY PURPOSE. - -1.F.5. Some states do not allow disclaimers of certain implied -warranties or the exclusion or limitation of certain types of damages. -If any disclaimer or limitation set forth in this agreement violates the -law of the state applicable to this agreement, the agreement shall be -interpreted to make the maximum disclaimer or limitation permitted by -the applicable state law. The invalidity or unenforceability of any -provision of this agreement shall not void the remaining provisions. - -1.F.6. INDEMNITY - You agree to indemnify and hold the Foundation, the -trademark owner, any agent or employee of the Foundation, anyone -providing copies of Project Gutenberg-tm electronic works in accordance -with this agreement, and any volunteers associated with the production, -promotion and distribution of Project Gutenberg-tm electronic works, -harmless from all liability, costs and expenses, including legal fees, -that arise directly or indirectly from any of the following which you do -or cause to occur: (a) distribution of this or any Project Gutenberg-tm -work, (b) alteration, modification, or additions or deletions to any -Project Gutenberg-tm work, and (c) any Defect you cause. - - -Section 2. Information about the Mission of Project Gutenberg-tm - -Project Gutenberg-tm is synonymous with the free distribution of -electronic works in formats readable by the widest variety of computers -including obsolete, old, middle-aged and new computers. It exists -because of the efforts of hundreds of volunteers and donations from -people in all walks of life. - -Volunteers and financial support to provide volunteers with the -assistance they need are critical to reaching Project Gutenberg-tm's -goals and ensuring that the Project Gutenberg-tm collection will -remain freely available for generations to come. In 2001, the Project -Gutenberg Literary Archive Foundation was created to provide a secure -and permanent future for Project Gutenberg-tm and future generations. -To learn more about the Project Gutenberg Literary Archive Foundation -and how your efforts and donations can help, see Sections 3 and 4 -and the Foundation web page at http://www.pglaf.org. - - -Section 3. Information about the Project Gutenberg Literary Archive -Foundation - -The Project Gutenberg Literary Archive Foundation is a non profit -501(c)(3) educational corporation organized under the laws of the -state of Mississippi and granted tax exempt status by the Internal -Revenue Service. The Foundation's EIN or federal tax identification -number is 64-6221541. Its 501(c)(3) letter is posted at -http://pglaf.org/fundraising. Contributions to the Project Gutenberg -Literary Archive Foundation are tax deductible to the full extent -permitted by U.S. federal laws and your state's laws. - -The Foundation's principal office is located at 4557 Melan Dr. S. -Fairbanks, AK, 99712., but its volunteers and employees are scattered -throughout numerous locations. Its business office is located at -809 North 1500 West, Salt Lake City, UT 84116, (801) 596-1887, email -business@pglaf.org. Email contact links and up to date contact -information can be found at the Foundation's web site and official -page at http://pglaf.org - -For additional contact information: - Dr. Gregory B. Newby - Chief Executive and Director - gbnewby@pglaf.org - - -Section 4. Information about Donations to the Project Gutenberg -Literary Archive Foundation - -Project Gutenberg-tm depends upon and cannot survive without wide -spread public support and donations to carry out its mission of -increasing the number of public domain and licensed works that can be -freely distributed in machine readable form accessible by the widest -array of equipment including outdated equipment. Many small donations -($1 to $5,000) are particularly important to maintaining tax exempt -status with the IRS. - -The Foundation is committed to complying with the laws regulating -charities and charitable donations in all 50 states of the United -States. Compliance requirements are not uniform and it takes a -considerable effort, much paperwork and many fees to meet and keep up -with these requirements. We do not solicit donations in locations -where we have not received written confirmation of compliance. To -SEND DONATIONS or determine the status of compliance for any -particular state visit http://pglaf.org - -While we cannot and do not solicit contributions from states where we -have not met the solicitation requirements, we know of no prohibition -against accepting unsolicited donations from donors in such states who -approach us with offers to donate. - -International donations are gratefully accepted, but we cannot make -any statements concerning tax treatment of donations received from -outside the United States. U.S. laws alone swamp our small staff. - -Please check the Project Gutenberg Web pages for current donation -methods and addresses. Donations are accepted in a number of other -ways including including checks, online payments and credit card -donations. To donate, please visit: http://pglaf.org/donate - - -Section 5. General Information About Project Gutenberg-tm electronic -works. - -Professor Michael S. Hart is the originator of the Project Gutenberg-tm -concept of a library of electronic works that could be freely shared -with anyone. For thirty years, he produced and distributed Project -Gutenberg-tm eBooks with only a loose network of volunteer support. - - -Project Gutenberg-tm eBooks are often created from several printed -editions, all of which are confirmed as Public Domain in the U.S. -unless a copyright notice is included. Thus, we do not necessarily -keep eBooks in compliance with any particular paper edition. - - -Most people start at our Web site which has the main PG search facility: - - http://www.gutenberg.net - -This Web site includes information about Project Gutenberg-tm, -including how to make donations to the Project Gutenberg Literary -Archive Foundation, how to help produce our new eBooks, and how to -subscribe to our email newsletter to hear about new eBooks. diff -Nru ripgrep-0.6.0/grep/src/lib.rs ripgrep-0.10.0.3/grep/src/lib.rs --- ripgrep-0.6.0/grep/src/lib.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/grep/src/lib.rs 2018-09-10 21:10:55.000000000 +0000 @@ -1,83 +1,23 @@ -#![deny(missing_docs)] - /*! -A fast line oriented regex searcher. -*/ - -#[macro_use] -extern crate log; -extern crate memchr; -extern crate regex; -extern crate regex_syntax as syntax; - -use std::error; -use std::fmt; -use std::result; - -pub use search::{Grep, GrepBuilder, Iter, Match}; +ripgrep, as a library. -mod literals; -mod nonl; -mod search; -mod word_boundary; +This library is intended to provide a high level facade to the crates that +make up ripgrep's core searching routines. However, there is no high level +documentation available yet guiding users on how to fit all of the pieces +together. -/// Result is a convenient type alias that fixes the type of the error to -/// the `Error` type defined in this crate. -pub type Result = result::Result; +Every public API item in the constituent crates is documented, but examples +are sparse. -/// Error enumerates the list of possible error conditions when building or -/// using a `Grep` line searcher. -#[derive(Debug)] -pub enum Error { - /// An error from parsing or compiling a regex. - Regex(regex::Error), - /// This error occurs when an illegal literal was found in the regex - /// pattern. For example, if the line terminator is `\n` and the regex - /// pattern is `\w+\n\w+`, then the presence of `\n` will cause this error. - LiteralNotAllowed(char), - /// An unused enum variant that indicates this enum may be expanded in - /// the future and therefore should not be exhaustively matched. - #[doc(hidden)] - __Nonexhaustive, -} - -impl error::Error for Error { - fn description(&self) -> &str { - match *self { - Error::Regex(ref err) => err.description(), - Error::LiteralNotAllowed(_) => "use of forbidden literal", - Error::__Nonexhaustive => unreachable!(), - } - } - - fn cause(&self) -> Option<&error::Error> { - match *self { - Error::Regex(ref err) => err.cause(), - _ => None, - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::Regex(ref err) => err.fmt(f), - Error::LiteralNotAllowed(chr) => { - write!(f, "Literal {:?} not allowed.", chr) - } - Error::__Nonexhaustive => unreachable!(), - } - } -} +A cookbook and a guide are planned. +*/ -impl From for Error { - fn from(err: regex::Error) -> Error { - Error::Regex(err) - } -} +#![deny(missing_docs)] -impl From for Error { - fn from(err: syntax::Error) -> Error { - Error::Regex(regex::Error::Syntax(err.to_string())) - } -} +pub extern crate grep_cli as cli; +pub extern crate grep_matcher as matcher; +#[cfg(feature = "pcre2")] +pub extern crate grep_pcre2 as pcre2; +pub extern crate grep_printer as printer; +pub extern crate grep_regex as regex; +pub extern crate grep_searcher as searcher; diff -Nru ripgrep-0.6.0/grep/src/literals.rs ripgrep-0.10.0.3/grep/src/literals.rs --- ripgrep-0.6.0/grep/src/literals.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/grep/src/literals.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,265 +0,0 @@ -/*! -The literals module is responsible for extracting *inner* literals out of the -AST of a regular expression. Normally this is the job of the regex engine -itself, but the regex engine doesn't look for inner literals. Since we're doing -line based searching, we can use them, so we need to do it ourselves. - -Note that this implementation is incredibly suspicious. We need something more -principled. -*/ -use std::cmp; - -use regex::bytes::RegexBuilder; -use syntax::{ - Expr, Literals, Lit, - ByteClass, ByteRange, CharClass, ClassRange, Repeater, -}; - -#[derive(Clone, Debug)] -pub struct LiteralSets { - prefixes: Literals, - suffixes: Literals, - required: Literals, -} - -impl LiteralSets { - pub fn create(expr: &Expr) -> Self { - let mut required = Literals::empty(); - union_required(expr, &mut required); - LiteralSets { - prefixes: expr.prefixes(), - suffixes: expr.suffixes(), - required: required, - } - } - - pub fn to_regex_builder(&self) -> Option { - if self.prefixes.all_complete() && !self.prefixes.is_empty() { - debug!("literal prefixes detected: {:?}", self.prefixes); - // When this is true, the regex engine will do a literal scan. - return None; - } - - // Out of inner required literals, prefixes and suffixes, which one - // is the longest? We pick the longest to do fast literal scan under - // the assumption that a longer literal will have a lower false - // positive rate. - let pre_lcp = self.prefixes.longest_common_prefix(); - let pre_lcs = self.prefixes.longest_common_suffix(); - let suf_lcp = self.suffixes.longest_common_prefix(); - let suf_lcs = self.suffixes.longest_common_suffix(); - - let req_lits = self.required.literals(); - let req = match req_lits.iter().max_by_key(|lit| lit.len()) { - None => &[], - Some(req) => &***req, - }; - - let mut lit = pre_lcp; - if pre_lcs.len() > lit.len() { - lit = pre_lcs; - } - if suf_lcp.len() > lit.len() { - lit = suf_lcp; - } - if suf_lcs.len() > lit.len() { - lit = suf_lcs; - } - if req_lits.len() == 1 && req.len() > lit.len() { - lit = req; - } - - // Special case: if we detected an alternation of inner required - // literals and its longest literal is bigger than the longest - // prefix/suffix, then choose the alternation. In practice, this - // helps with case insensitive matching, which can generate lots of - // inner required literals. - let any_empty = req_lits.iter().any(|lit| lit.is_empty()); - if req.len() > lit.len() && req_lits.len() > 1 && !any_empty { - debug!("required literals found: {:?}", req_lits); - let alts: Vec = - req_lits.into_iter().map(|x| bytes_to_regex(x)).collect(); - let mut builder = RegexBuilder::new(&alts.join("|")); - builder.unicode(false); - Some(builder) - } else if lit.is_empty() { - None - } else { - debug!("required literal found: {:?}", show(lit)); - let mut builder = RegexBuilder::new(&bytes_to_regex(&lit)); - builder.unicode(false); - Some(builder) - } - } -} - -fn union_required(expr: &Expr, lits: &mut Literals) { - use syntax::Expr::*; - match *expr { - Literal { ref chars, casei: false } => { - let s: String = chars.iter().cloned().collect(); - lits.cross_add(s.as_bytes()); - } - Literal { ref chars, casei: true } => { - for &c in chars { - let cls = CharClass::new(vec![ - ClassRange { start: c, end: c }, - ]).case_fold(); - if !lits.add_char_class(&cls) { - lits.cut(); - return; - } - } - } - LiteralBytes { ref bytes, casei: false } => { - lits.cross_add(bytes); - } - LiteralBytes { ref bytes, casei: true } => { - for &b in bytes { - let cls = ByteClass::new(vec![ - ByteRange { start: b, end: b }, - ]).case_fold(); - if !lits.add_byte_class(&cls) { - lits.cut(); - return; - } - } - } - Class(_) => { - lits.cut(); - } - ClassBytes(_) => { - lits.cut(); - } - Group { ref e, .. } => { - union_required(&**e, lits); - } - Repeat { r: Repeater::ZeroOrOne, .. } => lits.cut(), - Repeat { r: Repeater::ZeroOrMore, .. } => lits.cut(), - Repeat { ref e, r: Repeater::OneOrMore, .. } => { - union_required(&**e, lits); - lits.cut(); - } - Repeat { ref e, r: Repeater::Range { min, max }, greedy } => { - repeat_range_literals( - &**e, min, max, greedy, lits, union_required); - } - Concat(ref es) if es.is_empty() => {} - Concat(ref es) if es.len() == 1 => union_required(&es[0], lits), - Concat(ref es) => { - for e in es { - let mut lits2 = lits.to_empty(); - union_required(e, &mut lits2); - if lits2.is_empty() { - lits.cut(); - continue; - } - if lits2.contains_empty() { - lits.cut(); - } - // if !lits.union(lits2) { - if !lits.cross_product(&lits2) { - // If this expression couldn't yield any literal that - // could be extended, then we need to quit. Since we're - // short-circuiting, we also need to freeze every member. - lits.cut(); - break; - } - } - } - Alternate(ref es) => { - alternate_literals(es, lits, union_required); - } - _ => lits.cut(), - } -} - -fn repeat_range_literals( - e: &Expr, - min: u32, - max: Option, - _greedy: bool, - lits: &mut Literals, - mut f: F, -) { - if min == 0 { - // This is a bit conservative. If `max` is set, then we could - // treat this as a finite set of alternations. For now, we - // just treat it as `e*`. - lits.cut(); - } else { - let n = cmp::min(lits.limit_size(), min as usize); - // We only extract literals from a single repetition, even though - // we could do more. e.g., `a{3}` will have `a` extracted instead of - // `aaa`. The reason is that inner literal extraction can't be unioned - // across repetitions. e.g., extracting `foofoofoo` from `(\w+foo){3}` - // is wrong. - f(e, lits); - if n < min as usize { - lits.cut(); - } - if max.map_or(true, |max| min < max) { - lits.cut(); - } - } -} - -fn alternate_literals( - es: &[Expr], - lits: &mut Literals, - mut f: F, -) { - let mut lits2 = lits.to_empty(); - for e in es { - let mut lits3 = lits.to_empty(); - lits3.set_limit_size(lits.limit_size() / 5); - f(e, &mut lits3); - if lits3.is_empty() || !lits2.union(lits3) { - // If we couldn't find suffixes for *any* of the - // alternates, then the entire alternation has to be thrown - // away and any existing members must be frozen. Similarly, - // if the union couldn't complete, stop and freeze. - lits.cut(); - return; - } - } - // All we do at the moment is look for prefixes and suffixes. If both - // are empty, then we report nothing. We should be able to do better than - // this, but we'll need something more expressive than just a "set of - // literals." - let lcp = lits2.longest_common_prefix(); - let lcs = lits2.longest_common_suffix(); - if !lcp.is_empty() { - lits.cross_add(lcp); - } - lits.cut(); - if !lcs.is_empty() { - lits.add(Lit::empty()); - lits.add(Lit::new(lcs.to_vec())); - } -} - -/// Converts an arbitrary sequence of bytes to a literal suitable for building -/// a regular expression. -fn bytes_to_regex(bs: &[u8]) -> String { - let mut s = String::with_capacity(bs.len()); - for &b in bs { - s.push_str(&format!("\\x{:02x}", b)); - } - s -} - -/// Converts arbitrary bytes to a nice string. -fn show(bs: &[u8]) -> String { - // Why aren't we using this to feed to the regex? Doesn't really matter - // I guess. ---AG - use std::ascii::escape_default; - use std::str; - - let mut nice = String::new(); - for &b in bs { - let part: Vec = escape_default(b).collect(); - nice.push_str(str::from_utf8(&part).unwrap()); - } - nice -} diff -Nru ripgrep-0.6.0/grep/src/nonl.rs ripgrep-0.10.0.3/grep/src/nonl.rs --- ripgrep-0.6.0/grep/src/nonl.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/grep/src/nonl.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,69 +0,0 @@ -use syntax::Expr; - -use {Error, Result}; - -/// Returns a new expression that is guaranteed to never match the given -/// ASCII character. -/// -/// If the expression contains the literal byte, then an error is returned. -/// -/// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this -/// function panics. -pub fn remove(expr: Expr, byte: u8) -> Result { - // TODO(burntsushi): There is a bug in this routine where only `\n` is - // handled correctly. Namely, `AnyChar` and `AnyByte` need to be translated - // to proper character classes instead of the special `AnyCharNoNL` and - // `AnyByteNoNL` classes. - use syntax::Expr::*; - assert!(byte <= 0x7F); - let chr = byte as char; - assert!(chr.len_utf8() == 1); - - Ok(match expr { - Literal { chars, casei } => { - if chars.iter().position(|&c| c == chr).is_some() { - return Err(Error::LiteralNotAllowed(chr)); - } - Literal { chars: chars, casei: casei } - } - LiteralBytes { bytes, casei } => { - if bytes.iter().position(|&b| b == byte).is_some() { - return Err(Error::LiteralNotAllowed(chr)); - } - LiteralBytes { bytes: bytes, casei: casei } - } - AnyChar => AnyCharNoNL, - AnyByte => AnyByteNoNL, - Class(mut cls) => { - cls.remove(chr); - Class(cls) - } - ClassBytes(mut cls) => { - cls.remove(byte); - ClassBytes(cls) - } - Group { e, i, name } => { - Group { - e: Box::new(try!(remove(*e, byte))), - i: i, - name: name, - } - } - Repeat { e, r, greedy } => { - Repeat { - e: Box::new(try!(remove(*e, byte))), - r: r, - greedy: greedy, - } - } - Concat(exprs) => { - Concat(try!( - exprs.into_iter().map(|e| remove(e, byte)).collect())) - } - Alternate(exprs) => { - Alternate(try!( - exprs.into_iter().map(|e| remove(e, byte)).collect())) - } - e => e, - }) -} diff -Nru ripgrep-0.6.0/grep/src/search.rs ripgrep-0.10.0.3/grep/src/search.rs --- ripgrep-0.6.0/grep/src/search.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/grep/src/search.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,398 +0,0 @@ -use memchr::{memchr, memrchr}; -use regex::bytes::{Regex, RegexBuilder}; -use syntax; - -use literals::LiteralSets; -use nonl; -use syntax::Expr; -use word_boundary::strip_unicode_word_boundaries; -use Result; - -/// A matched line. -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct Match { - start: usize, - end: usize, -} - -impl Match { - /// Create a new empty match value. - pub fn new() -> Match { - Match::default() - } - - /// Return the starting byte offset of the line that matched. - #[inline] - pub fn start(&self) -> usize { - self.start - } - - /// Return the ending byte offset of the line that matched. - #[inline] - pub fn end(&self) -> usize { - self.end - } -} - -/// A fast line oriented regex searcher. -#[derive(Clone, Debug)] -pub struct Grep { - re: Regex, - required: Option, - opts: Options, -} - -/// A builder for a grep searcher. -#[derive(Clone, Debug)] -pub struct GrepBuilder { - pattern: String, - opts: Options, -} - -#[derive(Clone, Debug)] -struct Options { - case_insensitive: bool, - case_smart: bool, - line_terminator: u8, - size_limit: usize, - dfa_size_limit: usize, -} - -impl Default for Options { - fn default() -> Options { - Options { - case_insensitive: false, - case_smart: false, - line_terminator: b'\n', - size_limit: 10 * (1 << 20), - dfa_size_limit: 10 * (1 << 20), - } - } -} - -impl GrepBuilder { - /// Create a new builder for line searching. - /// - /// The pattern given should be a regular expression. The precise syntax - /// supported is documented on the regex crate. - pub fn new(pattern: &str) -> GrepBuilder { - GrepBuilder { - pattern: pattern.to_string(), - opts: Options::default(), - } - } - - /// Set the line terminator. - /// - /// The line terminator can be any ASCII character and serves to delineate - /// the match boundaries in the text searched. - /// - /// This panics if `ascii_byte` is greater than `0x7F` (i.e., not ASCII). - pub fn line_terminator(mut self, ascii_byte: u8) -> GrepBuilder { - assert!(ascii_byte <= 0x7F); - self.opts.line_terminator = ascii_byte; - self - } - - /// Set the case sensitive flag (`i`) on the regex. - pub fn case_insensitive(mut self, yes: bool) -> GrepBuilder { - self.opts.case_insensitive = yes; - self - } - - /// Whether to enable smart case search or not (disabled by default). - /// - /// Smart case uses case insensitive search if the regex is contains all - /// lowercase literal characters. Otherwise, a case sensitive search is - /// used instead. - /// - /// Enabling the case_insensitive flag overrides this. - pub fn case_smart(mut self, yes: bool) -> GrepBuilder { - self.opts.case_smart = yes; - self - } - - /// Set the approximate size limit of the compiled regular expression. - /// - /// This roughly corresponds to the number of bytes occupied by a - /// single compiled program. If the program exceeds this number, then a - /// compilation error is returned. - pub fn size_limit(mut self, limit: usize) -> GrepBuilder { - self.opts.size_limit = limit; - self - } - - /// Set the approximate size of the cache used by the DFA. - /// - /// This roughly corresponds to the number of bytes that the DFA will use - /// while searching. - /// - /// Note that this is a per thread limit. There is no way to set a global - /// limit. In particular, if a regex is used from multiple threads - /// simulanteously, then each thread may use up to the number of bytes - /// specified here. - pub fn dfa_size_limit(mut self, limit: usize) -> GrepBuilder { - self.opts.dfa_size_limit = limit; - self - } - - /// Create a line searcher. - /// - /// If there was a problem parsing or compiling the regex with the given - /// options, then an error is returned. - pub fn build(self) -> Result { - let expr = try!(self.parse()); - let literals = LiteralSets::create(&expr); - let re = try!(self.regex(&expr)); - let required = match literals.to_regex_builder() { - Some(builder) => Some(try!(self.regex_build(builder))), - None => { - match strip_unicode_word_boundaries(&expr) { - None => None, - Some(expr) => { - debug!("Stripped Unicode word boundaries. \ - New AST:\n{:?}", expr); - self.regex(&expr).ok() - } - } - } - }; - Ok(Grep { - re: re, - required: required, - opts: self.opts, - }) - } - - /// Creates a new regex from the given expression with the current - /// configuration. - fn regex(&self, expr: &Expr) -> Result { - let mut builder = RegexBuilder::new(&expr.to_string()); - builder.unicode(true); - self.regex_build(builder) - } - - /// Builds a new regex from the given builder using the caller's settings. - fn regex_build(&self, mut builder: RegexBuilder) -> Result { - builder - .multi_line(true) - .size_limit(self.opts.size_limit) - .dfa_size_limit(self.opts.dfa_size_limit) - .build() - .map_err(From::from) - } - - /// Parses the underlying pattern and ensures the pattern can never match - /// the line terminator. - fn parse(&self) -> Result { - let expr = - try!(syntax::ExprBuilder::new() - .allow_bytes(true) - .unicode(true) - .case_insensitive(try!(self.is_case_insensitive())) - .parse(&self.pattern)); - let expr = try!(nonl::remove(expr, self.opts.line_terminator)); - debug!("regex ast:\n{:#?}", expr); - Ok(expr) - } - - /// Determines whether the case insensitive flag should be enabled or not. - /// - /// An error is returned if the regex could not be parsed. - fn is_case_insensitive(&self) -> Result { - if self.opts.case_insensitive { - return Ok(true); - } - if !self.opts.case_smart { - return Ok(false); - } - let expr = - try!(syntax::ExprBuilder::new() - .allow_bytes(true) - .unicode(true) - .parse(&self.pattern)); - Ok(!has_uppercase_literal(&expr)) - } -} - -impl Grep { - /// Returns a reference to the underlying regex used by the searcher. - pub fn regex(&self) -> &Regex { - &self.re - } - - /// Returns an iterator over all matches in the given buffer. - pub fn iter<'b, 's>(&'s self, buf: &'b [u8]) -> Iter<'b, 's> { - Iter { - searcher: self, - buf: buf, - start: 0, - } - } - - /// Fills in the next line that matches in the given buffer starting at - /// the position given. - /// - /// If no match could be found, `false` is returned, otherwise, `true` is - /// returned. - pub fn read_match( - &self, - mat: &mut Match, - buf: &[u8], - mut start: usize, - ) -> bool { - if start >= buf.len() { - return false; - } - if let Some(ref req) = self.required { - while start < buf.len() { - let e = match req.shortest_match(&buf[start..]) { - None => return false, - Some(e) => start + e, - }; - let (prevnl, nextnl) = self.find_line(buf, e, e); - match self.re.shortest_match(&buf[prevnl..nextnl]) { - None => { - start = nextnl; - continue; - } - Some(_) => { - self.fill_match(mat, prevnl, nextnl); - return true; - } - } - } - false - } else { - let e = match self.re.shortest_match(&buf[start..]) { - None => return false, - Some(e) => start + e, - }; - let (s, e) = self.find_line(buf, e, e); - self.fill_match(mat, s, e); - true - } - } - - fn fill_match(&self, mat: &mut Match, start: usize, end: usize) { - mat.start = start; - mat.end = end; - } - - fn find_line(&self, buf: &[u8], s: usize, e: usize) -> (usize, usize) { - (self.find_line_start(buf, s), self.find_line_end(buf, e)) - } - - fn find_line_start(&self, buf: &[u8], pos: usize) -> usize { - memrchr(self.opts.line_terminator, &buf[0..pos]).map_or(0, |i| i + 1) - } - - fn find_line_end(&self, buf: &[u8], pos: usize) -> usize { - memchr(self.opts.line_terminator, &buf[pos..]) - .map_or(buf.len(), |i| pos + i + 1) - } -} - -/// An iterator over all matches in a particular buffer. -/// -/// `'b` refers to the lifetime of the buffer, and `'s` refers to the lifetime -/// of the searcher. -pub struct Iter<'b, 's> { - searcher: &'s Grep, - buf: &'b [u8], - start: usize, -} - -impl<'b, 's> Iterator for Iter<'b, 's> { - type Item = Match; - - fn next(&mut self) -> Option { - let mut mat = Match::default(); - if !self.searcher.read_match(&mut mat, self.buf, self.start) { - self.start = self.buf.len(); - return None; - } - self.start = mat.end; - Some(mat) - } -} - -fn has_uppercase_literal(expr: &Expr) -> bool { - use syntax::Expr::*; - fn byte_is_upper(b: u8) -> bool { b'A' <= b && b <= b'Z' } - match *expr { - Literal { ref chars, casei } => { - casei || chars.iter().any(|c| c.is_uppercase()) - } - LiteralBytes { ref bytes, casei } => { - casei || bytes.iter().any(|&b| byte_is_upper(b)) - } - Class(ref ranges) => { - for r in ranges { - if r.start.is_uppercase() || r.end.is_uppercase() { - return true; - } - } - false - } - ClassBytes(ref ranges) => { - for r in ranges { - if byte_is_upper(r.start) || byte_is_upper(r.end) { - return true; - } - } - false - } - Group { ref e, .. } => has_uppercase_literal(e), - Repeat { ref e, .. } => has_uppercase_literal(e), - Concat(ref es) => es.iter().any(has_uppercase_literal), - Alternate(ref es) => es.iter().any(has_uppercase_literal), - _ => false, - } -} - -#[cfg(test)] -mod tests { - #![allow(unused_imports)] - - use memchr::{memchr, memrchr}; - use regex::bytes::Regex; - - use super::{GrepBuilder, Match}; - - static SHERLOCK: &'static [u8] = include_bytes!("./data/sherlock.txt"); - - #[allow(dead_code)] - fn s(bytes: &[u8]) -> String { - String::from_utf8(bytes.to_vec()).unwrap() - } - - fn find_lines(pat: &str, haystack: &[u8]) -> Vec { - let re = Regex::new(pat).unwrap(); - let mut lines = vec![]; - for m in re.find_iter(haystack) { - let start = memrchr(b'\n', &haystack[..m.start()]) - .map_or(0, |i| i + 1); - let end = memchr(b'\n', &haystack[m.end()..]) - .map_or(haystack.len(), |i| m.end() + i + 1); - lines.push(Match { - start: start, - end: end, - }); - } - lines - } - - fn grep_lines(pat: &str, haystack: &[u8]) -> Vec { - let g = GrepBuilder::new(pat).build().unwrap(); - g.iter(haystack).collect() - } - - #[test] - fn buffered_literal() { - let expected = find_lines("Sherlock Holmes", SHERLOCK); - let got = grep_lines("Sherlock Holmes", SHERLOCK); - assert_eq!(expected.len(), got.len()); - assert_eq!(expected, got); - } -} diff -Nru ripgrep-0.6.0/grep/src/word_boundary.rs ripgrep-0.10.0.3/grep/src/word_boundary.rs --- ripgrep-0.6.0/grep/src/word_boundary.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/grep/src/word_boundary.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,54 +0,0 @@ -use syntax::Expr; - -/// Strips Unicode word boundaries from the given expression. -/// -/// The key invariant this maintains is that the expression returned will match -/// *at least* every where the expression given will match. Namely, a match of -/// the returned expression can report false positives but it will never report -/// false negatives. -/// -/// If no word boundaries could be stripped, then None is returned. -pub fn strip_unicode_word_boundaries(expr: &Expr) -> Option { - // The real reason we do this is because Unicode word boundaries are the - // one thing that Rust's regex DFA engine can't handle. When it sees a - // Unicode word boundary among non-ASCII text, it falls back to one of the - // slower engines. We work around this limitation by attempting to use - // a regex to find candidate matches without a Unicode word boundary. We'll - // only then use the full (and slower) regex to confirm a candidate as a - // match or not during search. - use syntax::Expr::*; - - match *expr { - Concat(ref es) if !es.is_empty() => { - let first = is_unicode_word_boundary(&es[0]); - let last = is_unicode_word_boundary(es.last().unwrap()); - // Be careful not to strip word boundaries if there are no other - // expressions to match. - match (first, last) { - (true, false) if es.len() > 1 => { - Some(Concat(es[1..].to_vec())) - } - (false, true) if es.len() > 1 => { - Some(Concat(es[..es.len() - 1].to_vec())) - } - (true, true) if es.len() > 2 => { - Some(Concat(es[1..es.len() - 1].to_vec())) - } - _ => None, - } - } - _ => None, - } -} - -/// Returns true if the given expression is a Unicode word boundary. -fn is_unicode_word_boundary(expr: &Expr) -> bool { - use syntax::Expr::*; - - match *expr { - WordBoundary => true, - NotWordBoundary => true, - Group { ref e, .. } => is_unicode_word_boundary(e), - _ => false, - } -} diff -Nru ripgrep-0.6.0/grep-cli/Cargo.toml ripgrep-0.10.0.3/grep-cli/Cargo.toml --- ripgrep-0.6.0/grep-cli/Cargo.toml 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-cli/Cargo.toml 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,25 @@ +[package] +name = "grep-cli" +version = "0.1.1" #:version +authors = ["Andrew Gallant "] +description = """ +Utilities for search oriented command line applications. +""" +documentation = "https://docs.rs/grep-cli" +homepage = "https://github.com/BurntSushi/ripgrep" +repository = "https://github.com/BurntSushi/ripgrep" +readme = "README.md" +keywords = ["regex", "grep", "cli", "utility", "util"] +license = "Unlicense/MIT" + +[dependencies] +atty = "0.2.11" +globset = { version = "0.4.2", path = "../globset" } +lazy_static = "1.1.0" +log = "0.4.5" +regex = "1.0.5" +same-file = "1.0.3" +termcolor = "1.0.3" + +[target.'cfg(windows)'.dependencies.winapi-util] +version = "0.1.1" diff -Nru ripgrep-0.6.0/grep-cli/LICENSE-MIT ripgrep-0.10.0.3/grep-cli/LICENSE-MIT --- ripgrep-0.6.0/grep-cli/LICENSE-MIT 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-cli/LICENSE-MIT 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Andrew Gallant + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff -Nru ripgrep-0.6.0/grep-cli/README.md ripgrep-0.10.0.3/grep-cli/README.md --- ripgrep-0.6.0/grep-cli/README.md 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-cli/README.md 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,38 @@ +grep-cli +-------- +A utility library that provides common routines desired in search oriented +command line applications. This includes, but is not limited to, parsing hex +escapes, detecting whether stdin is readable and more. To the extent possible, +this crate strives for compatibility across Windows, macOS and Linux. + +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) +[![](https://img.shields.io/crates/v/grep-cli.svg)](https://crates.io/crates/grep-cli) + +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + + +### Documentation + +[https://docs.rs/grep-cli](https://docs.rs/grep-cli) + +**NOTE:** You probably don't want to use this crate directly. Instead, you +should prefer the facade defined in the +[`grep`](https://docs.rs/grep) +crate. + + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +grep-cli = "0.1" +``` + +and this to your crate root: + +```rust +extern crate grep_cli; +``` diff -Nru ripgrep-0.6.0/grep-cli/src/decompress.rs ripgrep-0.10.0.3/grep-cli/src/decompress.rs --- ripgrep-0.6.0/grep-cli/src/decompress.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-cli/src/decompress.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,381 @@ +use std::ffi::{OsStr, OsString}; +use std::fs::File; +use std::io; +use std::path::Path; +use std::process::Command; + +use globset::{Glob, GlobSet, GlobSetBuilder}; + +use process::{CommandError, CommandReader, CommandReaderBuilder}; + +/// A builder for a matcher that determines which files get decompressed. +#[derive(Clone, Debug)] +pub struct DecompressionMatcherBuilder { + /// The commands for each matching glob. + commands: Vec, + /// Whether to include the default matching rules. + defaults: bool, +} + +/// A representation of a single command for decompressing data +/// out-of-proccess. +#[derive(Clone, Debug)] +struct DecompressionCommand { + /// The glob that matches this command. + glob: String, + /// The command or binary name. + bin: OsString, + /// The arguments to invoke with the command. + args: Vec, +} + +impl Default for DecompressionMatcherBuilder { + fn default() -> DecompressionMatcherBuilder { + DecompressionMatcherBuilder::new() + } +} + +impl DecompressionMatcherBuilder { + /// Create a new builder for configuring a decompression matcher. + pub fn new() -> DecompressionMatcherBuilder { + DecompressionMatcherBuilder { + commands: vec![], + defaults: true, + } + } + + /// Build a matcher for determining how to decompress files. + /// + /// If there was a problem compiling the matcher, then an error is + /// returned. + pub fn build(&self) -> Result { + let defaults = + if !self.defaults { + vec![] + } else { + default_decompression_commands() + }; + let mut glob_builder = GlobSetBuilder::new(); + let mut commands = vec![]; + for decomp_cmd in defaults.iter().chain(&self.commands) { + let glob = Glob::new(&decomp_cmd.glob).map_err(|err| { + CommandError::io(io::Error::new(io::ErrorKind::Other, err)) + })?; + glob_builder.add(glob); + commands.push(decomp_cmd.clone()); + } + let globs = glob_builder.build().map_err(|err| { + CommandError::io(io::Error::new(io::ErrorKind::Other, err)) + })?; + Ok(DecompressionMatcher { globs, commands }) + } + + /// When enabled, the default matching rules will be compiled into this + /// matcher before any other associations. When disabled, only the + /// rules explicitly given to this builder will be used. + /// + /// This is enabled by default. + pub fn defaults(&mut self, yes: bool) -> &mut DecompressionMatcherBuilder { + self.defaults = yes; + self + } + + /// Associates a glob with a command to decompress files matching the glob. + /// + /// If multiple globs match the same file, then the most recently added + /// glob takes precedence. + /// + /// The syntax for the glob is documented in the + /// [`globset` crate](https://docs.rs/globset/#syntax). + pub fn associate( + &mut self, + glob: &str, + program: P, + args: I, + ) -> &mut DecompressionMatcherBuilder + where P: AsRef, + I: IntoIterator, + A: AsRef, + { + + let glob = glob.to_string(); + let bin = program.as_ref().to_os_string(); + let args = args + .into_iter() + .map(|a| a.as_ref().to_os_string()) + .collect(); + self.commands.push(DecompressionCommand { glob, bin, args }); + self + } +} + +/// A matcher for determining how to decompress files. +#[derive(Clone, Debug)] +pub struct DecompressionMatcher { + /// The set of globs to match. Each glob has a corresponding entry in + /// `commands`. When a glob matches, the corresponding command should be + /// used to perform out-of-process decompression. + globs: GlobSet, + /// The commands for each matching glob. + commands: Vec, +} + +impl Default for DecompressionMatcher { + fn default() -> DecompressionMatcher { + DecompressionMatcher::new() + } +} + +impl DecompressionMatcher { + /// Create a new matcher with default rules. + /// + /// To add more matching rules, build a matcher with + /// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html). + pub fn new() -> DecompressionMatcher { + DecompressionMatcherBuilder::new() + .build() + .expect("built-in matching rules should always compile") + } + + /// Return a pre-built command based on the given file path that can + /// decompress its contents. If no such decompressor is known, then this + /// returns `None`. + /// + /// If there are multiple possible commands matching the given path, then + /// the command added last takes precedence. + pub fn command>(&self, path: P) -> Option { + for i in self.globs.matches(path).into_iter().rev() { + let decomp_cmd = &self.commands[i]; + let mut cmd = Command::new(&decomp_cmd.bin); + cmd.args(&decomp_cmd.args); + return Some(cmd); + } + None + } + + /// Returns true if and only if the given file path has at least one + /// matching command to perform decompression on. + pub fn has_command>(&self, path: P) -> bool { + self.globs.is_match(path) + } +} + +/// Configures and builds a streaming reader for decompressing data. +#[derive(Clone, Debug, Default)] +pub struct DecompressionReaderBuilder { + matcher: DecompressionMatcher, + command_builder: CommandReaderBuilder, +} + +impl DecompressionReaderBuilder { + /// Create a new builder with the default configuration. + pub fn new() -> DecompressionReaderBuilder { + DecompressionReaderBuilder::default() + } + + /// Build a new streaming reader for decompressing data. + /// + /// If decompression is done out-of-process and if there was a problem + /// spawning the process, then its error is logged at the debug level and a + /// passthru reader is returned that does no decompression. This behavior + /// typically occurs when the given file path matches a decompression + /// command, but is executing in an environment where the decompression + /// command is not available. + /// + /// If the given file path could not be matched with a decompression + /// strategy, then a passthru reader is returned that does no + /// decompression. + pub fn build>( + &self, + path: P, + ) -> Result { + let path = path.as_ref(); + let mut cmd = match self.matcher.command(path) { + None => return DecompressionReader::new_passthru(path), + Some(cmd) => cmd, + }; + cmd.arg(path); + + match self.command_builder.build(&mut cmd) { + Ok(cmd_reader) => Ok(DecompressionReader { rdr: Ok(cmd_reader) }), + Err(err) => { + debug!( + "{}: error spawning command '{:?}': {} \ + (falling back to uncompressed reader)", + path.display(), + cmd, + err, + ); + DecompressionReader::new_passthru(path) + } + } + } + + /// Set the matcher to use to look up the decompression command for each + /// file path. + /// + /// A set of sensible rules is enabled by default. Setting this will + /// completely replace the current rules. + pub fn matcher( + &mut self, + matcher: DecompressionMatcher, + ) -> &mut DecompressionReaderBuilder { + self.matcher = matcher; + self + } + + /// Get the underlying matcher currently used by this builder. + pub fn get_matcher(&self) -> &DecompressionMatcher { + &self.matcher + } + + /// When enabled, the reader will asynchronously read the contents of the + /// command's stderr output. When disabled, stderr is only read after the + /// stdout stream has been exhausted (or if the process quits with an error + /// code). + /// + /// Note that when enabled, this may require launching an additional + /// thread in order to read stderr. This is done so that the process being + /// executed is never blocked from writing to stdout or stderr. If this is + /// disabled, then it is possible for the process to fill up the stderr + /// buffer and deadlock. + /// + /// This is enabled by default. + pub fn async_stderr( + &mut self, + yes: bool, + ) -> &mut DecompressionReaderBuilder { + self.command_builder.async_stderr(yes); + self + } +} + +/// A streaming reader for decompressing the contents of a file. +/// +/// The purpose of this reader is to provide a seamless way to decompress the +/// contents of file using existing tools in the current environment. This is +/// meant to be an alternative to using decompression libraries in favor of the +/// simplicity and portability of using external commands such as `gzip` and +/// `xz`. This does impose the overhead of spawning a process, so other means +/// for performing decompression should be sought if this overhead isn't +/// acceptable. +/// +/// A decompression reader comes with a default set of matching rules that are +/// meant to associate file paths with the corresponding command to use to +/// decompress them. For example, a glob like `*.gz` matches gzip compressed +/// files with the command `gzip -d -c`. If a file path does not match any +/// existing rules, or if it matches a rule whose command does not exist in the +/// current environment, then the decompression reader passes through the +/// contents of the underlying file without doing any decompression. +/// +/// The default matching rules are probably good enough for most cases, and if +/// they require revision, pull requests are welcome. In cases where they must +/// be changed or extended, they can be customized through the use of +/// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html) +/// and +/// [`DecompressionReaderBuilder`](struct.DecompressionReaderBuilder.html). +/// +/// By default, this reader will asynchronously read the processes' stderr. +/// This prevents subtle deadlocking bugs for noisy processes that write a lot +/// to stderr. Currently, the entire contents of stderr is read on to the heap. +/// +/// # Example +/// +/// This example shows how to read the decompressed contents of a file without +/// needing to explicitly choose the decompression command to run. +/// +/// Note that if you need to decompress multiple files, it is better to use +/// `DecompressionReaderBuilder`, which will amortize the cost of compiling the +/// matcher. +/// +/// ```no_run +/// use std::io::Read; +/// use std::process::Command; +/// use grep_cli::DecompressionReader; +/// +/// # fn example() -> Result<(), Box<::std::error::Error>> { +/// let mut rdr = DecompressionReader::new("/usr/share/man/man1/ls.1.gz")?; +/// let mut contents = vec![]; +/// rdr.read_to_end(&mut contents)?; +/// # Ok(()) } +/// ``` +#[derive(Debug)] +pub struct DecompressionReader { + rdr: Result, +} + +impl DecompressionReader { + /// Build a new streaming reader for decompressing data. + /// + /// If decompression is done out-of-process and if there was a problem + /// spawning the process, then its error is returned. + /// + /// If the given file path could not be matched with a decompression + /// strategy, then a passthru reader is returned that does no + /// decompression. + /// + /// This uses the default matching rules for determining how to decompress + /// the given file. To change those matching rules, use + /// [`DecompressionReaderBuilder`](struct.DecompressionReaderBuilder.html) + /// and + /// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html). + /// + /// When creating readers for many paths. it is better to use the builder + /// since it will amortize the cost of constructing the matcher. + pub fn new>( + path: P, + ) -> Result { + DecompressionReaderBuilder::new().build(path) + } + + /// Creates a new "passthru" decompression reader that reads from the file + /// corresponding to the given path without doing decompression and without + /// executing another process. + fn new_passthru(path: &Path) -> Result { + let file = File::open(path)?; + Ok(DecompressionReader { rdr: Err(file) }) + } +} + +impl io::Read for DecompressionReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self.rdr { + Ok(ref mut rdr) => rdr.read(buf), + Err(ref mut rdr) => rdr.read(buf), + } + } +} + +fn default_decompression_commands() -> Vec { + const ARGS_GZIP: &[&str] = &["gzip", "-d", "-c"]; + const ARGS_BZIP: &[&str] = &["bzip2", "-d", "-c"]; + const ARGS_XZ: &[&str] = &["xz", "-d", "-c"]; + const ARGS_LZ4: &[&str] = &["lz4", "-d", "-c"]; + const ARGS_LZMA: &[&str] = &["xz", "--format=lzma", "-d", "-c"]; + + fn cmd(glob: &str, args: &[&str]) -> DecompressionCommand { + DecompressionCommand { + glob: glob.to_string(), + bin: OsStr::new(&args[0]).to_os_string(), + args: args + .iter() + .skip(1) + .map(|s| OsStr::new(s).to_os_string()) + .collect(), + } + } + vec![ + cmd("*.gz", ARGS_GZIP), + cmd("*.tgz", ARGS_GZIP), + + cmd("*.bz2", ARGS_BZIP), + cmd("*.tbz2", ARGS_BZIP), + + cmd("*.xz", ARGS_XZ), + cmd("*.txz", ARGS_XZ), + + cmd("*.lz4", ARGS_LZ4), + + cmd("*.lzma", ARGS_LZMA), + ] +} diff -Nru ripgrep-0.6.0/grep-cli/src/escape.rs ripgrep-0.10.0.3/grep-cli/src/escape.rs --- ripgrep-0.6.0/grep-cli/src/escape.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-cli/src/escape.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,315 @@ +use std::ffi::OsStr; +use std::str; + +/// A single state in the state machine used by `unescape`. +#[derive(Clone, Copy, Eq, PartialEq)] +enum State { + /// The state after seeing a `\`. + Escape, + /// The state after seeing a `\x`. + HexFirst, + /// The state after seeing a `\x[0-9A-Fa-f]`. + HexSecond(char), + /// Default state. + Literal, +} + +/// Escapes arbitrary bytes into a human readable string. +/// +/// This converts `\t`, `\r` and `\n` into their escaped forms. It also +/// converts the non-printable subset of ASCII in addition to invalid UTF-8 +/// bytes to hexadecimal escape sequences. Everything else is left as is. +/// +/// The dual of this routine is [`unescape`](fn.unescape.html). +/// +/// # Example +/// +/// This example shows how to convert a byte string that contains a `\n` and +/// invalid UTF-8 bytes into a `String`. +/// +/// Pay special attention to the use of raw strings. That is, `r"\n"` is +/// equivalent to `"\\n"`. +/// +/// ``` +/// use grep_cli::escape; +/// +/// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz")); +/// ``` +pub fn escape(mut bytes: &[u8]) -> String { + let mut escaped = String::new(); + while let Some(result) = decode_utf8(bytes) { + match result { + Ok(cp) => { + escape_char(cp, &mut escaped); + bytes = &bytes[cp.len_utf8()..]; + } + Err(byte) => { + escape_byte(byte, &mut escaped); + bytes = &bytes[1..]; + } + } + } + escaped +} + +/// Escapes an OS string into a human readable string. +/// +/// This is like [`escape`](fn.escape.html), but accepts an OS string. +pub fn escape_os(string: &OsStr) -> String { + #[cfg(unix)] + fn imp(string: &OsStr) -> String { + use std::os::unix::ffi::OsStrExt; + + escape(string.as_bytes()) + } + + #[cfg(not(unix))] + fn imp(string: &OsStr) -> String { + escape(string.to_string_lossy().as_bytes()) + } + + imp(string) +} + +/// Unescapes a string. +/// +/// It supports a limited set of escape sequences: +/// +/// * `\t`, `\r` and `\n` are mapped to their corresponding ASCII bytes. +/// * `\xZZ` hexadecimal escapes are mapped to their byte. +/// +/// Everything else is left as is, including non-hexadecimal escapes like +/// `\xGG`. +/// +/// This is useful when it is desirable for a command line argument to be +/// capable of specifying arbitrary bytes or otherwise make it easier to +/// specify non-printable characters. +/// +/// The dual of this routine is [`escape`](fn.escape.html). +/// +/// # Example +/// +/// This example shows how to convert an escaped string (which is valid UTF-8) +/// into a corresponding sequence of bytes. Each escape sequence is mapped to +/// its bytes, which may include invalid UTF-8. +/// +/// Pay special attention to the use of raw strings. That is, `r"\n"` is +/// equivalent to `"\\n"`. +/// +/// ``` +/// use grep_cli::unescape; +/// +/// assert_eq!(&b"foo\nbar\xFFbaz"[..], &*unescape(r"foo\nbar\xFFbaz")); +/// ``` +pub fn unescape(s: &str) -> Vec { + use self::State::*; + + let mut bytes = vec![]; + let mut state = Literal; + for c in s.chars() { + match state { + Escape => { + match c { + '\\' => { bytes.push(b'\\'); state = Literal; } + 'n' => { bytes.push(b'\n'); state = Literal; } + 'r' => { bytes.push(b'\r'); state = Literal; } + 't' => { bytes.push(b'\t'); state = Literal; } + 'x' => { state = HexFirst; } + c => { + bytes.extend(format!(r"\{}", c).into_bytes()); + state = Literal; + } + } + } + HexFirst => { + match c { + '0'...'9' | 'A'...'F' | 'a'...'f' => { + state = HexSecond(c); + } + c => { + bytes.extend(format!(r"\x{}", c).into_bytes()); + state = Literal; + } + } + } + HexSecond(first) => { + match c { + '0'...'9' | 'A'...'F' | 'a'...'f' => { + let ordinal = format!("{}{}", first, c); + let byte = u8::from_str_radix(&ordinal, 16).unwrap(); + bytes.push(byte); + state = Literal; + } + c => { + let original = format!(r"\x{}{}", first, c); + bytes.extend(original.into_bytes()); + state = Literal; + } + } + } + Literal => { + match c { + '\\' => { state = Escape; } + c => { bytes.extend(c.to_string().as_bytes()); } + } + } + } + } + match state { + Escape => bytes.push(b'\\'), + HexFirst => bytes.extend(b"\\x"), + HexSecond(c) => bytes.extend(format!("\\x{}", c).into_bytes()), + Literal => {} + } + bytes +} + +/// Unescapes an OS string. +/// +/// This is like [`unescape`](fn.unescape.html), but accepts an OS string. +/// +/// Note that this first lossily decodes the given OS string as UTF-8. That +/// is, an escaped string (the thing given) should be valid UTF-8. +pub fn unescape_os(string: &OsStr) -> Vec { + unescape(&string.to_string_lossy()) +} + +/// Adds the given codepoint to the given string, escaping it if necessary. +fn escape_char(cp: char, into: &mut String) { + if cp.is_ascii() { + escape_byte(cp as u8, into); + } else { + into.push(cp); + } +} + +/// Adds the given byte to the given string, escaping it if necessary. +fn escape_byte(byte: u8, into: &mut String) { + match byte { + 0x21...0x5B | 0x5D...0x7D => into.push(byte as char), + b'\n' => into.push_str(r"\n"), + b'\r' => into.push_str(r"\r"), + b'\t' => into.push_str(r"\t"), + b'\\' => into.push_str(r"\\"), + _ => into.push_str(&format!(r"\x{:02X}", byte)), + } +} + +/// Decodes the next UTF-8 encoded codepoint from the given byte slice. +/// +/// If no valid encoding of a codepoint exists at the beginning of the given +/// byte slice, then the first byte is returned instead. +/// +/// This returns `None` if and only if `bytes` is empty. +fn decode_utf8(bytes: &[u8]) -> Option> { + if bytes.is_empty() { + return None; + } + let len = match utf8_len(bytes[0]) { + None => return Some(Err(bytes[0])), + Some(len) if len > bytes.len() => return Some(Err(bytes[0])), + Some(len) => len, + }; + match str::from_utf8(&bytes[..len]) { + Ok(s) => Some(Ok(s.chars().next().unwrap())), + Err(_) => Some(Err(bytes[0])), + } +} + +/// Given a UTF-8 leading byte, this returns the total number of code units +/// in the following encoded codepoint. +/// +/// If the given byte is not a valid UTF-8 leading byte, then this returns +/// `None`. +fn utf8_len(byte: u8) -> Option { + if byte <= 0x7F { + Some(1) + } else if byte <= 0b110_11111 { + Some(2) + } else if byte <= 0b1110_1111 { + Some(3) + } else if byte <= 0b1111_0111 { + Some(4) + } else { + None + } +} + +#[cfg(test)] +mod tests { + use super::{escape, unescape}; + + fn b(bytes: &'static [u8]) -> Vec { + bytes.to_vec() + } + + #[test] + fn empty() { + assert_eq!(b(b""), unescape(r"")); + assert_eq!(r"", escape(b"")); + } + + #[test] + fn backslash() { + assert_eq!(b(b"\\"), unescape(r"\\")); + assert_eq!(r"\\", escape(b"\\")); + } + + #[test] + fn nul() { + assert_eq!(b(b"\x00"), unescape(r"\x00")); + assert_eq!(r"\x00", escape(b"\x00")); + } + + #[test] + fn nl() { + assert_eq!(b(b"\n"), unescape(r"\n")); + assert_eq!(r"\n", escape(b"\n")); + } + + #[test] + fn tab() { + assert_eq!(b(b"\t"), unescape(r"\t")); + assert_eq!(r"\t", escape(b"\t")); + } + + #[test] + fn carriage() { + assert_eq!(b(b"\r"), unescape(r"\r")); + assert_eq!(r"\r", escape(b"\r")); + } + + #[test] + fn nothing_simple() { + assert_eq!(b(b"\\a"), unescape(r"\a")); + assert_eq!(b(b"\\a"), unescape(r"\\a")); + assert_eq!(r"\\a", escape(b"\\a")); + } + + #[test] + fn nothing_hex0() { + assert_eq!(b(b"\\x"), unescape(r"\x")); + assert_eq!(b(b"\\x"), unescape(r"\\x")); + assert_eq!(r"\\x", escape(b"\\x")); + } + + #[test] + fn nothing_hex1() { + assert_eq!(b(b"\\xz"), unescape(r"\xz")); + assert_eq!(b(b"\\xz"), unescape(r"\\xz")); + assert_eq!(r"\\xz", escape(b"\\xz")); + } + + #[test] + fn nothing_hex2() { + assert_eq!(b(b"\\xzz"), unescape(r"\xzz")); + assert_eq!(b(b"\\xzz"), unescape(r"\\xzz")); + assert_eq!(r"\\xzz", escape(b"\\xzz")); + } + + #[test] + fn invalid_utf8() { + assert_eq!(r"\xFF", escape(b"\xFF")); + assert_eq!(r"a\xFFb", escape(b"a\xFFb")); + } +} diff -Nru ripgrep-0.6.0/grep-cli/src/human.rs ripgrep-0.10.0.3/grep-cli/src/human.rs --- ripgrep-0.6.0/grep-cli/src/human.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-cli/src/human.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,171 @@ +use std::error; +use std::fmt; +use std::io; +use std::num::ParseIntError; + +use regex::Regex; + +/// An error that occurs when parsing a human readable size description. +/// +/// This error provides a end user friendly message describing why the +/// description coudln't be parsed and what the expected format is. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ParseSizeError { + original: String, + kind: ParseSizeErrorKind, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +enum ParseSizeErrorKind { + InvalidFormat, + InvalidInt(ParseIntError), + Overflow, +} + +impl ParseSizeError { + fn format(original: &str) -> ParseSizeError { + ParseSizeError { + original: original.to_string(), + kind: ParseSizeErrorKind::InvalidFormat, + } + } + + fn int(original: &str, err: ParseIntError) -> ParseSizeError { + ParseSizeError { + original: original.to_string(), + kind: ParseSizeErrorKind::InvalidInt(err), + } + } + + fn overflow(original: &str) -> ParseSizeError { + ParseSizeError { + original: original.to_string(), + kind: ParseSizeErrorKind::Overflow, + } + } +} + +impl error::Error for ParseSizeError { + fn description(&self) -> &str { "invalid size" } +} + +impl fmt::Display for ParseSizeError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::ParseSizeErrorKind::*; + + match self.kind { + InvalidFormat => { + write!( + f, + "invalid format for size '{}', which should be a sequence \ + of digits followed by an optional 'K', 'M' or 'G' \ + suffix", + self.original + ) + } + InvalidInt(ref err) => { + write!( + f, + "invalid integer found in size '{}': {}", + self.original, + err + ) + } + Overflow => { + write!(f, "size too big in '{}'", self.original) + } + } + } +} + +impl From for io::Error { + fn from(size_err: ParseSizeError) -> io::Error { + io::Error::new(io::ErrorKind::Other, size_err) + } +} + +/// Parse a human readable size like `2M` into a corresponding number of bytes. +/// +/// Supported size suffixes are `K` (for kilobyte), `M` (for megabyte) and `G` +/// (for gigabyte). If a size suffix is missing, then the size is interpreted +/// as bytes. If the size is too big to fit into a `u64`, then this returns an +/// error. +/// +/// Additional suffixes may be added over time. +pub fn parse_human_readable_size(size: &str) -> Result { + lazy_static! { + // Normally I'd just parse something this simple by hand to avoid the + // regex dep, but we bring regex in any way for glob matching, so might + // as well use it. + static ref RE: Regex = Regex::new(r"^([0-9]+)([KMG])?$").unwrap(); + } + + let caps = match RE.captures(size) { + Some(caps) => caps, + None => return Err(ParseSizeError::format(size)), + }; + let value: u64 = caps[1].parse().map_err(|err| { + ParseSizeError::int(size, err) + })?; + let suffix = match caps.get(2) { + None => return Ok(value), + Some(cap) => cap.as_str(), + }; + let bytes = match suffix { + "K" => value.checked_mul(1<<10), + "M" => value.checked_mul(1<<20), + "G" => value.checked_mul(1<<30), + // Because if the regex matches this group, it must be [KMG]. + _ => unreachable!(), + }; + bytes.ok_or_else(|| ParseSizeError::overflow(size)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn suffix_none() { + let x = parse_human_readable_size("123").unwrap(); + assert_eq!(123, x); + } + + #[test] + fn suffix_k() { + let x = parse_human_readable_size("123K").unwrap(); + assert_eq!(123 * (1<<10), x); + } + + #[test] + fn suffix_m() { + let x = parse_human_readable_size("123M").unwrap(); + assert_eq!(123 * (1<<20), x); + } + + #[test] + fn suffix_g() { + let x = parse_human_readable_size("123G").unwrap(); + assert_eq!(123 * (1<<30), x); + } + + #[test] + fn invalid_empty() { + assert!(parse_human_readable_size("").is_err()); + } + + #[test] + fn invalid_non_digit() { + assert!(parse_human_readable_size("a").is_err()); + } + + #[test] + fn invalid_overflow() { + assert!(parse_human_readable_size("9999999999999999G").is_err()); + } + + #[test] + fn invalid_suffix() { + assert!(parse_human_readable_size("123T").is_err()); + } +} diff -Nru ripgrep-0.6.0/grep-cli/src/lib.rs ripgrep-0.10.0.3/grep-cli/src/lib.rs --- ripgrep-0.6.0/grep-cli/src/lib.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-cli/src/lib.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,251 @@ +/*! +This crate provides common routines used in command line applications, with a +focus on routines useful for search oriented applications. As a utility +library, there is no central type or function. However, a key focus of this +crate is to improve failure modes and provide user friendly error messages +when things go wrong. + +To the best extent possible, everything in this crate works on Windows, macOS +and Linux. + + +# Standard I/O + +The +[`is_readable_stdin`](fn.is_readable_stdin.html), +[`is_tty_stderr`](fn.is_tty_stderr.html), +[`is_tty_stdin`](fn.is_tty_stdin.html) +and +[`is_tty_stdout`](fn.is_tty_stdout.html) +routines query aspects of standard I/O. `is_readable_stdin` determines whether +stdin can be usefully read from, while the `tty` methods determine whether a +tty is attached to stdin/stdout/stderr. + +`is_readable_stdin` is useful when writing an application that changes behavior +based on whether the application was invoked with data on stdin. For example, +`rg foo` might recursively search the current working directory for +occurrences of `foo`, but `rg foo < file` might only search the contents of +`file`. + +The `tty` methods are useful for similar reasons. Namely, commands like `ls` +will change their output depending on whether they are printing to a terminal +or not. For example, `ls` shows a file on each line when stdout is redirected +to a file or a pipe, but condenses the output to show possibly many files on +each line when stdout is connected to a tty. + + +# Coloring and buffering + +The +[`stdout`](fn.stdout.html), +[`stdout_buffered_block`](fn.stdout_buffered_block.html) +and +[`stdout_buffered_line`](fn.stdout_buffered_line.html) +routines are alternative constructors for +[`StandardStream`](struct.StandardStream.html). +A `StandardStream` implements `termcolor::WriteColor`, which provides a way +to emit colors to terminals. Its key use is the encapsulation of buffering +style. Namely, `stdout` will return a line buffered `StandardStream` if and +only if stdout is connected to a tty, and will otherwise return a block +buffered `StandardStream`. Line buffering is important for use with a tty +because it typically decreases the latency at which the end user sees output. +Block buffering is used otherwise because it is faster, and redirecting stdout +to a file typically doesn't benefit from the decreased latency that line +buffering provides. + +The `stdout_buffered_block` and `stdout_buffered_line` can be used to +explicitly set the buffering strategy regardless of whether stdout is connected +to a tty or not. + + +# Escaping + +The +[`escape`](fn.escape.html), +[`escape_os`](fn.escape_os.html), +[`unescape`](fn.unescape.html) +and +[`unescape_os`](fn.unescape_os.html) +routines provide a user friendly way of dealing with UTF-8 encoded strings that +can express arbitrary bytes. For example, you might want to accept a string +containing arbitrary bytes as a command line argument, but most interactive +shells make such strings difficult to type. Instead, we can ask users to use +escape sequences. + +For example, `a\xFFz` is itself a valid UTF-8 string corresponding to the +following bytes: + +```ignore +[b'a', b'\\', b'x', b'F', b'F', b'z'] +``` + +However, we can +interpret `\xFF` as an escape sequence with the `unescape`/`unescape_os` +routines, which will yield + +```ignore +[b'a', b'\xFF', b'z'] +``` + +instead. For example: + +``` +use grep_cli::unescape; + +// Note the use of a raw string! +assert_eq!(vec![b'a', b'\xFF', b'z'], unescape(r"a\xFFz")); +``` + +The `escape`/`escape_os` routines provide the reverse transformation, which +makes it easy to show user friendly error messages involving arbitrary bytes. + + +# Building patterns + +Typically, regular expression patterns must be valid UTF-8. However, command +line arguments aren't guaranteed to be valid UTF-8. Unfortunately, the +standard library's UTF-8 conversion functions from `OsStr`s do not provide +good error messages. However, the +[`pattern_from_bytes`](fn.pattern_from_bytes.html) +and +[`pattern_from_os`](fn.pattern_from_os.html) +do, including reporting exactly where the first invalid UTF-8 byte is seen. + +Additionally, it can be useful to read patterns from a file while reporting +good error messages that include line numbers. The +[`patterns_from_path`](fn.patterns_from_path.html), +[`patterns_from_reader`](fn.patterns_from_reader.html) +and +[`patterns_from_stdin`](fn.patterns_from_stdin.html) +routines do just that. If any pattern is found that is invalid UTF-8, then the +error includes the file path (if available) along with the line number and the +byte offset at which the first invalid UTF-8 byte was observed. + + +# Read process output + +Sometimes a command line application needs to execute other processes and read +its stdout in a streaming fashion. The +[`CommandReader`](struct.CommandReader.html) +provides this functionality with an explicit goal of improving failure modes. +In particular, if the process exits with an error code, then stderr is read +and converted into a normal Rust error to show to end users. This makes the +underlying failure modes explicit and gives more information to end users for +debugging the problem. + +As a special case, +[`DecompressionReader`](struct.DecompressionReader.html) +provides a way to decompress arbitrary files by matching their file extensions +up with corresponding decompression programs (such as `gzip` and `xz`). This +is useful as a means of performing simplistic decompression in a portable +manner without binding to specific compression libraries. This does come with +some overhead though, so if you need to decompress lots of small files, this +may not be an appropriate convenience to use. + +Each reader has a corresponding builder for additional configuration, such as +whether to read stderr asynchronously in order to avoid deadlock (which is +enabled by default). + + +# Miscellaneous parsing + +The +[`parse_human_readable_size`](fn.parse_human_readable_size.html) +routine parses strings like `2M` and converts them to the corresponding number +of bytes (`2 * 1<<20` in this case). If an invalid size is found, then a good +error message is crafted that typically tells the user how to fix the problem. +*/ + +#![deny(missing_docs)] + +extern crate atty; +extern crate globset; +#[macro_use] +extern crate lazy_static; +#[macro_use] +extern crate log; +extern crate regex; +extern crate same_file; +extern crate termcolor; +#[cfg(windows)] +extern crate winapi_util; + +mod decompress; +mod escape; +mod human; +mod pattern; +mod process; +mod wtr; + +pub use decompress::{ + DecompressionMatcher, DecompressionMatcherBuilder, + DecompressionReader, DecompressionReaderBuilder, +}; +pub use escape::{escape, escape_os, unescape, unescape_os}; +pub use human::{ParseSizeError, parse_human_readable_size}; +pub use pattern::{ + InvalidPatternError, + pattern_from_os, pattern_from_bytes, + patterns_from_path, patterns_from_reader, patterns_from_stdin, +}; +pub use process::{CommandError, CommandReader, CommandReaderBuilder}; +pub use wtr::{ + StandardStream, + stdout, stdout_buffered_line, stdout_buffered_block, +}; + +/// Returns true if and only if stdin is believed to be readable. +/// +/// When stdin is readable, command line programs may choose to behave +/// differently than when stdin is not readable. For example, `command foo` +/// might search the current directory for occurrences of `foo` where as +/// `command foo < some-file` or `cat some-file | command foo` might instead +/// only search stdin for occurrences of `foo`. +pub fn is_readable_stdin() -> bool { + #[cfg(unix)] + fn imp() -> bool { + use std::os::unix::fs::FileTypeExt; + use same_file::Handle; + + let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) { + Err(_) => return false, + Ok(md) => md.file_type(), + }; + ft.is_file() || ft.is_fifo() + } + + #[cfg(windows)] + fn imp() -> bool { + use winapi_util as winutil; + + winutil::file::typ(winutil::HandleRef::stdin()) + .map(|t| t.is_disk() || t.is_pipe()) + .unwrap_or(false) + } + + !is_tty_stdin() && imp() +} + +/// Returns true if and only if stdin is believed to be connectted to a tty +/// or a console. +pub fn is_tty_stdin() -> bool { + atty::is(atty::Stream::Stdin) +} + +/// Returns true if and only if stdout is believed to be connectted to a tty +/// or a console. +/// +/// This is useful for when you want your command line program to produce +/// different output depending on whether it's printing directly to a user's +/// terminal or whether it's being redirected somewhere else. For example, +/// implementations of `ls` will often show one item per line when stdout is +/// redirected, but will condensed output when printing to a tty. +pub fn is_tty_stdout() -> bool { + atty::is(atty::Stream::Stdout) +} + +/// Returns true if and only if stderr is believed to be connectted to a tty +/// or a console. +pub fn is_tty_stderr() -> bool { + atty::is(atty::Stream::Stderr) +} diff -Nru ripgrep-0.6.0/grep-cli/src/pattern.rs ripgrep-0.10.0.3/grep-cli/src/pattern.rs --- ripgrep-0.6.0/grep-cli/src/pattern.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-cli/src/pattern.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,205 @@ +use std::error; +use std::ffi::OsStr; +use std::fmt; +use std::fs::File; +use std::io::{self, BufRead}; +use std::path::Path; +use std::str; + +use escape::{escape, escape_os}; + +/// An error that occurs when a pattern could not be converted to valid UTF-8. +/// +/// The purpose of this error is to give a more targeted failure mode for +/// patterns written by end users that are not valid UTF-8. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct InvalidPatternError { + original: String, + valid_up_to: usize, +} + +impl InvalidPatternError { + /// Returns the index in the given string up to which valid UTF-8 was + /// verified. + pub fn valid_up_to(&self) -> usize { + self.valid_up_to + } +} + +impl error::Error for InvalidPatternError { + fn description(&self) -> &str { "invalid pattern" } +} + +impl fmt::Display for InvalidPatternError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "found invalid UTF-8 in pattern at byte offset {} \ + (use hex escape sequences to match arbitrary bytes \ + in a pattern, e.g., \\xFF): '{}'", + self.valid_up_to, + self.original, + ) + } +} + +impl From for io::Error { + fn from(paterr: InvalidPatternError) -> io::Error { + io::Error::new(io::ErrorKind::Other, paterr) + } +} + +/// Convert an OS string into a regular expression pattern. +/// +/// This conversion fails if the given pattern is not valid UTF-8, in which +/// case, a targeted error with more information about where the invalid UTF-8 +/// occurs is given. The error also suggests the use of hex escape sequences, +/// which are supported by many regex engines. +pub fn pattern_from_os(pattern: &OsStr) -> Result<&str, InvalidPatternError> { + pattern.to_str().ok_or_else(|| { + let valid_up_to = pattern + .to_string_lossy() + .find('\u{FFFD}') + .expect("a Unicode replacement codepoint for invalid UTF-8"); + InvalidPatternError { + original: escape_os(pattern), + valid_up_to: valid_up_to, + } + }) +} + +/// Convert arbitrary bytes into a regular expression pattern. +/// +/// This conversion fails if the given pattern is not valid UTF-8, in which +/// case, a targeted error with more information about where the invalid UTF-8 +/// occurs is given. The error also suggests the use of hex escape sequences, +/// which are supported by many regex engines. +pub fn pattern_from_bytes( + pattern: &[u8], +) -> Result<&str, InvalidPatternError> { + str::from_utf8(pattern).map_err(|err| { + InvalidPatternError { + original: escape(pattern), + valid_up_to: err.valid_up_to(), + } + }) +} + +/// Read patterns from a file path, one per line. +/// +/// If there was a problem reading or if any of the patterns contain invalid +/// UTF-8, then an error is returned. If there was a problem with a specific +/// pattern, then the error message will include the line number and the file +/// path. +pub fn patterns_from_path>(path: P) -> io::Result> { + let path = path.as_ref(); + let file = File::open(path).map_err(|err| { + io::Error::new( + io::ErrorKind::Other, + format!("{}: {}", path.display(), err), + ) + })?; + patterns_from_reader(file).map_err(|err| { + io::Error::new( + io::ErrorKind::Other, + format!("{}:{}", path.display(), err), + ) + }) +} + +/// Read patterns from stdin, one per line. +/// +/// If there was a problem reading or if any of the patterns contain invalid +/// UTF-8, then an error is returned. If there was a problem with a specific +/// pattern, then the error message will include the line number and the fact +/// that it came from stdin. +pub fn patterns_from_stdin() -> io::Result> { + let stdin = io::stdin(); + let locked = stdin.lock(); + patterns_from_reader(locked).map_err(|err| { + io::Error::new( + io::ErrorKind::Other, + format!(":{}", err), + ) + }) +} + +/// Read patterns from any reader, one per line. +/// +/// If there was a problem reading or if any of the patterns contain invalid +/// UTF-8, then an error is returned. If there was a problem with a specific +/// pattern, then the error message will include the line number. +/// +/// Note that this routine uses its own internal buffer, so the caller should +/// not provide their own buffered reader if possible. +/// +/// # Example +/// +/// This shows how to parse patterns, one per line. +/// +/// ``` +/// use grep_cli::patterns_from_reader; +/// +/// # fn example() -> Result<(), Box<::std::error::Error>> { +/// let patterns = "\ +/// foo +/// bar\\s+foo +/// [a-z]{3} +/// "; +/// +/// assert_eq!(patterns_from_reader(patterns.as_bytes())?, vec![ +/// r"foo", +/// r"bar\s+foo", +/// r"[a-z]{3}", +/// ]); +/// # Ok(()) } +/// ``` +pub fn patterns_from_reader(rdr: R) -> io::Result> { + let mut patterns = vec![]; + let mut bufrdr = io::BufReader::new(rdr); + let mut line = vec![]; + let mut line_number = 0; + while { + line.clear(); + line_number += 1; + bufrdr.read_until(b'\n', &mut line)? > 0 + } { + line.pop().unwrap(); // remove trailing '\n' + if line.last() == Some(&b'\r') { + line.pop().unwrap(); + } + match pattern_from_bytes(&line) { + Ok(pattern) => patterns.push(pattern.to_string()), + Err(err) => { + return Err(io::Error::new( + io::ErrorKind::Other, + format!("{}: {}", line_number, err), + )); + } + } + } + Ok(patterns) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn bytes() { + let pat = b"abc\xFFxyz"; + let err = pattern_from_bytes(pat).unwrap_err(); + assert_eq!(3, err.valid_up_to()); + } + + #[test] + #[cfg(unix)] + fn os() { + use std::os::unix::ffi::OsStrExt; + use std::ffi::OsStr; + + let pat = OsStr::from_bytes(b"abc\xFFxyz"); + let err = pattern_from_os(pat).unwrap_err(); + assert_eq!(3, err.valid_up_to()); + } +} diff -Nru ripgrep-0.6.0/grep-cli/src/process.rs ripgrep-0.10.0.3/grep-cli/src/process.rs --- ripgrep-0.6.0/grep-cli/src/process.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-cli/src/process.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,267 @@ +use std::error; +use std::fmt; +use std::io::{self, Read}; +use std::iter; +use std::process; +use std::thread::{self, JoinHandle}; + +/// An error that can occur while running a command and reading its output. +/// +/// This error can be seamlessly converted to an `io::Error` via a `From` +/// implementation. +#[derive(Debug)] +pub struct CommandError { + kind: CommandErrorKind, +} + +#[derive(Debug)] +enum CommandErrorKind { + Io(io::Error), + Stderr(Vec), +} + +impl CommandError { + /// Create an error from an I/O error. + pub(crate) fn io(ioerr: io::Error) -> CommandError { + CommandError { kind: CommandErrorKind::Io(ioerr) } + } + + /// Create an error from the contents of stderr (which may be empty). + pub(crate) fn stderr(bytes: Vec) -> CommandError { + CommandError { kind: CommandErrorKind::Stderr(bytes) } + } +} + +impl error::Error for CommandError { + fn description(&self) -> &str { "command error" } +} + +impl fmt::Display for CommandError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.kind { + CommandErrorKind::Io(ref e) => e.fmt(f), + CommandErrorKind::Stderr(ref bytes) => { + let msg = String::from_utf8_lossy(bytes); + if msg.trim().is_empty() { + write!(f, "") + } else { + let div = iter::repeat('-').take(79).collect::(); + write!(f, "\n{div}\n{msg}\n{div}", div=div, msg=msg.trim()) + } + } + } + } +} + +impl From for CommandError { + fn from(ioerr: io::Error) -> CommandError { + CommandError { kind: CommandErrorKind::Io(ioerr) } + } +} + +impl From for io::Error { + fn from(cmderr: CommandError) -> io::Error { + match cmderr.kind { + CommandErrorKind::Io(ioerr) => ioerr, + CommandErrorKind::Stderr(_) => { + io::Error::new(io::ErrorKind::Other, cmderr) + } + } + } +} + +/// Configures and builds a streaming reader for process output. +#[derive(Clone, Debug, Default)] +pub struct CommandReaderBuilder { + async_stderr: bool, +} + +impl CommandReaderBuilder { + /// Create a new builder with the default configuration. + pub fn new() -> CommandReaderBuilder { + CommandReaderBuilder::default() + } + + /// Build a new streaming reader for the given command's output. + /// + /// The caller should set everything that's required on the given command + /// before building a reader, such as its arguments, environment and + /// current working directory. Settings such as the stdout and stderr (but + /// not stdin) pipes will be overridden so that they can be controlled by + /// the reader. + /// + /// If there was a problem spawning the given command, then its error is + /// returned. + pub fn build( + &self, + command: &mut process::Command, + ) -> Result { + let mut child = command + .stdout(process::Stdio::piped()) + .stderr(process::Stdio::piped()) + .spawn()?; + let stdout = child.stdout.take().unwrap(); + let stderr = + if self.async_stderr { + StderrReader::async(child.stderr.take().unwrap()) + } else { + StderrReader::sync(child.stderr.take().unwrap()) + }; + Ok(CommandReader { + child: child, + stdout: stdout, + stderr: stderr, + done: false, + }) + } + + /// When enabled, the reader will asynchronously read the contents of the + /// command's stderr output. When disabled, stderr is only read after the + /// stdout stream has been exhausted (or if the process quits with an error + /// code). + /// + /// Note that when enabled, this may require launching an additional + /// thread in order to read stderr. This is done so that the process being + /// executed is never blocked from writing to stdout or stderr. If this is + /// disabled, then it is possible for the process to fill up the stderr + /// buffer and deadlock. + /// + /// This is enabled by default. + pub fn async_stderr(&mut self, yes: bool) -> &mut CommandReaderBuilder { + self.async_stderr = yes; + self + } +} + +/// A streaming reader for a command's output. +/// +/// The purpose of this reader is to provide an easy way to execute processes +/// whose stdout is read in a streaming way while also making the processes' +/// stderr available when the process fails with an exit code. This makes it +/// possible to execute processes while surfacing the underlying failure mode +/// in the case of an error. +/// +/// Moreover, by default, this reader will asynchronously read the processes' +/// stderr. This prevents subtle deadlocking bugs for noisy processes that +/// write a lot to stderr. Currently, the entire contents of stderr is read +/// on to the heap. +/// +/// # Example +/// +/// This example shows how to invoke `gzip` to decompress the contents of a +/// file. If the `gzip` command reports a failing exit status, then its stderr +/// is returned as an error. +/// +/// ```no_run +/// use std::io::Read; +/// use std::process::Command; +/// use grep_cli::CommandReader; +/// +/// # fn example() -> Result<(), Box<::std::error::Error>> { +/// let mut cmd = Command::new("gzip"); +/// cmd.arg("-d").arg("-c").arg("/usr/share/man/man1/ls.1.gz"); +/// +/// let mut rdr = CommandReader::new(&mut cmd)?; +/// let mut contents = vec![]; +/// rdr.read_to_end(&mut contents)?; +/// # Ok(()) } +/// ``` +#[derive(Debug)] +pub struct CommandReader { + child: process::Child, + stdout: process::ChildStdout, + stderr: StderrReader, + done: bool, +} + +impl CommandReader { + /// Create a new streaming reader for the given command using the default + /// configuration. + /// + /// The caller should set everything that's required on the given command + /// before building a reader, such as its arguments, environment and + /// current working directory. Settings such as the stdout and stderr (but + /// not stdin) pipes will be overridden so that they can be controlled by + /// the reader. + /// + /// If there was a problem spawning the given command, then its error is + /// returned. + /// + /// If the caller requires additional configuration for the reader + /// returned, then use + /// [`CommandReaderBuilder`](struct.CommandReaderBuilder.html). + pub fn new( + cmd: &mut process::Command, + ) -> Result { + CommandReaderBuilder::new().build(cmd) + } +} + +impl io::Read for CommandReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + if self.done { + return Ok(0); + } + let nread = self.stdout.read(buf)?; + if nread == 0 { + self.done = true; + // Reap the child now that we're done reading. If the command + // failed, report stderr as an error. + if !self.child.wait()?.success() { + return Err(io::Error::from(self.stderr.read_to_end())); + } + } + Ok(nread) + } +} + +/// A reader that encapsulates the asynchronous or synchronous reading of +/// stderr. +#[derive(Debug)] +enum StderrReader { + Async(Option>), + Sync(process::ChildStderr), +} + +impl StderrReader { + /// Create a reader for stderr that reads contents asynchronously. + fn async(mut stderr: process::ChildStderr) -> StderrReader { + let handle = thread::spawn(move || { + stderr_to_command_error(&mut stderr) + }); + StderrReader::Async(Some(handle)) + } + + /// Create a reader for stderr that reads contents synchronously. + fn sync(stderr: process::ChildStderr) -> StderrReader { + StderrReader::Sync(stderr) + } + + /// Consumes all of stderr on to the heap and returns it as an error. + /// + /// If there was a problem reading stderr itself, then this returns an I/O + /// command error. + fn read_to_end(&mut self) -> CommandError { + match *self { + StderrReader::Async(ref mut handle) => { + let handle = handle + .take() + .expect("read_to_end cannot be called more than once"); + handle + .join() + .expect("stderr reading thread does not panic") + } + StderrReader::Sync(ref mut stderr) => { + stderr_to_command_error(stderr) + } + } + } +} + +fn stderr_to_command_error(stderr: &mut process::ChildStderr) -> CommandError { + let mut bytes = vec![]; + match stderr.read_to_end(&mut bytes) { + Ok(_) => CommandError::stderr(bytes), + Err(err) => CommandError::io(err), + } +} diff -Nru ripgrep-0.6.0/grep-cli/src/wtr.rs ripgrep-0.10.0.3/grep-cli/src/wtr.rs --- ripgrep-0.6.0/grep-cli/src/wtr.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-cli/src/wtr.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,133 @@ +use std::io; + +use termcolor; + +use is_tty_stdout; + +/// A writer that supports coloring with either line or block buffering. +pub struct StandardStream(StandardStreamKind); + +/// Returns a possibly buffered writer to stdout for the given color choice. +/// +/// The writer returned is either line buffered or block buffered. The decision +/// between these two is made automatically based on whether a tty is attached +/// to stdout or not. If a tty is attached, then line buffering is used. +/// Otherwise, block buffering is used. In general, block buffering is more +/// efficient, but may increase the time it takes for the end user to see the +/// first bits of output. +/// +/// If you need more fine grained control over the buffering mode, then use one +/// of `stdout_buffered_line` or `stdout_buffered_block`. +/// +/// The color choice given is passed along to the underlying writer. To +/// completely disable colors in all cases, use `ColorChoice::Never`. +pub fn stdout(color_choice: termcolor::ColorChoice) -> StandardStream { + if is_tty_stdout() { + stdout_buffered_line(color_choice) + } else { + stdout_buffered_block(color_choice) + } +} + +/// Returns a line buffered writer to stdout for the given color choice. +/// +/// This writer is useful when printing results directly to a tty such that +/// users see output as soon as it's written. The downside of this approach +/// is that it can be slower, especially when there is a lot of output. +/// +/// You might consider using +/// [`stdout`](fn.stdout.html) +/// instead, which chooses the buffering strategy automatically based on +/// whether stdout is connected to a tty. +pub fn stdout_buffered_line( + color_choice: termcolor::ColorChoice, +) -> StandardStream { + let out = termcolor::StandardStream::stdout(color_choice); + StandardStream(StandardStreamKind::LineBuffered(out)) +} + +/// Returns a block buffered writer to stdout for the given color choice. +/// +/// This writer is useful when printing results to a file since it amortizes +/// the cost of writing data. The downside of this approach is that it can +/// increase the latency of display output when writing to a tty. +/// +/// You might consider using +/// [`stdout`](fn.stdout.html) +/// instead, which chooses the buffering strategy automatically based on +/// whether stdout is connected to a tty. +pub fn stdout_buffered_block( + color_choice: termcolor::ColorChoice, +) -> StandardStream { + let out = termcolor::BufferedStandardStream::stdout(color_choice); + StandardStream(StandardStreamKind::BlockBuffered(out)) +} + +enum StandardStreamKind { + LineBuffered(termcolor::StandardStream), + BlockBuffered(termcolor::BufferedStandardStream), +} + +impl io::Write for StandardStream { + #[inline] + fn write(&mut self, buf: &[u8]) -> io::Result { + use self::StandardStreamKind::*; + + match self.0 { + LineBuffered(ref mut w) => w.write(buf), + BlockBuffered(ref mut w) => w.write(buf), + } + } + + #[inline] + fn flush(&mut self) -> io::Result<()> { + use self::StandardStreamKind::*; + + match self.0 { + LineBuffered(ref mut w) => w.flush(), + BlockBuffered(ref mut w) => w.flush(), + } + } +} + +impl termcolor::WriteColor for StandardStream { + #[inline] + fn supports_color(&self) -> bool { + use self::StandardStreamKind::*; + + match self.0 { + LineBuffered(ref w) => w.supports_color(), + BlockBuffered(ref w) => w.supports_color(), + } + } + + #[inline] + fn set_color(&mut self, spec: &termcolor::ColorSpec) -> io::Result<()> { + use self::StandardStreamKind::*; + + match self.0 { + LineBuffered(ref mut w) => w.set_color(spec), + BlockBuffered(ref mut w) => w.set_color(spec), + } + } + + #[inline] + fn reset(&mut self) -> io::Result<()> { + use self::StandardStreamKind::*; + + match self.0 { + LineBuffered(ref mut w) => w.reset(), + BlockBuffered(ref mut w) => w.reset(), + } + } + + #[inline] + fn is_synchronous(&self) -> bool { + use self::StandardStreamKind::*; + + match self.0 { + LineBuffered(ref w) => w.is_synchronous(), + BlockBuffered(ref w) => w.is_synchronous(), + } + } +} diff -Nru ripgrep-0.6.0/grep-cli/UNLICENSE ripgrep-0.10.0.3/grep-cli/UNLICENSE --- ripgrep-0.6.0/grep-cli/UNLICENSE 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-cli/UNLICENSE 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff -Nru ripgrep-0.6.0/grep-matcher/Cargo.toml ripgrep-0.10.0.3/grep-matcher/Cargo.toml --- ripgrep-0.6.0/grep-matcher/Cargo.toml 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-matcher/Cargo.toml 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,24 @@ +[package] +name = "grep-matcher" +version = "0.1.1" #:version +authors = ["Andrew Gallant "] +description = """ +A trait for regular expressions, with a focus on line oriented search. +""" +documentation = "https://docs.rs/grep-matcher" +homepage = "https://github.com/BurntSushi/ripgrep" +repository = "https://github.com/BurntSushi/ripgrep" +readme = "README.md" +keywords = ["regex", "pattern", "trait"] +license = "Unlicense/MIT" +autotests = false + +[dependencies] +memchr = "2.0.2" + +[dev-dependencies] +regex = "1.0.5" + +[[test]] +name = "integration" +path = "tests/tests.rs" diff -Nru ripgrep-0.6.0/grep-matcher/LICENSE-MIT ripgrep-0.10.0.3/grep-matcher/LICENSE-MIT --- ripgrep-0.6.0/grep-matcher/LICENSE-MIT 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-matcher/LICENSE-MIT 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Andrew Gallant + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff -Nru ripgrep-0.6.0/grep-matcher/README.md ripgrep-0.10.0.3/grep-matcher/README.md --- ripgrep-0.6.0/grep-matcher/README.md 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-matcher/README.md 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,36 @@ +grep-matcher +------------ +This crate provides a low level interface for describing regular expression +matchers. The `grep` crate uses this interface in order to make the regex +engine it uses pluggable. + +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) +[![](https://img.shields.io/crates/v/grep-matcher.svg)](https://crates.io/crates/grep-matcher) + +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + +### Documentation + +[https://docs.rs/grep-matcher](https://docs.rs/grep-matcher) + +**NOTE:** You probably don't want to use this crate directly. Instead, you +should prefer the facade defined in the +[`grep`](https://docs.rs/grep) +crate. + + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +grep-matcher = "0.1" +``` + +and this to your crate root: + +```rust +extern crate grep_matcher; +``` diff -Nru ripgrep-0.6.0/grep-matcher/src/interpolate.rs ripgrep-0.10.0.3/grep-matcher/src/interpolate.rs --- ripgrep-0.6.0/grep-matcher/src/interpolate.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-matcher/src/interpolate.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,328 @@ +use std::str; + +use memchr::memchr; + +/// Interpolate capture references in `replacement` and write the interpolation +/// result to `dst`. References in `replacement` take the form of $N or $name, +/// where `N` is a capture group index and `name` is a capture group name. The +/// function provided, `name_to_index`, maps capture group names to indices. +/// +/// The `append` function given is responsible for writing the replacement +/// to the `dst` buffer. That is, it is called with the capture group index +/// of a capture group reference and is expected to resolve the index to its +/// corresponding matched text. If no such match exists, then `append` should +/// not write anything to its given buffer. +pub fn interpolate( + mut replacement: &[u8], + mut append: A, + mut name_to_index: N, + dst: &mut Vec, +) where + A: FnMut(usize, &mut Vec), + N: FnMut(&str) -> Option +{ + while !replacement.is_empty() { + match memchr(b'$', replacement) { + None => break, + Some(i) => { + dst.extend(&replacement[..i]); + replacement = &replacement[i..]; + } + } + if replacement.get(1).map_or(false, |&b| b == b'$') { + dst.push(b'$'); + replacement = &replacement[2..]; + continue; + } + debug_assert!(!replacement.is_empty()); + let cap_ref = match find_cap_ref(replacement) { + Some(cap_ref) => cap_ref, + None => { + dst.push(b'$'); + replacement = &replacement[1..]; + continue; + } + }; + replacement = &replacement[cap_ref.end..]; + match cap_ref.cap { + Ref::Number(i) => append(i, dst), + Ref::Named(name) => { + if let Some(i) = name_to_index(name) { + append(i, dst); + } + } + } + } + dst.extend(replacement); +} + +/// `CaptureRef` represents a reference to a capture group inside some text. +/// The reference is either a capture group name or a number. +/// +/// It is also tagged with the position in the text immediately proceding the +/// capture reference. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct CaptureRef<'a> { + cap: Ref<'a>, + end: usize, +} + +/// A reference to a capture group in some text. +/// +/// e.g., `$2`, `$foo`, `${foo}`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum Ref<'a> { + Named(&'a str), + Number(usize), +} + +impl<'a> From<&'a str> for Ref<'a> { + fn from(x: &'a str) -> Ref<'a> { + Ref::Named(x) + } +} + +impl From for Ref<'static> { + fn from(x: usize) -> Ref<'static> { + Ref::Number(x) + } +} + +/// Parses a possible reference to a capture group name in the given text, +/// starting at the beginning of `replacement`. +/// +/// If no such valid reference could be found, None is returned. +fn find_cap_ref(replacement: &[u8]) -> Option { + let mut i = 0; + if replacement.len() <= 1 || replacement[0] != b'$' { + return None; + } + let mut brace = false; + i += 1; + if replacement[i] == b'{' { + brace = true; + i += 1; + } + let mut cap_end = i; + while replacement.get(cap_end).map_or(false, is_valid_cap_letter) { + cap_end += 1; + } + if cap_end == i { + return None; + } + // We just verified that the range 0..cap_end is valid ASCII, so it must + // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 + // check with an unchecked conversion or by parsing the number straight + // from &[u8]. + let cap = str::from_utf8(&replacement[i..cap_end]) + .expect("valid UTF-8 capture name"); + if brace { + if !replacement.get(cap_end).map_or(false, |&b| b == b'}') { + return None; + } + cap_end += 1; + } + Some(CaptureRef { + cap: match cap.parse::() { + Ok(i) => Ref::Number(i as usize), + Err(_) => Ref::Named(cap), + }, + end: cap_end, + }) +} + +/// Returns true if and only if the given byte is allowed in a capture name. +fn is_valid_cap_letter(b: &u8) -> bool { + match *b { + b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z' | b'_' => true, + _ => false, + } +} + +#[cfg(test)] +mod tests { + use super::{CaptureRef, find_cap_ref, interpolate}; + + macro_rules! find { + ($name:ident, $text:expr) => { + #[test] + fn $name() { + assert_eq!(None, find_cap_ref($text.as_bytes())); + } + }; + ($name:ident, $text:expr, $capref:expr) => { + #[test] + fn $name() { + assert_eq!(Some($capref), find_cap_ref($text.as_bytes())); + } + }; + } + + macro_rules! c { + ($name_or_number:expr, $pos:expr) => { + CaptureRef { cap: $name_or_number.into(), end: $pos } + }; + } + + find!(find_cap_ref1, "$foo", c!("foo", 4)); + find!(find_cap_ref2, "${foo}", c!("foo", 6)); + find!(find_cap_ref3, "$0", c!(0, 2)); + find!(find_cap_ref4, "$5", c!(5, 2)); + find!(find_cap_ref5, "$10", c!(10, 3)); + find!(find_cap_ref6, "$42a", c!("42a", 4)); + find!(find_cap_ref7, "${42}a", c!(42, 5)); + find!(find_cap_ref8, "${42"); + find!(find_cap_ref9, "${42 "); + find!(find_cap_ref10, " $0 "); + find!(find_cap_ref11, "$"); + find!(find_cap_ref12, " "); + find!(find_cap_ref13, ""); + + // A convenience routine for using interpolate's unwieldy but flexible API. + fn interpolate_string( + mut name_to_index: Vec<(&'static str, usize)>, + caps: Vec<&'static str>, + replacement: &str, + ) -> String { + name_to_index.sort_by_key(|x| x.0); + + let mut dst = vec![]; + interpolate( + replacement.as_bytes(), + |i, dst| { + if let Some(&s) = caps.get(i) { + dst.extend(s.as_bytes()); + } + }, + |name| -> Option { + name_to_index + .binary_search_by_key(&name, |x| x.0) + .ok() + .map(|i| name_to_index[i].1) + }, + &mut dst, + ); + String::from_utf8(dst).unwrap() + } + + macro_rules! interp { + ($name:ident, $map:expr, $caps:expr, $hay:expr, $expected:expr $(,)*) => { + #[test] + fn $name() { + assert_eq!($expected, interpolate_string($map, $caps, $hay)); + } + } + } + + interp!( + interp1, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test $foo test", + "test xxx test", + ); + + interp!( + interp2, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test$footest", + "test", + ); + + interp!( + interp3, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test${foo}test", + "testxxxtest", + ); + + interp!( + interp4, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test$2test", + "test", + ); + + interp!( + interp5, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test${2}test", + "testxxxtest", + ); + + interp!( + interp6, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test $$foo test", + "test $foo test", + ); + + interp!( + interp7, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test $foo", + "test xxx", + ); + + interp!( + interp8, + vec![("foo", 2)], + vec!["", "", "xxx"], + "$foo test", + "xxx test", + ); + + interp!( + interp9, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test $bar$foo", + "test yyyxxx", + ); + + interp!( + interp10, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test $ test", + "test $ test", + ); + + interp!( + interp11, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${} test", + "test ${} test", + ); + + interp!( + interp12, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${ } test", + "test ${ } test", + ); + + interp!( + interp13, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${a b} test", + "test ${a b} test", + ); + + interp!( + interp14, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${a} test", + "test test", + ); +} diff -Nru ripgrep-0.6.0/grep-matcher/src/lib.rs ripgrep-0.10.0.3/grep-matcher/src/lib.rs --- ripgrep-0.6.0/grep-matcher/src/lib.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-matcher/src/lib.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,1136 @@ +/*! +This crate provides an interface for regular expressions, with a focus on line +oriented search. The purpose of this crate is to provide a low level matching +interface that permits any kind of substring or regex implementation to power +the search routines provided by the +[`grep-searcher`](https://docs.rs/grep-searcher) +crate. + +The primary thing provided by this crate is the +[`Matcher`](trait.Matcher.html) +trait. The trait defines an abstract interface for text search. It is robust +enough to support everything from basic substring search all the way to +arbitrarily complex regular expression implementations without sacrificing +performance. + +A key design decision made in this crate is the use of *internal iteration*, +or otherwise known as the "push" model of searching. In this paradigm, +implementations of the `Matcher` trait will drive search and execute callbacks +provided by the caller when a match is found. This is in contrast to the +usual style of *external iteration* (the "pull" model) found throughout the +Rust ecosystem. There are two primary reasons why internal iteration was +chosen: + +* Some search implementations may themselves require internal iteration. + Converting an internal iterator to an external iterator can be non-trivial + and sometimes even practically impossible. +* Rust's type system isn't quite expressive enough to write a generic interface + using external iteration without giving something else up (namely, ease of + use and/or performance). + +In other words, internal iteration was chosen because it is the lowest common +denominator and because it is probably the least bad way of expressing the +interface in today's Rust. As a result, this trait isn't specifically intended +for everyday use, although, you might find it to be a happy price to pay if you +want to write code that is generic over multiple different regex +implementations. +*/ + +#![deny(missing_docs)] + +extern crate memchr; + +use std::fmt; +use std::io; +use std::ops; +use std::u64; + +use interpolate::interpolate; + +mod interpolate; + +/// The type of a match. +/// +/// The type of a match is a possibly empty range pointing to a contiguous +/// block of addressable memory. +/// +/// Every `Match` is guaranteed to satisfy the invariant that `start <= end`. +/// +/// # Indexing +/// +/// This type is structurally identical to `std::ops::Range`, but +/// is a bit more ergonomic for dealing with match indices. In particular, +/// this type implements `Copy` and provides methods for building new `Match` +/// values based on old `Match` values. Finally, the invariant that `start` +/// is always less than or equal to `end` is enforced. +/// +/// A `Match` can be used to slice a `&[u8]`, `&mut [u8]` or `&str` using +/// range notation. e.g., +/// +/// ``` +/// use grep_matcher::Match; +/// +/// let m = Match::new(2, 5); +/// let bytes = b"abcdefghi"; +/// assert_eq!(b"cde", &bytes[m]); +/// ``` +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct Match { + start: usize, + end: usize, +} + +impl Match { + /// Create a new match. + /// + /// # Panics + /// + /// This function panics if `start > end`. + #[inline] + pub fn new(start: usize, end: usize) -> Match { + assert!(start <= end); + Match { start, end } + } + + /// Creates a zero width match at the given offset. + #[inline] + pub fn zero(offset: usize) -> Match { + Match { start: offset, end: offset } + } + + /// Return the start offset of this match. + #[inline] + pub fn start(&self) -> usize { + self.start + } + + /// Return the end offset of this match. + #[inline] + pub fn end(&self) -> usize { + self.end + } + + /// Return a new match with the start offset replaced with the given + /// value. + /// + /// # Panics + /// + /// This method panics if `start > self.end`. + #[inline] + pub fn with_start(&self, start: usize) -> Match { + assert!(start <= self.end); + Match { start, ..*self } + } + + /// Return a new match with the end offset replaced with the given + /// value. + /// + /// # Panics + /// + /// This method panics if `self.start > end`. + #[inline] + pub fn with_end(&self, end: usize) -> Match { + assert!(self.start <= end); + Match { end, ..*self } + } + + /// Offset this match by the given amount and return a new match. + /// + /// This adds the given offset to the start and end of this match, and + /// returns the resulting match. + /// + /// # Panics + /// + /// This panics if adding the given amount to either the start or end + /// offset would result in an overflow. + #[inline] + pub fn offset(&self, amount: usize) -> Match { + Match { + start: self.start.checked_add(amount).unwrap(), + end: self.end.checked_add(amount).unwrap(), + } + } + + /// Returns the number of bytes in this match. + #[inline] + pub fn len(&self) -> usize { + self.end - self.start + } + + /// Returns true if and only if this match is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl ops::Index for [u8] { + type Output = [u8]; + + #[inline] + fn index(&self, index: Match) -> &[u8] { + &self[index.start..index.end] + } +} + +impl ops::IndexMut for [u8] { + #[inline] + fn index_mut(&mut self, index: Match) -> &mut [u8] { + &mut self[index.start..index.end] + } +} + +impl ops::Index for str { + type Output = str; + + #[inline] + fn index(&self, index: Match) -> &str { + &self[index.start..index.end] + } +} + +/// A line terminator. +/// +/// A line terminator represents the end of a line. Generally, every line is +/// either "terminated" by the end of a stream or a specific byte (or sequence +/// of bytes). +/// +/// Generally, a line terminator is a single byte, specifically, `\n`, on +/// Unix-like systems. On Windows, a line terminator is `\r\n` (referred to +/// as `CRLF` for `Carriage Return; Line Feed`). +/// +/// The default line terminator is `\n` on all platforms. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct LineTerminator(LineTerminatorImp); + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +enum LineTerminatorImp { + /// Any single byte representing a line terminator. + /// + /// We represent this as an array so we can safely convert it to a slice + /// for convenient access. At some point, we can use `std::slice::from_ref` + /// instead. + Byte([u8; 1]), + /// A line terminator represented by `\r\n`. + /// + /// When this option is used, consumers may generally treat a lone `\n` as + /// a line terminator in addition to `\r\n`. + CRLF, +} + +impl LineTerminator { + /// Return a new single-byte line terminator. Any byte is valid. + #[inline] + pub fn byte(byte: u8) -> LineTerminator { + LineTerminator(LineTerminatorImp::Byte([byte])) + } + + /// Return a new line terminator represented by `\r\n`. + /// + /// When this option is used, consumers may generally treat a lone `\n` as + /// a line terminator in addition to `\r\n`. + #[inline] + pub fn crlf() -> LineTerminator { + LineTerminator(LineTerminatorImp::CRLF) + } + + /// Returns true if and only if this line terminator is CRLF. + #[inline] + pub fn is_crlf(&self) -> bool { + self.0 == LineTerminatorImp::CRLF + } + + /// Returns this line terminator as a single byte. + /// + /// If the line terminator is CRLF, then this returns `\n`. This is + /// useful for routines that, for example, find line boundaries by treating + /// `\n` as a line terminator even when it isn't preceded by `\r`. + #[inline] + pub fn as_byte(&self) -> u8 { + match self.0 { + LineTerminatorImp::Byte(array) => array[0], + LineTerminatorImp::CRLF => b'\n', + } + } + + /// Returns this line terminator as a sequence of bytes. + /// + /// This returns a singleton sequence for all line terminators except for + /// `CRLF`, in which case, it returns `\r\n`. + /// + /// The slice returned is guaranteed to have length at least `1`. + #[inline] + pub fn as_bytes(&self) -> &[u8] { + match self.0 { + LineTerminatorImp::Byte(ref array) => array, + LineTerminatorImp::CRLF => &[b'\r', b'\n'], + } + } + + /// Returns true if and only if the given slice ends with this line + /// terminator. + /// + /// If this line terminator is `CRLF`, then this only checks whether the + /// last byte is `\n`. + #[inline] + pub fn is_suffix(&self, slice: &[u8]) -> bool { + slice.last().map_or(false, |&b| b == self.as_byte()) + } +} + +impl Default for LineTerminator { + #[inline] + fn default() -> LineTerminator { + LineTerminator::byte(b'\n') + } +} + +/// A set of bytes. +/// +/// In this crate, byte sets are used to express bytes that can never appear +/// anywhere in a match for a particular implementation of the `Matcher` trait. +/// Specifically, if such a set can be determined, then it's possible for +/// callers to perform additional operations on the basis that certain bytes +/// may never match. +/// +/// For example, if a search is configured to possibly produce results that +/// span multiple lines but a caller provided pattern can never match across +/// multiple lines, then it may make sense to divert to more optimized line +/// oriented routines that don't need to handle the multi-line match case. +#[derive(Clone, Debug)] +pub struct ByteSet(BitSet); + +#[derive(Clone, Copy)] +struct BitSet([u64; 4]); + +impl fmt::Debug for BitSet { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut fmtd = f.debug_set(); + for b in (0..256).map(|b| b as u8) { + if ByteSet(*self).contains(b) { + fmtd.entry(&b); + } + } + fmtd.finish() + } +} + +impl ByteSet { + /// Create an empty set of bytes. + pub fn empty() -> ByteSet { + ByteSet(BitSet([0; 4])) + } + + /// Create a full set of bytes such that every possible byte is in the set + /// returned. + pub fn full() -> ByteSet { + ByteSet(BitSet([u64::MAX; 4])) + } + + /// Add a byte to this set. + /// + /// If the given byte already belongs to this set, then this is a no-op. + pub fn add(&mut self, byte: u8) { + let bucket = byte / 64; + let bit = byte % 64; + (self.0).0[bucket as usize] |= 1 << bit; + } + + /// Add an inclusive range of bytes. + pub fn add_all(&mut self, start: u8, end: u8) { + for b in (start as u64..end as u64 + 1).map(|b| b as u8) { + self.add(b); + } + } + + /// Remove a byte from this set. + /// + /// If the given byte is not in this set, then this is a no-op. + pub fn remove(&mut self, byte: u8) { + let bucket = byte / 64; + let bit = byte % 64; + (self.0).0[bucket as usize] &= !(1 << bit); + } + + /// Remove an inclusive range of bytes. + pub fn remove_all(&mut self, start: u8, end: u8) { + for b in (start as u64..end as u64 + 1).map(|b| b as u8) { + self.remove(b); + } + } + + /// Return true if and only if the given byte is in this set. + pub fn contains(&self, byte: u8) -> bool { + let bucket = byte / 64; + let bit = byte % 64; + (self.0).0[bucket as usize] & (1 << bit) > 0 + } +} + +/// A trait that describes implementations of capturing groups. +/// +/// When a matcher supports capturing group extraction, then it is the +/// matcher's responsibility to provide an implementation of this trait. +/// +/// Principally, this trait provides a way to access capturing groups +/// in a uniform way that does not require any specific representation. +/// Namely, different matcher implementations may require different in-memory +/// representations of capturing groups. This trait permits matchers to +/// maintain their specific in-memory representation. +/// +/// Note that this trait explicitly does not provide a way to construct a new +/// capture value. Instead, it is the responsibility of a `Matcher` to build +/// one, which might require knowledge of the matcher's internal implementation +/// details. +pub trait Captures { + /// Return the total number of capturing groups. This includes capturing + /// groups that have not matched anything. + fn len(&self) -> usize; + + /// Return the capturing group match at the given index. If no match of + /// that capturing group exists, then this returns `None`. + /// + /// When a matcher reports a match with capturing groups, then the first + /// capturing group (at index `0`) must always correspond to the offsets + /// for the overall match. + fn get(&self, i: usize) -> Option; + + /// Returns true if and only if these captures are empty. This occurs + /// when `len` is `0`. + /// + /// Note that capturing groups that have non-zero length but otherwise + /// contain no matching groups are *not* empty. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Expands all instances of `$name` in `replacement` to the corresponding + /// capture group `name`, and writes them to the `dst` buffer given. + /// + /// (Note: If you're looking for a convenient way to perform replacements + /// with interpolation, then you'll want to use the `replace_with_captures` + /// method on the `Matcher` trait.) + /// + /// `name` may be an integer corresponding to the index of the + /// capture group (counted by order of opening parenthesis where `0` is the + /// entire match) or it can be a name (consisting of letters, digits or + /// underscores) corresponding to a named capture group. + /// + /// A `name` is translated to a capture group index via the given + /// `name_to_index` function. If `name` isn't a valid capture group + /// (whether the name doesn't exist or isn't a valid index), then it is + /// replaced with the empty string. + /// + /// The longest possible name is used. e.g., `$1a` looks up the capture + /// group named `1a` and not the capture group at index `1`. To exert + /// more precise control over the name, use braces, e.g., `${1}a`. In all + /// cases, capture group names are limited to ASCII letters, numbers and + /// underscores. + /// + /// To write a literal `$` use `$$`. + /// + /// Note that the capture group match indices are resolved by slicing + /// the given `haystack`. Generally, this means that `haystack` should be + /// the same slice that was searched to get the current capture group + /// matches. + fn interpolate( + &self, + name_to_index: F, + haystack: &[u8], + replacement: &[u8], + dst: &mut Vec, + ) where F: FnMut(&str) -> Option + { + interpolate( + replacement, + |i, dst| { + if let Some(range) = self.get(i) { + dst.extend(&haystack[range]); + } + }, + name_to_index, + dst, + ) + } +} + +/// NoCaptures provides an always-empty implementation of the `Captures` trait. +/// +/// This type is useful for implementations of `Matcher` that don't support +/// capturing groups. +#[derive(Clone, Debug)] +pub struct NoCaptures(()); + +impl NoCaptures { + /// Create an empty set of capturing groups. + pub fn new() -> NoCaptures { NoCaptures(()) } +} + +impl Captures for NoCaptures { + fn len(&self) -> usize { 0 } + fn get(&self, _: usize) -> Option { None } +} + +/// NoError provides an error type for matchers that never produce errors. +/// +/// This error type implements the `std::error::Error` and `fmt::Display` +/// traits for use in matcher implementations that can never produce errors. +/// +/// The `fmt::Debug` and `fmt::Display` impls for this type panics. +#[derive(Debug, Eq, PartialEq)] +pub struct NoError(()); + +impl ::std::error::Error for NoError { + fn description(&self) -> &str { "no error" } +} + +impl fmt::Display for NoError { + fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result { + panic!("BUG for NoError: an impossible error occurred") + } +} + +impl From for io::Error { + fn from(_: NoError) -> io::Error { + panic!("BUG for NoError: an impossible error occurred") + } +} + +/// The type of match for a line oriented matcher. +#[derive(Clone, Copy, Debug)] +pub enum LineMatchKind { + /// A position inside a line that is known to contain a match. + /// + /// This position can be anywhere in the line. It does not need to point + /// at the location of the match. + Confirmed(usize), + /// A position inside a line that may contain a match, and must be searched + /// for verification. + /// + /// This position can be anywhere in the line. It does not need to point + /// at the location of the match. + Candidate(usize), +} + +/// A matcher defines an interface for regular expression implementations. +/// +/// While this trait is large, there are only two required methods that +/// implementors must provide: `find_at` and `new_captures`. If captures +/// aren't supported by your implementation, then `new_captures` can be +/// implemented with +/// [`NoCaptures`](struct.NoCaptures.html). If your implementation does support +/// capture groups, then you should also implement the other capture related +/// methods, as dictated by the documentation. Crucially, this includes +/// `captures_at`. +/// +/// The rest of the methods on this trait provide default implementations on +/// top of `find_at` and `new_captures`. It is not uncommon for implementations +/// to be able to provide faster variants of some methods; in those cases, +/// simply override the default implementation. +pub trait Matcher { + /// The concrete type of capturing groups used for this matcher. + /// + /// If this implementation does not support capturing groups, then set + /// this to `NoCaptures`. + type Captures: Captures; + + /// The error type used by this matcher. + /// + /// For matchers in which an error is not possible, they are encouraged to + /// use the `NoError` type in this crate. In the future, when the "never" + /// (spelled `!`) type is stabilized, then it should probably be used + /// instead. + type Error: fmt::Display; + + /// Returns the start and end byte range of the first match in `haystack` + /// after `at`, where the byte offsets are relative to that start of + /// `haystack` (and not `at`). If no match exists, then `None` is returned. + /// + /// The text encoding of `haystack` is not strictly specified. Matchers are + /// advised to assume UTF-8, or at worst, some ASCII compatible encoding. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `at == 0`. + fn find_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result, Self::Error>; + + /// Creates an empty group of captures suitable for use with the capturing + /// APIs of this trait. + /// + /// Implementations that don't support capturing groups should use + /// the `NoCaptures` type and implement this method by calling + /// `NoCaptures::new()`. + fn new_captures(&self) -> Result; + + /// Returns the total number of capturing groups in this matcher. + /// + /// If a matcher supports capturing groups, then this value must always be + /// at least 1, where the first capturing group always corresponds to the + /// overall match. + /// + /// If a matcher does not support capturing groups, then this should + /// always return 0. + /// + /// By default, capturing groups are not supported, so this always + /// returns 0. + fn capture_count(&self) -> usize { + 0 + } + + /// Maps the given capture group name to its corresponding capture group + /// index, if one exists. If one does not exist, then `None` is returned. + /// + /// If the given capture group name maps to multiple indices, then it is + /// not specified which one is returned. However, it is guaranteed that + /// one of them is returned. + /// + /// By default, capturing groups are not supported, so this always returns + /// `None`. + fn capture_index(&self, _name: &str) -> Option { + None + } + + /// Returns the start and end byte range of the first match in `haystack`. + /// If no match exists, then `None` is returned. + /// + /// The text encoding of `haystack` is not strictly specified. Matchers are + /// advised to assume UTF-8, or at worst, some ASCII compatible encoding. + fn find( + &self, + haystack: &[u8], + ) -> Result, Self::Error> { + self.find_at(haystack, 0) + } + + /// Executes the given function over successive non-overlapping matches + /// in `haystack`. If no match exists, then the given function is never + /// called. If the function returns `false`, then iteration stops. + fn find_iter( + &self, + haystack: &[u8], + mut matched: F, + ) -> Result<(), Self::Error> + where F: FnMut(Match) -> bool + { + self.try_find_iter(haystack, |m| Ok(matched(m))) + .map(|r: Result<(), ()>| r.unwrap()) + } + + /// Executes the given function over successive non-overlapping matches + /// in `haystack`. If no match exists, then the given function is never + /// called. If the function returns `false`, then iteration stops. + /// Similarly, if the function returns an error then iteration stops and + /// the error is yielded. If an error occurs while executing the search, + /// then it is converted to + /// `E`. + fn try_find_iter( + &self, + haystack: &[u8], + mut matched: F, + ) -> Result, Self::Error> + where F: FnMut(Match) -> Result + { + let mut last_end = 0; + let mut last_match = None; + + loop { + if last_end > haystack.len() { + return Ok(Ok(())); + } + let m = match self.find_at(haystack, last_end)? { + None => return Ok(Ok(())), + Some(m) => m, + }; + if m.start == m.end { + // This is an empty match. To ensure we make progress, start + // the next search at the smallest possible starting position + // of the next match following this one. + last_end = m.end + 1; + // Don't accept empty matches immediately following a match. + // Just move on to the next match. + if Some(m.end) == last_match { + continue; + } + } else { + last_end = m.end; + } + last_match = Some(m.end); + match matched(m) { + Ok(true) => continue, + Ok(false) => return Ok(Ok(())), + Err(err) => return Ok(Err(err)), + } + } + } + + /// Populates the first set of capture group matches from `haystack` into + /// `caps`. If no match exists, then `false` is returned. + /// + /// The text encoding of `haystack` is not strictly specified. Matchers are + /// advised to assume UTF-8, or at worst, some ASCII compatible encoding. + fn captures( + &self, + haystack: &[u8], + caps: &mut Self::Captures, + ) -> Result { + self.captures_at(haystack, 0, caps) + } + + /// Executes the given function over successive non-overlapping matches + /// in `haystack` with capture groups extracted from each match. If no + /// match exists, then the given function is never called. If the function + /// returns `false`, then iteration stops. + fn captures_iter( + &self, + haystack: &[u8], + caps: &mut Self::Captures, + mut matched: F, + ) -> Result<(), Self::Error> + where F: FnMut(&Self::Captures) -> bool + { + self.try_captures_iter(haystack, caps, |caps| Ok(matched(caps))) + .map(|r: Result<(), ()>| r.unwrap()) + } + + /// Executes the given function over successive non-overlapping matches + /// in `haystack` with capture groups extracted from each match. If no + /// match exists, then the given function is never called. If the function + /// returns `false`, then iteration stops. Similarly, if the function + /// returns an error then iteration stops and the error is yielded. If + /// an error occurs while executing the search, then it is converted to + /// `E`. + fn try_captures_iter( + &self, + haystack: &[u8], + caps: &mut Self::Captures, + mut matched: F, + ) -> Result, Self::Error> + where F: FnMut(&Self::Captures) -> Result + { + let mut last_end = 0; + let mut last_match = None; + + loop { + if last_end > haystack.len() { + return Ok(Ok(())); + } + if !self.captures_at(haystack, last_end, caps)? { + return Ok(Ok(())); + } + let m = caps.get(0).unwrap(); + if m.start == m.end { + // This is an empty match. To ensure we make progress, start + // the next search at the smallest possible starting position + // of the next match following this one. + last_end = m.end + 1; + // Don't accept empty matches immediately following a match. + // Just move on to the next match. + if Some(m.end) == last_match { + continue; + } + } else { + last_end = m.end; + } + last_match = Some(m.end); + match matched(caps) { + Ok(true) => continue, + Ok(false) => return Ok(Ok(())), + Err(err) => return Ok(Err(err)), + } + } + } + + /// Populates the first set of capture group matches from `haystack` + /// into `matches` after `at`, where the byte offsets in each capturing + /// group are relative to the start of `haystack` (and not `at`). If no + /// match exists, then `false` is returned and the contents of the given + /// capturing groups are unspecified. + /// + /// The text encoding of `haystack` is not strictly specified. Matchers are + /// advised to assume UTF-8, or at worst, some ASCII compatible encoding. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `at == 0`. + /// + /// By default, capturing groups aren't supported, and this implementation + /// will always behave as if a match were impossible. + /// + /// Implementors that provide support for capturing groups must guarantee + /// that when a match occurs, the first capture match (at index `0`) is + /// always set to the overall match offsets. + /// + /// Note that if implementors seek to support capturing groups, then they + /// should implement this method. Other methods that match based on + /// captures will then work automatically. + fn captures_at( + &self, + _haystack: &[u8], + _at: usize, + _caps: &mut Self::Captures, + ) -> Result { + Ok(false) + } + + /// Replaces every match in the given haystack with the result of calling + /// `append`. `append` is given the start and end of a match, along with + /// a handle to the `dst` buffer provided. + /// + /// If the given `append` function returns `false`, then replacement stops. + fn replace( + &self, + haystack: &[u8], + dst: &mut Vec, + mut append: F, + ) -> Result<(), Self::Error> + where F: FnMut(Match, &mut Vec) -> bool + { + let mut last_match = 0; + self.find_iter(haystack, |m| { + dst.extend(&haystack[last_match..m.start]); + last_match = m.end; + append(m, dst) + })?; + dst.extend(&haystack[last_match..]); + Ok(()) + } + + /// Replaces every match in the given haystack with the result of calling + /// `append` with the matching capture groups. + /// + /// If the given `append` function returns `false`, then replacement stops. + fn replace_with_captures( + &self, + haystack: &[u8], + caps: &mut Self::Captures, + dst: &mut Vec, + mut append: F, + ) -> Result<(), Self::Error> + where F: FnMut(&Self::Captures, &mut Vec) -> bool + { + let mut last_match = 0; + self.captures_iter(haystack, caps, |caps| { + let m = caps.get(0).unwrap(); + dst.extend(&haystack[last_match..m.start]); + last_match = m.end; + append(caps, dst) + })?; + dst.extend(&haystack[last_match..]); + Ok(()) + } + + /// Returns true if and only if the matcher matches the given haystack. + /// + /// By default, this method is implemented by calling `shortest_match`. + fn is_match(&self, haystack: &[u8]) -> Result { + self.is_match_at(haystack, 0) + } + + /// Returns true if and only if the matcher matches the given haystack + /// starting at the given position. + /// + /// By default, this method is implemented by calling `shortest_match_at`. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `at == 0`. + fn is_match_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + Ok(self.shortest_match_at(haystack, at)?.is_some()) + } + + /// Returns an end location of the first match in `haystack`. If no match + /// exists, then `None` is returned. + /// + /// Note that the end location reported by this method may be less than the + /// same end location reported by `find`. For example, running `find` with + /// the pattern `a+` on the haystack `aaa` should report a range of `[0, + /// 3)`, but `shortest_match` may report `1` as the ending location since + /// that is the place at which a match is guaranteed to occur. + /// + /// This method should never report false positives or false negatives. The + /// point of this method is that some implementors may be able to provide + /// a faster implementation of this than what `find` does. + /// + /// By default, this method is implemented by calling `find`. + fn shortest_match( + &self, + haystack: &[u8], + ) -> Result, Self::Error> { + self.shortest_match_at(haystack, 0) + } + + /// Returns an end location of the first match in `haystack` starting at + /// the given position. If no match exists, then `None` is returned. + /// + /// Note that the end location reported by this method may be less than the + /// same end location reported by `find`. For example, running `find` with + /// the pattern `a+` on the haystack `aaa` should report a range of `[0, + /// 3)`, but `shortest_match` may report `1` as the ending location since + /// that is the place at which a match is guaranteed to occur. + /// + /// This method should never report false positives or false negatives. The + /// point of this method is that some implementors may be able to provide + /// a faster implementation of this than what `find` does. + /// + /// By default, this method is implemented by calling `find_at`. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `at == 0`. + fn shortest_match_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result, Self::Error> { + Ok(self.find_at(haystack, at)?.map(|m| m.end)) + } + + /// If available, return a set of bytes that will never appear in a match + /// produced by an implementation. + /// + /// Specifically, if such a set can be determined, then it's possible for + /// callers to perform additional operations on the basis that certain + /// bytes may never match. + /// + /// For example, if a search is configured to possibly produce results + /// that span multiple lines but a caller provided pattern can never + /// match across multiple lines, then it may make sense to divert to + /// more optimized line oriented routines that don't need to handle the + /// multi-line match case. + /// + /// Implementations that produce this set must never report false + /// positives, but may produce false negatives. That is, is a byte is in + /// this set then it must be guaranteed that it is never in a match. But, + /// if a byte is not in this set, then callers cannot assume that a match + /// exists with that byte. + /// + /// By default, this returns `None`. + fn non_matching_bytes(&self) -> Option<&ByteSet> { + None + } + + /// If this matcher was compiled as a line oriented matcher, then this + /// method returns the line terminator if and only if the line terminator + /// never appears in any match produced by this matcher. If this wasn't + /// compiled as a line oriented matcher, or if the aforementioned guarantee + /// cannot be made, then this must return `None`, which is the default. + /// It is **never wrong** to return `None`, but returning a line terminator + /// when it can appear in a match results in unspecified behavior. + /// + /// The line terminator is typically `b'\n'`, but can be any single byte or + /// `CRLF`. + /// + /// By default, this returns `None`. + fn line_terminator(&self) -> Option { + None + } + + /// Return one of the following: a confirmed line match, a candidate line + /// match (which may be a false positive) or no match at all (which **must + /// not** be a false negative). When reporting a confirmed or candidate + /// match, the position returned can be any position in the line. + /// + /// By default, this never returns a candidate match, and always either + /// returns a confirmed match or no match at all. + /// + /// When a matcher can match spans over multiple lines, then the behavior + /// of this method is unspecified. Namely, use of this method only + /// makes sense in a context where the caller is looking for the next + /// matching line. That is, callers should only use this method when + /// `line_terminator` does not return `None`. + /// + /// # Design rationale + /// + /// A line matcher is, fundamentally, a normal matcher with the addition + /// of one optional method: finding a line. By default, this routine + /// is implemented via the matcher's `shortest_match` method, which + /// always yields either no match or a `LineMatchKind::Confirmed`. However, + /// implementors may provide a routine for this that can return candidate + /// lines that need subsequent verification to be confirmed as a match. + /// This can be useful in cases where it may be quicker to find candidate + /// lines via some other means instead of relying on the more general + /// implementations for `find` and `shortest_match`. + /// + /// For example, consider the regex `\w+foo\s+`. Both `find` and + /// `shortest_match` must consider the entire regex, including the `\w+` + /// and `\s+`, while searching. However, this method could look for lines + /// containing `foo` and return them as candidates. Finding `foo` might + /// be implemented as a highly optimized substring search routine (like + /// `memmem`), which is likely to be faster than whatever more generalized + /// routine is required for resolving `\w+foo\s+`. The caller is then + /// responsible for confirming whether a match exists or not. + /// + /// Note that while this method may report false positives, it must never + /// report false negatives. That is, it can never skip over lines that + /// contain a match. + fn find_candidate_line( + &self, + haystack: &[u8], + ) -> Result, Self::Error> { + Ok(self.shortest_match(haystack)?.map(LineMatchKind::Confirmed)) + } +} + +impl<'a, M: Matcher> Matcher for &'a M { + type Captures = M::Captures; + type Error = M::Error; + + fn find_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result, Self::Error> { + (*self).find_at(haystack, at) + } + + fn new_captures(&self) -> Result { + (*self).new_captures() + } + + fn captures_at( + &self, + haystack: &[u8], + at: usize, + caps: &mut Self::Captures, + ) -> Result { + (*self).captures_at(haystack, at, caps) + } + + fn capture_index(&self, name: &str) -> Option { + (*self).capture_index(name) + } + + fn capture_count(&self) -> usize { + (*self).capture_count() + } + + fn find( + &self, + haystack: &[u8] + ) -> Result, Self::Error> { + (*self).find(haystack) + } + + fn find_iter( + &self, + haystack: &[u8], + matched: F, + ) -> Result<(), Self::Error> + where F: FnMut(Match) -> bool + { + (*self).find_iter(haystack, matched) + } + + fn try_find_iter( + &self, + haystack: &[u8], + matched: F, + ) -> Result, Self::Error> + where F: FnMut(Match) -> Result + { + (*self).try_find_iter(haystack, matched) + } + + fn captures( + &self, + haystack: &[u8], + caps: &mut Self::Captures, + ) -> Result { + (*self).captures(haystack, caps) + } + + fn captures_iter( + &self, + haystack: &[u8], + caps: &mut Self::Captures, + matched: F, + ) -> Result<(), Self::Error> + where F: FnMut(&Self::Captures) -> bool + { + (*self).captures_iter(haystack, caps, matched) + } + + fn try_captures_iter( + &self, + haystack: &[u8], + caps: &mut Self::Captures, + matched: F, + ) -> Result, Self::Error> + where F: FnMut(&Self::Captures) -> Result + { + (*self).try_captures_iter(haystack, caps, matched) + } + + fn replace( + &self, + haystack: &[u8], + dst: &mut Vec, + append: F, + ) -> Result<(), Self::Error> + where F: FnMut(Match, &mut Vec) -> bool + { + (*self).replace(haystack, dst, append) + } + + fn replace_with_captures( + &self, + haystack: &[u8], + caps: &mut Self::Captures, + dst: &mut Vec, + append: F, + ) -> Result<(), Self::Error> + where F: FnMut(&Self::Captures, &mut Vec) -> bool + { + (*self).replace_with_captures(haystack, caps, dst, append) + } + + fn is_match(&self, haystack: &[u8]) -> Result { + (*self).is_match(haystack) + } + + fn is_match_at( + &self, + haystack: &[u8], + at: usize + ) -> Result { + (*self).is_match_at(haystack, at) + } + + fn shortest_match( + &self, + haystack: &[u8], + ) -> Result, Self::Error> { + (*self).shortest_match(haystack) + } + + fn shortest_match_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result, Self::Error> { + (*self).shortest_match_at(haystack, at) + } + + fn non_matching_bytes(&self) -> Option<&ByteSet> { + (*self).non_matching_bytes() + } + + fn line_terminator(&self) -> Option { + (*self).line_terminator() + } + + fn find_candidate_line( + &self, + haystack: &[u8], + ) -> Result, Self::Error> { + (*self).find_candidate_line(haystack) + } +} diff -Nru ripgrep-0.6.0/grep-matcher/tests/test_matcher.rs ripgrep-0.10.0.3/grep-matcher/tests/test_matcher.rs --- ripgrep-0.6.0/grep-matcher/tests/test_matcher.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-matcher/tests/test_matcher.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,208 @@ +use grep_matcher::{Captures, Match, Matcher}; +use regex::bytes::Regex; + +use util::{RegexMatcher, RegexMatcherNoCaps}; + +fn matcher(pattern: &str) -> RegexMatcher { + RegexMatcher::new(Regex::new(pattern).unwrap()) +} + +fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps { + RegexMatcherNoCaps(Regex::new(pattern).unwrap()) +} + +fn m(start: usize, end: usize) -> Match { + Match::new(start, end) +} + +#[test] +fn find() { + let matcher = matcher(r"(\w+)\s+(\w+)"); + assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14))); +} + +#[test] +fn find_iter() { + let matcher = matcher(r"(\w+)\s+(\w+)"); + let mut matches = vec![]; + matcher.find_iter(b"aa bb cc dd", |m| { + matches.push(m); + true + }).unwrap(); + assert_eq!(matches, vec![m(0, 5), m(6, 11)]); + + // Test that find_iter respects short circuiting. + matches.clear(); + matcher.find_iter(b"aa bb cc dd", |m| { + matches.push(m); + false + }).unwrap(); + assert_eq!(matches, vec![m(0, 5)]); +} + +#[test] +fn try_find_iter() { + #[derive(Clone, Debug, Eq, PartialEq)] + struct MyError; + + let matcher = matcher(r"(\w+)\s+(\w+)"); + let mut matches = vec![]; + let err = matcher.try_find_iter(b"aa bb cc dd", |m| { + if matches.is_empty() { + matches.push(m); + Ok(true) + } else { + Err(MyError) + } + }).unwrap().unwrap_err(); + assert_eq!(matches, vec![m(0, 5)]); + assert_eq!(err, MyError); +} + +#[test] +fn shortest_match() { + let matcher = matcher(r"a+"); + // This tests that the default impl isn't doing anything smart, and simply + // defers to `find`. + assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3)); + // The actual underlying regex is smarter. + assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1)); +} + +#[test] +fn captures() { + let matcher = matcher(r"(?P\w+)\s+(?P\w+)"); + assert_eq!(matcher.capture_count(), 3); + assert_eq!(matcher.capture_index("a"), Some(1)); + assert_eq!(matcher.capture_index("b"), Some(2)); + assert_eq!(matcher.capture_index("nada"), None); + + let mut caps = matcher.new_captures().unwrap(); + assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap()); + assert_eq!(caps.get(0), Some(m(1, 14))); + assert_eq!(caps.get(1), Some(m(1, 6))); + assert_eq!(caps.get(2), Some(m(7, 14))); +} + +#[test] +fn captures_iter() { + let matcher = matcher(r"(?P\w+)\s+(?P\w+)"); + let mut caps = matcher.new_captures().unwrap(); + let mut matches = vec![]; + matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| { + matches.push(caps.get(0).unwrap()); + matches.push(caps.get(1).unwrap()); + matches.push(caps.get(2).unwrap()); + true + }).unwrap(); + assert_eq!(matches, vec![ + m(0, 5), m(0, 2), m(3, 5), + m(6, 11), m(6, 8), m(9, 11), + ]); + + // Test that captures_iter respects short circuiting. + matches.clear(); + matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| { + matches.push(caps.get(0).unwrap()); + matches.push(caps.get(1).unwrap()); + matches.push(caps.get(2).unwrap()); + false + }).unwrap(); + assert_eq!(matches, vec![ + m(0, 5), m(0, 2), m(3, 5), + ]); +} + +#[test] +fn try_captures_iter() { + #[derive(Clone, Debug, Eq, PartialEq)] + struct MyError; + + let matcher = matcher(r"(?P\w+)\s+(?P\w+)"); + let mut caps = matcher.new_captures().unwrap(); + let mut matches = vec![]; + let err = matcher.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| { + if matches.is_empty() { + matches.push(caps.get(0).unwrap()); + matches.push(caps.get(1).unwrap()); + matches.push(caps.get(2).unwrap()); + Ok(true) + } else { + Err(MyError) + } + }).unwrap().unwrap_err(); + assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]); + assert_eq!(err, MyError); +} + +// Test that our default impls for capturing are correct. Namely, when +// capturing isn't supported by the underlying matcher, then all of the +// various capturing related APIs fail fast. +#[test] +fn no_captures() { + let matcher = matcher_no_caps(r"(?P\w+)\s+(?P\w+)"); + assert_eq!(matcher.capture_count(), 0); + assert_eq!(matcher.capture_index("a"), None); + assert_eq!(matcher.capture_index("b"), None); + assert_eq!(matcher.capture_index("nada"), None); + + let mut caps = matcher.new_captures().unwrap(); + assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap()); + + let mut called = false; + matcher.captures_iter(b"homer simpson", &mut caps, |_| { + called = true; + true + }).unwrap(); + assert!(!called); +} + +#[test] +fn replace() { + let matcher = matcher(r"(\w+)\s+(\w+)"); + let mut dst = vec![]; + matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| { + dst.push(b'z'); + true + }).unwrap(); + assert_eq!(dst, b"z z"); + + // Test that replacements respect short circuiting. + dst.clear(); + matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| { + dst.push(b'z'); + false + }).unwrap(); + assert_eq!(dst, b"z cc dd"); +} + +#[test] +fn replace_with_captures() { + let matcher = matcher(r"(\w+)\s+(\w+)"); + let haystack = b"aa bb cc dd"; + let mut caps = matcher.new_captures().unwrap(); + let mut dst = vec![]; + matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| { + caps.interpolate( + |name| matcher.capture_index(name), + haystack, + b"$2 $1", + dst, + ); + true + }).unwrap(); + assert_eq!(dst, b"bb aa dd cc"); + + // Test that replacements respect short circuiting. + dst.clear(); + matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| { + caps.interpolate( + |name| matcher.capture_index(name), + haystack, + b"$2 $1", + dst, + ); + false + }).unwrap(); + assert_eq!(dst, b"bb aa cc dd"); +} diff -Nru ripgrep-0.6.0/grep-matcher/tests/tests.rs ripgrep-0.10.0.3/grep-matcher/tests/tests.rs --- ripgrep-0.6.0/grep-matcher/tests/tests.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-matcher/tests/tests.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,6 @@ +extern crate grep_matcher; +extern crate regex; + +mod util; + +mod test_matcher; diff -Nru ripgrep-0.6.0/grep-matcher/tests/util.rs ripgrep-0.10.0.3/grep-matcher/tests/util.rs --- ripgrep-0.6.0/grep-matcher/tests/util.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-matcher/tests/util.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,104 @@ +use std::collections::HashMap; +use std::result; + +use grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError}; +use regex::bytes::{CaptureLocations, Regex}; + +#[derive(Debug)] +pub struct RegexMatcher { + pub re: Regex, + pub names: HashMap, +} + +impl RegexMatcher { + pub fn new(re: Regex) -> RegexMatcher { + let mut names = HashMap::new(); + for (i, optional_name) in re.capture_names().enumerate() { + if let Some(name) = optional_name { + names.insert(name.to_string(), i); + } + } + RegexMatcher { + re: re, + names: names, + } + } +} + +type Result = result::Result; + +impl Matcher for RegexMatcher { + type Captures = RegexCaptures; + type Error = NoError; + + fn find_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result> { + Ok(self.re + .find_at(haystack, at) + .map(|m| Match::new(m.start(), m.end()))) + } + + fn new_captures(&self) -> Result { + Ok(RegexCaptures(self.re.capture_locations())) + } + + fn captures_at( + &self, + haystack: &[u8], + at: usize, + caps: &mut RegexCaptures, + ) -> Result { + Ok(self.re.captures_read_at(&mut caps.0, haystack, at).is_some()) + } + + fn capture_count(&self) -> usize { + self.re.captures_len() + } + + fn capture_index(&self, name: &str) -> Option { + self.names.get(name).map(|i| *i) + } + + // We purposely don't implement any other methods, so that we test the + // default impls. The "real" Regex impl for Matcher provides a few more + // impls. e.g., Its `find_iter` impl is faster than what we can do here, + // since the regex crate avoids synchronization overhead. +} + +#[derive(Debug)] +pub struct RegexMatcherNoCaps(pub Regex); + +impl Matcher for RegexMatcherNoCaps { + type Captures = NoCaptures; + type Error = NoError; + + fn find_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result> { + Ok(self.0 + .find_at(haystack, at) + .map(|m| Match::new(m.start(), m.end()))) + } + + fn new_captures(&self) -> Result { + Ok(NoCaptures::new()) + } +} + +#[derive(Clone, Debug)] +pub struct RegexCaptures(CaptureLocations); + +impl Captures for RegexCaptures { + fn len(&self) -> usize { + self.0.len() + } + + fn get(&self, i: usize) -> Option { + self.0.pos(i).map(|(s, e)| Match::new(s, e)) + } +} diff -Nru ripgrep-0.6.0/grep-matcher/UNLICENSE ripgrep-0.10.0.3/grep-matcher/UNLICENSE --- ripgrep-0.6.0/grep-matcher/UNLICENSE 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-matcher/UNLICENSE 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff -Nru ripgrep-0.6.0/grep-pcre2/Cargo.toml ripgrep-0.10.0.3/grep-pcre2/Cargo.toml --- ripgrep-0.6.0/grep-pcre2/Cargo.toml 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-pcre2/Cargo.toml 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,17 @@ +[package] +name = "grep-pcre2" +version = "0.1.2" #:version +authors = ["Andrew Gallant "] +description = """ +Use PCRE2 with the 'grep' crate. +""" +documentation = "https://docs.rs/grep-pcre2" +homepage = "https://github.com/BurntSushi/ripgrep" +repository = "https://github.com/BurntSushi/ripgrep" +readme = "README.md" +keywords = ["regex", "grep", "pcre", "backreference", "look"] +license = "Unlicense/MIT" + +[dependencies] +grep-matcher = { version = "0.1.1", path = "../grep-matcher" } +pcre2 = "0.1.1" diff -Nru ripgrep-0.6.0/grep-pcre2/LICENSE-MIT ripgrep-0.10.0.3/grep-pcre2/LICENSE-MIT --- ripgrep-0.6.0/grep-pcre2/LICENSE-MIT 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-pcre2/LICENSE-MIT 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Andrew Gallant + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff -Nru ripgrep-0.6.0/grep-pcre2/README.md ripgrep-0.10.0.3/grep-pcre2/README.md --- ripgrep-0.6.0/grep-pcre2/README.md 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-pcre2/README.md 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,39 @@ +grep-pcre2 +---------- +The `grep-pcre2` crate provides an implementation of the `Matcher` trait from +the `grep-matcher` crate. This implementation permits PCRE2 to be used in the +`grep` crate for fast line oriented searching. + +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) +[![](https://img.shields.io/crates/v/grep-pcre2.svg)](https://crates.io/crates/grep-pcre2) + +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + +### Documentation + +[https://docs.rs/grep-pcre2](https://docs.rs/grep-pcre2) + +**NOTE:** You probably don't want to use this crate directly. Instead, you +should prefer the facade defined in the +[`grep`](https://docs.rs/grep) +crate. + +If you're looking to just use PCRE2 from Rust, then you probably want the +[`pcre2`](https://docs.rs/pcre2) +crate, which provide high level safe bindings to PCRE2. + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +grep-pcre2 = "0.1" +``` + +and this to your crate root: + +```rust +extern crate grep_pcre2; +``` diff -Nru ripgrep-0.6.0/grep-pcre2/src/error.rs ripgrep-0.10.0.3/grep-pcre2/src/error.rs --- ripgrep-0.6.0/grep-pcre2/src/error.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-pcre2/src/error.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,59 @@ +use std::error; +use std::fmt; + +/// An error that can occur in this crate. +/// +/// Generally, this error corresponds to problems building a regular +/// expression, whether it's in parsing, compilation or a problem with +/// guaranteeing a configured optimization. +#[derive(Clone, Debug)] +pub struct Error { + kind: ErrorKind, +} + +impl Error { + pub(crate) fn regex(err: E) -> Error { + Error { kind: ErrorKind::Regex(err.to_string()) } + } + + /// Return the kind of this error. + pub fn kind(&self) -> &ErrorKind { + &self.kind + } +} + +/// The kind of an error that can occur. +#[derive(Clone, Debug)] +pub enum ErrorKind { + /// An error that occurred as a result of parsing a regular expression. + /// This can be a syntax error or an error that results from attempting to + /// compile a regular expression that is too big. + /// + /// The string here is the underlying error converted to a string. + Regex(String), + /// Hints that destructuring should not be exhaustive. + /// + /// This enum may grow additional variants, so this makes sure clients + /// don't count on exhaustive matching. (Otherwise, adding a new variant + /// could break existing code.) + #[doc(hidden)] + __Nonexhaustive, +} + +impl error::Error for Error { + fn description(&self) -> &str { + match self.kind { + ErrorKind::Regex(_) => "regex error", + ErrorKind::__Nonexhaustive => unreachable!(), + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.kind { + ErrorKind::Regex(ref s) => write!(f, "{}", s), + ErrorKind::__Nonexhaustive => unreachable!(), + } + } +} diff -Nru ripgrep-0.6.0/grep-pcre2/src/lib.rs ripgrep-0.10.0.3/grep-pcre2/src/lib.rs --- ripgrep-0.6.0/grep-pcre2/src/lib.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-pcre2/src/lib.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,15 @@ +/*! +An implementation of `grep-matcher`'s `Matcher` trait for +[PCRE2](https://www.pcre.org/). +*/ + +#![deny(missing_docs)] + +extern crate grep_matcher; +extern crate pcre2; + +pub use error::{Error, ErrorKind}; +pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder}; + +mod error; +mod matcher; diff -Nru ripgrep-0.6.0/grep-pcre2/src/matcher.rs ripgrep-0.10.0.3/grep-pcre2/src/matcher.rs --- ripgrep-0.6.0/grep-pcre2/src/matcher.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-pcre2/src/matcher.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,443 @@ +use std::collections::HashMap; + +use grep_matcher::{Captures, Match, Matcher}; +use pcre2::bytes::{CaptureLocations, Regex, RegexBuilder}; + +use error::Error; + +/// A builder for configuring the compilation of a PCRE2 regex. +#[derive(Clone, Debug)] +pub struct RegexMatcherBuilder { + builder: RegexBuilder, + case_smart: bool, + word: bool, +} + +impl RegexMatcherBuilder { + /// Create a new matcher builder with a default configuration. + pub fn new() -> RegexMatcherBuilder { + RegexMatcherBuilder { + builder: RegexBuilder::new(), + case_smart: false, + word: false, + } + } + + /// Compile the given pattern into a PCRE matcher using the current + /// configuration. + /// + /// If there was a problem compiling the pattern, then an error is + /// returned. + pub fn build(&self, pattern: &str) -> Result { + let mut builder = self.builder.clone(); + if self.case_smart && !has_uppercase_literal(pattern) { + builder.caseless(true); + } + let res = + if self.word { + let pattern = format!(r"(? &mut RegexMatcherBuilder { + self.builder.caseless(yes); + self + } + + /// Whether to enable "smart case" or not. + /// + /// When smart case is enabled, the builder will automatically enable + /// case insensitive matching based on how the pattern is written. Namely, + /// case insensitive mode is enabled when both of the following things + /// are believed to be true: + /// + /// 1. The pattern contains at least one literal character. For example, + /// `a\w` contains a literal (`a`) but `\w` does not. + /// 2. Of the literals in the pattern, none of them are considered to be + /// uppercase according to Unicode. For example, `foo\pL` has no + /// uppercase literals but `Foo\pL` does. + /// + /// Note that the implementation of this is not perfect. Namely, `\p{Ll}` + /// will prevent case insensitive matching even though it is part of a meta + /// sequence. This bug will probably never be fixed. + pub fn case_smart(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.case_smart = yes; + self + } + + /// Enables "dot all" matching. + /// + /// When enabled, the `.` metacharacter in the pattern matches any + /// character, include `\n`. When disabled (the default), `.` will match + /// any character except for `\n`. + /// + /// This option corresponds to the `s` flag. + pub fn dotall(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.builder.dotall(yes); + self + } + + /// Enable "extended" mode in the pattern, where whitespace is ignored. + /// + /// This option corresponds to the `x` flag. + pub fn extended(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.builder.extended(yes); + self + } + + /// Enable multiline matching mode. + /// + /// When enabled, the `^` and `$` anchors will match both at the beginning + /// and end of a subject string, in addition to matching at the start of + /// a line and the end of a line. When disabled, the `^` and `$` anchors + /// will only match at the beginning and end of a subject string. + /// + /// This option corresponds to the `m` flag. + pub fn multi_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.builder.multi_line(yes); + self + } + + /// Enable matching of CRLF as a line terminator. + /// + /// When enabled, anchors such as `^` and `$` will match any of the + /// following as a line terminator: `\r`, `\n` or `\r\n`. + /// + /// This is disabled by default, in which case, only `\n` is recognized as + /// a line terminator. + pub fn crlf(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.builder.crlf(yes); + self + } + + /// Require that all matches occur on word boundaries. + /// + /// Enabling this option is subtly different than putting `\b` assertions + /// on both sides of your pattern. In particular, a `\b` assertion requires + /// that one side of it match a word character while the other match a + /// non-word character. This option, in contrast, merely requires that + /// one side match a non-word character. + /// + /// For example, `\b-2\b` will not match `foo -2 bar` since `-` is not a + /// word character. However, `-2` with this `word` option enabled will + /// match the `-2` in `foo -2 bar`. + pub fn word(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.word = yes; + self + } + + /// Enable Unicode matching mode. + /// + /// When enabled, the following patterns become Unicode aware: `\b`, `\B`, + /// `\d`, `\D`, `\s`, `\S`, `\w`, `\W`. + /// + /// When set, this implies UTF matching mode. It is not possible to enable + /// Unicode matching mode without enabling UTF matching mode. + /// + /// This is disabled by default. + pub fn ucp(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.builder.ucp(yes); + self + } + + /// Enable UTF matching mode. + /// + /// When enabled, characters are treated as sequences of code units that + /// make up a single codepoint instead of as single bytes. For example, + /// this will cause `.` to match any single UTF-8 encoded codepoint, where + /// as when this is disabled, `.` will any single byte (except for `\n` in + /// both cases, unless "dot all" mode is enabled). + /// + /// Note that when UTF matching mode is enabled, every search performed + /// will do a UTF-8 validation check, which can impact performance. The + /// UTF-8 check can be disabled via the `disable_utf_check` option, but it + /// is undefined behavior to enable UTF matching mode and search invalid + /// UTF-8. + /// + /// This is disabled by default. + pub fn utf(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.builder.utf(yes); + self + } + + /// When UTF matching mode is enabled, this will disable the UTF checking + /// that PCRE2 will normally perform automatically. If UTF matching mode + /// is not enabled, then this has no effect. + /// + /// UTF checking is enabled by default when UTF matching mode is enabled. + /// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2 + /// will return an error if you attempt to search a subject string that is + /// not valid UTF-8. + /// + /// # Safety + /// + /// It is undefined behavior to disable the UTF check in UTF matching mode + /// and search a subject string that is not valid UTF-8. When the UTF check + /// is disabled, callers must guarantee that the subject string is valid + /// UTF-8. + pub unsafe fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder { + self.builder.disable_utf_check(); + self + } + + /// Enable PCRE2's JIT and return an error if it's not available. + /// + /// This generally speeds up matching quite a bit. The downside is that it + /// can increase the time it takes to compile a pattern. + /// + /// If the JIT isn't available or if JIT compilation returns an error, then + /// regex compilation will fail with the corresponding error. + /// + /// This is disabled by default, and always overrides `jit_if_available`. + pub fn jit(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.builder.jit(yes); + self + } + + /// Enable PCRE2's JIT if it's available. + /// + /// This generally speeds up matching quite a bit. The downside is that it + /// can increase the time it takes to compile a pattern. + /// + /// If the JIT isn't available or if JIT compilation returns an error, + /// then a debug message with the error will be emitted and the regex will + /// otherwise silently fall back to non-JIT matching. + /// + /// This is disabled by default, and always overrides `jit`. + pub fn jit_if_available(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.builder.jit_if_available(yes); + self + } +} + +/// An implementation of the `Matcher` trait using PCRE2. +#[derive(Clone, Debug)] +pub struct RegexMatcher { + regex: Regex, + names: HashMap, +} + +impl RegexMatcher { + /// Create a new matcher from the given pattern using the default + /// configuration. + pub fn new(pattern: &str) -> Result { + RegexMatcherBuilder::new().build(pattern) + } +} + +impl Matcher for RegexMatcher { + type Captures = RegexCaptures; + type Error = Error; + + fn find_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result, Error> { + Ok(self.regex + .find_at(haystack, at) + .map_err(Error::regex)? + .map(|m| Match::new(m.start(), m.end()))) + } + + fn new_captures(&self) -> Result { + Ok(RegexCaptures::new(self.regex.capture_locations())) + } + + fn capture_count(&self) -> usize { + self.regex.captures_len() + } + + fn capture_index(&self, name: &str) -> Option { + self.names.get(name).map(|i| *i) + } + + fn try_find_iter( + &self, + haystack: &[u8], + mut matched: F, + ) -> Result, Error> + where F: FnMut(Match) -> Result + { + for result in self.regex.find_iter(haystack) { + let m = result.map_err(Error::regex)?; + match matched(Match::new(m.start(), m.end())) { + Ok(true) => continue, + Ok(false) => return Ok(Ok(())), + Err(err) => return Ok(Err(err)), + } + } + Ok(Ok(())) + } + + fn captures_at( + &self, + haystack: &[u8], + at: usize, + caps: &mut RegexCaptures, + ) -> Result { + Ok(self.regex + .captures_read_at(&mut caps.locs, haystack, at) + .map_err(Error::regex)? + .is_some()) + } +} + +/// Represents the match offsets of each capturing group in a match. +/// +/// The first, or `0`th capture group, always corresponds to the entire match +/// and is guaranteed to be present when a match occurs. The next capture +/// group, at index `1`, corresponds to the first capturing group in the regex, +/// ordered by the position at which the left opening parenthesis occurs. +/// +/// Note that not all capturing groups are guaranteed to be present in a match. +/// For example, in the regex, `(?P\w)|(?P\W)`, only one of `foo` +/// or `bar` will ever be set in any given match. +/// +/// In order to access a capture group by name, you'll need to first find the +/// index of the group using the corresponding matcher's `capture_index` +/// method, and then use that index with `RegexCaptures::get`. +#[derive(Clone, Debug)] +pub struct RegexCaptures { + /// Where the locations are stored. + locs: CaptureLocations, +} + +impl Captures for RegexCaptures { + fn len(&self) -> usize { + self.locs.len() + } + + fn get(&self, i: usize) -> Option { + self.locs.get(i).map(|(s, e)| Match::new(s, e)) + } +} + +impl RegexCaptures { + pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures { + RegexCaptures { locs } + } +} + +/// Determine whether the pattern contains an uppercase character which should +/// negate the effect of the smart-case option. +/// +/// Ideally we would be able to check the AST in order to correctly handle +/// things like '\p{Ll}' and '\p{Lu}' (which should be treated as explicitly +/// cased), but PCRE doesn't expose enough details for that kind of analysis. +/// For now, our 'good enough' solution is to simply perform a semi-naïve +/// scan of the input pattern and ignore all characters following a '\'. The +/// This at least lets us support the most common cases, like 'foo\w' and +/// 'foo\S', in an intuitive manner. +fn has_uppercase_literal(pattern: &str) -> bool { + let mut chars = pattern.chars(); + while let Some(c) = chars.next() { + if c == '\\' { + chars.next(); + } else if c.is_uppercase() { + return true; + } + } + false +} + +#[cfg(test)] +mod tests { + use grep_matcher::{LineMatchKind, Matcher}; + use super::*; + + // Test that enabling word matches does the right thing and demonstrate + // the difference between it and surrounding the regex in `\b`. + #[test] + fn word() { + let matcher = RegexMatcherBuilder::new() + .word(true) + .build(r"-2") + .unwrap(); + assert!(matcher.is_match(b"abc -2 foo").unwrap()); + + let matcher = RegexMatcherBuilder::new() + .word(false) + .build(r"\b-2\b") + .unwrap(); + assert!(!matcher.is_match(b"abc -2 foo").unwrap()); + } + + // Test that enabling CRLF permits `$` to match at the end of a line. + #[test] + fn line_terminator_crlf() { + // Test normal use of `$` with a `\n` line terminator. + let matcher = RegexMatcherBuilder::new() + .multi_line(true) + .build(r"abc$") + .unwrap(); + assert!(matcher.is_match(b"abc\n").unwrap()); + + // Test that `$` doesn't match at `\r\n` boundary normally. + let matcher = RegexMatcherBuilder::new() + .multi_line(true) + .build(r"abc$") + .unwrap(); + assert!(!matcher.is_match(b"abc\r\n").unwrap()); + + // Now check the CRLF handling. + let matcher = RegexMatcherBuilder::new() + .multi_line(true) + .crlf(true) + .build(r"abc$") + .unwrap(); + assert!(matcher.is_match(b"abc\r\n").unwrap()); + } + + // Test that smart case works. + #[test] + fn case_smart() { + let matcher = RegexMatcherBuilder::new() + .case_smart(true) + .build(r"abc") + .unwrap(); + assert!(matcher.is_match(b"ABC").unwrap()); + + let matcher = RegexMatcherBuilder::new() + .case_smart(true) + .build(r"aBc") + .unwrap(); + assert!(!matcher.is_match(b"ABC").unwrap()); + } + + // Test that finding candidate lines works as expected. + #[test] + fn candidate_lines() { + fn is_confirmed(m: LineMatchKind) -> bool { + match m { + LineMatchKind::Confirmed(_) => true, + _ => false, + } + } + + let matcher = RegexMatcherBuilder::new() + .build(r"\wfoo\s") + .unwrap(); + let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap(); + assert!(is_confirmed(m)); + } +} diff -Nru ripgrep-0.6.0/grep-pcre2/UNLICENSE ripgrep-0.10.0.3/grep-pcre2/UNLICENSE --- ripgrep-0.6.0/grep-pcre2/UNLICENSE 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-pcre2/UNLICENSE 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff -Nru ripgrep-0.6.0/grep-printer/Cargo.toml ripgrep-0.10.0.3/grep-printer/Cargo.toml --- ripgrep-0.6.0/grep-printer/Cargo.toml 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/Cargo.toml 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,30 @@ +[package] +name = "grep-printer" +version = "0.1.1" #:version +authors = ["Andrew Gallant "] +description = """ +An implementation of the grep crate's Sink trait that provides standard +printing of search results, similar to grep itself. +""" +documentation = "https://docs.rs/grep-printer" +homepage = "https://github.com/BurntSushi/ripgrep" +repository = "https://github.com/BurntSushi/ripgrep" +readme = "README.md" +keywords = ["grep", "pattern", "print", "printer", "sink"] +license = "Unlicense/MIT" + +[features] +default = ["serde1"] +serde1 = ["base64", "serde", "serde_derive", "serde_json"] + +[dependencies] +base64 = { version = "0.9.2", optional = true } +grep-matcher = { version = "0.1.1", path = "../grep-matcher" } +grep-searcher = { version = "0.1.1", path = "../grep-searcher" } +termcolor = "1.0.3" +serde = { version = "1.0.77", optional = true } +serde_derive = { version = "1.0.77", optional = true } +serde_json = { version = "1.0.27", optional = true } + +[dev-dependencies] +grep-regex = { version = "0.1.1", path = "../grep-regex" } diff -Nru ripgrep-0.6.0/grep-printer/LICENSE-MIT ripgrep-0.10.0.3/grep-printer/LICENSE-MIT --- ripgrep-0.6.0/grep-printer/LICENSE-MIT 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/LICENSE-MIT 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Andrew Gallant + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff -Nru ripgrep-0.6.0/grep-printer/README.md ripgrep-0.10.0.3/grep-printer/README.md --- ripgrep-0.6.0/grep-printer/README.md 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/README.md 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,35 @@ +grep-printer +------------ +Print results from line oriented searching in a human readable, aggregate or +JSON Lines format. + +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) +[![](https://img.shields.io/crates/v/grep-printer.svg)](https://crates.io/crates/grep-printer) + +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + +### Documentation + +[https://docs.rs/grep-printer](https://docs.rs/grep-printer) + +**NOTE:** You probably don't want to use this crate directly. Instead, you +should prefer the facade defined in the +[`grep`](https://docs.rs/grep) +crate. + + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +grep-printer = "0.1" +``` + +and this to your crate root: + +```rust +extern crate grep_printer; +``` diff -Nru ripgrep-0.6.0/grep-printer/src/color.rs ripgrep-0.10.0.3/grep-printer/src/color.rs --- ripgrep-0.6.0/grep-printer/src/color.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/src/color.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,394 @@ +use std::error; +use std::fmt; +use std::str::FromStr; + +use termcolor::{Color, ColorSpec, ParseColorError}; + +/// Returns a default set of color specifications. +/// +/// This may change over time, but the color choices are meant to be fairly +/// conservative that work across terminal themes. +/// +/// Additional color specifications can be added to the list returned. More +/// recently added specifications override previously added specifications. +pub fn default_color_specs() -> Vec { + vec![ + #[cfg(unix)] + "path:fg:magenta".parse().unwrap(), + #[cfg(windows)] + "path:fg:cyan".parse().unwrap(), + "line:fg:green".parse().unwrap(), + "match:fg:red".parse().unwrap(), + "match:style:bold".parse().unwrap(), + ] +} + +/// An error that can occur when parsing color specifications. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ColorError { + /// This occurs when an unrecognized output type is used. + UnrecognizedOutType(String), + /// This occurs when an unrecognized spec type is used. + UnrecognizedSpecType(String), + /// This occurs when an unrecognized color name is used. + UnrecognizedColor(String, String), + /// This occurs when an unrecognized style attribute is used. + UnrecognizedStyle(String), + /// This occurs when the format of a color specification is invalid. + InvalidFormat(String), +} + +impl error::Error for ColorError { + fn description(&self) -> &str { + match *self { + ColorError::UnrecognizedOutType(_) => "unrecognized output type", + ColorError::UnrecognizedSpecType(_) => "unrecognized spec type", + ColorError::UnrecognizedColor(_, _) => "unrecognized color name", + ColorError::UnrecognizedStyle(_) => "unrecognized style attribute", + ColorError::InvalidFormat(_) => "invalid color spec", + } + } +} + +impl ColorError { + fn from_parse_error(err: ParseColorError) -> ColorError { + ColorError::UnrecognizedColor( + err.invalid().to_string(), + err.to_string(), + ) + } +} + +impl fmt::Display for ColorError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + ColorError::UnrecognizedOutType(ref name) => { + write!( + f, + "unrecognized output type '{}'. Choose from: \ + path, line, column, match.", + name, + ) + } + ColorError::UnrecognizedSpecType(ref name) => { + write!( + f, + "unrecognized spec type '{}'. Choose from: \ + fg, bg, style, none.", + name, + ) + } + ColorError::UnrecognizedColor(_, ref msg) => { + write!(f, "{}", msg) + } + ColorError::UnrecognizedStyle(ref name) => { + write!( + f, + "unrecognized style attribute '{}'. Choose from: \ + nobold, bold, nointense, intense, nounderline, \ + underline.", + name, + ) + } + ColorError::InvalidFormat(ref original) => { + write!( + f, + "invalid color spec format: '{}'. Valid format \ + is '(path|line|column|match):(fg|bg|style):(value)'.", + original, + ) + } + } + } +} + +/// A merged set of color specifications. +/// +/// This set of color specifications represents the various color types that +/// are supported by the printers in this crate. A set of color specifications +/// can be created from a sequence of +/// [`UserColorSpec`s](struct.UserColorSpec.html). +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct ColorSpecs { + path: ColorSpec, + line: ColorSpec, + column: ColorSpec, + matched: ColorSpec, +} + +/// A single color specification provided by the user. +/// +/// ## Format +/// +/// The format of a `Spec` is a triple: `{type}:{attribute}:{value}`. Each +/// component is defined as follows: +/// +/// * `{type}` can be one of `path`, `line`, `column` or `match`. +/// * `{attribute}` can be one of `fg`, `bg` or `style`. `{attribute}` may also +/// be the special value `none`, in which case, `{value}` can be omitted. +/// * `{value}` is either a color name (for `fg`/`bg`) or a style instruction. +/// +/// `{type}` controls which part of the output should be styled. +/// +/// When `{attribute}` is `none`, then this should cause any existing style +/// settings to be cleared for the specified `type`. +/// +/// `{value}` should be a color when `{attribute}` is `fg` or `bg`, or it +/// should be a style instruction when `{attribute}` is `style`. When +/// `{attribute}` is `none`, `{value}` must be omitted. +/// +/// Valid colors are `black`, `blue`, `green`, `red`, `cyan`, `magenta`, +/// `yellow`, `white`. Extended colors can also be specified, and are formatted +/// as `x` (for 256-bit colors) or `x,x,x` (for 24-bit true color), where +/// `x` is a number between 0 and 255 inclusive. `x` may be given as a normal +/// decimal number of a hexadecimal number, where the latter is prefixed by +/// `0x`. +/// +/// Valid style instructions are `nobold`, `bold`, `intense`, `nointense`, +/// `underline`, `nounderline`. +/// +/// ## Example +/// +/// The standard way to build a `UserColorSpec` is to parse it from a string. +/// Once multiple `UserColorSpec`s have been constructed, they can be provided +/// to the standard printer where they will automatically be applied to the +/// output. +/// +/// A `UserColorSpec` can also be converted to a `termcolor::ColorSpec`: +/// +/// ```rust +/// extern crate grep_printer; +/// extern crate termcolor; +/// +/// # fn main() { +/// use termcolor::{Color, ColorSpec}; +/// use grep_printer::UserColorSpec; +/// +/// let user_spec1: UserColorSpec = "path:fg:blue".parse().unwrap(); +/// let user_spec2: UserColorSpec = "match:bg:0xff,0x7f,0x00".parse().unwrap(); +/// +/// let spec1 = user_spec1.to_color_spec(); +/// let spec2 = user_spec2.to_color_spec(); +/// +/// assert_eq!(spec1.fg(), Some(&Color::Blue)); +/// assert_eq!(spec2.bg(), Some(&Color::Rgb(0xFF, 0x7F, 0x00))); +/// # } +/// ``` +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct UserColorSpec { + ty: OutType, + value: SpecValue, +} + +impl UserColorSpec { + /// Convert this user provided color specification to a specification that + /// can be used with `termcolor`. This drops the type of this specification + /// (where the type indicates where the color is applied in the standard + /// printer, e.g., to the file path or the line numbers, etc.). + pub fn to_color_spec(&self) -> ColorSpec { + let mut spec = ColorSpec::default(); + self.value.merge_into(&mut spec); + spec + } +} + +/// The actual value given by the specification. +#[derive(Clone, Debug, Eq, PartialEq)] +enum SpecValue { + None, + Fg(Color), + Bg(Color), + Style(Style), +} + +/// The set of configurable portions of ripgrep's output. +#[derive(Clone, Debug, Eq, PartialEq)] +enum OutType { + Path, + Line, + Column, + Match, +} + +/// The specification type. +#[derive(Clone, Debug, Eq, PartialEq)] +enum SpecType { + Fg, + Bg, + Style, + None, +} + +/// The set of available styles for use in the terminal. +#[derive(Clone, Debug, Eq, PartialEq)] +enum Style { + Bold, + NoBold, + Intense, + NoIntense, + Underline, + NoUnderline +} + +impl ColorSpecs { + /// Create color specifications from a list of user supplied + /// specifications. + pub fn new(specs: &[UserColorSpec]) -> ColorSpecs { + let mut merged = ColorSpecs::default(); + for spec in specs { + match spec.ty { + OutType::Path => spec.merge_into(&mut merged.path), + OutType::Line => spec.merge_into(&mut merged.line), + OutType::Column => spec.merge_into(&mut merged.column), + OutType::Match => spec.merge_into(&mut merged.matched), + } + } + merged + } + + /// Create a default set of specifications that have color. + /// + /// This is distinct from `ColorSpecs`'s `Default` implementation in that + /// this provides a set of default color choices, where as the `Default` + /// implementation provides no color choices. + pub fn default_with_color() -> ColorSpecs { + ColorSpecs::new(&default_color_specs()) + } + + /// Return the color specification for coloring file paths. + pub fn path(&self) -> &ColorSpec { + &self.path + } + + /// Return the color specification for coloring line numbers. + pub fn line(&self) -> &ColorSpec { + &self.line + } + + /// Return the color specification for coloring column numbers. + pub fn column(&self) -> &ColorSpec { + &self.column + } + + /// Return the color specification for coloring matched text. + pub fn matched(&self) -> &ColorSpec { + &self.matched + } +} + +impl UserColorSpec { + /// Merge this spec into the given color specification. + fn merge_into(&self, cspec: &mut ColorSpec) { + self.value.merge_into(cspec); + } +} + +impl SpecValue { + /// Merge this spec value into the given color specification. + fn merge_into(&self, cspec: &mut ColorSpec) { + match *self { + SpecValue::None => cspec.clear(), + SpecValue::Fg(ref color) => { cspec.set_fg(Some(color.clone())); } + SpecValue::Bg(ref color) => { cspec.set_bg(Some(color.clone())); } + SpecValue::Style(ref style) => { + match *style { + Style::Bold => { cspec.set_bold(true); } + Style::NoBold => { cspec.set_bold(false); } + Style::Intense => { cspec.set_intense(true); } + Style::NoIntense => { cspec.set_intense(false); } + Style::Underline => { cspec.set_underline(true); } + Style::NoUnderline => { cspec.set_underline(false); } + } + } + } + } +} + +impl FromStr for UserColorSpec { + type Err = ColorError; + + fn from_str(s: &str) -> Result { + let pieces: Vec<&str> = s.split(':').collect(); + if pieces.len() <= 1 || pieces.len() > 3 { + return Err(ColorError::InvalidFormat(s.to_string())); + } + let otype: OutType = pieces[0].parse()?; + match pieces[1].parse()? { + SpecType::None => { + Ok(UserColorSpec { + ty: otype, + value: SpecValue::None, + }) + } + SpecType::Style => { + if pieces.len() < 3 { + return Err(ColorError::InvalidFormat(s.to_string())); + } + let style: Style = pieces[2].parse()?; + Ok(UserColorSpec { ty: otype, value: SpecValue::Style(style) }) + } + SpecType::Fg => { + if pieces.len() < 3 { + return Err(ColorError::InvalidFormat(s.to_string())); + } + let color: Color = pieces[2] + .parse() + .map_err(ColorError::from_parse_error)?; + Ok(UserColorSpec { ty: otype, value: SpecValue::Fg(color) }) + } + SpecType::Bg => { + if pieces.len() < 3 { + return Err(ColorError::InvalidFormat(s.to_string())); + } + let color: Color = pieces[2] + .parse() + .map_err(ColorError::from_parse_error)?; + Ok(UserColorSpec { ty: otype, value: SpecValue::Bg(color) }) + } + } + } +} + +impl FromStr for OutType { + type Err = ColorError; + + fn from_str(s: &str) -> Result { + match &*s.to_lowercase() { + "path" => Ok(OutType::Path), + "line" => Ok(OutType::Line), + "column" => Ok(OutType::Column), + "match" => Ok(OutType::Match), + _ => Err(ColorError::UnrecognizedOutType(s.to_string())), + } + } +} + +impl FromStr for SpecType { + type Err = ColorError; + + fn from_str(s: &str) -> Result { + match &*s.to_lowercase() { + "fg" => Ok(SpecType::Fg), + "bg" => Ok(SpecType::Bg), + "style" => Ok(SpecType::Style), + "none" => Ok(SpecType::None), + _ => Err(ColorError::UnrecognizedSpecType(s.to_string())), + } + } +} + +impl FromStr for Style { + type Err = ColorError; + + fn from_str(s: &str) -> Result { + match &*s.to_lowercase() { + "bold" => Ok(Style::Bold), + "nobold" => Ok(Style::NoBold), + "intense" => Ok(Style::Intense), + "nointense" => Ok(Style::NoIntense), + "underline" => Ok(Style::Underline), + "nounderline" => Ok(Style::NoUnderline), + _ => Err(ColorError::UnrecognizedStyle(s.to_string())), + } + } +} diff -Nru ripgrep-0.6.0/grep-printer/src/counter.rs ripgrep-0.10.0.3/grep-printer/src/counter.rs --- ripgrep-0.6.0/grep-printer/src/counter.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/src/counter.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,90 @@ +use std::io::{self, Write}; + +use termcolor::{ColorSpec, WriteColor}; + +/// A writer that counts the number of bytes that have been successfully +/// written. +#[derive(Clone, Debug)] +pub struct CounterWriter { + wtr: W, + count: u64, + total_count: u64, +} + +impl CounterWriter { + pub fn new(wtr: W) -> CounterWriter { + CounterWriter { wtr: wtr, count: 0, total_count: 0 } + } +} + +impl CounterWriter { + /// Returns the total number of bytes written since construction or the + /// last time `reset` was called. + pub fn count(&self) -> u64 { + self.count + } + + /// Returns the total number of bytes written since construction. + pub fn total_count(&self) -> u64 { + self.total_count + self.count + } + + /// Resets the number of bytes written to `0`. + pub fn reset_count(&mut self) { + self.total_count += self.count; + self.count = 0; + } + + /// Clear resets all counting related state for this writer. + /// + /// After this call, the total count of bytes written to the underlying + /// writer is erased and reset. + #[allow(dead_code)] + pub fn clear(&mut self) { + self.count = 0; + self.total_count = 0; + } + + #[allow(dead_code)] + pub fn get_ref(&self) -> &W { + &self.wtr + } + + pub fn get_mut(&mut self) -> &mut W { + &mut self.wtr + } + + pub fn into_inner(self) -> W { + self.wtr + } +} + +impl Write for CounterWriter { + fn write(&mut self, buf: &[u8]) -> Result { + let n = self.wtr.write(buf)?; + self.count += n as u64; + Ok(n) + } + + fn flush(&mut self) -> Result<(), io::Error> { + self.wtr.flush() + } +} + +impl WriteColor for CounterWriter { + fn supports_color(&self) -> bool { + self.wtr.supports_color() + } + + fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { + self.wtr.set_color(spec) + } + + fn reset(&mut self) -> io::Result<()> { + self.wtr.reset() + } + + fn is_synchronous(&self) -> bool { + self.wtr.is_synchronous() + } +} diff -Nru ripgrep-0.6.0/grep-printer/src/json.rs ripgrep-0.10.0.3/grep-printer/src/json.rs --- ripgrep-0.6.0/grep-printer/src/json.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/src/json.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,921 @@ +use std::io::{self, Write}; +use std::path::Path; +use std::time::Instant; + +use grep_matcher::{Match, Matcher}; +use grep_searcher::{ + Searcher, + Sink, SinkError, SinkContext, SinkContextKind, SinkFinish, SinkMatch, +}; +use serde_json as json; + +use counter::CounterWriter; +use jsont; +use stats::Stats; + +/// The configuration for the JSON printer. +/// +/// This is manipulated by the JSONBuilder and then referenced by the actual +/// implementation. Once a printer is build, the configuration is frozen and +/// cannot changed. +#[derive(Debug, Clone)] +struct Config { + pretty: bool, + max_matches: Option, + always_begin_end: bool, +} + +impl Default for Config { + fn default() -> Config { + Config { + pretty: false, + max_matches: None, + always_begin_end: false, + } + } +} + +/// A builder for a JSON lines printer. +/// +/// The builder permits configuring how the printer behaves. The JSON printer +/// has fewer configuration options than the standard printer because it is +/// a structured format, and the printer always attempts to find the most +/// information possible. +/// +/// Some configuration options, such as whether line numbers are included or +/// whether contextual lines are shown, are drawn directly from the +/// `grep_searcher::Searcher`'s configuration. +/// +/// Once a `JSON` printer is built, its configuration cannot be changed. +#[derive(Clone, Debug)] +pub struct JSONBuilder { + config: Config, +} + +impl JSONBuilder { + /// Return a new builder for configuring the JSON printer. + pub fn new() -> JSONBuilder { + JSONBuilder { config: Config::default() } + } + + /// Create a JSON printer that writes results to the given writer. + pub fn build(&self, wtr: W) -> JSON { + JSON { + config: self.config.clone(), + wtr: CounterWriter::new(wtr), + matches: vec![], + } + } + + /// Print JSON in a pretty printed format. + /// + /// Enabling this will no longer produce a "JSON lines" format, in that + /// each JSON object printed may span multiple lines. + /// + /// This is disabled by default. + pub fn pretty(&mut self, yes: bool) -> &mut JSONBuilder { + self.config.pretty = yes; + self + } + + /// Set the maximum amount of matches that are printed. + /// + /// If multi line search is enabled and a match spans multiple lines, then + /// that match is counted exactly once for the purposes of enforcing this + /// limit, regardless of how many lines it spans. + pub fn max_matches(&mut self, limit: Option) -> &mut JSONBuilder { + self.config.max_matches = limit; + self + } + + /// When enabled, the `begin` and `end` messages are always emitted, even + /// when no match is found. + /// + /// When disabled, the `begin` and `end` messages are only shown if there + /// is at least one `match` or `context` message. + /// + /// This is disabled by default. + pub fn always_begin_end(&mut self, yes: bool) -> &mut JSONBuilder { + self.config.always_begin_end = yes; + self + } +} + +/// The JSON printer, which emits results in a JSON lines format. +/// +/// This type is generic over `W`, which represents any implementation of +/// the standard library `io::Write` trait. +/// +/// # Format +/// +/// This section describes the JSON format used by this printer. +/// +/// To skip the rigamarole, take a look at the +/// [example](#example) +/// at the end. +/// +/// ## Overview +/// +/// The format of this printer is the [JSON Lines](http://jsonlines.org/) +/// format. Specifically, this printer emits a sequence of messages, where +/// each message is encoded as a single JSON value on a single line. There are +/// four different types of messages (and this number may expand over time): +/// +/// * **begin** - A message that indicates a file is being searched. +/// * **end** - A message the indicates a file is done being searched. This +/// message also include summary statistics about the search. +/// * **match** - A message that indicates a match was found. This includes +/// the text and offsets of the match. +/// * **context** - A message that indicates a contextual line was found. +/// This includes the text of the line, along with any match information if +/// the search was inverted. +/// +/// Every message is encoded in the same envelope format, which includes a tag +/// indicating the message type along with an object for the payload: +/// +/// ```json +/// { +/// "type": "{begin|end|match|context}", +/// "data": { ... } +/// } +/// ``` +/// +/// The message itself is encoded in the envelope's `data` key. +/// +/// ## Text encoding +/// +/// Before describing each message format, we first must briefly discuss text +/// encoding, since it factors into every type of message. In particular, JSON +/// may only be encoded in UTF-8, UTF-16 or UTF-32. For the purposes of this +/// printer, we need only worry about UTF-8. The problem here is that searching +/// is not limited to UTF-8 exclusively, which in turn implies that matches +/// may be reported that contain invalid UTF-8. Moreover, this printer may +/// also print file paths, and the encoding of file paths is itself not +/// guarnateed to be valid UTF-8. Therefore, this printer must deal with the +/// presence of invalid UTF-8 somehow. The printer could silently ignore such +/// things completely, or even lossily transcode invalid UTF-8 to valid UTF-8 +/// by replacing all invalid sequences with the Unicode replacement character. +/// However, this would prevent consumers of this format from accessing the +/// original data in a non-lossy way. +/// +/// Therefore, this printer will emit valid UTF-8 encoded bytes as normal +/// JSON strings and otherwise base64 encode data that isn't valid UTF-8. To +/// communicate whether this process occurs or not, strings are keyed by the +/// name `text` where as arbitrary bytes are keyed by `bytes`. +/// +/// For example, when a path is included in a message, it is formatted like so, +/// if and only if the path is valid UTF-8: +/// +/// ```json +/// { +/// "path": { +/// "text": "/home/ubuntu/lib.rs" +/// } +/// } +/// ``` +/// +/// If instead our path was `/home/ubuntu/lib\xFF.rs`, where the `\xFF` byte +/// makes it invalid UTF-8, the path would instead be encoded like so: +/// +/// ```json +/// { +/// "path": { +/// "bytes": "L2hvbWUvdWJ1bnR1L2xpYv8ucnM=" +/// } +/// } +/// ``` +/// +/// This same representation is used for reporting matches as well. +/// +/// The printer guarantees that the `text` field is used whenever the +/// underlying bytes are valid UTF-8. +/// +/// ## Wire format +/// +/// This section documents the wire format emitted by this printer, starting +/// with the four types of messages. +/// +/// Each message has its own format, and is contained inside an envelope that +/// indicates the type of message. The envelope has these fields: +/// +/// * **type** - A string indicating the type of this message. It may be one +/// of four possible strings: `begin`, `end`, `match` or `context`. This +/// list may expand over time. +/// * **data** - The actual message data. The format of this field depends on +/// the value of `type`. The possible message formats are +/// [`begin`](#message-begin), +/// [`end`](#message-end), +/// [`match`](#message-match), +/// [`context`](#message-context). +/// +/// #### Message: **begin** +/// +/// This message indicates that a search has begun. It has these fields: +/// +/// * **path** - An +/// [arbitrary data object](#object-arbitrary-data) +/// representing the file path corresponding to the search, if one is +/// present. If no file path is available, then this field is `null`. +/// +/// #### Message: **end** +/// +/// This message indicates that a search has finished. It has these fields: +/// +/// * **path** - An +/// [arbitrary data object](#object-arbitrary-data) +/// representing the file path corresponding to the search, if one is +/// present. If no file path is available, then this field is `null`. +/// * **binary_offset** - The absolute offset in the data searched +/// corresponding to the place at which binary data was detected. If no +/// binary data was detected (or if binary detection was disabled), then this +/// field is `null`. +/// * **stats** - A [`stats` object](#object-stats) that contains summary +/// statistics for the previous search. +/// +/// #### Message: **match** +/// +/// This message indicates that a match has been found. A match generally +/// corresponds to a single line of text, although it may correspond to +/// multiple lines if the search can emit matches over multiple lines. It +/// has these fields: +/// +/// * **path** - An +/// [arbitrary data object](#object-arbitrary-data) +/// representing the file path corresponding to the search, if one is +/// present. If no file path is available, then this field is `null`. +/// * **lines** - An +/// [arbitrary data object](#object-arbitrary-data) +/// representing one or more lines contained in this match. +/// * **line_number** - If the searcher has been configured to report line +/// numbers, then this corresponds to the line number of the first line +/// in `lines`. If no line numbers are available, then this is `null`. +/// * **absolute_offset** - The absolute byte offset corresponding to the start +/// of `lines` in the data being searched. +/// * **submatches** - An array of [`submatch` objects](#object-submatch) +/// corresponding to matches in `lines`. The offsets included in each +/// `submatch` correspond to byte offsets into `lines`. (If `lines` is base64 +/// encoded, then the byte offsets correspond to the data after base64 +/// decoding.) The `submatch` objects are guaranteed to be sorted by their +/// starting offsets. Note that it is possible for this array to be empty, +/// for example, when searching reports inverted matches. +/// +/// #### Message: **context** +/// +/// This message indicates that a contextual line has been found. A contextual +/// line is a line that doesn't contain a match, but is generally adjacent to +/// a line that does contain a match. The precise way in which contextual lines +/// are reported is determined by the searcher. It has these fields, which are +/// exactly the same fields found in a [`match`](#message-match): +/// +/// * **path** - An +/// [arbitrary data object](#object-arbitrary-data) +/// representing the file path corresponding to the search, if one is +/// present. If no file path is available, then this field is `null`. +/// * **lines** - An +/// [arbitrary data object](#object-arbitrary-data) +/// representing one or more lines contained in this context. This includes +/// line terminators, if they're present. +/// * **line_number** - If the searcher has been configured to report line +/// numbers, then this corresponds to the line number of the first line +/// in `lines`. If no line numbers are available, then this is `null`. +/// * **absolute_offset** - The absolute byte offset corresponding to the start +/// of `lines` in the data being searched. +/// * **submatches** - An array of [`submatch` objects](#object-submatch) +/// corresponding to matches in `lines`. The offsets included in each +/// `submatch` correspond to byte offsets into `lines`. (If `lines` is base64 +/// encoded, then the byte offsets correspond to the data after base64 +/// decoding.) The `submatch` objects are guaranteed to be sorted by +/// their starting offsets. Note that it is possible for this array to be +/// non-empty, for example, when searching reports inverted matches such that +/// the original matcher could match things in the contextual lines. +/// +/// #### Object: **submatch** +/// +/// This object describes submatches found within `match` or `context` +/// messages. The `start` and `end` fields indicate the half-open interval on +/// which the match occurs (`start` is included, but `end` is not). It is +/// guaranteed that `start <= end`. It has these fields: +/// +/// * **match** - An +/// [arbitrary data object](#object-arbitrary-data) +/// corresponding to the text in this submatch. +/// * **start** - A byte offset indicating the start of this match. This offset +/// is generally reported in terms of the parent object's data. For example, +/// the `lines` field in the +/// [`match`](#message-match) or [`context`](#message-context) +/// messages. +/// * **end** - A byte offset indicating the end of this match. This offset +/// is generally reported in terms of the parent object's data. For example, +/// the `lines` field in the +/// [`match`](#message-match) or [`context`](#message-context) +/// messages. +/// +/// #### Object: **stats** +/// +/// This object is included in messages and contains summary statistics about +/// a search. It has these fields: +/// +/// * **elapsed** - A [`duration` object](#object-duration) describing the +/// length of time that elapsed while performing the search. +/// * **searches** - The number of searches that have run. For this printer, +/// this value is always `1`. (Implementations may emit additional message +/// types that use this same `stats` object that represents summary +/// statistics over multiple searches.) +/// * **searches_with_match** - The number of searches that have run that have +/// found at least one match. This is never more than `searches`. +/// * **bytes_searched** - The total number of bytes that have been searched. +/// * **bytes_printed** - The total number of bytes that have been printed. +/// This includes everything emitted by this printer. +/// * **matched_lines** - The total number of lines that participated in a +/// match. When matches may contain multiple lines, then this includes every +/// line that is part of every match. +/// * **matches** - The total number of matches. There may be multiple matches +/// per line. When matches may contain multiple lines, each match is counted +/// only once, regardless of how many lines it spans. +/// +/// #### Object: **duration** +/// +/// This object includes a few fields for describing a duration. Two of its +/// fields, `secs` and `nanos`, can be combined to give nanosecond precision +/// on systems that support it. It has these fields: +/// +/// * **secs** - A whole number of seconds indicating the length of this +/// duration. +/// * **nanos** - A fractional part of this duration represent by nanoseconds. +/// If nanosecond precision isn't supported, then this is typically rounded +/// up to the nearest number of nanoseconds. +/// * **human** - A human readable string describing the length of the +/// duration. The format of the string is itself unspecified. +/// +/// #### Object: **arbitrary data** +/// +/// This object is used whenever arbitrary data needs to be represented as a +/// JSON value. This object contains two fields, where generally only one of +/// the fields is present: +/// +/// * **text** - A normal JSON string that is UTF-8 encoded. This field is +/// populated if and only if the underlying data is valid UTF-8. +/// * **bytes** - A normal JSON string that is a base64 encoding of the +/// underlying bytes. +/// +/// More information on the motivation for this representation can be seen in +/// the section [text encoding](#text-encoding) above. +/// +/// ## Example +/// +/// This section shows a small example that includes all message types. +/// +/// Here's the file we want to search, located at `/home/andrew/sherlock`: +/// +/// ```text +/// For the Doctor Watsons of this world, as opposed to the Sherlock +/// Holmeses, success in the province of detective work must always +/// be, to a very large extent, the result of luck. Sherlock Holmes +/// can extract a clew from a wisp of straw or a flake of cigar ash; +/// but Doctor Watson has to have it taken out for him and dusted, +/// and exhibited clearly, with a label attached. +/// ``` +/// +/// Searching for `Watson` with a `before_context` of `1` with line numbers +/// enabled shows something like this using the standard printer: +/// +/// ```text +/// sherlock:1:For the Doctor Watsons of this world, as opposed to the Sherlock +/// -- +/// sherlock-4-can extract a clew from a wisp of straw or a flake of cigar ash; +/// sherlock:5:but Doctor Watson has to have it taken out for him and dusted, +/// ``` +/// +/// Here's what the same search looks like using the JSON wire format described +/// above, where in we show semi-prettified JSON (instead of a strict JSON +/// Lines format), for illustrative purposes: +/// +/// ```json +/// { +/// "type": "begin", +/// "data": { +/// "path": {"text": "/home/andrew/sherlock"}} +/// } +/// } +/// { +/// "type": "match", +/// "data": { +/// "path": {"text": "/home/andrew/sherlock"}, +/// "lines": {"text": "For the Doctor Watsons of this world, as opposed to the Sherlock\n"}, +/// "line_number": 1, +/// "absolute_offset": 0, +/// "submatches": [ +/// {"match": {"text": "Watson"}, "start": 15, "end": 21} +/// ] +/// } +/// } +/// { +/// "type": "context", +/// "data": { +/// "path": {"text": "/home/andrew/sherlock"}, +/// "lines": {"text": "can extract a clew from a wisp of straw or a flake of cigar ash;\n"}, +/// "line_number": 4, +/// "absolute_offset": 193, +/// "submatches": [] +/// } +/// } +/// { +/// "type": "match", +/// "data": { +/// "path": {"text": "/home/andrew/sherlock"}, +/// "lines": {"text": "but Doctor Watson has to have it taken out for him and dusted,\n"}, +/// "line_number": 5, +/// "absolute_offset": 258, +/// "submatches": [ +/// {"match": {"text": "Watson"}, "start": 11, "end": 17} +/// ] +/// } +/// } +/// { +/// "type": "end", +/// "data": { +/// "path": {"text": "/home/andrew/sherlock"}, +/// "binary_offset": null, +/// "stats": { +/// "elapsed": {"secs": 0, "nanos": 36296, "human": "0.0000s"}, +/// "searches": 1, +/// "searches_with_match": 1, +/// "bytes_searched": 367, +/// "bytes_printed": 1151, +/// "matched_lines": 2, +/// "matches": 2 +/// } +/// } +/// } +/// ``` +#[derive(Debug)] +pub struct JSON { + config: Config, + wtr: CounterWriter, + matches: Vec, +} + +impl JSON { + /// Return a JSON lines printer with a default configuration that writes + /// matches to the given writer. + pub fn new(wtr: W) -> JSON { + JSONBuilder::new().build(wtr) + } + + /// Return an implementation of `Sink` for the JSON printer. + /// + /// This does not associate the printer with a file path, which means this + /// implementation will never print a file path along with the matches. + pub fn sink<'s, M: Matcher>( + &'s mut self, + matcher: M, + ) -> JSONSink<'static, 's, M, W> { + JSONSink { + matcher: matcher, + json: self, + path: None, + start_time: Instant::now(), + match_count: 0, + after_context_remaining: 0, + binary_byte_offset: None, + begin_printed: false, + stats: Stats::new(), + } + } + + /// Return an implementation of `Sink` associated with a file path. + /// + /// When the printer is associated with a path, then it may, depending on + /// its configuration, print the path along with the matches found. + pub fn sink_with_path<'p, 's, M, P>( + &'s mut self, + matcher: M, + path: &'p P, + ) -> JSONSink<'p, 's, M, W> + where M: Matcher, + P: ?Sized + AsRef, + { + JSONSink { + matcher: matcher, + json: self, + path: Some(path.as_ref()), + start_time: Instant::now(), + match_count: 0, + after_context_remaining: 0, + binary_byte_offset: None, + begin_printed: false, + stats: Stats::new(), + } + } + + /// Write the given message followed by a new line. The new line is + /// determined from the configuration of the given searcher. + fn write_message(&mut self, message: &jsont::Message) -> io::Result<()> { + if self.config.pretty { + json::to_writer_pretty(&mut self.wtr, message)?; + } else { + json::to_writer(&mut self.wtr, message)?; + } + self.wtr.write(&[b'\n'])?; + Ok(()) + } +} + +impl JSON { + /// Returns true if and only if this printer has written at least one byte + /// to the underlying writer during any of the previous searches. + pub fn has_written(&self) -> bool { + self.wtr.total_count() > 0 + } + + /// Return a mutable reference to the underlying writer. + pub fn get_mut(&mut self) -> &mut W { + self.wtr.get_mut() + } + + /// Consume this printer and return back ownership of the underlying + /// writer. + pub fn into_inner(self) -> W { + self.wtr.into_inner() + } +} + +/// An implementation of `Sink` associated with a matcher and an optional file +/// path for the JSON printer. +/// +/// This type is generic over a few type parameters: +/// +/// * `'p` refers to the lifetime of the file path, if one is provided. When +/// no file path is given, then this is `'static`. +/// * `'s` refers to the lifetime of the +/// [`JSON`](struct.JSON.html) +/// printer that this type borrows. +/// * `M` refers to the type of matcher used by +/// `grep_searcher::Searcher` that is reporting results to this sink. +/// * `W` refers to the underlying writer that this printer is writing its +/// output to. +#[derive(Debug)] +pub struct JSONSink<'p, 's, M: Matcher, W: 's> { + matcher: M, + json: &'s mut JSON, + path: Option<&'p Path>, + start_time: Instant, + match_count: u64, + after_context_remaining: u64, + binary_byte_offset: Option, + begin_printed: bool, + stats: Stats, +} + +impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> { + /// Returns true if and only if this printer received a match in the + /// previous search. + /// + /// This is unaffected by the result of searches before the previous + /// search. + pub fn has_match(&self) -> bool { + self.match_count > 0 + } + + /// Return the total number of matches reported to this sink. + /// + /// This corresponds to the number of times `Sink::matched` is called. + pub fn match_count(&self) -> u64 { + self.match_count + } + + /// If binary data was found in the previous search, this returns the + /// offset at which the binary data was first detected. + /// + /// The offset returned is an absolute offset relative to the entire + /// set of bytes searched. + /// + /// This is unaffected by the result of searches before the previous + /// search. e.g., If the search prior to the previous search found binary + /// data but the previous search found no binary data, then this will + /// return `None`. + pub fn binary_byte_offset(&self) -> Option { + self.binary_byte_offset + } + + /// Return a reference to the stats produced by the printer for all + /// searches executed on this sink. + pub fn stats(&self) -> &Stats { + &self.stats + } + + /// Execute the matcher over the given bytes and record the match + /// locations if the current configuration demands match granularity. + fn record_matches(&mut self, bytes: &[u8]) -> io::Result<()> { + self.json.matches.clear(); + // If printing requires knowing the location of each individual match, + // then compute and stored those right now for use later. While this + // adds an extra copy for storing the matches, we do amortize the + // allocation for it and this greatly simplifies the printing logic to + // the extent that it's easy to ensure that we never do more than + // one search to find the matches. + let matches = &mut self.json.matches; + self.matcher.find_iter(bytes, |m| { + matches.push(m); + true + }).map_err(io::Error::error_message)?; + // Don't report empty matches appearing at the end of the bytes. + if !matches.is_empty() + && matches.last().unwrap().is_empty() + && matches.last().unwrap().start() >= bytes.len() + { + matches.pop().unwrap(); + } + Ok(()) + } + + /// Returns true if this printer should quit. + /// + /// This implements the logic for handling quitting after seeing a certain + /// amount of matches. In most cases, the logic is simple, but we must + /// permit all "after" contextual lines to print after reaching the limit. + fn should_quit(&self) -> bool { + let limit = match self.json.config.max_matches { + None => return false, + Some(limit) => limit, + }; + if self.match_count < limit { + return false; + } + self.after_context_remaining == 0 + } + + /// Write the "begin" message. + fn write_begin_message(&mut self) -> io::Result<()> { + if self.begin_printed { + return Ok(()); + } + let msg = jsont::Message::Begin(jsont::Begin { + path: self.path, + }); + self.json.write_message(&msg)?; + self.begin_printed = true; + Ok(()) + } +} + +impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> { + type Error = io::Error; + + fn matched( + &mut self, + searcher: &Searcher, + mat: &SinkMatch, + ) -> Result { + self.write_begin_message()?; + + self.match_count += 1; + self.after_context_remaining = searcher.after_context() as u64; + self.record_matches(mat.bytes())?; + self.stats.add_matches(self.json.matches.len() as u64); + self.stats.add_matched_lines(mat.lines().count() as u64); + + let submatches = SubMatches::new(mat.bytes(), &self.json.matches); + let msg = jsont::Message::Match(jsont::Match { + path: self.path, + lines: mat.bytes(), + line_number: mat.line_number(), + absolute_offset: mat.absolute_byte_offset(), + submatches: submatches.as_slice(), + }); + self.json.write_message(&msg)?; + Ok(!self.should_quit()) + } + + fn context( + &mut self, + searcher: &Searcher, + ctx: &SinkContext, + ) -> Result { + self.write_begin_message()?; + self.json.matches.clear(); + + if ctx.kind() == &SinkContextKind::After { + self.after_context_remaining = + self.after_context_remaining.saturating_sub(1); + } + let submatches = + if searcher.invert_match() { + self.record_matches(ctx.bytes())?; + SubMatches::new(ctx.bytes(), &self.json.matches) + } else { + SubMatches::empty() + }; + let msg = jsont::Message::Context(jsont::Context { + path: self.path, + lines: ctx.bytes(), + line_number: ctx.line_number(), + absolute_offset: ctx.absolute_byte_offset(), + submatches: submatches.as_slice(), + }); + self.json.write_message(&msg)?; + Ok(!self.should_quit()) + } + + fn begin( + &mut self, + _searcher: &Searcher, + ) -> Result { + self.json.wtr.reset_count(); + self.start_time = Instant::now(); + self.match_count = 0; + self.after_context_remaining = 0; + self.binary_byte_offset = None; + if self.json.config.max_matches == Some(0) { + return Ok(false); + } + + if !self.json.config.always_begin_end { + return Ok(true); + } + self.write_begin_message()?; + Ok(true) + } + + fn finish( + &mut self, + _searcher: &Searcher, + finish: &SinkFinish, + ) -> Result<(), io::Error> { + if !self.begin_printed { + return Ok(()); + } + + self.binary_byte_offset = finish.binary_byte_offset(); + self.stats.add_elapsed(self.start_time.elapsed()); + self.stats.add_searches(1); + if self.match_count > 0 { + self.stats.add_searches_with_match(1); + } + self.stats.add_bytes_searched(finish.byte_count()); + self.stats.add_bytes_printed(self.json.wtr.count()); + + let msg = jsont::Message::End(jsont::End { + path: self.path, + binary_offset: finish.binary_byte_offset(), + stats: self.stats.clone(), + }); + self.json.write_message(&msg)?; + Ok(()) + } +} + +/// SubMatches represents a set of matches in a contiguous range of bytes. +/// +/// A simpler representation for this would just simply be `Vec`, +/// but the common case is exactly one match per range of bytes, which we +/// specialize here using a fixed size array without any allocation. +enum SubMatches<'a> { + Empty, + Small([jsont::SubMatch<'a>; 1]), + Big(Vec>), +} + +impl<'a> SubMatches<'a> { + /// Create a new set of match ranges from a set of matches and the + /// corresponding bytes that those matches apply to. + fn new(bytes: &'a[u8], matches: &[Match]) -> SubMatches<'a> { + if matches.len() == 1 { + let mat = matches[0]; + SubMatches::Small([jsont::SubMatch { + m: &bytes[mat], + start: mat.start(), + end: mat.end(), + }]) + } else { + let mut match_ranges = vec![]; + for &mat in matches { + match_ranges.push(jsont::SubMatch { + m: &bytes[mat], + start: mat.start(), + end: mat.end(), + }); + } + SubMatches::Big(match_ranges) + } + } + + /// Create an empty set of match ranges. + fn empty() -> SubMatches<'static> { + SubMatches::Empty + } + + /// Return this set of match ranges as a slice. + fn as_slice(&self) -> &[jsont::SubMatch] { + match *self { + SubMatches::Empty => &[], + SubMatches::Small(ref x) => x, + SubMatches::Big(ref x) => x, + } + } +} + +#[cfg(test)] +mod tests { + use grep_regex::RegexMatcher; + use grep_searcher::SearcherBuilder; + + use super::{JSON, JSONBuilder}; + + const SHERLOCK: &'static [u8] = b"\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + + fn printer_contents( + printer: &mut JSON>, + ) -> String { + String::from_utf8(printer.get_mut().to_owned()).unwrap() + } + + #[test] + fn binary_detection() { + use grep_searcher::BinaryDetection; + + const BINARY: &'static [u8] = b"\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew \x00 from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached.\ +"; + + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = JSONBuilder::new() + .build(vec![]); + SearcherBuilder::new() + .binary_detection(BinaryDetection::quit(b'\x00')) + .heap_limit(Some(80)) + .build() + .search_reader(&matcher, BINARY, printer.sink(&matcher)) + .unwrap(); + let got = printer_contents(&mut printer); + + assert_eq!(got.lines().count(), 3); + let last = got.lines().last().unwrap(); + assert!(last.contains(r#""binary_offset":212,"#)); + } + + #[test] + fn max_matches() { + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = JSONBuilder::new() + .max_matches(Some(1)) + .build(vec![]); + SearcherBuilder::new() + .build() + .search_reader(&matcher, SHERLOCK, printer.sink(&matcher)) + .unwrap(); + let got = printer_contents(&mut printer); + + assert_eq!(got.lines().count(), 3); + } + + #[test] + fn no_match() { + let matcher = RegexMatcher::new( + r"DOES NOT MATCH" + ).unwrap(); + let mut printer = JSONBuilder::new() + .build(vec![]); + SearcherBuilder::new() + .build() + .search_reader(&matcher, SHERLOCK, printer.sink(&matcher)) + .unwrap(); + let got = printer_contents(&mut printer); + + assert!(got.is_empty()); + } + + #[test] + fn always_begin_end_no_match() { + let matcher = RegexMatcher::new( + r"DOES NOT MATCH" + ).unwrap(); + let mut printer = JSONBuilder::new() + .always_begin_end(true) + .build(vec![]); + SearcherBuilder::new() + .build() + .search_reader(&matcher, SHERLOCK, printer.sink(&matcher)) + .unwrap(); + let got = printer_contents(&mut printer); + + assert_eq!(got.lines().count(), 2); + assert!(got.contains("begin") && got.contains("end")); + } +} diff -Nru ripgrep-0.6.0/grep-printer/src/jsont.rs ripgrep-0.10.0.3/grep-printer/src/jsont.rs --- ripgrep-0.6.0/grep-printer/src/jsont.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/src/jsont.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,147 @@ +// This module defines the types we use for JSON serialization. We specifically +// omit deserialization, partially because there isn't a clear use case for +// them at this time, but also because deserialization will complicate things. +// Namely, the types below are designed in a way that permits JSON +// serialization with little or no allocation. Allocation is often quite +// convenient for deserialization however, so these types would become a bit +// more complex. + +use std::borrow::Cow; +use std::path::Path; +use std::str; + +use base64; +use serde::{Serialize, Serializer}; + +use stats::Stats; + +#[derive(Serialize)] +#[serde(tag = "type", content = "data")] +#[serde(rename_all = "snake_case")] +pub enum Message<'a> { + Begin(Begin<'a>), + End(End<'a>), + Match(Match<'a>), + Context(Context<'a>), +} + +#[derive(Serialize)] +pub struct Begin<'a> { + #[serde(serialize_with = "ser_path")] + pub path: Option<&'a Path>, +} + +#[derive(Serialize)] +pub struct End<'a> { + #[serde(serialize_with = "ser_path")] + pub path: Option<&'a Path>, + pub binary_offset: Option, + pub stats: Stats, +} + +#[derive(Serialize)] +pub struct Match<'a> { + #[serde(serialize_with = "ser_path")] + pub path: Option<&'a Path>, + #[serde(serialize_with = "ser_bytes")] + pub lines: &'a [u8], + pub line_number: Option, + pub absolute_offset: u64, + pub submatches: &'a [SubMatch<'a>], +} + +#[derive(Serialize)] +pub struct Context<'a> { + #[serde(serialize_with = "ser_path")] + pub path: Option<&'a Path>, + #[serde(serialize_with = "ser_bytes")] + pub lines: &'a [u8], + pub line_number: Option, + pub absolute_offset: u64, + pub submatches: &'a [SubMatch<'a>], +} + +#[derive(Serialize)] +pub struct SubMatch<'a> { + #[serde(rename = "match")] + #[serde(serialize_with = "ser_bytes")] + pub m: &'a [u8], + pub start: usize, + pub end: usize, +} + +/// Data represents things that look like strings, but may actually not be +/// valid UTF-8. To handle this, `Data` is serialized as an object with one +/// of two keys: `text` (for valid UTF-8) or `bytes` (for invalid UTF-8). +/// +/// The happy path is valid UTF-8, which streams right through as-is, since +/// it is natively supported by JSON. When invalid UTF-8 is found, then it is +/// represented as arbitrary bytes and base64 encoded. +#[derive(Clone, Debug, Hash, PartialEq, Eq, Serialize)] +#[serde(untagged)] +enum Data<'a> { + Text { text: Cow<'a, str> }, + Bytes { + #[serde(serialize_with = "to_base64")] + bytes: &'a [u8], + }, +} + +impl<'a> Data<'a> { + fn from_bytes(bytes: &[u8]) -> Data { + match str::from_utf8(bytes) { + Ok(text) => Data::Text { text: Cow::Borrowed(text) }, + Err(_) => Data::Bytes { bytes }, + } + } + + #[cfg(unix)] + fn from_path(path: &Path) -> Data { + use std::os::unix::ffi::OsStrExt; + + match path.to_str() { + Some(text) => Data::Text { text: Cow::Borrowed(text) }, + None => Data::Bytes { bytes: path.as_os_str().as_bytes() }, + } + } + + #[cfg(not(unix))] + fn from_path(path: &Path) -> Data { + // Using lossy conversion means some paths won't round trip precisely, + // but it's not clear what we should actually do. Serde rejects + // non-UTF-8 paths, and OsStr's are serialized as a sequence of UTF-16 + // code units on Windows. Neither seem appropriate for this use case, + // so we do the easy thing for now. + Data::Text { text: path.to_string_lossy() } + } +} + +fn to_base64( + bytes: T, + ser: S, +) -> Result +where T: AsRef<[u8]>, + S: Serializer +{ + ser.serialize_str(&base64::encode(&bytes)) +} + +fn ser_bytes( + bytes: T, + ser: S, +) -> Result +where T: AsRef<[u8]>, + S: Serializer +{ + Data::from_bytes(bytes.as_ref()).serialize(ser) +} + +fn ser_path( + path: &Option

, + ser: S, +) -> Result +where P: AsRef, + S: Serializer +{ + path.as_ref().map(|p| Data::from_path(p.as_ref())).serialize(ser) +} diff -Nru ripgrep-0.6.0/grep-printer/src/lib.rs ripgrep-0.10.0.3/grep-printer/src/lib.rs --- ripgrep-0.6.0/grep-printer/src/lib.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/src/lib.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,106 @@ +/*! +This crate provides featureful and fast printers that interoperate with the +[`grep-searcher`](https://docs.rs/grep-searcher) +crate. + +# Brief overview + +The [`Standard`](struct.Standard.html) printer shows results in a human +readable format, and is modeled after the formats used by standard grep-like +tools. Features include, but are not limited to, cross platform terminal +coloring, search & replace, multi-line result handling and reporting summary +statistics. + +The [`JSON`](struct.JSON.html) printer shows results in a machine readable +format. To facilitate a stream of search results, the format uses +[JSON Lines](http://jsonlines.org/) +by emitting a series of messages as search results are found. + +The [`Summary`](struct.Summary.html) printer shows *aggregate* results for a +single search in a human readable format, and is modeled after similar formats +found in standard grep-like tools. This printer is useful for showing the total +number of matches and/or printing file paths that either contain or don't +contain matches. + +# Example + +This example shows how to create a "standard" printer and execute a search. + +``` +extern crate grep_regex; +extern crate grep_printer; +extern crate grep_searcher; + +use std::error::Error; + +use grep_regex::RegexMatcher; +use grep_printer::Standard; +use grep_searcher::Searcher; + +const SHERLOCK: &'static [u8] = b"\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + +# fn main() { example().unwrap(); } +fn example() -> Result<(), Box> { + let matcher = RegexMatcher::new(r"Sherlock")?; + let mut printer = Standard::new_no_color(vec![]); + Searcher::new().search_slice(&matcher, SHERLOCK, printer.sink(&matcher))?; + + // into_inner gives us back the underlying writer we provided to + // new_no_color, which is wrapped in a termcolor::NoColor. Thus, a second + // into_inner gives us back the actual buffer. + let output = String::from_utf8(printer.into_inner().into_inner())?; + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +3:be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq!(output, expected); + Ok(()) +} +``` +*/ + +#![deny(missing_docs)] + +#[cfg(feature = "serde1")] +extern crate base64; +extern crate grep_matcher; +#[cfg(test)] +extern crate grep_regex; +extern crate grep_searcher; +#[cfg(feature = "serde1")] +extern crate serde; +#[cfg(feature = "serde1")] +#[macro_use] +extern crate serde_derive; +#[cfg(feature = "serde1")] +extern crate serde_json; +extern crate termcolor; + +pub use color::{ColorError, ColorSpecs, UserColorSpec, default_color_specs}; +#[cfg(feature = "serde1")] +pub use json::{JSON, JSONBuilder, JSONSink}; +pub use standard::{Standard, StandardBuilder, StandardSink}; +pub use stats::Stats; +pub use summary::{Summary, SummaryBuilder, SummaryKind, SummarySink}; +pub use util::PrinterPath; + +#[macro_use] +mod macros; + +mod color; +mod counter; +#[cfg(feature = "serde1")] +mod json; +#[cfg(feature = "serde1")] +mod jsont; +mod standard; +mod stats; +mod summary; +mod util; diff -Nru ripgrep-0.6.0/grep-printer/src/macros.rs ripgrep-0.10.0.3/grep-printer/src/macros.rs --- ripgrep-0.6.0/grep-printer/src/macros.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/src/macros.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,23 @@ +#[cfg(test)] +#[macro_export] +macro_rules! assert_eq_printed { + ($expected:expr, $got:expr) => { + let expected = &*$expected; + let got = &*$got; + if expected != got { + panic!(" +printed outputs differ! + +expected: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +got: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +", expected, got); + } + } +} diff -Nru ripgrep-0.6.0/grep-printer/src/standard.rs ripgrep-0.10.0.3/grep-printer/src/standard.rs --- ripgrep-0.6.0/grep-printer/src/standard.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/src/standard.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,3053 @@ +use std::cell::{Cell, RefCell}; +use std::cmp; +use std::io::{self, Write}; +use std::path::Path; +use std::sync::Arc; +use std::time::Instant; + +use grep_matcher::{Match, Matcher}; +use grep_searcher::{ + LineStep, Searcher, + Sink, SinkError, + SinkContext, SinkContextKind, SinkFinish, SinkMatch, +}; +use termcolor::{ColorSpec, NoColor, WriteColor}; + +use color::ColorSpecs; +use counter::CounterWriter; +use stats::Stats; +use util::{ + PrinterPath, Replacer, Sunk, + trim_ascii_prefix, trim_ascii_prefix_range, +}; + +/// The configuration for the standard printer. +/// +/// This is manipulated by the StandardBuilder and then referenced by the +/// actual implementation. Once a printer is build, the configuration is frozen +/// and cannot changed. +#[derive(Debug, Clone)] +struct Config { + colors: ColorSpecs, + stats: bool, + heading: bool, + path: bool, + only_matching: bool, + per_match: bool, + replacement: Arc>>, + max_columns: Option, + max_matches: Option, + column: bool, + byte_offset: bool, + trim_ascii: bool, + separator_search: Arc>>, + separator_context: Arc>>, + separator_field_match: Arc>, + separator_field_context: Arc>, + separator_path: Option, + path_terminator: Option, +} + +impl Default for Config { + fn default() -> Config { + Config { + colors: ColorSpecs::default(), + stats: false, + heading: false, + path: true, + only_matching: false, + per_match: false, + replacement: Arc::new(None), + max_columns: None, + max_matches: None, + column: false, + byte_offset: false, + trim_ascii: false, + separator_search: Arc::new(None), + separator_context: Arc::new(Some(b"--".to_vec())), + separator_field_match: Arc::new(b":".to_vec()), + separator_field_context: Arc::new(b"-".to_vec()), + separator_path: None, + path_terminator: None, + } + } +} + +/// A builder for the "standard" grep-like printer. +/// +/// The builder permits configuring how the printer behaves. Configurable +/// behavior includes, but is not limited to, limiting the number of matches, +/// tweaking separators, executing pattern replacements, recording statistics +/// and setting colors. +/// +/// Some configuration options, such as the display of line numbers or +/// contextual lines, are drawn directly from the +/// `grep_searcher::Searcher`'s configuration. +/// +/// Once a `Standard` printer is built, its configuration cannot be changed. +#[derive(Clone, Debug)] +pub struct StandardBuilder { + config: Config, +} + +impl StandardBuilder { + /// Return a new builder for configuring the standard printer. + pub fn new() -> StandardBuilder { + StandardBuilder { config: Config::default() } + } + + /// Build a printer using any implementation of `termcolor::WriteColor`. + /// + /// The implementation of `WriteColor` used here controls whether colors + /// are used or not when colors have been configured using the + /// `color_specs` method. + /// + /// For maximum portability, callers should generally use either + /// `termcolor::StandardStream` or `termcolor::BufferedStandardStream` + /// where appropriate, which will automatically enable colors on Windows + /// when possible. + /// + /// However, callers may also provide an arbitrary writer using the + /// `termcolor::Ansi` or `termcolor::NoColor` wrappers, which always enable + /// colors via ANSI escapes or always disable colors, respectively. + /// + /// As a convenience, callers may use `build_no_color` to automatically + /// select the `termcolor::NoColor` wrapper to avoid needing to import + /// from `termcolor` explicitly. + pub fn build(&self, wtr: W) -> Standard { + Standard { + config: self.config.clone(), + wtr: RefCell::new(CounterWriter::new(wtr)), + matches: vec![], + } + } + + /// Build a printer from any implementation of `io::Write` and never emit + /// any colors, regardless of the user color specification settings. + /// + /// This is a convenience routine for + /// `StandardBuilder::build(termcolor::NoColor::new(wtr))`. + pub fn build_no_color( + &self, + wtr: W, + ) -> Standard> { + self.build(NoColor::new(wtr)) + } + + /// Set the user color specifications to use for coloring in this printer. + /// + /// A [`UserColorSpec`](struct.UserColorSpec.html) can be constructed from + /// a string in accordance with the color specification format. See the + /// `UserColorSpec` type documentation for more details on the format. + /// A [`ColorSpecs`](struct.ColorSpecs.html) can then be generated from + /// zero or more `UserColorSpec`s. + /// + /// Regardless of the color specifications provided here, whether color + /// is actually used or not is determined by the implementation of + /// `WriteColor` provided to `build`. For example, if `termcolor::NoColor` + /// is provided to `build`, then no color will ever be printed regardless + /// of the color specifications provided here. + /// + /// This completely overrides any previous color specifications. This does + /// not add to any previously provided color specifications on this + /// builder. + pub fn color_specs( + &mut self, + specs: ColorSpecs, + ) -> &mut StandardBuilder { + self.config.colors = specs; + self + } + + /// Enable the gathering of various aggregate statistics. + /// + /// When this is enabled (it's disabled by default), statistics will be + /// gathered for all uses of `Standard` printer returned by `build`, + /// including but not limited to, the total number of matches, the total + /// number of bytes searched and the total number of bytes printed. + /// + /// Aggregate statistics can be accessed via the sink's + /// [`StandardSink::stats`](struct.StandardSink.html#method.stats) + /// method. + /// + /// When this is enabled, this printer may need to do extra work in order + /// to compute certain statistics, which could cause the search to take + /// longer. + /// + /// For a complete description of available statistics, see + /// [`Stats`](struct.Stats.html). + pub fn stats(&mut self, yes: bool) -> &mut StandardBuilder { + self.config.stats = yes; + self + } + + /// Enable the use of "headings" in the printer. + /// + /// When this is enabled, and if a file path has been given to the printer, + /// then the file path will be printed once on its own line before showing + /// any matches. If the heading is not the first thing emitted by the + /// printer, then a line terminator is printed before the heading. + /// + /// By default, this option is disabled. When disabled, the printer will + /// not show any heading and will instead print the file path (if one is + /// given) on the same line as each matching (or context) line. + pub fn heading(&mut self, yes: bool) -> &mut StandardBuilder { + self.config.heading = yes; + self + } + + /// When enabled, if a path was given to the printer, then it is shown in + /// the output (either as a heading or as a prefix to each matching line). + /// When disabled, then no paths are ever included in the output even when + /// a path is provided to the printer. + /// + /// This is enabled by default. + pub fn path(&mut self, yes: bool) -> &mut StandardBuilder { + self.config.path = yes; + self + } + + /// Only print the specific matches instead of the entire line containing + /// each match. Each match is printed on its own line. When multi line + /// search is enabled, then matches spanning multiple lines are printed + /// such that only the matching portions of each line are shown. + pub fn only_matching(&mut self, yes: bool) -> &mut StandardBuilder { + self.config.only_matching = yes; + self + } + + /// Print at least one line for every match. + /// + /// This is similar to the `only_matching` option, except the entire line + /// is printed for each match. This is typically useful in conjunction with + /// the `column` option, which will show the starting column number for + /// every match on every line. + /// + /// When multi-line mode is enabled, each match and its accompanying lines + /// are printed. As with single line matches, if a line contains multiple + /// matches (even if only partially), then that line is printed once for + /// each match it participates in. + pub fn per_match(&mut self, yes: bool) -> &mut StandardBuilder { + self.config.per_match = yes; + self + } + + /// Set the bytes that will be used to replace each occurrence of a match + /// found. + /// + /// The replacement bytes given may include references to capturing groups, + /// which may either be in index form (e.g., `$2`) or can reference named + /// capturing groups if present in the original pattern (e.g., `$foo`). + /// + /// For documentation on the full format, please see the `Capture` trait's + /// `interpolate` method in the + /// [grep-printer](https://docs.rs/grep-printer) crate. + pub fn replacement( + &mut self, + replacement: Option>, + ) -> &mut StandardBuilder { + self.config.replacement = Arc::new(replacement); + self + } + + /// Set the maximum number of columns allowed for each line printed. A + /// single column is heuristically defined as a single byte. + /// + /// If a line is found which exceeds this maximum, then it is replaced + /// with a message indicating that the line has been omitted. + /// + /// The default is to not specify a limit, in which each matching or + /// contextual line is printed regardless of how long it is. + pub fn max_columns(&mut self, limit: Option) -> &mut StandardBuilder { + self.config.max_columns = limit; + self + } + + /// Set the maximum amount of matching lines that are printed. + /// + /// If multi line search is enabled and a match spans multiple lines, then + /// that match is counted exactly once for the purposes of enforcing this + /// limit, regardless of how many lines it spans. + pub fn max_matches(&mut self, limit: Option) -> &mut StandardBuilder { + self.config.max_matches = limit; + self + } + + /// Print the column number of the first match in a line. + /// + /// This option is convenient for use with `per_match` which will print a + /// line for every match along with the starting offset for that match. + /// + /// Column numbers are computed in terms of bytes from the start of the + /// line being printed. + /// + /// For matches that span multiple lines, the column number for each + /// matching line is in terms of the first matching line. + /// + /// This is disabled by default. + pub fn column(&mut self, yes: bool) -> &mut StandardBuilder { + self.config.column = yes; + self + } + + /// Print the absolute byte offset of the beginning of each line printed. + /// + /// The absolute byte offset starts from the beginning of each search and + /// is zero based. + /// + /// If the `only_matching` option is set, then this will print the absolute + /// byte offset of the beginning of each match. + pub fn byte_offset(&mut self, yes: bool) -> &mut StandardBuilder { + self.config.byte_offset = yes; + self + } + + /// When enabled, all lines will have prefix ASCII whitespace trimmed + /// before being written. + /// + /// This is disabled by default. + pub fn trim_ascii(&mut self, yes: bool) -> &mut StandardBuilder { + self.config.trim_ascii = yes; + self + } + + /// Set the separator used between sets of search results. + /// + /// When this is set, then it will be printed on its own line immediately + /// before the results for a single search if and only if a previous search + /// had already printed results. In effect, this permits showing a divider + /// between sets of search results that does not appear at the beginning + /// or end of all search results. + /// + /// To reproduce the classic grep format, this is typically set to `--` + /// (the same as the context separator) if and only if contextual lines + /// have been requested, but disabled otherwise. + /// + /// By default, this is disabled. + pub fn separator_search( + &mut self, + sep: Option>, + ) -> &mut StandardBuilder { + self.config.separator_search = Arc::new(sep); + self + } + + /// Set the separator used between discontiguous runs of search context, + /// but only when the searcher is configured to report contextual lines. + /// + /// The separator is always printed on its own line, even if it's empty. + /// + /// If no separator is set, then nothing is printed when a context break + /// occurs. + /// + /// By default, this is set to `--`. + pub fn separator_context( + &mut self, + sep: Option>, + ) -> &mut StandardBuilder { + self.config.separator_context = Arc::new(sep); + self + } + + /// Set the separator used between fields emitted for matching lines. + /// + /// For example, when the searcher has line numbers enabled, this printer + /// will print the line number before each matching line. The bytes given + /// here will be written after the line number but before the matching + /// line. + /// + /// By default, this is set to `:`. + pub fn separator_field_match( + &mut self, + sep: Vec, + ) -> &mut StandardBuilder { + self.config.separator_field_match = Arc::new(sep); + self + } + + /// Set the separator used between fields emitted for context lines. + /// + /// For example, when the searcher has line numbers enabled, this printer + /// will print the line number before each context line. The bytes given + /// here will be written after the line number but before the context + /// line. + /// + /// By default, this is set to `-`. + pub fn separator_field_context( + &mut self, + sep: Vec, + ) -> &mut StandardBuilder { + self.config.separator_field_context = Arc::new(sep); + self + } + + /// Set the path separator used when printing file paths. + /// + /// When a printer is configured with a file path, and when a match is + /// found, that file path will be printed (either as a heading or as a + /// prefix to each matching or contextual line, depending on other + /// configuration settings). Typically, printing is done by emitting the + /// file path as is. However, this setting provides the ability to use a + /// different path separator from what the current environment has + /// configured. + /// + /// A typical use for this option is to permit cygwin users on Windows to + /// set the path separator to `/` instead of using the system default of + /// `\`. + pub fn separator_path( + &mut self, + sep: Option, + ) -> &mut StandardBuilder { + self.config.separator_path = sep; + self + } + + /// Set the path terminator used. + /// + /// The path terminator is a byte that is printed after every file path + /// emitted by this printer. + /// + /// If no path terminator is set (the default), then paths are terminated + /// by either new lines (for when `heading` is enabled) or the match or + /// context field separators (e.g., `:` or `-`). + pub fn path_terminator( + &mut self, + terminator: Option, + ) -> &mut StandardBuilder { + self.config.path_terminator = terminator; + self + } +} + +/// The standard printer, which implements grep-like formatting, including +/// color support. +/// +/// A default printer can be created with either of the `Standard::new` or +/// `Standard::new_no_color` constructors. However, there are a considerable +/// number of options that configure this printer's output. Those options can +/// be configured using [`StandardBuilder`](struct.StandardBuilder.html). +/// +/// This type is generic over `W`, which represents any implementation +/// of the `termcolor::WriteColor` trait. If colors are not desired, +/// then the `new_no_color` constructor can be used, or, alternatively, +/// the `termcolor::NoColor` adapter can be used to wrap any `io::Write` +/// implementation without enabling any colors. +#[derive(Debug)] +pub struct Standard { + config: Config, + wtr: RefCell>, + matches: Vec, +} + +impl Standard { + /// Return a standard printer with a default configuration that writes + /// matches to the given writer. + /// + /// The writer should be an implementation of `termcolor::WriteColor` + /// and not just a bare implementation of `io::Write`. To use a normal + /// `io::Write` implementation (simultaneously sacrificing colors), use + /// the `new_no_color` constructor. + pub fn new(wtr: W) -> Standard { + StandardBuilder::new().build(wtr) + } +} + +impl Standard> { + /// Return a standard printer with a default configuration that writes + /// matches to the given writer. + /// + /// The writer can be any implementation of `io::Write`. With this + /// constructor, the printer will never emit colors. + pub fn new_no_color(wtr: W) -> Standard> { + StandardBuilder::new().build_no_color(wtr) + } +} + +impl Standard { + /// Return an implementation of `Sink` for the standard printer. + /// + /// This does not associate the printer with a file path, which means this + /// implementation will never print a file path along with the matches. + pub fn sink<'s, M: Matcher>( + &'s mut self, + matcher: M, + ) -> StandardSink<'static, 's, M, W> { + let stats = + if self.config.stats { + Some(Stats::new()) + } else { + None + }; + let needs_match_granularity = self.needs_match_granularity(); + StandardSink { + matcher: matcher, + standard: self, + replacer: Replacer::new(), + path: None, + start_time: Instant::now(), + match_count: 0, + after_context_remaining: 0, + binary_byte_offset: None, + stats: stats, + needs_match_granularity: needs_match_granularity, + } + } + + /// Return an implementation of `Sink` associated with a file path. + /// + /// When the printer is associated with a path, then it may, depending on + /// its configuration, print the path along with the matches found. + pub fn sink_with_path<'p, 's, M, P>( + &'s mut self, + matcher: M, + path: &'p P, + ) -> StandardSink<'p, 's, M, W> + where M: Matcher, + P: ?Sized + AsRef, + { + if !self.config.path { + return self.sink(matcher); + } + let stats = + if self.config.stats { + Some(Stats::new()) + } else { + None + }; + let ppath = PrinterPath::with_separator( + path.as_ref(), self.config.separator_path); + let needs_match_granularity = self.needs_match_granularity(); + StandardSink { + matcher: matcher, + standard: self, + replacer: Replacer::new(), + path: Some(ppath), + start_time: Instant::now(), + match_count: 0, + after_context_remaining: 0, + binary_byte_offset: None, + stats: stats, + needs_match_granularity: needs_match_granularity, + } + } + + /// Returns true if and only if the configuration of the printer requires + /// us to find each individual match in the lines reported by the searcher. + /// + /// We care about this distinction because finding each individual match + /// costs more, so we only do it when we need to. + fn needs_match_granularity(&self) -> bool { + let supports_color = self.wtr.borrow().supports_color(); + let match_colored = !self.config.colors.matched().is_none(); + + // Coloring requires identifying each individual match. + (supports_color && match_colored) + // The column feature requires finding the position of the first match. + || self.config.column + // Requires finding each match for performing replacement. + || self.config.replacement.is_some() + // Emitting a line for each match requires finding each match. + || self.config.per_match + // Emitting only the match requires finding each match. + || self.config.only_matching + // Computing certain statistics requires finding each match. + || self.config.stats + } +} + +impl Standard { + /// Returns true if and only if this printer has written at least one byte + /// to the underlying writer during any of the previous searches. + pub fn has_written(&self) -> bool { + self.wtr.borrow().total_count() > 0 + } + + /// Return a mutable reference to the underlying writer. + pub fn get_mut(&mut self) -> &mut W { + self.wtr.get_mut().get_mut() + } + + /// Consume this printer and return back ownership of the underlying + /// writer. + pub fn into_inner(self) -> W { + self.wtr.into_inner().into_inner() + } +} + +/// An implementation of `Sink` associated with a matcher and an optional file +/// path for the standard printer. +/// +/// A `Sink` can be created via the +/// [`Standard::sink`](struct.Standard.html#method.sink) +/// or +/// [`Standard::sink_with_path`](struct.Standard.html#method.sink_with_path) +/// methods, depending on whether you want to include a file path in the +/// printer's output. +/// +/// Building a `StandardSink` is cheap, and callers should create a new one +/// for each thing that is searched. After a search has completed, callers may +/// query this sink for information such as whether a match occurred or whether +/// binary data was found (and if so, the offset at which it occurred). +/// +/// This type is generic over a few type parameters: +/// +/// * `'p` refers to the lifetime of the file path, if one is provided. When +/// no file path is given, then this is `'static`. +/// * `'s` refers to the lifetime of the +/// [`Standard`](struct.Standard.html) +/// printer that this type borrows. +/// * `M` refers to the type of matcher used by +/// `grep_searcher::Searcher` that is reporting results to this sink. +/// * `W` refers to the underlying writer that this printer is writing its +/// output to. +#[derive(Debug)] +pub struct StandardSink<'p, 's, M: Matcher, W: 's> { + matcher: M, + standard: &'s mut Standard, + replacer: Replacer, + path: Option>, + start_time: Instant, + match_count: u64, + after_context_remaining: u64, + binary_byte_offset: Option, + stats: Option, + needs_match_granularity: bool, +} + +impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> { + /// Returns true if and only if this printer received a match in the + /// previous search. + /// + /// This is unaffected by the result of searches before the previous + /// search on this sink. + pub fn has_match(&self) -> bool { + self.match_count > 0 + } + + /// Return the total number of matches reported to this sink. + /// + /// This corresponds to the number of times `Sink::matched` is called + /// on the previous search. + /// + /// This is unaffected by the result of searches before the previous + /// search on this sink. + pub fn match_count(&self) -> u64 { + self.match_count + } + + /// If binary data was found in the previous search, this returns the + /// offset at which the binary data was first detected. + /// + /// The offset returned is an absolute offset relative to the entire + /// set of bytes searched. + /// + /// This is unaffected by the result of searches before the previous + /// search. e.g., If the search prior to the previous search found binary + /// data but the previous search found no binary data, then this will + /// return `None`. + pub fn binary_byte_offset(&self) -> Option { + self.binary_byte_offset + } + + /// Return a reference to the stats produced by the printer for all + /// searches executed on this sink. + /// + /// This only returns stats if they were requested via the + /// [`StandardBuilder`](struct.StandardBuilder.html) + /// configuration. + pub fn stats(&self) -> Option<&Stats> { + self.stats.as_ref() + } + + /// Execute the matcher over the given bytes and record the match + /// locations if the current configuration demands match granularity. + fn record_matches(&mut self, bytes: &[u8]) -> io::Result<()> { + self.standard.matches.clear(); + if !self.needs_match_granularity { + return Ok(()); + } + // If printing requires knowing the location of each individual match, + // then compute and stored those right now for use later. While this + // adds an extra copy for storing the matches, we do amortize the + // allocation for it and this greatly simplifies the printing logic to + // the extent that it's easy to ensure that we never do more than + // one search to find the matches (well, for replacements, we do one + // additional search to perform the actual replacement). + let matches = &mut self.standard.matches; + self.matcher.find_iter(bytes, |m| { + matches.push(m); + true + }).map_err(io::Error::error_message)?; + // Don't report empty matches appearing at the end of the bytes. + if !matches.is_empty() + && matches.last().unwrap().is_empty() + && matches.last().unwrap().start() >= bytes.len() + { + matches.pop().unwrap(); + } + Ok(()) + } + + /// If the configuration specifies a replacement, then this executes the + /// replacement, lazily allocating memory if necessary. + /// + /// To access the result of a replacement, use `replacer.replacement()`. + fn replace(&mut self, bytes: &[u8]) -> io::Result<()> { + self.replacer.clear(); + if self.standard.config.replacement.is_some() { + let replacement = (*self.standard.config.replacement) + .as_ref() + .map(|r| &*r) + .unwrap(); + self.replacer.replace_all( + &self.matcher, + bytes, + replacement, + )?; + } + Ok(()) + } + + /// Returns true if this printer should quit. + /// + /// This implements the logic for handling quitting after seeing a certain + /// amount of matches. In most cases, the logic is simple, but we must + /// permit all "after" contextual lines to print after reaching the limit. + fn should_quit(&self) -> bool { + let limit = match self.standard.config.max_matches { + None => return false, + Some(limit) => limit, + }; + if self.match_count < limit { + return false; + } + self.after_context_remaining == 0 + } +} + +impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { + type Error = io::Error; + + fn matched( + &mut self, + searcher: &Searcher, + mat: &SinkMatch, + ) -> Result { + self.match_count += 1; + self.after_context_remaining = searcher.after_context() as u64; + + self.record_matches(mat.bytes())?; + self.replace(mat.bytes())?; + + if let Some(ref mut stats) = self.stats { + stats.add_matches(self.standard.matches.len() as u64); + stats.add_matched_lines(mat.lines().count() as u64); + } + + StandardImpl::from_match(searcher, self, mat).sink()?; + Ok(!self.should_quit()) + } + + fn context( + &mut self, + searcher: &Searcher, + ctx: &SinkContext, + ) -> Result { + self.standard.matches.clear(); + self.replacer.clear(); + + if ctx.kind() == &SinkContextKind::After { + self.after_context_remaining = + self.after_context_remaining.saturating_sub(1); + } + if searcher.invert_match() { + self.record_matches(ctx.bytes())?; + self.replace(ctx.bytes())?; + } + StandardImpl::from_context(searcher, self, ctx).sink()?; + Ok(!self.should_quit()) + } + + fn context_break( + &mut self, + searcher: &Searcher, + ) -> Result { + StandardImpl::new(searcher, self).write_context_separator()?; + Ok(true) + } + + fn begin( + &mut self, + _searcher: &Searcher, + ) -> Result { + self.standard.wtr.borrow_mut().reset_count(); + self.start_time = Instant::now(); + self.match_count = 0; + self.after_context_remaining = 0; + self.binary_byte_offset = None; + if self.standard.config.max_matches == Some(0) { + return Ok(false); + } + Ok(true) + } + + fn finish( + &mut self, + _searcher: &Searcher, + finish: &SinkFinish, + ) -> Result<(), io::Error> { + self.binary_byte_offset = finish.binary_byte_offset(); + if let Some(stats) = self.stats.as_mut() { + stats.add_elapsed(self.start_time.elapsed()); + stats.add_searches(1); + if self.match_count > 0 { + stats.add_searches_with_match(1); + } + stats.add_bytes_searched(finish.byte_count()); + stats.add_bytes_printed(self.standard.wtr.borrow().count()); + } + Ok(()) + } +} + +/// The actual implementation of the standard printer. This couples together +/// the searcher, the sink implementation and information about the match. +/// +/// A StandardImpl is initialized every time a match or a contextual line is +/// reported. +#[derive(Debug)] +struct StandardImpl<'a, M: 'a + Matcher, W: 'a> { + searcher: &'a Searcher, + sink: &'a StandardSink<'a, 'a, M, W>, + sunk: Sunk<'a>, + /// Set to true if and only if we are writing a match with color. + in_color_match: Cell, +} + +impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { + /// Bundle self with a searcher and return the core implementation of Sink. + fn new( + searcher: &'a Searcher, + sink: &'a StandardSink, + ) -> StandardImpl<'a, M, W> { + StandardImpl { + searcher: searcher, + sink: sink, + sunk: Sunk::empty(), + in_color_match: Cell::new(false), + } + } + + /// Bundle self with a searcher and return the core implementation of Sink + /// for use with handling matching lines. + fn from_match( + searcher: &'a Searcher, + sink: &'a StandardSink, + mat: &'a SinkMatch<'a>, + ) -> StandardImpl<'a, M, W> { + let sunk = Sunk::from_sink_match( + mat, + &sink.standard.matches, + sink.replacer.replacement(), + ); + StandardImpl { + sunk: sunk, + ..StandardImpl::new(searcher, sink) + } + } + + /// Bundle self with a searcher and return the core implementation of Sink + /// for use with handling contextual lines. + fn from_context( + searcher: &'a Searcher, + sink: &'a StandardSink, + ctx: &'a SinkContext<'a>, + ) -> StandardImpl<'a, M, W> { + let sunk = Sunk::from_sink_context( + ctx, + &sink.standard.matches, + sink.replacer.replacement(), + ); + StandardImpl { + sunk: sunk, + ..StandardImpl::new(searcher, sink) + } + } + + fn sink(&self) -> io::Result<()> { + self.write_search_prelude()?; + if self.sunk.matches().is_empty() { + if self.multi_line() && !self.is_context() { + self.sink_fast_multi_line() + } else { + self.sink_fast() + } + } else { + if self.multi_line() && !self.is_context() { + self.sink_slow_multi_line() + } else { + self.sink_slow() + } + } + } + + /// Print matches (limited to one line) quickly by avoiding the detection + /// of each individual match in the lines reported in the given + /// `SinkMatch`. + /// + /// This should only be used when the configuration does not demand match + /// granularity and the searcher is not in multi line mode. + fn sink_fast(&self) -> io::Result<()> { + debug_assert!(self.sunk.matches().is_empty()); + debug_assert!(!self.multi_line() || self.is_context()); + + self.write_prelude( + self.sunk.absolute_byte_offset(), + self.sunk.line_number(), + None, + )?; + self.write_line(self.sunk.bytes()) + } + + /// Print matches (possibly spanning more than one line) quickly by + /// avoiding the detection of each individual match in the lines reported + /// in the given `SinkMatch`. + /// + /// This should only be used when the configuration does not demand match + /// granularity. This may be used when the searcher is in multi line mode. + fn sink_fast_multi_line(&self) -> io::Result<()> { + debug_assert!(self.sunk.matches().is_empty()); + // This isn't actually a required invariant for using this method, + // but if we wind up here and multi line mode is disabled, then we + // should still treat it as a bug since we should be using matched_fast + // instead. + debug_assert!(self.multi_line()); + + let line_term = self.searcher.line_terminator().as_byte(); + let mut absolute_byte_offset = self.sunk.absolute_byte_offset(); + for (i, line) in self.sunk.lines(line_term).enumerate() { + self.write_prelude( + absolute_byte_offset, + self.sunk.line_number().map(|n| n + i as u64), + None, + )?; + absolute_byte_offset += line.len() as u64; + + self.write_line(line)?; + } + Ok(()) + } + + /// Print a matching line where the configuration of the printer requires + /// finding each individual match (e.g., for coloring). + fn sink_slow(&self) -> io::Result<()> { + debug_assert!(!self.sunk.matches().is_empty()); + debug_assert!(!self.multi_line() || self.is_context()); + + if self.config().only_matching { + for &m in self.sunk.matches() { + self.write_prelude( + self.sunk.absolute_byte_offset() + m.start() as u64, + self.sunk.line_number(), + Some(m.start() as u64 + 1), + )?; + + let buf = &self.sunk.bytes()[m]; + self.write_colored_line(&[Match::new(0, buf.len())], buf)?; + } + } else if self.config().per_match { + for &m in self.sunk.matches() { + self.write_prelude( + self.sunk.absolute_byte_offset() + m.start() as u64, + self.sunk.line_number(), + Some(m.start() as u64 + 1), + )?; + self.write_colored_line(&[m], self.sunk.bytes())?; + } + } else { + self.write_prelude( + self.sunk.absolute_byte_offset(), + self.sunk.line_number(), + Some(self.sunk.matches()[0].start() as u64 + 1), + )?; + self.write_colored_line(self.sunk.matches(), self.sunk.bytes())?; + } + Ok(()) + } + + fn sink_slow_multi_line(&self) -> io::Result<()> { + debug_assert!(!self.sunk.matches().is_empty()); + debug_assert!(self.multi_line()); + + if self.config().only_matching { + return self.sink_slow_multi_line_only_matching(); + } else if self.config().per_match { + return self.sink_slow_multi_per_match(); + } + + let line_term = self.searcher.line_terminator().as_byte(); + let bytes = self.sunk.bytes(); + let matches = self.sunk.matches(); + let mut midx = 0; + let mut count = 0; + let mut stepper = LineStep::new(line_term, 0, bytes.len()); + while let Some((start, end)) = stepper.next(bytes) { + let mut line = Match::new(start, end); + self.write_prelude( + self.sunk.absolute_byte_offset() + line.start() as u64, + self.sunk.line_number().map(|n| n + count), + Some(matches[0].start() as u64 + 1), + )?; + count += 1; + if self.exceeds_max_columns(&bytes[line]) { + self.write_exceeded_line()?; + continue; + } + if self.has_line_terminator(&bytes[line]) { + line = line.with_end(line.end() - 1); + } + if self.config().trim_ascii { + line = self.trim_ascii_prefix_range(bytes, line); + } + + while !line.is_empty() { + if matches[midx].end() <= line.start() { + if midx + 1 < matches.len() { + midx += 1; + continue; + } else { + self.end_color_match()?; + self.write(&bytes[line])?; + break; + } + } + let m = matches[midx]; + + if line.start() < m.start() { + let upto = cmp::min(line.end(), m.start()); + self.end_color_match()?; + self.write(&bytes[line.with_end(upto)])?; + line = line.with_start(upto); + } else { + let upto = cmp::min(line.end(), m.end()); + self.start_color_match()?; + self.write(&bytes[line.with_end(upto)])?; + line = line.with_start(upto); + } + } + self.end_color_match()?; + self.write_line_term()?; + } + Ok(()) + } + + fn sink_slow_multi_line_only_matching(&self) -> io::Result<()> { + let line_term = self.searcher.line_terminator().as_byte(); + let spec = self.config().colors.matched(); + let bytes = self.sunk.bytes(); + let matches = self.sunk.matches(); + let mut midx = 0; + let mut count = 0; + let mut stepper = LineStep::new(line_term, 0, bytes.len()); + while let Some((start, end)) = stepper.next(bytes) { + let mut line = Match::new(start, end); + if self.has_line_terminator(&bytes[line]) { + line = line.with_end(line.end() - 1); + } + if self.config().trim_ascii { + line = self.trim_ascii_prefix_range(bytes, line); + } + while !line.is_empty() { + if matches[midx].end() <= line.start() { + if midx + 1 < matches.len() { + midx += 1; + continue; + } else { + break; + } + } + let m = matches[midx]; + + if line.start() < m.start() { + let upto = cmp::min(line.end(), m.start()); + line = line.with_start(upto); + } else { + let upto = cmp::min(line.end(), m.end()); + self.write_prelude( + self.sunk.absolute_byte_offset() + m.start() as u64, + self.sunk.line_number().map(|n| n + count), + Some(m.start() as u64 + 1), + )?; + + let buf = &bytes[line.with_end(upto)]; + line = line.with_start(upto); + if self.exceeds_max_columns(&buf) { + self.write_exceeded_line()?; + continue; + } + self.write_spec(spec, buf)?; + self.write_line_term()?; + } + } + count += 1; + } + Ok(()) + } + + fn sink_slow_multi_per_match(&self) -> io::Result<()> { + let line_term = self.searcher.line_terminator().as_byte(); + let spec = self.config().colors.matched(); + let bytes = self.sunk.bytes(); + for &m in self.sunk.matches() { + let mut m = m; + let mut count = 0; + let mut stepper = LineStep::new(line_term, 0, bytes.len()); + while let Some((start, end)) = stepper.next(bytes) { + let mut line = Match::new(start, end); + if line.start() >= m.end() { + break; + } else if line.end() <= m.start() { + count += 1; + continue; + } + self.write_prelude( + self.sunk.absolute_byte_offset() + line.start() as u64, + self.sunk.line_number().map(|n| n + count), + Some(m.start() as u64 + 1), + )?; + count += 1; + if self.exceeds_max_columns(&bytes[line]) { + self.write_exceeded_line()?; + continue; + } + if self.has_line_terminator(&bytes[line]) { + line = line.with_end(line.end() - 1); + } + if self.config().trim_ascii { + line = self.trim_ascii_prefix_range(bytes, line); + } + + while !line.is_empty() { + if m.end() <= line.start() { + self.write(&bytes[line])?; + line = line.with_start(line.end()); + } else if line.start() < m.start() { + let upto = cmp::min(line.end(), m.start()); + self.write(&bytes[line.with_end(upto)])?; + line = line.with_start(upto); + } else { + let upto = cmp::min(line.end(), m.end()); + self.write_spec(spec, &bytes[line.with_end(upto)])?; + line = line.with_start(upto); + } + } + self.write_line_term()?; + } + } + Ok(()) + } + + /// Write the beginning part of a matching line. This (may) include things + /// like the file path, line number among others, depending on the + /// configuration and the parameters given. + #[inline(always)] + fn write_prelude( + &self, + absolute_byte_offset: u64, + line_number: Option, + column: Option, + ) -> io::Result<()> { + let sep = self.separator_field(); + + if !self.config().heading { + self.write_path_field(sep)?; + } + if let Some(n) = line_number { + self.write_line_number(n, sep)?; + } + if let Some(n) = column { + if self.config().column { + self.write_column_number(n, sep)?; + } + } + if self.config().byte_offset { + self.write_byte_offset(absolute_byte_offset, sep)?; + } + Ok(()) + } + + #[inline(always)] + fn write_line( + &self, + line: &[u8], + ) -> io::Result<()> { + if self.exceeds_max_columns(line) { + self.write_exceeded_line()?; + } else { + self.write_trim(line)?; + if !self.has_line_terminator(line) { + self.write_line_term()?; + } + } + Ok(()) + } + + fn write_colored_line( + &self, + matches: &[Match], + line: &[u8], + ) -> io::Result<()> { + // If we know we aren't going to emit color, then we can go faster. + let spec = self.config().colors.matched(); + if !self.wtr().borrow().supports_color() || spec.is_none() { + return self.write_line(line); + } + if self.exceeds_max_columns(line) { + return self.write_exceeded_line(); + } + + let mut last_written = + if !self.config().trim_ascii { + 0 + } else { + self.trim_ascii_prefix_range( + line, + Match::new(0, line.len()), + ).start() + }; + for mut m in matches.iter().map(|&m| m) { + if last_written < m.start() { + self.end_color_match()?; + self.write(&line[last_written..m.start()])?; + } else if last_written < m.end() { + m = m.with_start(last_written); + } else { + continue; + } + if !m.is_empty() { + self.start_color_match()?; + self.write(&line[m])?; + } + last_written = m.end(); + } + self.end_color_match()?; + self.write(&line[last_written..])?; + if !self.has_line_terminator(line) { + self.write_line_term()?; + } + Ok(()) + } + + fn write_exceeded_line(&self) -> io::Result<()> { + if self.sunk.original_matches().is_empty() { + if self.is_context() { + self.write(b"[Omitted long context line]")?; + } else { + self.write(b"[Omitted long matching line]")?; + } + } else { + if self.config().only_matching { + if self.is_context() { + self.write(b"[Omitted long context line]")?; + } else { + self.write(b"[Omitted long matching line]")?; + } + } else { + write!( + self.wtr().borrow_mut(), + "[Omitted long line with {} matches]", + self.sunk.original_matches().len(), + )?; + } + } + self.write_line_term()?; + Ok(()) + } + + /// If this printer has a file path associated with it, then this will + /// write that path to the underlying writer followed by a line terminator. + /// (If a path terminator is set, then that is used instead of the line + /// terminator.) + fn write_path_line(&self) -> io::Result<()> { + if let Some(path) = self.path() { + self.write_spec(self.config().colors.path(), path.as_bytes())?; + if let Some(term) = self.config().path_terminator { + self.write(&[term])?; + } else { + self.write_line_term()?; + } + } + Ok(()) + } + + /// If this printer has a file path associated with it, then this will + /// write that path to the underlying writer followed by the given field + /// separator. (If a path terminator is set, then that is used instead of + /// the field separator.) + fn write_path_field(&self, field_separator: &[u8]) -> io::Result<()> { + if let Some(path) = self.path() { + self.write_spec(self.config().colors.path(), path.as_bytes())?; + if let Some(term) = self.config().path_terminator { + self.write(&[term])?; + } else { + self.write(field_separator)?; + } + } + Ok(()) + } + + fn write_search_prelude(&self) -> io::Result<()> { + let this_search_written = self.wtr().borrow().count() > 0; + if this_search_written { + return Ok(()); + } + if let Some(ref sep) = *self.config().separator_search { + let ever_written = self.wtr().borrow().total_count() > 0; + if ever_written { + self.write(sep)?; + self.write_line_term()?; + } + } + if self.config().heading { + self.write_path_line()?; + } + Ok(()) + } + + fn write_context_separator(&self) -> io::Result<()> { + if let Some(ref sep) = *self.config().separator_context { + self.write(sep)?; + self.write_line_term()?; + } + Ok(()) + } + + fn write_line_number( + &self, + line_number: u64, + field_separator: &[u8], + ) -> io::Result<()> { + let n = line_number.to_string(); + self.write_spec(self.config().colors.line(), n.as_bytes())?; + self.write(field_separator)?; + Ok(()) + } + + fn write_column_number( + &self, + column_number: u64, + field_separator: &[u8], + ) -> io::Result<()> { + let n = column_number.to_string(); + self.write_spec(self.config().colors.column(), n.as_bytes())?; + self.write(field_separator)?; + Ok(()) + } + + fn write_byte_offset( + &self, + offset: u64, + field_separator: &[u8], + ) -> io::Result<()> { + let n = offset.to_string(); + self.write_spec(self.config().colors.column(), n.as_bytes())?; + self.write(field_separator)?; + Ok(()) + } + + fn write_line_term(&self) -> io::Result<()> { + self.write(self.searcher.line_terminator().as_bytes()) + } + + fn write_spec(&self, spec: &ColorSpec, buf: &[u8]) -> io::Result<()> { + let mut wtr = self.wtr().borrow_mut(); + wtr.set_color(spec)?; + wtr.write_all(buf)?; + wtr.reset()?; + Ok(()) + } + + fn start_color_match(&self) -> io::Result<()> { + if self.in_color_match.get() { + return Ok(()); + } + self.wtr().borrow_mut().set_color(self.config().colors.matched())?; + self.in_color_match.set(true); + Ok(()) + } + + fn end_color_match(&self) -> io::Result<()> { + if !self.in_color_match.get() { + return Ok(()); + } + self.wtr().borrow_mut().reset()?; + self.in_color_match.set(false); + Ok(()) + } + + fn write_trim(&self, buf: &[u8]) -> io::Result<()> { + if !self.config().trim_ascii { + return self.write(buf); + } + self.write(self.trim_ascii_prefix(buf)) + } + + fn write(&self, buf: &[u8]) -> io::Result<()> { + self.wtr().borrow_mut().write_all(buf) + } + + fn has_line_terminator(&self, buf: &[u8]) -> bool { + self.searcher.line_terminator().is_suffix(buf) + } + + fn is_context(&self) -> bool { + self.sunk.context_kind().is_some() + } + + /// Return the underlying configuration for this printer. + fn config(&self) -> &'a Config { + &self.sink.standard.config + } + + /// Return the underlying writer that we are printing to. + fn wtr(&self) -> &'a RefCell> { + &self.sink.standard.wtr + } + + /// Return the path associated with this printer, if one exists. + fn path(&self) -> Option<&'a PrinterPath<'a>> { + self.sink.path.as_ref() + } + + /// Return the appropriate field separator based on whether we are emitting + /// matching or contextual lines. + fn separator_field(&self) -> &[u8] { + if self.is_context() { + &self.config().separator_field_context + } else { + &self.config().separator_field_match + } + } + + /// Returns true if and only if the given line exceeds the maximum number + /// of columns set. If no maximum is set, then this always returns false. + fn exceeds_max_columns(&self, line: &[u8]) -> bool { + self.config().max_columns.map_or(false, |m| line.len() as u64 > m) + } + + /// Returns true if and only if the searcher may report matches over + /// multiple lines. + /// + /// Note that this doesn't just return whether the searcher is in multi + /// line mode, but also checks if the mater can match over multiple lines. + /// If it can't, then we don't need multi line handling, even if the + /// searcher has multi line mode enabled. + fn multi_line(&self) -> bool { + self.searcher.multi_line_with_matcher(&self.sink.matcher) + } + + /// Trim prefix ASCII spaces from the given slice and return the + /// corresponding range. + /// + /// This stops trimming a prefix as soon as it sees non-whitespace or a + /// line terminator. + fn trim_ascii_prefix_range(&self, slice: &[u8], range: Match) -> Match { + trim_ascii_prefix_range(self.searcher.line_terminator(), slice, range) + } + + /// Trim prefix ASCII spaces from the given slice and return the + /// corresponding sub-slice. + fn trim_ascii_prefix<'s>(&self, slice: &'s [u8]) -> &'s [u8] { + trim_ascii_prefix(self.searcher.line_terminator(), slice) + } +} + +#[cfg(test)] +mod tests { + use grep_regex::RegexMatcher; + use grep_searcher::SearcherBuilder; + use termcolor::NoColor; + + use super::{Standard, StandardBuilder}; + + const SHERLOCK: &'static str = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached.\ +"; + + #[allow(dead_code)] + const SHERLOCK_CRLF: &'static str = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock\r +Holmeses, success in the province of detective work must always\r +be, to a very large extent, the result of luck. Sherlock Holmes\r +can extract a clew from a wisp of straw or a flake of cigar ash;\r +but Doctor Watson has to have it taken out for him and dusted,\r +and exhibited clearly, with a label attached.\ +"; + + fn printer_contents( + printer: &mut Standard>>, + ) -> String { + String::from_utf8(printer.get_mut().get_ref().to_owned()).unwrap() + } + + #[test] + fn reports_match() { + let matcher = RegexMatcher::new("Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .build(NoColor::new(vec![])); + let mut sink = printer.sink(&matcher); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader(&matcher, SHERLOCK.as_bytes(), &mut sink) + .unwrap(); + assert!(sink.has_match()); + + let matcher = RegexMatcher::new("zzzzz").unwrap(); + let mut printer = StandardBuilder::new() + .build(NoColor::new(vec![])); + let mut sink = printer.sink(&matcher); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader(&matcher, SHERLOCK.as_bytes(), &mut sink) + .unwrap(); + assert!(!sink.has_match()); + } + + #[test] + fn reports_binary() { + use grep_searcher::BinaryDetection; + + let matcher = RegexMatcher::new("Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .build(NoColor::new(vec![])); + let mut sink = printer.sink(&matcher); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader(&matcher, SHERLOCK.as_bytes(), &mut sink) + .unwrap(); + assert!(sink.binary_byte_offset().is_none()); + + let matcher = RegexMatcher::new(".+").unwrap(); + let mut printer = StandardBuilder::new() + .build(NoColor::new(vec![])); + let mut sink = printer.sink(&matcher); + SearcherBuilder::new() + .line_number(false) + .binary_detection(BinaryDetection::quit(b'\x00')) + .build() + .search_reader(&matcher, &b"abc\x00"[..], &mut sink) + .unwrap(); + assert_eq!(sink.binary_byte_offset(), Some(3)); + } + + #[test] + fn reports_stats() { + use std::time::Duration; + + let matcher = RegexMatcher::new("Sherlock|opposed").unwrap(); + let mut printer = StandardBuilder::new() + .stats(true) + .build(NoColor::new(vec![])); + let stats = { + let mut sink = printer.sink(&matcher); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader(&matcher, SHERLOCK.as_bytes(), &mut sink) + .unwrap(); + sink.stats().unwrap().clone() + }; + let buf = printer_contents(&mut printer); + + assert!(stats.elapsed() > Duration::default()); + assert_eq!(stats.searches(), 1); + assert_eq!(stats.searches_with_match(), 1); + assert_eq!(stats.bytes_searched(), SHERLOCK.len() as u64); + assert_eq!(stats.bytes_printed(), buf.len() as u64); + assert_eq!(stats.matched_lines(), 2); + assert_eq!(stats.matches(), 3); + + } + + #[test] + fn reports_stats_multiple() { + use std::time::Duration; + + let matcher = RegexMatcher::new("Sherlock|opposed").unwrap(); + let mut printer = StandardBuilder::new() + .stats(true) + .build(NoColor::new(vec![])); + let stats = { + let mut sink = printer.sink(&matcher); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader(&matcher, SHERLOCK.as_bytes(), &mut sink) + .unwrap(); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader(&matcher, &b"zzzzzzzzzz"[..], &mut sink) + .unwrap(); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader(&matcher, SHERLOCK.as_bytes(), &mut sink) + .unwrap(); + sink.stats().unwrap().clone() + }; + let buf = printer_contents(&mut printer); + + assert!(stats.elapsed() > Duration::default()); + assert_eq!(stats.searches(), 3); + assert_eq!(stats.searches_with_match(), 2); + assert_eq!(stats.bytes_searched(), 10 + 2 * SHERLOCK.len() as u64); + assert_eq!(stats.bytes_printed(), buf.len() as u64); + assert_eq!(stats.matched_lines(), 4); + assert_eq!(stats.matches(), 6); + } + + #[test] + fn context_break() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .separator_context(Some(b"--abc--".to_vec())) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .before_context(1) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +--abc-- +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn context_break_multiple_no_heading() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .separator_search(Some(b"--xyz--".to_vec())) + .separator_context(Some(b"--abc--".to_vec())) + .build(NoColor::new(vec![])); + + SearcherBuilder::new() + .line_number(false) + .before_context(1) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + SearcherBuilder::new() + .line_number(false) + .before_context(1) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +--abc-- +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +--xyz-- +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +--abc-- +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn context_break_multiple_heading() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .heading(true) + .separator_search(Some(b"--xyz--".to_vec())) + .separator_context(Some(b"--abc--".to_vec())) + .build(NoColor::new(vec![])); + + SearcherBuilder::new() + .line_number(false) + .before_context(1) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + SearcherBuilder::new() + .line_number(false) + .before_context(1) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +--abc-- +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +--xyz-- +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +--abc-- +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn path() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .path(false) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +5:but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn separator_field() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .separator_field_match(b"!!".to_vec()) + .separator_field_context(b"^^".to_vec()) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .before_context(1) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +sherlock!!For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock^^Holmeses, success in the province of detective work must always +-- +sherlock^^can extract a clew from a wisp of straw or a flake of cigar ash; +sherlock!!but Doctor Watson has to have it taken out for him and dusted, +sherlock^^and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn separator_path() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .separator_path(Some(b'Z')) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink_with_path(&matcher, "books/sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +booksZsherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +booksZsherlock:but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn path_terminator() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .path_terminator(Some(b'Z')) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink_with_path(&matcher, "books/sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +books/sherlockZFor the Doctor Watsons of this world, as opposed to the Sherlock +books/sherlockZbut Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn heading() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .heading(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +sherlock +For the Doctor Watsons of this world, as opposed to the Sherlock +but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn no_heading() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .heading(false) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn no_heading_multiple() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .heading(false) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let matcher = RegexMatcher::new("Sherlock").unwrap(); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:but Doctor Watson has to have it taken out for him and dusted, +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn heading_multiple() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .heading(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let matcher = RegexMatcher::new("Sherlock").unwrap(); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +sherlock +For the Doctor Watsons of this world, as opposed to the Sherlock +but Doctor Watson has to have it taken out for him and dusted, +sherlock +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn trim_ascii() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .trim_ascii(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + " Watson".as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +Watson +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn trim_ascii_multi_line() { + let matcher = RegexMatcher::new("(?s:.{0})Watson").unwrap(); + let mut printer = StandardBuilder::new() + .trim_ascii(true) + .stats(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .multi_line(true) + .build() + .search_reader( + &matcher, + " Watson".as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +Watson +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn trim_ascii_with_line_term() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .trim_ascii(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .before_context(1) + .build() + .search_reader( + &matcher, + "\n Watson".as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1- +2:Watson +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn line_number() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +5:but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn line_number_multi_line() { + let matcher = RegexMatcher::new("(?s)Watson.+Watson").unwrap(); + let mut printer = StandardBuilder::new() + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .multi_line(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2:Holmeses, success in the province of detective work must always +3:be, to a very large extent, the result of luck. Sherlock Holmes +4:can extract a clew from a wisp of straw or a flake of cigar ash; +5:but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn column_number() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .column(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +16:For the Doctor Watsons of this world, as opposed to the Sherlock +12:but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn column_number_multi_line() { + let matcher = RegexMatcher::new("(?s)Watson.+Watson").unwrap(); + let mut printer = StandardBuilder::new() + .column(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .multi_line(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +16:For the Doctor Watsons of this world, as opposed to the Sherlock +16:Holmeses, success in the province of detective work must always +16:be, to a very large extent, the result of luck. Sherlock Holmes +16:can extract a clew from a wisp of straw or a flake of cigar ash; +16:but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn byte_offset() { + let matcher = RegexMatcher::new("Watson").unwrap(); + let mut printer = StandardBuilder::new() + .byte_offset(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +258:but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn byte_offset_multi_line() { + let matcher = RegexMatcher::new("(?s)Watson.+Watson").unwrap(); + let mut printer = StandardBuilder::new() + .byte_offset(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .multi_line(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65:Holmeses, success in the province of detective work must always +129:be, to a very large extent, the result of luck. Sherlock Holmes +193:can extract a clew from a wisp of straw or a flake of cigar ash; +258:but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn max_columns() { + let matcher = RegexMatcher::new("ash|dusted").unwrap(); + let mut printer = StandardBuilder::new() + .max_columns(Some(63)) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +[Omitted long matching line] +but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn max_columns_with_count() { + let matcher = RegexMatcher::new("cigar|ash|dusted").unwrap(); + let mut printer = StandardBuilder::new() + .stats(true) + .max_columns(Some(63)) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +[Omitted long line with 2 matches] +but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn max_columns_multi_line() { + let matcher = RegexMatcher::new("(?s)ash.+dusted").unwrap(); + let mut printer = StandardBuilder::new() + .max_columns(Some(63)) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .multi_line(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +[Omitted long matching line] +but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn max_matches() { + let matcher = RegexMatcher::new("Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .max_matches(Some(1)) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn max_matches_context() { + // after context: 1 + let matcher = RegexMatcher::new("Doctor Watsons").unwrap(); + let mut printer = StandardBuilder::new() + .max_matches(Some(1)) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +"; + assert_eq_printed!(expected, got); + + // after context: 4 + let mut printer = StandardBuilder::new() + .max_matches(Some(1)) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .after_context(4) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + + // after context: 1, max matches: 2 + let matcher = RegexMatcher::new("Doctor Watsons|but Doctor").unwrap(); + let mut printer = StandardBuilder::new() + .max_matches(Some(2)) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +-- +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + + // after context: 4, max matches: 2 + let mut printer = StandardBuilder::new() + .max_matches(Some(2)) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .after_context(4) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn max_matches_multi_line1() { + let matcher = RegexMatcher::new("(?s:.{0})Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .max_matches(Some(1)) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .multi_line(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn max_matches_multi_line2() { + let matcher = RegexMatcher::new( + r"(?s)Watson.+?(Holmeses|clearly)" + ).unwrap(); + let mut printer = StandardBuilder::new() + .max_matches(Some(1)) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .multi_line(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn only_matching() { + let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .only_matching(true) + .column(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:9:Doctor Watsons +1:57:Sherlock +3:49:Sherlock +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn only_matching_multi_line1() { + let matcher = RegexMatcher::new( + r"(?s:.{0})(Doctor Watsons|Sherlock)" + ).unwrap(); + let mut printer = StandardBuilder::new() + .only_matching(true) + .column(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .multi_line(true) + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:9:Doctor Watsons +1:57:Sherlock +3:49:Sherlock +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn only_matching_multi_line2() { + let matcher = RegexMatcher::new( + r"(?s)Watson.+?(Holmeses|clearly)" + ).unwrap(); + let mut printer = StandardBuilder::new() + .only_matching(true) + .column(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .multi_line(true) + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:16:Watsons of this world, as opposed to the Sherlock +2:16:Holmeses +5:12:Watson has to have it taken out for him and dusted, +6:12:and exhibited clearly +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn only_matching_max_columns() { + let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .only_matching(true) + .max_columns(Some(10)) + .column(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:9:[Omitted long matching line] +1:57:Sherlock +3:49:Sherlock +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn only_matching_max_columns_multi_line1() { + let matcher = RegexMatcher::new( + r"(?s:.{0})(Doctor Watsons|Sherlock)" + ).unwrap(); + let mut printer = StandardBuilder::new() + .only_matching(true) + .max_columns(Some(10)) + .column(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .multi_line(true) + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:9:[Omitted long matching line] +1:57:Sherlock +3:49:Sherlock +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn only_matching_max_columns_multi_line2() { + let matcher = RegexMatcher::new( + r"(?s)Watson.+?(Holmeses|clearly)" + ).unwrap(); + let mut printer = StandardBuilder::new() + .only_matching(true) + .max_columns(Some(50)) + .column(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .multi_line(true) + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:16:Watsons of this world, as opposed to the Sherlock +2:16:Holmeses +5:12:[Omitted long matching line] +6:12:and exhibited clearly +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn per_match() { + let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .per_match(true) + .column(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:9:For the Doctor Watsons of this world, as opposed to the Sherlock +1:57:For the Doctor Watsons of this world, as opposed to the Sherlock +3:49:be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn per_match_multi_line1() { + let matcher = RegexMatcher::new( + r"(?s:.{0})(Doctor Watsons|Sherlock)" + ).unwrap(); + let mut printer = StandardBuilder::new() + .per_match(true) + .column(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .multi_line(true) + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:9:For the Doctor Watsons of this world, as opposed to the Sherlock +1:57:For the Doctor Watsons of this world, as opposed to the Sherlock +3:49:be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn per_match_multi_line2() { + let matcher = RegexMatcher::new( + r"(?s)Watson.+?(Holmeses|clearly)", + ).unwrap(); + let mut printer = StandardBuilder::new() + .per_match(true) + .column(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .multi_line(true) + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:16:For the Doctor Watsons of this world, as opposed to the Sherlock +2:16:Holmeses, success in the province of detective work must always +5:12:but Doctor Watson has to have it taken out for him and dusted, +6:12:and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn per_match_multi_line3() { + let matcher = RegexMatcher::new( + r"(?s)Watson.+?Holmeses|always.+?be", + ).unwrap(); + let mut printer = StandardBuilder::new() + .per_match(true) + .column(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .multi_line(true) + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:16:For the Doctor Watsons of this world, as opposed to the Sherlock +2:16:Holmeses, success in the province of detective work must always +2:123:Holmeses, success in the province of detective work must always +3:123:be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn replacement_passthru() { + let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap(); + let mut printer = StandardBuilder::new() + .replacement(Some(b"doctah $1 MD".to_vec())) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .passthru(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:For the doctah Watsons MD of this world, as opposed to the doctah MD +2-Holmeses, success in the province of detective work must always +3:be, to a very large extent, the result of luck. doctah MD Holmes +4-can extract a clew from a wisp of straw or a flake of cigar ash; +5:but doctah Watson MD has to have it taken out for him and dusted, +6-and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn replacement() { + let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap(); + let mut printer = StandardBuilder::new() + .replacement(Some(b"doctah $1 MD".to_vec())) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:For the doctah Watsons MD of this world, as opposed to the doctah MD +3:be, to a very large extent, the result of luck. doctah MD Holmes +5:but doctah Watson MD has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn replacement_max_columns() { + let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap(); + let mut printer = StandardBuilder::new() + .max_columns(Some(67)) + .replacement(Some(b"doctah $1 MD".to_vec())) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:[Omitted long line with 2 matches] +3:be, to a very large extent, the result of luck. doctah MD Holmes +5:but doctah Watson MD has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn replacement_only_matching() { + let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap(); + let mut printer = StandardBuilder::new() + .only_matching(true) + .replacement(Some(b"doctah $1 MD".to_vec())) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:doctah Watsons MD +1:doctah MD +3:doctah MD +5:doctah Watson MD +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn replacement_per_match() { + let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap(); + let mut printer = StandardBuilder::new() + .per_match(true) + .replacement(Some(b"doctah $1 MD".to_vec())) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1:For the doctah Watsons MD of this world, as opposed to the doctah MD +1:For the doctah Watsons MD of this world, as opposed to the doctah MD +3:be, to a very large extent, the result of luck. doctah MD Holmes +5:but doctah Watson MD has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn invert() { + let matcher = RegexMatcher::new(r"Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .invert_match(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +2:Holmeses, success in the province of detective work must always +4:can extract a clew from a wisp of straw or a flake of cigar ash; +5:but Doctor Watson has to have it taken out for him and dusted, +6:and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn invert_multi_line() { + let matcher = RegexMatcher::new(r"(?s:.{0})Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .multi_line(true) + .line_number(true) + .invert_match(true) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +2:Holmeses, success in the province of detective work must always +4:can extract a clew from a wisp of straw or a flake of cigar ash; +5:but Doctor Watson has to have it taken out for him and dusted, +6:and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn invert_context() { + let matcher = RegexMatcher::new(r"Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .invert_match(true) + .before_context(1) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1-For the Doctor Watsons of this world, as opposed to the Sherlock +2:Holmeses, success in the province of detective work must always +3-be, to a very large extent, the result of luck. Sherlock Holmes +4:can extract a clew from a wisp of straw or a flake of cigar ash; +5:but Doctor Watson has to have it taken out for him and dusted, +6:and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn invert_context_multi_line() { + let matcher = RegexMatcher::new(r"(?s:.{0})Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .multi_line(true) + .line_number(true) + .invert_match(true) + .before_context(1) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1-For the Doctor Watsons of this world, as opposed to the Sherlock +2:Holmeses, success in the province of detective work must always +3-be, to a very large extent, the result of luck. Sherlock Holmes +4:can extract a clew from a wisp of straw or a flake of cigar ash; +5:but Doctor Watson has to have it taken out for him and dusted, +6:and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn invert_context_only_matching() { + let matcher = RegexMatcher::new(r"Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .only_matching(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .invert_match(true) + .before_context(1) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1-Sherlock +2:Holmeses, success in the province of detective work must always +3-Sherlock +4:can extract a clew from a wisp of straw or a flake of cigar ash; +5:but Doctor Watson has to have it taken out for him and dusted, +6:and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn invert_context_only_matching_multi_line() { + let matcher = RegexMatcher::new(r"(?s:.{0})Sherlock").unwrap(); + let mut printer = StandardBuilder::new() + .only_matching(true) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .multi_line(true) + .line_number(true) + .invert_match(true) + .before_context(1) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +1-Sherlock +2:Holmeses, success in the province of detective work must always +3-Sherlock +4:can extract a clew from a wisp of straw or a flake of cigar ash; +5:but Doctor Watson has to have it taken out for him and dusted, +6:and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } +} diff -Nru ripgrep-0.6.0/grep-printer/src/stats.rs ripgrep-0.10.0.3/grep-printer/src/stats.rs --- ripgrep-0.6.0/grep-printer/src/stats.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/src/stats.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,147 @@ +use std::ops::{Add, AddAssign}; +use std::time::Duration; + +use util::NiceDuration; + +/// Summary statistics produced at the end of a search. +/// +/// When statistics are reported by a printer, they correspond to all searches +/// executed with that printer. +#[derive(Clone, Debug, Default, PartialEq, Eq)] +#[cfg_attr(feature = "serde1", derive(Serialize))] +pub struct Stats { + elapsed: NiceDuration, + searches: u64, + searches_with_match: u64, + bytes_searched: u64, + bytes_printed: u64, + matched_lines: u64, + matches: u64, +} + +impl Add for Stats { + type Output = Stats; + + fn add(self, rhs: Stats) -> Stats { + self + &rhs + } +} + +impl<'a> Add<&'a Stats> for Stats { + type Output = Stats; + + fn add(self, rhs: &'a Stats) -> Stats { + Stats { + elapsed: NiceDuration(self.elapsed.0 + rhs.elapsed.0), + searches: self.searches + rhs.searches, + searches_with_match: + self.searches_with_match + rhs.searches_with_match, + bytes_searched: self.bytes_searched + rhs.bytes_searched, + bytes_printed: self.bytes_printed + rhs.bytes_printed, + matched_lines: self.matched_lines + rhs.matched_lines, + matches: self.matches + rhs.matches, + } + } +} + +impl AddAssign for Stats { + fn add_assign(&mut self, rhs: Stats) { + *self += &rhs; + } +} + +impl<'a> AddAssign<&'a Stats> for Stats { + fn add_assign(&mut self, rhs: &'a Stats) { + self.elapsed.0 += rhs.elapsed.0; + self.searches += rhs.searches; + self.searches_with_match += rhs.searches_with_match; + self.bytes_searched += rhs.bytes_searched; + self.bytes_printed += rhs.bytes_printed; + self.matched_lines += rhs.matched_lines; + self.matches += rhs.matches; + } +} + +impl Stats { + /// Return a new value for tracking aggregate statistics across searches. + /// + /// All statistics are set to `0`. + pub fn new() -> Stats { + Stats::default() + } + + /// Return the total amount of time elapsed. + pub fn elapsed(&self) -> Duration { + self.elapsed.0 + } + + /// Return the total number of searches executed. + pub fn searches(&self) -> u64 { + self.searches + } + + /// Return the total number of searches that found at least one match. + pub fn searches_with_match(&self) -> u64 { + self.searches_with_match + } + + /// Return the total number of bytes searched. + pub fn bytes_searched(&self) -> u64 { + self.bytes_searched + } + + /// Return the total number of bytes printed. + pub fn bytes_printed(&self) -> u64 { + self.bytes_printed + } + + /// Return the total number of lines that participated in a match. + /// + /// When matches may contain multiple lines then this includes every line + /// that is part of every match. + pub fn matched_lines(&self) -> u64 { + self.matched_lines + } + + /// Return the total number of matches. + /// + /// There may be multiple matches per line. + pub fn matches(&self) -> u64 { + self.matches + } + + /// Add to the elapsed time. + pub fn add_elapsed(&mut self, duration: Duration) { + self.elapsed.0 += duration; + } + + /// Add to the number of searches executed. + pub fn add_searches(&mut self, n: u64) { + self.searches += n; + } + + /// Add to the number of searches that found at least one match. + pub fn add_searches_with_match(&mut self, n: u64) { + self.searches_with_match += n; + } + + /// Add to the total number of bytes searched. + pub fn add_bytes_searched(&mut self, n: u64) { + self.bytes_searched += n; + } + + /// Add to the total number of bytes printed. + pub fn add_bytes_printed(&mut self, n: u64) { + self.bytes_printed += n; + } + + /// Add to the total number of lines that participated in a match. + pub fn add_matched_lines(&mut self, n: u64) { + self.matched_lines += n; + } + + /// Add to the total number of matches. + pub fn add_matches(&mut self, n: u64) { + self.matches += n; + } +} diff -Nru ripgrep-0.6.0/grep-printer/src/summary.rs ripgrep-0.10.0.3/grep-printer/src/summary.rs --- ripgrep-0.6.0/grep-printer/src/summary.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/src/summary.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,1068 @@ +use std::cell::RefCell; +use std::io::{self, Write}; +use std::path::Path; +use std::sync::Arc; +use std::time::Instant; + +use grep_matcher::Matcher; +use grep_searcher::{Searcher, Sink, SinkError, SinkFinish, SinkMatch}; +use termcolor::{ColorSpec, NoColor, WriteColor}; + +use color::ColorSpecs; +use counter::CounterWriter; +use stats::Stats; +use util::PrinterPath; + +/// The configuration for the summary printer. +/// +/// This is manipulated by the SummaryBuilder and then referenced by the actual +/// implementation. Once a printer is build, the configuration is frozen and +/// cannot changed. +#[derive(Debug, Clone)] +struct Config { + kind: SummaryKind, + colors: ColorSpecs, + stats: bool, + path: bool, + max_matches: Option, + exclude_zero: bool, + separator_field: Arc>, + separator_path: Option, + path_terminator: Option, +} + +impl Default for Config { + fn default() -> Config { + Config { + kind: SummaryKind::Count, + colors: ColorSpecs::default(), + stats: false, + path: true, + max_matches: None, + exclude_zero: true, + separator_field: Arc::new(b":".to_vec()), + separator_path: None, + path_terminator: None, + } + } +} + +/// The type of summary output (if any) to print. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum SummaryKind { + /// Show only a count of the total number of matches (counting each line + /// at most once) found. + /// + /// If the `path` setting is enabled, then the count is prefixed by the + /// corresponding file path. + Count, + /// Show only a count of the total number of matches (counting possibly + /// many matches on each line) found. + /// + /// If the `path` setting is enabled, then the count is prefixed by the + /// corresponding file path. + CountMatches, + /// Show only the file path if and only if a match was found. + /// + /// This ignores the `path` setting and always shows the file path. If no + /// file path is provided, then searching will immediately stop and return + /// an error. + PathWithMatch, + /// Show only the file path if and only if a match was found. + /// + /// This ignores the `path` setting and always shows the file path. If no + /// file path is provided, then searching will immediately stop and return + /// an error. + PathWithoutMatch, + /// Don't show any output and the stop the search once a match is found. + /// + /// Note that if `stats` is enabled, then searching continues in order to + /// compute statistics. + Quiet, +} + +impl SummaryKind { + /// Returns true if and only if this output mode requires a file path. + /// + /// When an output mode requires a file path, then the summary printer + /// will report an error at the start of every search that lacks a file + /// path. + fn requires_path(&self) -> bool { + use self::SummaryKind::*; + + match *self { + PathWithMatch | PathWithoutMatch => true, + Count | CountMatches | Quiet => false, + } + } + + /// Returns true if and only if this output mode requires computing + /// statistics, regardless of whether they have been enabled or not. + fn requires_stats(&self) -> bool { + use self::SummaryKind::*; + + match *self { + CountMatches => true, + Count | PathWithMatch | PathWithoutMatch | Quiet => false, + } + } + + /// Returns true if and only if a printer using this output mode can + /// quit after seeing the first match. + fn quit_early(&self) -> bool { + use self::SummaryKind::*; + + match *self { + PathWithMatch | Quiet => true, + Count | CountMatches | PathWithoutMatch => false, + } + } +} + +/// A builder for summary printer. +/// +/// The builder permits configuring how the printer behaves. The summary +/// printer has fewer configuration options than the standard printer because +/// it aims to produce aggregate output about a single search (typically just +/// one line) instead of output for each match. +/// +/// Once a `Summary` printer is built, its configuration cannot be changed. +#[derive(Clone, Debug)] +pub struct SummaryBuilder { + config: Config, +} + +impl SummaryBuilder { + /// Return a new builder for configuring the summary printer. + pub fn new() -> SummaryBuilder { + SummaryBuilder { config: Config::default() } + } + + /// Build a printer using any implementation of `termcolor::WriteColor`. + /// + /// The implementation of `WriteColor` used here controls whether colors + /// are used or not when colors have been configured using the + /// `color_specs` method. + /// + /// For maximum portability, callers should generally use either + /// `termcolor::StandardStream` or `termcolor::BufferedStandardStream` + /// where appropriate, which will automatically enable colors on Windows + /// when possible. + /// + /// However, callers may also provide an arbitrary writer using the + /// `termcolor::Ansi` or `termcolor::NoColor` wrappers, which always enable + /// colors via ANSI escapes or always disable colors, respectively. + /// + /// As a convenience, callers may use `build_no_color` to automatically + /// select the `termcolor::NoColor` wrapper to avoid needing to import + /// from `termcolor` explicitly. + pub fn build(&self, wtr: W) -> Summary { + Summary { + config: self.config.clone(), + wtr: RefCell::new(CounterWriter::new(wtr)), + } + } + + /// Build a printer from any implementation of `io::Write` and never emit + /// any colors, regardless of the user color specification settings. + /// + /// This is a convenience routine for + /// `SummaryBuilder::build(termcolor::NoColor::new(wtr))`. + pub fn build_no_color( + &self, + wtr: W, + ) -> Summary> { + self.build(NoColor::new(wtr)) + } + + /// Set the output mode for this printer. + /// + /// The output mode controls how aggregate results of a search are printed. + /// + /// By default, this printer uses the `Count` mode. + pub fn kind(&mut self, kind: SummaryKind) -> &mut SummaryBuilder { + self.config.kind = kind; + self + } + + /// Set the user color specifications to use for coloring in this printer. + /// + /// A [`UserColorSpec`](struct.UserColorSpec.html) can be constructed from + /// a string in accordance with the color specification format. See the + /// `UserColorSpec` type documentation for more details on the format. + /// A [`ColorSpecs`](struct.ColorSpecs.html) can then be generated from + /// zero or more `UserColorSpec`s. + /// + /// Regardless of the color specifications provided here, whether color + /// is actually used or not is determined by the implementation of + /// `WriteColor` provided to `build`. For example, if `termcolor::NoColor` + /// is provided to `build`, then no color will ever be printed regardless + /// of the color specifications provided here. + /// + /// This completely overrides any previous color specifications. This does + /// not add to any previously provided color specifications on this + /// builder. + pub fn color_specs( + &mut self, + specs: ColorSpecs, + ) -> &mut SummaryBuilder { + self.config.colors = specs; + self + } + + /// Enable the gathering of various aggregate statistics. + /// + /// When this is enabled (it's disabled by default), statistics will be + /// gathered for all uses of `Summary` printer returned by `build`, + /// including but not limited to, the total number of matches, the total + /// number of bytes searched and the total number of bytes printed. + /// + /// Aggregate statistics can be accessed via the sink's + /// [`SummarySink::stats`](struct.SummarySink.html#method.stats) + /// method. + /// + /// When this is enabled, this printer may need to do extra work in order + /// to compute certain statistics, which could cause the search to take + /// longer. For example, in `Quiet` mode, a search can quit after finding + /// the first match, but if `stats` is enabled, then the search will + /// continue after the first match in order to compute statistics. + /// + /// For a complete description of available statistics, see + /// [`Stats`](struct.Stats.html). + /// + /// Note that some output modes, such as `CountMatches`, automatically + /// enable this option even if it has been explicitly disabled. + pub fn stats(&mut self, yes: bool) -> &mut SummaryBuilder { + self.config.stats = yes; + self + } + + /// When enabled, if a path was given to the printer, then it is shown in + /// the output (either as a heading or as a prefix to each matching line). + /// When disabled, then no paths are ever included in the output even when + /// a path is provided to the printer. + /// + /// This setting has no effect in `PathWithMatch` and `PathWithoutMatch` + /// modes. + /// + /// This is enabled by default. + pub fn path(&mut self, yes: bool) -> &mut SummaryBuilder { + self.config.path = yes; + self + } + + /// Set the maximum amount of matches that are printed. + /// + /// If multi line search is enabled and a match spans multiple lines, then + /// that match is counted exactly once for the purposes of enforcing this + /// limit, regardless of how many lines it spans. + pub fn max_matches(&mut self, limit: Option) -> &mut SummaryBuilder { + self.config.max_matches = limit; + self + } + + /// Exclude count-related summary results with no matches. + /// + /// When enabled and the mode is either `Count` or `CountMatches`, then + /// results are not printed if no matches were found. Otherwise, every + /// search prints a result with a possibly `0` number of matches. + pub fn exclude_zero(&mut self, yes: bool) -> &mut SummaryBuilder { + self.config.exclude_zero = yes; + self + } + + /// Set the separator used between fields for the `Count` and + /// `CountMatches` modes. + /// + /// By default, this is set to `:`. + pub fn separator_field( + &mut self, + sep: Vec, + ) -> &mut SummaryBuilder { + self.config.separator_field = Arc::new(sep); + self + } + + /// Set the path separator used when printing file paths. + /// + /// Typically, printing is done by emitting the file path as is. However, + /// this setting provides the ability to use a different path separator + /// from what the current environment has configured. + /// + /// A typical use for this option is to permit cygwin users on Windows to + /// set the path separator to `/` instead of using the system default of + /// `\`. + pub fn separator_path( + &mut self, + sep: Option, + ) -> &mut SummaryBuilder { + self.config.separator_path = sep; + self + } + + /// Set the path terminator used. + /// + /// The path terminator is a byte that is printed after every file path + /// emitted by this printer. + /// + /// If no path terminator is set (the default), then paths are terminated + /// by either new lines or the configured field separator. + pub fn path_terminator( + &mut self, + terminator: Option, + ) -> &mut SummaryBuilder { + self.config.path_terminator = terminator; + self + } +} + +/// The summary printer, which emits aggregate results from a search. +/// +/// Aggregate results generally correspond to file paths and/or the number of +/// matches found. +/// +/// A default printer can be created with either of the `Summary::new` or +/// `Summary::new_no_color` constructors. However, there are a number of +/// options that configure this printer's output. Those options can be +/// configured using [`SummaryBuilder`](struct.SummaryBuilder.html). +/// +/// This type is generic over `W`, which represents any implementation of +/// the `termcolor::WriteColor` trait. +#[derive(Debug)] +pub struct Summary { + config: Config, + wtr: RefCell>, +} + +impl Summary { + /// Return a summary printer with a default configuration that writes + /// matches to the given writer. + /// + /// The writer should be an implementation of `termcolor::WriteColor` + /// and not just a bare implementation of `io::Write`. To use a normal + /// `io::Write` implementation (simultaneously sacrificing colors), use + /// the `new_no_color` constructor. + /// + /// The default configuration uses the `Count` summary mode. + pub fn new(wtr: W) -> Summary { + SummaryBuilder::new().build(wtr) + } +} + +impl Summary> { + /// Return a summary printer with a default configuration that writes + /// matches to the given writer. + /// + /// The writer can be any implementation of `io::Write`. With this + /// constructor, the printer will never emit colors. + /// + /// The default configuration uses the `Count` summary mode. + pub fn new_no_color(wtr: W) -> Summary> { + SummaryBuilder::new().build_no_color(wtr) + } +} + +impl Summary { + /// Return an implementation of `Sink` for the summary printer. + /// + /// This does not associate the printer with a file path, which means this + /// implementation will never print a file path. If the output mode of + /// this summary printer does not make sense without a file path (such as + /// `PathWithMatch` or `PathWithoutMatch`), then any searches executed + /// using this sink will immediately quit with an error. + pub fn sink<'s, M: Matcher>( + &'s mut self, + matcher: M, + ) -> SummarySink<'static, 's, M, W> { + let stats = + if self.config.stats || self.config.kind.requires_stats() { + Some(Stats::new()) + } else { + None + }; + SummarySink { + matcher: matcher, + summary: self, + path: None, + start_time: Instant::now(), + match_count: 0, + binary_byte_offset: None, + stats: stats, + } + } + + /// Return an implementation of `Sink` associated with a file path. + /// + /// When the printer is associated with a path, then it may, depending on + /// its configuration, print the path. + pub fn sink_with_path<'p, 's, M, P>( + &'s mut self, + matcher: M, + path: &'p P, + ) -> SummarySink<'p, 's, M, W> + where M: Matcher, + P: ?Sized + AsRef, + { + if !self.config.path { + return self.sink(matcher); + } + let stats = + if self.config.stats || self.config.kind.requires_stats() { + Some(Stats::new()) + } else { + None + }; + let ppath = PrinterPath::with_separator( + path.as_ref(), self.config.separator_path); + SummarySink { + matcher: matcher, + summary: self, + path: Some(ppath), + start_time: Instant::now(), + match_count: 0, + binary_byte_offset: None, + stats: stats, + } + } +} + +impl Summary { + /// Returns true if and only if this printer has written at least one byte + /// to the underlying writer during any of the previous searches. + pub fn has_written(&self) -> bool { + self.wtr.borrow().total_count() > 0 + } + + /// Return a mutable reference to the underlying writer. + pub fn get_mut(&mut self) -> &mut W { + self.wtr.get_mut().get_mut() + } + + /// Consume this printer and return back ownership of the underlying + /// writer. + pub fn into_inner(self) -> W { + self.wtr.into_inner().into_inner() + } +} + +/// An implementation of `Sink` associated with a matcher and an optional file +/// path for the summary printer. +/// +/// This type is generic over a few type parameters: +/// +/// * `'p` refers to the lifetime of the file path, if one is provided. When +/// no file path is given, then this is `'static`. +/// * `'s` refers to the lifetime of the +/// [`Summary`](struct.Summary.html) +/// printer that this type borrows. +/// * `M` refers to the type of matcher used by +/// `grep_searcher::Searcher` that is reporting results to this sink. +/// * `W` refers to the underlying writer that this printer is writing its +/// output to. +#[derive(Debug)] +pub struct SummarySink<'p, 's, M: Matcher, W: 's> { + matcher: M, + summary: &'s mut Summary, + path: Option>, + start_time: Instant, + match_count: u64, + binary_byte_offset: Option, + stats: Option, +} + +impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> { + /// Returns true if and only if this printer received a match in the + /// previous search. + /// + /// This is unaffected by the result of searches before the previous + /// search. + pub fn has_match(&self) -> bool { + self.match_count > 0 + } + + /// If binary data was found in the previous search, this returns the + /// offset at which the binary data was first detected. + /// + /// The offset returned is an absolute offset relative to the entire + /// set of bytes searched. + /// + /// This is unaffected by the result of searches before the previous + /// search. e.g., If the search prior to the previous search found binary + /// data but the previous search found no binary data, then this will + /// return `None`. + pub fn binary_byte_offset(&self) -> Option { + self.binary_byte_offset + } + + /// Return a reference to the stats produced by the printer for all + /// searches executed on this sink. + /// + /// This only returns stats if they were requested via the + /// [`SummaryBuilder`](struct.SummaryBuilder.html) + /// configuration. + pub fn stats(&self) -> Option<&Stats> { + self.stats.as_ref() + } + + /// Returns true if this printer should quit. + /// + /// This implements the logic for handling quitting after seeing a certain + /// amount of matches. In most cases, the logic is simple, but we must + /// permit all "after" contextual lines to print after reaching the limit. + fn should_quit(&self) -> bool { + let limit = match self.summary.config.max_matches { + None => return false, + Some(limit) => limit, + }; + self.match_count >= limit + } + + /// If this printer has a file path associated with it, then this will + /// write that path to the underlying writer followed by a line terminator. + /// (If a path terminator is set, then that is used instead of the line + /// terminator.) + fn write_path_line(&self, searcher: &Searcher) -> io::Result<()> { + if let Some(ref path) = self.path { + self.write_spec( + self.summary.config.colors.path(), + path.as_bytes(), + )?; + if let Some(term) = self.summary.config.path_terminator { + self.write(&[term])?; + } else { + self.write_line_term(searcher)?; + } + } + Ok(()) + } + + /// If this printer has a file path associated with it, then this will + /// write that path to the underlying writer followed by the field + /// separator. (If a path terminator is set, then that is used instead of + /// the field separator.) + fn write_path_field(&self) -> io::Result<()> { + if let Some(ref path) = self.path { + self.write_spec( + self.summary.config.colors.path(), + path.as_bytes(), + )?; + if let Some(term) = self.summary.config.path_terminator { + self.write(&[term])?; + } else { + self.write(&self.summary.config.separator_field)?; + } + } + Ok(()) + } + + /// Write the line terminator configured on the given searcher. + fn write_line_term(&self, searcher: &Searcher) -> io::Result<()> { + self.write(searcher.line_terminator().as_bytes()) + } + + /// Write the given bytes using the give style. + fn write_spec(&self, spec: &ColorSpec, buf: &[u8]) -> io::Result<()> { + self.summary.wtr.borrow_mut().set_color(spec)?; + self.write(buf)?; + self.summary.wtr.borrow_mut().reset()?; + Ok(()) + } + + /// Write all of the given bytes. + fn write(&self, buf: &[u8]) -> io::Result<()> { + self.summary.wtr.borrow_mut().write_all(buf) + } +} + +impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> { + type Error = io::Error; + + fn matched( + &mut self, + _searcher: &Searcher, + mat: &SinkMatch, + ) -> Result { + self.match_count += 1; + if let Some(ref mut stats) = self.stats { + let mut match_count = 0; + self.matcher.find_iter(mat.bytes(), |_| { + match_count += 1; + true + }).map_err(io::Error::error_message)?; + stats.add_matches(match_count); + stats.add_matched_lines(mat.lines().count() as u64); + } else if self.summary.config.kind.quit_early() { + return Ok(false); + } + Ok(!self.should_quit()) + } + + fn begin( + &mut self, + _searcher: &Searcher, + ) -> Result { + if self.path.is_none() && self.summary.config.kind.requires_path() { + return Err(io::Error::error_message(format!( + "output kind {:?} requires a file path", + self.summary.config.kind, + ))); + } + self.summary.wtr.borrow_mut().reset_count(); + self.start_time = Instant::now(); + self.match_count = 0; + self.binary_byte_offset = None; + if self.summary.config.max_matches == Some(0) { + return Ok(false); + } + + Ok(true) + } + + fn finish( + &mut self, + searcher: &Searcher, + finish: &SinkFinish, + ) -> Result<(), io::Error> { + self.binary_byte_offset = finish.binary_byte_offset(); + if let Some(ref mut stats) = self.stats { + stats.add_elapsed(self.start_time.elapsed()); + stats.add_searches(1); + if self.match_count > 0 { + stats.add_searches_with_match(1); + } + stats.add_bytes_searched(finish.byte_count()); + stats.add_bytes_printed(self.summary.wtr.borrow().count()); + } + + let show_count = + !self.summary.config.exclude_zero + || self.match_count > 0; + match self.summary.config.kind { + SummaryKind::Count => { + if show_count { + self.write_path_field()?; + self.write(self.match_count.to_string().as_bytes())?; + self.write_line_term(searcher)?; + } + } + SummaryKind::CountMatches => { + if show_count { + let stats = self.stats + .as_ref() + .expect("CountMatches should enable stats tracking"); + self.write_path_field()?; + self.write(stats.matches().to_string().as_bytes())?; + self.write_line_term(searcher)?; + } + } + SummaryKind::PathWithMatch => { + if self.match_count > 0 { + self.write_path_line(searcher)?; + } + } + SummaryKind::PathWithoutMatch => { + if self.match_count == 0 { + self.write_path_line(searcher)?; + } + } + SummaryKind::Quiet => {} + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use grep_regex::RegexMatcher; + use grep_searcher::SearcherBuilder; + use termcolor::NoColor; + + use super::{Summary, SummaryKind, SummaryBuilder}; + + const SHERLOCK: &'static [u8] = b"\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + + fn printer_contents( + printer: &mut Summary>>, + ) -> String { + String::from_utf8(printer.get_mut().get_ref().to_owned()).unwrap() + } + + #[test] + fn path_with_match_error() { + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::PathWithMatch) + .build_no_color(vec![]); + let res = SearcherBuilder::new() + .build() + .search_reader(&matcher, SHERLOCK, printer.sink(&matcher)); + assert!(res.is_err()); + } + + #[test] + fn path_without_match_error() { + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::PathWithoutMatch) + .build_no_color(vec![]); + let res = SearcherBuilder::new() + .build() + .search_reader(&matcher, SHERLOCK, printer.sink(&matcher)); + assert!(res.is_err()); + } + + #[test] + fn count_no_path() { + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::Count) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader(&matcher, SHERLOCK, printer.sink(&matcher)) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("2\n", got); + } + + #[test] + fn count_no_path_even_with_path() { + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::Count) + .path(false) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader( + &matcher, + SHERLOCK, + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("2\n", got); + } + + #[test] + fn count_path() { + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::Count) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader( + &matcher, + SHERLOCK, + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("sherlock:2\n", got); + } + + #[test] + fn count_path_with_zero() { + let matcher = RegexMatcher::new( + r"NO MATCH" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::Count) + .exclude_zero(false) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader( + &matcher, + SHERLOCK, + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("sherlock:0\n", got); + } + + #[test] + fn count_path_without_zero() { + let matcher = RegexMatcher::new( + r"NO MATCH" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::Count) + .exclude_zero(true) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader( + &matcher, + SHERLOCK, + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("", got); + } + + #[test] + fn count_path_field_separator() { + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::Count) + .separator_field(b"ZZ".to_vec()) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader( + &matcher, + SHERLOCK, + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("sherlockZZ2\n", got); + } + + #[test] + fn count_path_terminator() { + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::Count) + .path_terminator(Some(b'\x00')) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader( + &matcher, + SHERLOCK, + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("sherlock\x002\n", got); + } + + #[test] + fn count_path_separator() { + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::Count) + .separator_path(Some(b'\\')) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader( + &matcher, + SHERLOCK, + printer.sink_with_path(&matcher, "/home/andrew/sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("\\home\\andrew\\sherlock:2\n", got); + } + + #[test] + fn count_max_matches() { + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::Count) + .max_matches(Some(1)) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader(&matcher, SHERLOCK, printer.sink(&matcher)) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("1\n", got); + } + + #[test] + fn count_matches() { + let matcher = RegexMatcher::new( + r"Watson|Sherlock" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::CountMatches) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader( + &matcher, + SHERLOCK, + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("sherlock:4\n", got); + } + + #[test] + fn path_with_match_found() { + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::PathWithMatch) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader( + &matcher, + SHERLOCK, + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("sherlock\n", got); + } + + #[test] + fn path_with_match_not_found() { + let matcher = RegexMatcher::new( + r"ZZZZZZZZ" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::PathWithMatch) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader( + &matcher, + SHERLOCK, + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("", got); + } + + + #[test] + fn path_without_match_found() { + let matcher = RegexMatcher::new( + r"ZZZZZZZZZ" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::PathWithoutMatch) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader( + &matcher, + SHERLOCK, + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("sherlock\n", got); + } + + #[test] + fn path_without_match_not_found() { + let matcher = RegexMatcher::new( + r"Watson" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::PathWithoutMatch) + .build_no_color(vec![]); + SearcherBuilder::new() + .build() + .search_reader( + &matcher, + SHERLOCK, + printer.sink_with_path(&matcher, "sherlock"), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + assert_eq_printed!("", got); + } + + #[test] + fn quiet() { + let matcher = RegexMatcher::new( + r"Watson|Sherlock" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::Quiet) + .build_no_color(vec![]); + let match_count = { + let mut sink = printer.sink_with_path(&matcher, "sherlock"); + SearcherBuilder::new() + .build() + .search_reader(&matcher, SHERLOCK, &mut sink) + .unwrap(); + sink.match_count + }; + + let got = printer_contents(&mut printer); + assert_eq_printed!("", got); + // There is actually more than one match, but Quiet should quit after + // finding the first one. + assert_eq!(1, match_count); + } + + #[test] + fn quiet_with_stats() { + let matcher = RegexMatcher::new( + r"Watson|Sherlock" + ).unwrap(); + let mut printer = SummaryBuilder::new() + .kind(SummaryKind::Quiet) + .stats(true) + .build_no_color(vec![]); + let match_count = { + let mut sink = printer.sink_with_path(&matcher, "sherlock"); + SearcherBuilder::new() + .build() + .search_reader(&matcher, SHERLOCK, &mut sink) + .unwrap(); + sink.match_count + }; + + let got = printer_contents(&mut printer); + assert_eq_printed!("", got); + // There is actually more than one match, and Quiet will usually quit + // after finding the first one, but since we request stats, it will + // mush on to find all matches. + assert_eq!(3, match_count); + } +} diff -Nru ripgrep-0.6.0/grep-printer/src/util.rs ripgrep-0.10.0.3/grep-printer/src/util.rs --- ripgrep-0.6.0/grep-printer/src/util.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/src/util.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,392 @@ +use std::borrow::Cow; +use std::fmt; +use std::io; +use std::path::Path; +use std::time; + +use grep_matcher::{Captures, LineTerminator, Match, Matcher}; +use grep_searcher::{ + LineIter, + SinkError, SinkContext, SinkContextKind, SinkMatch, +}; +#[cfg(feature = "serde1")] +use serde::{Serialize, Serializer}; + +/// A type for handling replacements while amortizing allocation. +pub struct Replacer { + space: Option>, +} + +struct Space { + /// The place to store capture locations. + caps: M::Captures, + /// The place to write a replacement to. + dst: Vec, + /// The place to store match offsets in terms of `dst`. + matches: Vec, +} + +impl fmt::Debug for Replacer { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let (dst, matches) = self.replacement().unwrap_or((&[], &[])); + f.debug_struct("Replacer") + .field("dst", &dst) + .field("matches", &matches) + .finish() + } +} + +impl Replacer { + /// Create a new replacer for use with a particular matcher. + /// + /// This constructor does not allocate. Instead, space for dealing with + /// replacements is allocated lazily only when needed. + pub fn new() -> Replacer { + Replacer { space: None } + } + + /// Executes a replacement on the given subject string by replacing all + /// matches with the given replacement. To access the result of the + /// replacement, use the `replacement` method. + /// + /// This can fail if the underlying matcher reports an error. + pub fn replace_all<'a>( + &'a mut self, + matcher: &M, + subject: &[u8], + replacement: &[u8], + ) -> io::Result<()> { + { + let &mut Space { + ref mut dst, + ref mut caps, + ref mut matches, + } = self.allocate(matcher)?; + dst.clear(); + matches.clear(); + + matcher.replace_with_captures( + subject, + caps, + dst, + |caps, dst| { + let start = dst.len(); + caps.interpolate( + |name| matcher.capture_index(name), + subject, + replacement, + dst, + ); + let end = dst.len(); + matches.push(Match::new(start, end)); + true + }, + ).map_err(io::Error::error_message)?; + } + Ok(()) + } + + /// Return the result of the prior replacement and the match offsets for + /// all replacement occurrences within the returned replacement buffer. + /// + /// If no replacement has occurred then `None` is returned. + pub fn replacement<'a>(&'a self) -> Option<(&'a [u8], &'a [Match])> { + match self.space { + None => None, + Some(ref space) => { + if space.matches.is_empty() { + None + } else { + Some((&space.dst, &space.matches)) + } + } + } + } + + /// Clear space used for performing a replacement. + /// + /// Subsequent calls to `replacement` after calling `clear` (but before + /// executing another replacement) will always return `None`. + pub fn clear(&mut self) { + if let Some(ref mut space) = self.space { + space.dst.clear(); + space.matches.clear(); + } + } + + /// Allocate space for replacements when used with the given matcher and + /// return a mutable reference to that space. + /// + /// This can fail if allocating space for capture locations from the given + /// matcher fails. + fn allocate(&mut self, matcher: &M) -> io::Result<&mut Space> { + if self.space.is_none() { + let caps = matcher + .new_captures() + .map_err(io::Error::error_message)?; + self.space = Some(Space { + caps: caps, + dst: vec![], + matches: vec![], + }); + } + Ok(self.space.as_mut().unwrap()) + } +} + +/// A simple layer of abstraction over either a match or a contextual line +/// reported by the searcher. +/// +/// In particular, this provides an API that unions the `SinkMatch` and +/// `SinkContext` types while also exposing a list of all individual match +/// locations. +/// +/// While this serves as a convenient mechanism to abstract over `SinkMatch` +/// and `SinkContext`, this also provides a way to abstract over replacements. +/// Namely, after a replacement, a `Sunk` value can be constructed using the +/// results of the replacement instead of the bytes reported directly by the +/// searcher. +#[derive(Debug)] +pub struct Sunk<'a> { + bytes: &'a [u8], + absolute_byte_offset: u64, + line_number: Option, + context_kind: Option<&'a SinkContextKind>, + matches: &'a [Match], + original_matches: &'a [Match], +} + +impl<'a> Sunk<'a> { + #[inline] + pub fn empty() -> Sunk<'static> { + Sunk { + bytes: &[], + absolute_byte_offset: 0, + line_number: None, + context_kind: None, + matches: &[], + original_matches: &[], + } + } + + #[inline] + pub fn from_sink_match( + sunk: &'a SinkMatch<'a>, + original_matches: &'a [Match], + replacement: Option<(&'a [u8], &'a [Match])>, + ) -> Sunk<'a> { + let (bytes, matches) = replacement.unwrap_or_else(|| { + (sunk.bytes(), original_matches) + }); + Sunk { + bytes: bytes, + absolute_byte_offset: sunk.absolute_byte_offset(), + line_number: sunk.line_number(), + context_kind: None, + matches: matches, + original_matches: original_matches, + } + } + + #[inline] + pub fn from_sink_context( + sunk: &'a SinkContext<'a>, + original_matches: &'a [Match], + replacement: Option<(&'a [u8], &'a [Match])>, + ) -> Sunk<'a> { + let (bytes, matches) = replacement.unwrap_or_else(|| { + (sunk.bytes(), original_matches) + }); + Sunk { + bytes: bytes, + absolute_byte_offset: sunk.absolute_byte_offset(), + line_number: sunk.line_number(), + context_kind: Some(sunk.kind()), + matches: matches, + original_matches: original_matches, + } + } + + #[inline] + pub fn context_kind(&self) -> Option<&'a SinkContextKind> { + self.context_kind + } + + #[inline] + pub fn bytes(&self) -> &'a [u8] { + self.bytes + } + + #[inline] + pub fn matches(&self) -> &'a [Match] { + self.matches + } + + #[inline] + pub fn original_matches(&self) -> &'a [Match] { + self.original_matches + } + + #[inline] + pub fn lines(&self, line_term: u8) -> LineIter<'a> { + LineIter::new(line_term, self.bytes()) + } + + #[inline] + pub fn absolute_byte_offset(&self) -> u64 { + self.absolute_byte_offset + } + + #[inline] + pub fn line_number(&self) -> Option { + self.line_number + } +} + +/// A simple encapsulation of a file path used by a printer. +/// +/// This represents any transforms that we might want to perform on the path, +/// such as converting it to valid UTF-8 and/or replacing its separator with +/// something else. This allows us to amortize work if we are printing the +/// file path for every match. +/// +/// In the common case, no transformation is needed, which lets us avoid the +/// allocation. Typically, only Windows requires a transform, since we can't +/// access the raw bytes of a path directly and first need to lossily convert +/// to UTF-8. Windows is also typically where the path separator replacement +/// is used, e.g., in cygwin environments to use `/` instead of `\`. +/// +/// Users of this type are expected to construct it from a normal `Path` +/// found in the standard library. It can then be written to any `io::Write` +/// implementation using the `as_bytes` method. This achieves platform +/// portability with a small cost: on Windows, paths that are not valid UTF-16 +/// will not roundtrip correctly. +#[derive(Clone, Debug)] +pub struct PrinterPath<'a>(Cow<'a, [u8]>); + +impl<'a> PrinterPath<'a> { + /// Create a new path suitable for printing. + pub fn new(path: &'a Path) -> PrinterPath<'a> { + PrinterPath::new_impl(path) + } + + #[cfg(unix)] + fn new_impl(path: &'a Path) -> PrinterPath<'a> { + use std::os::unix::ffi::OsStrExt; + PrinterPath(Cow::Borrowed(path.as_os_str().as_bytes())) + } + + #[cfg(not(unix))] + fn new_impl(path: &'a Path) -> PrinterPath<'a> { + PrinterPath(match path.to_string_lossy() { + Cow::Owned(path) => Cow::Owned(path.into_bytes()), + Cow::Borrowed(path) => Cow::Borrowed(path.as_bytes()), + }) + } + + /// Create a new printer path from the given path which can be efficiently + /// written to a writer without allocation. + /// + /// If the given separator is present, then any separators in `path` are + /// replaced with it. + pub fn with_separator(path: &'a Path, sep: Option) -> PrinterPath<'a> { + let mut ppath = PrinterPath::new(path); + if let Some(sep) = sep { + ppath.replace_separator(sep); + } + ppath + } + + /// Replace the path separator in this path with the given separator + /// and do it in place. On Windows, both `/` and `\` are treated as + /// path separators that are both replaced by `new_sep`. In all other + /// environments, only `/` is treated as a path separator. + fn replace_separator(&mut self, new_sep: u8) { + let transformed_path: Vec<_> = self.as_bytes().iter().map(|&b| { + if b == b'/' || (cfg!(windows) && b == b'\\') { + new_sep + } else { + b + } + }).collect(); + self.0 = Cow::Owned(transformed_path); + } + + /// Return the raw bytes for this path. + pub fn as_bytes(&self) -> &[u8] { + &*self.0 + } +} + +/// A type that provides "nicer" Display and Serialize impls for +/// std::time::Duration. The serialization format should actually be compatible +/// with the Deserialize impl for std::time::Duration, since this type only +/// adds new fields. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub struct NiceDuration(pub time::Duration); + +impl fmt::Display for NiceDuration { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:0.6}s", self.fractional_seconds()) + } +} + +impl NiceDuration { + /// Returns the number of seconds in this duration in fraction form. + /// The number to the left of the decimal point is the number of seconds, + /// and the number to the right is the number of milliseconds. + fn fractional_seconds(&self) -> f64 { + let fractional = (self.0.subsec_nanos() as f64) / 1_000_000_000.0; + self.0.as_secs() as f64 + fractional + } +} + +#[cfg(feature = "serde1")] +impl Serialize for NiceDuration { + fn serialize(&self, ser: S) -> Result { + use serde::ser::SerializeStruct; + + let mut state = ser.serialize_struct("Duration", 2)?; + state.serialize_field("secs", &self.0.as_secs())?; + state.serialize_field("nanos", &self.0.subsec_nanos())?; + state.serialize_field("human", &format!("{}", self))?; + state.end() + } +} + +/// Trim prefix ASCII spaces from the given slice and return the corresponding +/// range. +/// +/// This stops trimming a prefix as soon as it sees non-whitespace or a line +/// terminator. +pub fn trim_ascii_prefix_range( + line_term: LineTerminator, + slice: &[u8], + range: Match, +) -> Match { + fn is_space(b: u8) -> bool { + match b { + b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ' => true, + _ => false, + } + } + + let count = slice[range] + .iter() + .take_while(|&&b| -> bool { + is_space(b) && !line_term.as_bytes().contains(&b) + }) + .count(); + range.with_start(range.start() + count) +} + +/// Trim prefix ASCII spaces from the given slice and return the corresponding +/// sub-slice. +pub fn trim_ascii_prefix(line_term: LineTerminator, slice: &[u8]) -> &[u8] { + let range = trim_ascii_prefix_range( + line_term, + slice, + Match::new(0, slice.len()), + ); + &slice[range] +} diff -Nru ripgrep-0.6.0/grep-printer/UNLICENSE ripgrep-0.10.0.3/grep-printer/UNLICENSE --- ripgrep-0.6.0/grep-printer/UNLICENSE 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-printer/UNLICENSE 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff -Nru ripgrep-0.6.0/grep-regex/Cargo.toml ripgrep-0.10.0.3/grep-regex/Cargo.toml --- ripgrep-0.6.0/grep-regex/Cargo.toml 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/Cargo.toml 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,21 @@ +[package] +name = "grep-regex" +version = "0.1.1" #:version +authors = ["Andrew Gallant "] +description = """ +Use Rust's regex library with the 'grep' crate. +""" +documentation = "https://docs.rs/grep-regex" +homepage = "https://github.com/BurntSushi/ripgrep" +repository = "https://github.com/BurntSushi/ripgrep" +readme = "README.md" +keywords = ["regex", "grep", "search", "pattern", "line"] +license = "Unlicense/MIT" + +[dependencies] +log = "0.4.5" +grep-matcher = { version = "0.1.1", path = "../grep-matcher" } +regex = "1.0.5" +regex-syntax = "0.6.2" +thread_local = "0.3.6" +utf8-ranges = "1.0.1" diff -Nru ripgrep-0.6.0/grep-regex/LICENSE-MIT ripgrep-0.10.0.3/grep-regex/LICENSE-MIT --- ripgrep-0.6.0/grep-regex/LICENSE-MIT 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/LICENSE-MIT 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Andrew Gallant + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff -Nru ripgrep-0.6.0/grep-regex/README.md ripgrep-0.10.0.3/grep-regex/README.md --- ripgrep-0.6.0/grep-regex/README.md 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/README.md 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,35 @@ +grep-regex +---------- +The `grep-regex` crate provides an implementation of the `Matcher` trait from +the `grep-matcher` crate. This implementation permits Rust's regex engine to +be used in the `grep` crate for fast line oriented searching. + +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) +[![](https://img.shields.io/crates/v/grep-regex.svg)](https://crates.io/crates/grep-regex) + +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + +### Documentation + +[https://docs.rs/grep-regex](https://docs.rs/grep-regex) + +**NOTE:** You probably don't want to use this crate directly. Instead, you +should prefer the facade defined in the +[`grep`](https://docs.rs/grep) +crate. + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +grep-regex = "0.1" +``` + +and this to your crate root: + +```rust +extern crate grep_regex; +``` diff -Nru ripgrep-0.6.0/grep-regex/src/ast.rs ripgrep-0.10.0.3/grep-regex/src/ast.rs --- ripgrep-0.6.0/grep-regex/src/ast.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/src/ast.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,263 @@ +use regex_syntax::ast::{self, Ast}; +use regex_syntax::ast::parse::Parser; + +/// The results of analyzing AST of a regular expression (e.g., for supporting +/// smart case). +#[derive(Clone, Debug)] +pub struct AstAnalysis { + /// True if and only if a literal uppercase character occurs in the regex. + any_uppercase: bool, + /// True if and only if the regex contains any literal at all. + any_literal: bool, + /// True if and only if the regex consists entirely of a literal and no + /// other special regex characters. + all_verbatim_literal: bool, +} + +impl AstAnalysis { + /// Returns a `AstAnalysis` value by doing analysis on the AST of `pattern`. + /// + /// If `pattern` is not a valid regular expression, then `None` is + /// returned. + #[allow(dead_code)] + pub fn from_pattern(pattern: &str) -> Option { + Parser::new() + .parse(pattern) + .map(|ast| AstAnalysis::from_ast(&ast)) + .ok() + } + + /// Perform an AST analysis given the AST. + pub fn from_ast(ast: &Ast) -> AstAnalysis { + let mut analysis = AstAnalysis::new(); + analysis.from_ast_impl(ast); + analysis + } + + /// Returns true if and only if a literal uppercase character occurs in + /// the pattern. + /// + /// For example, a pattern like `\pL` contains no uppercase literals, + /// even though `L` is uppercase and the `\pL` class contains uppercase + /// characters. + pub fn any_uppercase(&self) -> bool { + self.any_uppercase + } + + /// Returns true if and only if the regex contains any literal at all. + /// + /// For example, a pattern like `\pL` reports `false`, but a pattern like + /// `\pLfoo` reports `true`. + pub fn any_literal(&self) -> bool { + self.any_literal + } + + /// Returns true if and only if the entire pattern is a verbatim literal + /// with no special meta characters. + /// + /// When this is true, then the pattern satisfies the following law: + /// `escape(pattern) == pattern`. Notable examples where this returns + /// `false` include patterns like `a\u0061` even though `\u0061` is just + /// a literal `a`. + /// + /// The purpose of this flag is to determine whether the patterns can be + /// given to non-regex substring search algorithms as-is. + #[allow(dead_code)] + pub fn all_verbatim_literal(&self) -> bool { + self.all_verbatim_literal + } + + /// Creates a new `AstAnalysis` value with an initial configuration. + fn new() -> AstAnalysis { + AstAnalysis { + any_uppercase: false, + any_literal: false, + all_verbatim_literal: true, + } + } + + fn from_ast_impl(&mut self, ast: &Ast) { + if self.done() { + return; + } + match *ast { + Ast::Empty(_) => {} + Ast::Flags(_) + | Ast::Dot(_) + | Ast::Assertion(_) + | Ast::Class(ast::Class::Unicode(_)) + | Ast::Class(ast::Class::Perl(_)) => { + self.all_verbatim_literal = false; + } + Ast::Literal(ref x) => { + self.from_ast_literal(x); + } + Ast::Class(ast::Class::Bracketed(ref x)) => { + self.all_verbatim_literal = false; + self.from_ast_class_set(&x.kind); + } + Ast::Repetition(ref x) => { + self.all_verbatim_literal = false; + self.from_ast_impl(&x.ast); + } + Ast::Group(ref x) => { + self.all_verbatim_literal = false; + self.from_ast_impl(&x.ast); + } + Ast::Alternation(ref alt) => { + self.all_verbatim_literal = false; + for x in &alt.asts { + self.from_ast_impl(x); + } + } + Ast::Concat(ref alt) => { + for x in &alt.asts { + self.from_ast_impl(x); + } + } + } + } + + fn from_ast_class_set(&mut self, ast: &ast::ClassSet) { + if self.done() { + return; + } + match *ast { + ast::ClassSet::Item(ref item) => { + self.from_ast_class_set_item(item); + } + ast::ClassSet::BinaryOp(ref x) => { + self.from_ast_class_set(&x.lhs); + self.from_ast_class_set(&x.rhs); + } + } + } + + fn from_ast_class_set_item(&mut self, ast: &ast::ClassSetItem) { + if self.done() { + return; + } + match *ast { + ast::ClassSetItem::Empty(_) + | ast::ClassSetItem::Ascii(_) + | ast::ClassSetItem::Unicode(_) + | ast::ClassSetItem::Perl(_) => {} + ast::ClassSetItem::Literal(ref x) => { + self.from_ast_literal(x); + } + ast::ClassSetItem::Range(ref x) => { + self.from_ast_literal(&x.start); + self.from_ast_literal(&x.end); + } + ast::ClassSetItem::Bracketed(ref x) => { + self.from_ast_class_set(&x.kind); + } + ast::ClassSetItem::Union(ref union) => { + for x in &union.items { + self.from_ast_class_set_item(x); + } + } + } + } + + fn from_ast_literal(&mut self, ast: &ast::Literal) { + if ast.kind != ast::LiteralKind::Verbatim { + self.all_verbatim_literal = false; + } + self.any_literal = true; + self.any_uppercase = self.any_uppercase || ast.c.is_uppercase(); + } + + /// Returns true if and only if the attributes can never change no matter + /// what other AST it might see. + fn done(&self) -> bool { + self.any_uppercase && self.any_literal && !self.all_verbatim_literal + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn analysis(pattern: &str) -> AstAnalysis { + AstAnalysis::from_pattern(pattern).unwrap() + } + + #[test] + fn various() { + let x = analysis(""); + assert!(!x.any_uppercase); + assert!(!x.any_literal); + assert!(x.all_verbatim_literal); + + let x = analysis("foo"); + assert!(!x.any_uppercase); + assert!(x.any_literal); + assert!(x.all_verbatim_literal); + + let x = analysis("Foo"); + assert!(x.any_uppercase); + assert!(x.any_literal); + assert!(x.all_verbatim_literal); + + let x = analysis("foO"); + assert!(x.any_uppercase); + assert!(x.any_literal); + assert!(x.all_verbatim_literal); + + let x = analysis(r"foo\\"); + assert!(!x.any_uppercase); + assert!(x.any_literal); + assert!(!x.all_verbatim_literal); + + let x = analysis(r"foo\w"); + assert!(!x.any_uppercase); + assert!(x.any_literal); + assert!(!x.all_verbatim_literal); + + let x = analysis(r"foo\S"); + assert!(!x.any_uppercase); + assert!(x.any_literal); + assert!(!x.all_verbatim_literal); + + let x = analysis(r"foo\p{Ll}"); + assert!(!x.any_uppercase); + assert!(x.any_literal); + assert!(!x.all_verbatim_literal); + + let x = analysis(r"foo[a-z]"); + assert!(!x.any_uppercase); + assert!(x.any_literal); + assert!(!x.all_verbatim_literal); + + let x = analysis(r"foo[A-Z]"); + assert!(x.any_uppercase); + assert!(x.any_literal); + assert!(!x.all_verbatim_literal); + + let x = analysis(r"foo[\S\t]"); + assert!(!x.any_uppercase); + assert!(x.any_literal); + assert!(!x.all_verbatim_literal); + + let x = analysis(r"foo\\S"); + assert!(x.any_uppercase); + assert!(x.any_literal); + assert!(!x.all_verbatim_literal); + + let x = analysis(r"\p{Ll}"); + assert!(!x.any_uppercase); + assert!(!x.any_literal); + assert!(!x.all_verbatim_literal); + + let x = analysis(r"aBc\w"); + assert!(x.any_uppercase); + assert!(x.any_literal); + assert!(!x.all_verbatim_literal); + + let x = analysis(r"a\u0061"); + assert!(!x.any_uppercase); + assert!(x.any_literal); + assert!(!x.all_verbatim_literal); + } +} diff -Nru ripgrep-0.6.0/grep-regex/src/config.rs ripgrep-0.10.0.3/grep-regex/src/config.rs --- ripgrep-0.6.0/grep-regex/src/config.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/src/config.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,265 @@ +use grep_matcher::{ByteSet, LineTerminator}; +use regex::bytes::{Regex, RegexBuilder}; +use regex_syntax::ast::{self, Ast}; +use regex_syntax::hir::Hir; + +use ast::AstAnalysis; +use crlf::crlfify; +use error::Error; +use literal::LiteralSets; +use non_matching::non_matching_bytes; +use strip::strip_from_match; + +/// Config represents the configuration of a regex matcher in this crate. +/// The configuration is itself a rough combination of the knobs found in +/// the `regex` crate itself, along with additional `grep-matcher` specific +/// options. +/// +/// The configuration can be used to build a "configured" HIR expression. A +/// configured HIR expression is an HIR expression that is aware of the +/// configuration which generated it, and provides transformation on that HIR +/// such that the configuration is preserved. +#[derive(Clone, Debug)] +pub struct Config { + pub case_insensitive: bool, + pub case_smart: bool, + pub multi_line: bool, + pub dot_matches_new_line: bool, + pub swap_greed: bool, + pub ignore_whitespace: bool, + pub unicode: bool, + pub octal: bool, + pub size_limit: usize, + pub dfa_size_limit: usize, + pub nest_limit: u32, + pub line_terminator: Option, + pub crlf: bool, + pub word: bool, +} + +impl Default for Config { + fn default() -> Config { + Config { + case_insensitive: false, + case_smart: false, + multi_line: false, + dot_matches_new_line: false, + swap_greed: false, + ignore_whitespace: false, + unicode: true, + octal: false, + // These size limits are much bigger than what's in the regex + // crate. + size_limit: 100 * (1<<20), + dfa_size_limit: 1000 * (1<<20), + nest_limit: 250, + line_terminator: None, + crlf: false, + word: false, + } + } +} + +impl Config { + /// Parse the given pattern and returned its HIR expression along with + /// the current configuration. + /// + /// If there was a problem parsing the given expression then an error + /// is returned. + pub fn hir(&self, pattern: &str) -> Result { + let analysis = self.analysis(pattern)?; + let expr = ::regex_syntax::ParserBuilder::new() + .nest_limit(self.nest_limit) + .octal(self.octal) + .allow_invalid_utf8(true) + .ignore_whitespace(self.ignore_whitespace) + .case_insensitive(self.is_case_insensitive(&analysis)?) + .multi_line(self.multi_line) + .dot_matches_new_line(self.dot_matches_new_line) + .swap_greed(self.swap_greed) + .unicode(self.unicode) + .build() + .parse(pattern) + .map_err(Error::regex)?; + let expr = match self.line_terminator { + None => expr, + Some(line_term) => strip_from_match(expr, line_term)?, + }; + Ok(ConfiguredHIR { + original: pattern.to_string(), + config: self.clone(), + analysis: analysis, + // If CRLF mode is enabled, replace `$` with `(?:\r?$)`. + expr: if self.crlf { crlfify(expr) } else { expr }, + }) + } + + /// Accounting for the `smart_case` config knob, return true if and only if + /// this pattern should be matched case insensitively. + fn is_case_insensitive( + &self, + analysis: &AstAnalysis, + ) -> Result { + if self.case_insensitive { + return Ok(true); + } + if !self.case_smart { + return Ok(false); + } + Ok(analysis.any_literal() && !analysis.any_uppercase()) + } + + /// Perform analysis on the AST of this pattern. + /// + /// This returns an error if the given pattern failed to parse. + fn analysis(&self, pattern: &str) -> Result { + Ok(AstAnalysis::from_ast(&self.ast(pattern)?)) + } + + /// Parse the given pattern into its abstract syntax. + /// + /// This returns an error if the given pattern failed to parse. + fn ast(&self, pattern: &str) -> Result { + ast::parse::ParserBuilder::new() + .nest_limit(self.nest_limit) + .octal(self.octal) + .ignore_whitespace(self.ignore_whitespace) + .build() + .parse(pattern) + .map_err(Error::regex) + } +} + +/// A "configured" HIR expression, which is aware of the configuration which +/// produced this HIR. +/// +/// Since the configuration is tracked, values with this type can be +/// transformed into other HIR expressions (or regular expressions) in a way +/// that preserves the configuration. For example, the `fast_line_regex` +/// method will apply literal extraction to the inner HIR and use that to build +/// a new regex that matches the extracted literals in a way that is +/// consistent with the configuration that produced this HIR. For example, the +/// size limits set on the configured HIR will be propagated out to any +/// subsequently constructed HIR or regular expression. +#[derive(Clone, Debug)] +pub struct ConfiguredHIR { + original: String, + config: Config, + analysis: AstAnalysis, + expr: Hir, +} + +impl ConfiguredHIR { + /// Return the configuration for this HIR expression. + pub fn config(&self) -> &Config { + &self.config + } + + /// Compute the set of non-matching bytes for this HIR expression. + pub fn non_matching_bytes(&self) -> ByteSet { + non_matching_bytes(&self.expr) + } + + /// Builds a regular expression from this HIR expression. + pub fn regex(&self) -> Result { + self.pattern_to_regex(&self.expr.to_string()) + } + + /// Applies the given function to the concrete syntax of this HIR and then + /// generates a new HIR based on the result of the function in a way that + /// preserves the configuration. + /// + /// For example, this can be used to wrap a user provided regular + /// expression with additional semantics. e.g., See the `WordMatcher`. + pub fn with_pattern String>( + &self, + mut f: F, + ) -> Result + { + self.pattern_to_hir(&f(&self.expr.to_string())) + } + + /// If the current configuration has a line terminator set and if useful + /// literals could be extracted, then a regular expression matching those + /// literals is returned. If no line terminator is set, then `None` is + /// returned. + /// + /// If compiling the resulting regular expression failed, then an error + /// is returned. + /// + /// This method only returns something when a line terminator is set + /// because matches from this regex are generally candidates that must be + /// confirmed before reporting a match. When performing a line oriented + /// search, confirmation is easy: just extend the candidate match to its + /// respective line boundaries and then re-search that line for a full + /// match. This only works when the line terminator is set because the line + /// terminator setting guarantees that the regex itself can never match + /// through the line terminator byte. + pub fn fast_line_regex(&self) -> Result, Error> { + if self.config.line_terminator.is_none() { + return Ok(None); + } + match LiteralSets::new(&self.expr).one_regex() { + None => Ok(None), + Some(pattern) => self.pattern_to_regex(&pattern).map(Some), + } + } + + /// Create a regex from the given pattern using this HIR's configuration. + fn pattern_to_regex(&self, pattern: &str) -> Result { + // The settings we explicitly set here are intentionally a subset + // of the settings we have. The key point here is that our HIR + // expression is computed with the settings in mind, such that setting + // them here could actually lead to unintended behavior. For example, + // consider the pattern `(?U)a+`. This will get folded into the HIR + // as a non-greedy repetition operator which will in turn get printed + // to the concrete syntax as `a+?`, which is correct. But if we + // set the `swap_greed` option again, then we'll wind up with `(?U)a+?` + // which is equal to `a+` which is not the same as what we were given. + // + // We also don't need to apply `case_insensitive` since this gets + // folded into the HIR and would just cause us to do redundant work. + // + // Finally, we don't need to set `ignore_whitespace` since the concrete + // syntax emitted by the HIR printer never needs it. + // + // We set the rest of the options. Some of them are important, such as + // the size limit, and some of them are necessary to preserve the + // intention of the original pattern. For example, the Unicode flag + // will impact how the WordMatcher functions, namely, whether its + // word boundaries are Unicode aware or not. + RegexBuilder::new(&pattern) + .nest_limit(self.config.nest_limit) + .octal(self.config.octal) + .multi_line(self.config.multi_line) + .dot_matches_new_line(self.config.dot_matches_new_line) + .unicode(self.config.unicode) + .size_limit(self.config.size_limit) + .dfa_size_limit(self.config.dfa_size_limit) + .build() + .map_err(Error::regex) + } + + /// Create an HIR expression from the given pattern using this HIR's + /// configuration. + fn pattern_to_hir(&self, pattern: &str) -> Result { + // See `pattern_to_regex` comment for explanation of why we only set + // a subset of knobs here. e.g., `swap_greed` is explicitly left out. + let expr = ::regex_syntax::ParserBuilder::new() + .nest_limit(self.config.nest_limit) + .octal(self.config.octal) + .allow_invalid_utf8(true) + .multi_line(self.config.multi_line) + .dot_matches_new_line(self.config.dot_matches_new_line) + .unicode(self.config.unicode) + .build() + .parse(pattern) + .map_err(Error::regex)?; + Ok(ConfiguredHIR { + original: self.original.clone(), + config: self.config.clone(), + analysis: self.analysis.clone(), + expr: expr, + }) + } +} diff -Nru ripgrep-0.6.0/grep-regex/src/crlf.rs ripgrep-0.10.0.3/grep-regex/src/crlf.rs --- ripgrep-0.6.0/grep-regex/src/crlf.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/src/crlf.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,83 @@ +use regex_syntax::hir::{self, Hir, HirKind}; + +/// Substitutes all occurrences of multi-line enabled `$` with `(?:\r?$)`. +/// +/// This does not preserve the exact semantics of the given expression, +/// however, it does have the useful property that anything that matched the +/// given expression will also match the returned expression. The difference is +/// that the returned expression can match possibly other things as well. +/// +/// The principle reason why we do this is because the underlying regex engine +/// doesn't support CRLF aware `$` look-around. It's planned to fix it at that +/// level, but we perform this kludge in the mean time. +/// +/// Note that while the match preserving semantics are nice and neat, the +/// match position semantics are quite a bit messier. Namely, `$` only ever +/// matches the position between characters where as `\r??` can match a +/// character and change the offset. This is regretable, but works out pretty +/// nicely in most cases, especially when a match is limited to a single line. +pub fn crlfify(expr: Hir) -> Hir { + match expr.into_kind() { + HirKind::Anchor(hir::Anchor::EndLine) => { + let concat = Hir::concat(vec![ + Hir::repetition(hir::Repetition { + kind: hir::RepetitionKind::ZeroOrOne, + greedy: false, + hir: Box::new(Hir::literal(hir::Literal::Unicode('\r'))), + }), + Hir::anchor(hir::Anchor::EndLine), + ]); + Hir::group(hir::Group { + kind: hir::GroupKind::NonCapturing, + hir: Box::new(concat), + }) + } + HirKind::Empty => Hir::empty(), + HirKind::Literal(x) => Hir::literal(x), + HirKind::Class(x) => Hir::class(x), + HirKind::Anchor(x) => Hir::anchor(x), + HirKind::WordBoundary(x) => Hir::word_boundary(x), + HirKind::Repetition(mut x) => { + x.hir = Box::new(crlfify(*x.hir)); + Hir::repetition(x) + } + HirKind::Group(mut x) => { + x.hir = Box::new(crlfify(*x.hir)); + Hir::group(x) + } + HirKind::Concat(xs) => { + Hir::concat(xs.into_iter().map(crlfify).collect()) + } + HirKind::Alternation(xs) => { + Hir::alternation(xs.into_iter().map(crlfify).collect()) + } + } +} + +#[cfg(test)] +mod tests { + use regex_syntax::Parser; + use super::crlfify; + + fn roundtrip(pattern: &str) -> String { + let expr1 = Parser::new().parse(pattern).unwrap(); + let expr2 = crlfify(expr1); + expr2.to_string() + } + + #[test] + fn various() { + assert_eq!(roundtrip(r"(?m)$"), "(?:\r??(?m:$))"); + assert_eq!(roundtrip(r"(?m)$$"), "(?:\r??(?m:$))(?:\r??(?m:$))"); + assert_eq!( + roundtrip(r"(?m)(?:foo$|bar$)"), + "(?:foo(?:\r??(?m:$))|bar(?:\r??(?m:$)))" + ); + assert_eq!(roundtrip(r"(?m)$a"), "(?:\r??(?m:$))a"); + + // Not a multiline `$`, so no crlfifying occurs. + assert_eq!(roundtrip(r"$"), "\\z"); + // It's a literal, derp. + assert_eq!(roundtrip(r"\$"), "\\$"); + } +} diff -Nru ripgrep-0.6.0/grep-regex/src/error.rs ripgrep-0.10.0.3/grep-regex/src/error.rs --- ripgrep-0.6.0/grep-regex/src/error.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/src/error.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,88 @@ +use std::error; +use std::fmt; + +use util; + +/// An error that can occur in this crate. +/// +/// Generally, this error corresponds to problems building a regular +/// expression, whether it's in parsing, compilation or a problem with +/// guaranteeing a configured optimization. +#[derive(Clone, Debug)] +pub struct Error { + kind: ErrorKind, +} + +impl Error { + pub(crate) fn new(kind: ErrorKind) -> Error { + Error { kind } + } + + pub(crate) fn regex(err: E) -> Error { + Error { kind: ErrorKind::Regex(err.to_string()) } + } + + /// Return the kind of this error. + pub fn kind(&self) -> &ErrorKind { + &self.kind + } +} + +/// The kind of an error that can occur. +#[derive(Clone, Debug)] +pub enum ErrorKind { + /// An error that occurred as a result of parsing a regular expression. + /// This can be a syntax error or an error that results from attempting to + /// compile a regular expression that is too big. + /// + /// The string here is the underlying error converted to a string. + Regex(String), + /// An error that occurs when a building a regex that isn't permitted to + /// match a line terminator. In general, building the regex will do its + /// best to make matching a line terminator impossible (e.g., by removing + /// `\n` from the `\s` character class), but if the regex contains a + /// `\n` literal, then there is no reasonable choice that can be made and + /// therefore an error is reported. + /// + /// The string is the literal sequence found in the regex that is not + /// allowed. + NotAllowed(String), + /// This error occurs when a non-ASCII line terminator was provided. + /// + /// The invalid byte is included in this error. + InvalidLineTerminator(u8), + /// Hints that destructuring should not be exhaustive. + /// + /// This enum may grow additional variants, so this makes sure clients + /// don't count on exhaustive matching. (Otherwise, adding a new variant + /// could break existing code.) + #[doc(hidden)] + __Nonexhaustive, +} + +impl error::Error for Error { + fn description(&self) -> &str { + match self.kind { + ErrorKind::Regex(_) => "regex error", + ErrorKind::NotAllowed(_) => "literal not allowed", + ErrorKind::InvalidLineTerminator(_) => "invalid line terminator", + ErrorKind::__Nonexhaustive => unreachable!(), + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.kind { + ErrorKind::Regex(ref s) => write!(f, "{}", s), + ErrorKind::NotAllowed(ref lit) => { + write!(f, "the literal '{:?}' is not allowed in a regex", lit) + } + ErrorKind::InvalidLineTerminator(byte) => { + let x = util::show_bytes(&[byte]); + write!(f, "line terminators must be ASCII, but '{}' is not", x) + } + ErrorKind::__Nonexhaustive => unreachable!(), + } + } +} diff -Nru ripgrep-0.6.0/grep-regex/src/lib.rs ripgrep-0.10.0.3/grep-regex/src/lib.rs --- ripgrep-0.6.0/grep-regex/src/lib.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/src/lib.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,27 @@ +/*! +An implementation of `grep-matcher`'s `Matcher` trait for Rust's regex engine. +*/ + +#![deny(missing_docs)] + +extern crate grep_matcher; +#[macro_use] +extern crate log; +extern crate regex; +extern crate regex_syntax; +extern crate thread_local; +extern crate utf8_ranges; + +pub use error::{Error, ErrorKind}; +pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder}; + +mod ast; +mod config; +mod crlf; +mod error; +mod literal; +mod matcher; +mod non_matching; +mod strip; +mod util; +mod word; diff -Nru ripgrep-0.6.0/grep-regex/src/literal.rs ripgrep-0.10.0.3/grep-regex/src/literal.rs --- ripgrep-0.6.0/grep-regex/src/literal.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/src/literal.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,304 @@ +/* +This module is responsible for extracting *inner* literals out of the AST of a +regular expression. Normally this is the job of the regex engine itself, but +the regex engine doesn't look for inner literals. Since we're doing line based +searching, we can use them, so we need to do it ourselves. +*/ + +use std::cmp; + +use regex_syntax::hir::{self, Hir, HirKind}; +use regex_syntax::hir::literal::{Literal, Literals}; + +use util; + +/// Represents prefix, suffix and inner "required" literals for a regular +/// expression. +/// +/// Prefixes and suffixes are detected using regex-syntax. The inner required +/// literals are detected using something custom (but based on the code in +/// regex-syntax). +#[derive(Clone, Debug)] +pub struct LiteralSets { + /// A set of prefix literals. + prefixes: Literals, + /// A set of suffix literals. + suffixes: Literals, + /// A set of literals such that at least one of them must appear in every + /// match. A literal in this set may be neither a prefix nor a suffix. + required: Literals, +} + +impl LiteralSets { + /// Create a set of literals from the given HIR expression. + pub fn new(expr: &Hir) -> LiteralSets { + let mut required = Literals::empty(); + union_required(expr, &mut required); + LiteralSets { + prefixes: Literals::prefixes(expr), + suffixes: Literals::suffixes(expr), + required: required, + } + } + + /// If it is deemed advantageuous to do so (via various suspicious + /// heuristics), this will return a single regular expression pattern that + /// matches a subset of the language matched by the regular expression that + /// generated these literal sets. The idea here is that the pattern + /// returned by this method is much cheaper to search for. i.e., It is + /// usually a single literal or an alternation of literals. + pub fn one_regex(&self) -> Option { + // TODO: The logic in this function is basically inscrutable. It grew + // organically in the old grep 0.1 crate. Ideally, it would be + // re-worked. In fact, the entire inner literal extraction should be + // re-worked. Actually, most of regex-syntax's literal extraction + // should also be re-worked. Alas... only so much time in the day. + + if self.prefixes.all_complete() && !self.prefixes.is_empty() { + debug!("literal prefixes detected: {:?}", self.prefixes); + // When this is true, the regex engine will do a literal scan, + // so we don't need to return anything. + return None; + } + + // Out of inner required literals, prefixes and suffixes, which one + // is the longest? We pick the longest to do fast literal scan under + // the assumption that a longer literal will have a lower false + // positive rate. + let pre_lcp = self.prefixes.longest_common_prefix(); + let pre_lcs = self.prefixes.longest_common_suffix(); + let suf_lcp = self.suffixes.longest_common_prefix(); + let suf_lcs = self.suffixes.longest_common_suffix(); + + let req_lits = self.required.literals(); + let req = match req_lits.iter().max_by_key(|lit| lit.len()) { + None => &[], + Some(req) => &***req, + }; + + let mut lit = pre_lcp; + if pre_lcs.len() > lit.len() { + lit = pre_lcs; + } + if suf_lcp.len() > lit.len() { + lit = suf_lcp; + } + if suf_lcs.len() > lit.len() { + lit = suf_lcs; + } + if req_lits.len() == 1 && req.len() > lit.len() { + lit = req; + } + + // Special case: if we detected an alternation of inner required + // literals and its longest literal is bigger than the longest + // prefix/suffix, then choose the alternation. In practice, this + // helps with case insensitive matching, which can generate lots of + // inner required literals. + let any_empty = req_lits.iter().any(|lit| lit.is_empty()); + if req.len() > lit.len() && req_lits.len() > 1 && !any_empty { + debug!("required literals found: {:?}", req_lits); + let alts: Vec = req_lits + .into_iter() + .map(|x| util::bytes_to_regex(x)) + .collect(); + // We're matching raw bytes, so disable Unicode mode. + Some(format!("(?-u:{})", alts.join("|"))) + } else if lit.is_empty() { + None + } else { + debug!("required literal found: {:?}", util::show_bytes(lit)); + Some(format!("(?-u:{})", util::bytes_to_regex(&lit))) + } + } +} + +fn union_required(expr: &Hir, lits: &mut Literals) { + match *expr.kind() { + HirKind::Literal(hir::Literal::Unicode(c)) => { + let mut buf = [0u8; 4]; + lits.cross_add(c.encode_utf8(&mut buf).as_bytes()); + } + HirKind::Literal(hir::Literal::Byte(b)) => { + lits.cross_add(&[b]); + } + HirKind::Class(hir::Class::Unicode(ref cls)) => { + if count_unicode_class(cls) >= 5 || !lits.add_char_class(cls) { + lits.cut(); + } + } + HirKind::Class(hir::Class::Bytes(ref cls)) => { + if count_byte_class(cls) >= 5 || !lits.add_byte_class(cls) { + lits.cut(); + } + } + HirKind::Group(hir::Group { ref hir, .. }) => { + union_required(&**hir, lits); + } + HirKind::Repetition(ref x) => { + match x.kind { + hir::RepetitionKind::ZeroOrOne => lits.cut(), + hir::RepetitionKind::ZeroOrMore => lits.cut(), + hir::RepetitionKind::OneOrMore => { + union_required(&x.hir, lits); + lits.cut(); + } + hir::RepetitionKind::Range(ref rng) => { + let (min, max) = match *rng { + hir::RepetitionRange::Exactly(m) => (m, Some(m)), + hir::RepetitionRange::AtLeast(m) => (m, None), + hir::RepetitionRange::Bounded(m, n) => (m, Some(n)), + }; + repeat_range_literals( + &x.hir, min, max, x.greedy, lits, union_required); + } + } + } + HirKind::Concat(ref es) if es.is_empty() => {} + HirKind::Concat(ref es) if es.len() == 1 => { + union_required(&es[0], lits) + } + HirKind::Concat(ref es) => { + for e in es { + let mut lits2 = lits.to_empty(); + union_required(e, &mut lits2); + if lits2.is_empty() { + lits.cut(); + continue; + } + if lits2.contains_empty() { + lits.cut(); + } + if !lits.cross_product(&lits2) { + // If this expression couldn't yield any literal that + // could be extended, then we need to quit. Since we're + // short-circuiting, we also need to freeze every member. + lits.cut(); + break; + } + } + } + HirKind::Alternation(ref es) => { + alternate_literals(es, lits, union_required); + } + _ => lits.cut(), + } +} + +fn repeat_range_literals( + e: &Hir, + min: u32, + max: Option, + _greedy: bool, + lits: &mut Literals, + mut f: F, +) { + if min == 0 { + // This is a bit conservative. If `max` is set, then we could + // treat this as a finite set of alternations. For now, we + // just treat it as `e*`. + lits.cut(); + } else { + let n = cmp::min(lits.limit_size(), min as usize); + // We only extract literals from a single repetition, even though + // we could do more. e.g., `a{3}` will have `a` extracted instead of + // `aaa`. The reason is that inner literal extraction can't be unioned + // across repetitions. e.g., extracting `foofoofoo` from `(\w+foo){3}` + // is wrong. + f(e, lits); + if n < min as usize { + lits.cut(); + } + if max.map_or(true, |max| min < max) { + lits.cut(); + } + } +} + +fn alternate_literals( + es: &[Hir], + lits: &mut Literals, + mut f: F, +) { + let mut lits2 = lits.to_empty(); + for e in es { + let mut lits3 = lits.to_empty(); + lits3.set_limit_size(lits.limit_size() / 5); + f(e, &mut lits3); + if lits3.is_empty() || !lits2.union(lits3) { + // If we couldn't find suffixes for *any* of the + // alternates, then the entire alternation has to be thrown + // away and any existing members must be frozen. Similarly, + // if the union couldn't complete, stop and freeze. + lits.cut(); + return; + } + } + // All we do at the moment is look for prefixes and suffixes. If both + // are empty, then we report nothing. We should be able to do better than + // this, but we'll need something more expressive than just a "set of + // literals." + let lcp = lits2.longest_common_prefix(); + let lcs = lits2.longest_common_suffix(); + if !lcp.is_empty() { + lits.cross_add(lcp); + } + lits.cut(); + if !lcs.is_empty() { + lits.add(Literal::empty()); + lits.add(Literal::new(lcs.to_vec())); + } +} + +/// Return the number of characters in the given class. +fn count_unicode_class(cls: &hir::ClassUnicode) -> u32 { + cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum() +} + +/// Return the number of bytes in the given class. +fn count_byte_class(cls: &hir::ClassBytes) -> u32 { + cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum() +} + +#[cfg(test)] +mod tests { + use regex_syntax::Parser; + use super::LiteralSets; + + fn sets(pattern: &str) -> LiteralSets { + let hir = Parser::new().parse(pattern).unwrap(); + LiteralSets::new(&hir) + } + + fn one_regex(pattern: &str) -> Option { + sets(pattern).one_regex() + } + + // Put a pattern into the same format as the one returned by `one_regex`. + fn pat(pattern: &str) -> Option { + Some(format!("(?-u:{})", pattern)) + } + + #[test] + fn various() { + // Obviously no literals. + assert!(one_regex(r"\w").is_none()); + assert!(one_regex(r"\pL").is_none()); + + // Tantalizingly close. + assert!(one_regex(r"\w|foo").is_none()); + + // There's a literal, but it's better if the regex engine handles it + // internally. + assert!(one_regex(r"abc").is_none()); + + // Core use cases. + assert_eq!(one_regex(r"\wabc\w"), pat("abc")); + assert_eq!(one_regex(r"abc\w"), pat("abc")); + + // TODO: Make these pass. We're missing some potentially big wins + // without these. + // assert_eq!(one_regex(r"\w(foo|bar|baz)"), pat("foo|bar|baz")); + // assert_eq!(one_regex(r"\w(foo|bar|baz)\w"), pat("foo|bar|baz")); + } +} diff -Nru ripgrep-0.6.0/grep-regex/src/matcher.rs ripgrep-0.10.0.3/grep-regex/src/matcher.rs --- ripgrep-0.6.0/grep-regex/src/matcher.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/src/matcher.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,871 @@ +use std::collections::HashMap; + +use grep_matcher::{ + Captures, LineMatchKind, LineTerminator, Match, Matcher, NoError, ByteSet, +}; +use regex::bytes::{CaptureLocations, Regex}; + +use config::{Config, ConfiguredHIR}; +use error::Error; +use word::WordMatcher; + +/// A builder for constructing a `Matcher` using regular expressions. +/// +/// This builder re-exports many of the same options found on the regex crate's +/// builder, in addition to a few other options such as smart case, word +/// matching and the ability to set a line terminator which may enable certain +/// types of optimizations. +/// +/// The syntax supported is documented as part of the regex crate: +/// https://docs.rs/regex/*/regex/#syntax +#[derive(Clone, Debug)] +pub struct RegexMatcherBuilder { + config: Config, +} + +impl Default for RegexMatcherBuilder { + fn default() -> RegexMatcherBuilder { + RegexMatcherBuilder::new() + } +} + +impl RegexMatcherBuilder { + /// Create a new builder for configuring a regex matcher. + pub fn new() -> RegexMatcherBuilder { + RegexMatcherBuilder { + config: Config::default(), + } + } + + /// Build a new matcher using the current configuration for the provided + /// pattern. + /// + /// The syntax supported is documented as part of the regex crate: + /// https://docs.rs/regex/*/regex/#syntax + pub fn build(&self, pattern: &str) -> Result { + let chir = self.config.hir(pattern)?; + let fast_line_regex = chir.fast_line_regex()?; + let non_matching_bytes = chir.non_matching_bytes(); + if let Some(ref re) = fast_line_regex { + trace!("extracted fast line regex: {:?}", re); + } + Ok(RegexMatcher { + config: self.config.clone(), + matcher: RegexMatcherImpl::new(&chir)?, + fast_line_regex: fast_line_regex, + non_matching_bytes: non_matching_bytes, + }) + } + + /// Set the value for the case insensitive (`i`) flag. + /// + /// When enabled, letters in the pattern will match both upper case and + /// lower case variants. + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.config.case_insensitive = yes; + self + } + + /// Whether to enable "smart case" or not. + /// + /// When smart case is enabled, the builder will automatically enable + /// case insensitive matching based on how the pattern is written. Namely, + /// case insensitive mode is enabled when both of the following things + /// are true: + /// + /// 1. The pattern contains at least one literal character. For example, + /// `a\w` contains a literal (`a`) but `\w` does not. + /// 2. Of the literals in the pattern, none of them are considered to be + /// uppercase according to Unicode. For example, `foo\pL` has no + /// uppercase literals but `Foo\pL` does. + pub fn case_smart(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.config.case_smart = yes; + self + } + + /// Set the value for the multi-line matching (`m`) flag. + /// + /// When enabled, `^` matches the beginning of lines and `$` matches the + /// end of lines. + /// + /// By default, they match beginning/end of the input. + pub fn multi_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.config.multi_line = yes; + self + } + + /// Set the value for the any character (`s`) flag, where in `.` matches + /// anything when `s` is set and matches anything except for new line when + /// it is not set (the default). + /// + /// N.B. "matches anything" means "any byte" when Unicode is disabled and + /// means "any valid UTF-8 encoding of any Unicode scalar value" when + /// Unicode is enabled. + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexMatcherBuilder { + self.config.dot_matches_new_line = yes; + self + } + + /// Set the value for the greedy swap (`U`) flag. + /// + /// When enabled, a pattern like `a*` is lazy (tries to find shortest + /// match) and `a*?` is greedy (tries to find longest match). + /// + /// By default, `a*` is greedy and `a*?` is lazy. + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.config.swap_greed = yes; + self + } + + /// Set the value for the ignore whitespace (`x`) flag. + /// + /// When enabled, whitespace such as new lines and spaces will be ignored + /// between expressions of the pattern, and `#` can be used to start a + /// comment until the next new line. + pub fn ignore_whitespace( + &mut self, + yes: bool, + ) -> &mut RegexMatcherBuilder { + self.config.ignore_whitespace = yes; + self + } + + /// Set the value for the Unicode (`u`) flag. + /// + /// Enabled by default. When disabled, character classes such as `\w` only + /// match ASCII word characters instead of all Unicode word characters. + pub fn unicode(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.config.unicode = yes; + self + } + + /// Whether to support octal syntax or not. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints in + /// a regular expression. For example, `a`, `\x61`, `\u0061` and + /// `\141` are all equivalent regular expressions, where the last example + /// shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, it does + /// make good error messages harder. That is, in PCRE based regex engines, + /// syntax like `\0` invokes a backreference, which is explicitly + /// unsupported in Rust's regex engine. However, many users expect it to + /// be supported. Therefore, when octal support is disabled, the error + /// message will explicitly mention that backreferences aren't supported. + /// + /// Octal syntax is disabled by default. + pub fn octal(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.config.octal = yes; + self + } + + /// Set the approximate size limit of the compiled regular expression. + /// + /// This roughly corresponds to the number of bytes occupied by a single + /// compiled program. If the program exceeds this number, then a + /// compilation error is returned. + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexMatcherBuilder { + self.config.size_limit = bytes; + self + } + + /// Set the approximate size of the cache used by the DFA. + /// + /// This roughly corresponds to the number of bytes that the DFA will + /// use while searching. + /// + /// Note that this is a *per thread* limit. There is no way to set a global + /// limit. In particular, if a regex is used from multiple threads + /// simultaneously, then each thread may use up to the number of bytes + /// specified here. + pub fn dfa_size_limit( + &mut self, + bytes: usize, + ) -> &mut RegexMatcherBuilder { + self.config.dfa_size_limit = bytes; + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is allowed + /// to be. If the AST exceeds the given limit (e.g., with too many nested + /// groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an `Ast` using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire Ast is parsed. Therefore, + /// if callers want to put a limit on the amount of heap space used, then + /// they should impose a limit on the length, in bytes, of the concrete + /// pattern string. In particular, this is viable since this parser + /// implementation will limit itself to heap space proportional to the + /// lenth of the pattern string. + /// + /// Note that a nest limit of `0` will return a nest limit error for most + /// patterns but not all. For example, a nest limit of `0` permits `a` but + /// not `ab`, since `ab` requires a concatenation, which results in a nest + /// depth of `1`. In general, a nest limit is not something that manifests + /// in an obvious way in the concrete syntax, therefore, it should not be + /// used in a granular way. + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexMatcherBuilder { + self.config.nest_limit = limit; + self + } + + /// Set an ASCII line terminator for the matcher. + /// + /// The purpose of setting a line terminator is to enable a certain class + /// of optimizations that can make line oriented searching faster. Namely, + /// when a line terminator is enabled, then the builder will guarantee that + /// the resulting matcher will never be capable of producing a match that + /// contains the line terminator. Because of this guarantee, users of the + /// resulting matcher do not need to slowly execute a search line by line + /// for line oriented search. + /// + /// If the aforementioned guarantee about not matching a line terminator + /// cannot be made because of how the pattern was written, then the builder + /// will return an error when attempting to construct the matcher. For + /// example, the pattern `a\sb` will be transformed such that it can never + /// match `a\nb` (when `\n` is the line terminator), but the pattern `a\nb` + /// will result in an error since the `\n` cannot be easily removed without + /// changing the fundamental intent of the pattern. + /// + /// If the given line terminator isn't an ASCII byte (`<=127`), then the + /// builder will return an error when constructing the matcher. + pub fn line_terminator( + &mut self, + line_term: Option, + ) -> &mut RegexMatcherBuilder { + self.config.line_terminator = line_term.map(LineTerminator::byte); + self + } + + /// Set the line terminator to `\r\n` and enable CRLF matching for `$` in + /// regex patterns. + /// + /// This method sets two distinct settings: + /// + /// 1. It causes the line terminator for the matcher to be `\r\n`. Namely, + /// this prevents the matcher from ever producing a match that contains + /// a `\r` or `\n`. + /// 2. It translates all instances of `$` in the pattern to `(?:\r??$)`. + /// This works around the fact that the regex engine does not support + /// matching CRLF as a line terminator when using `$`. + /// + /// In particular, because of (2), the matches produced by the matcher may + /// be slightly different than what one would expect given the pattern. + /// This is the trade off made: in many cases, `$` will "just work" in the + /// presence of `\r\n` line terminators, but matches may require some + /// trimming to faithfully represent the intended match. + /// + /// Note that if you do not wish to set the line terminator but would still + /// like `$` to match `\r\n` line terminators, then it is valid to call + /// `crlf(true)` followed by `line_terminator(None)`. Ordering is + /// important, since `crlf` and `line_terminator` override each other. + pub fn crlf(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + if yes { + self.config.line_terminator = Some(LineTerminator::crlf()); + } else { + self.config.line_terminator = None; + } + self.config.crlf = yes; + self + } + + /// Require that all matches occur on word boundaries. + /// + /// Enabling this option is subtly different than putting `\b` assertions + /// on both sides of your pattern. In particular, a `\b` assertion requires + /// that one side of it match a word character while the other match a + /// non-word character. This option, in contrast, merely requires that + /// one side match a non-word character. + /// + /// For example, `\b-2\b` will not match `foo -2 bar` since `-` is not a + /// word character. However, `-2` with this `word` option enabled will + /// match the `-2` in `foo -2 bar`. + pub fn word(&mut self, yes: bool) -> &mut RegexMatcherBuilder { + self.config.word = yes; + self + } +} + +/// An implementation of the `Matcher` trait using Rust's standard regex +/// library. +#[derive(Clone, Debug)] +pub struct RegexMatcher { + /// The configuration specified by the caller. + config: Config, + /// The underlying matcher implementation. + matcher: RegexMatcherImpl, + /// A regex that never reports false negatives but may report false + /// positives that is believed to be capable of being matched more quickly + /// than `regex`. Typically, this is a single literal or an alternation + /// of literals. + fast_line_regex: Option, + /// A set of bytes that will never appear in a match. + non_matching_bytes: ByteSet, +} + +impl RegexMatcher { + /// Create a new matcher from the given pattern using the default + /// configuration. + pub fn new(pattern: &str) -> Result { + RegexMatcherBuilder::new().build(pattern) + } + + /// Create a new matcher from the given pattern using the default + /// configuration, but matches lines terminated by `\n`. + /// + /// This is meant to be a convenience constructor for using a + /// `RegexMatcherBuilder` and setting its + /// [`line_terminator`](struct.RegexMatcherBuilder.html#method.line_terminator) + /// to `\n`. The purpose of using this constructor is to permit special + /// optimizations that help speed up line oriented search. These types of + /// optimizations are only appropriate when matches span no more than one + /// line. For this reason, this constructor will return an error if the + /// given pattern contains a literal `\n`. Other uses of `\n` (such as in + /// `\s`) are removed transparently. + pub fn new_line_matcher(pattern: &str) -> Result { + RegexMatcherBuilder::new() + .line_terminator(Some(b'\n')) + .build(pattern) + } +} + +/// An encapsulation of the type of matcher we use in `RegexMatcher`. +#[derive(Clone, Debug)] +enum RegexMatcherImpl { + /// The standard matcher used for all regular expressions. + Standard(StandardMatcher), + /// A matcher that only matches at word boundaries. This transforms the + /// regex to `(^|\W)(...)($|\W)` instead of the more intuitive `\b(...)\b`. + /// Because of this, the WordMatcher provides its own implementation of + /// `Matcher` to encapsulate its use of capture groups to make them + /// invisible to the caller. + Word(WordMatcher), +} + +impl RegexMatcherImpl { + /// Based on the configuration, create a new implementation of the + /// `Matcher` trait. + fn new(expr: &ConfiguredHIR) -> Result { + if expr.config().word { + Ok(RegexMatcherImpl::Word(WordMatcher::new(expr)?)) + } else { + Ok(RegexMatcherImpl::Standard(StandardMatcher::new(expr)?)) + } + } +} + +// This implementation just dispatches on the internal matcher impl except +// for the line terminator optimization, which is possibly executed via +// `fast_line_regex`. +impl Matcher for RegexMatcher { + type Captures = RegexCaptures; + type Error = NoError; + + fn find_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result, NoError> { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.find_at(haystack, at), + Word(ref m) => m.find_at(haystack, at), + } + } + + fn new_captures(&self) -> Result { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.new_captures(), + Word(ref m) => m.new_captures(), + } + } + + fn capture_count(&self) -> usize { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.capture_count(), + Word(ref m) => m.capture_count(), + } + } + + fn capture_index(&self, name: &str) -> Option { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.capture_index(name), + Word(ref m) => m.capture_index(name), + } + } + + fn find(&self, haystack: &[u8]) -> Result, NoError> { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.find(haystack), + Word(ref m) => m.find(haystack), + } + } + + fn find_iter( + &self, + haystack: &[u8], + matched: F, + ) -> Result<(), NoError> + where F: FnMut(Match) -> bool + { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.find_iter(haystack, matched), + Word(ref m) => m.find_iter(haystack, matched), + } + } + + fn try_find_iter( + &self, + haystack: &[u8], + matched: F, + ) -> Result, NoError> + where F: FnMut(Match) -> Result + { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.try_find_iter(haystack, matched), + Word(ref m) => m.try_find_iter(haystack, matched), + } + } + + fn captures( + &self, + haystack: &[u8], + caps: &mut RegexCaptures, + ) -> Result { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.captures(haystack, caps), + Word(ref m) => m.captures(haystack, caps), + } + } + + fn captures_iter( + &self, + haystack: &[u8], + caps: &mut RegexCaptures, + matched: F, + ) -> Result<(), NoError> + where F: FnMut(&RegexCaptures) -> bool + { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.captures_iter(haystack, caps, matched), + Word(ref m) => m.captures_iter(haystack, caps, matched), + } + } + + fn try_captures_iter( + &self, + haystack: &[u8], + caps: &mut RegexCaptures, + matched: F, + ) -> Result, NoError> + where F: FnMut(&RegexCaptures) -> Result + { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.try_captures_iter(haystack, caps, matched), + Word(ref m) => m.try_captures_iter(haystack, caps, matched), + } + } + + fn captures_at( + &self, + haystack: &[u8], + at: usize, + caps: &mut RegexCaptures, + ) -> Result { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.captures_at(haystack, at, caps), + Word(ref m) => m.captures_at(haystack, at, caps), + } + } + + fn replace( + &self, + haystack: &[u8], + dst: &mut Vec, + append: F, + ) -> Result<(), NoError> + where F: FnMut(Match, &mut Vec) -> bool + { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.replace(haystack, dst, append), + Word(ref m) => m.replace(haystack, dst, append), + } + } + + fn replace_with_captures( + &self, + haystack: &[u8], + caps: &mut RegexCaptures, + dst: &mut Vec, + append: F, + ) -> Result<(), NoError> + where F: FnMut(&Self::Captures, &mut Vec) -> bool + { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => { + m.replace_with_captures(haystack, caps, dst, append) + } + Word(ref m) => { + m.replace_with_captures(haystack, caps, dst, append) + } + } + } + + fn is_match(&self, haystack: &[u8]) -> Result { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.is_match(haystack), + Word(ref m) => m.is_match(haystack), + } + } + + fn is_match_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.is_match_at(haystack, at), + Word(ref m) => m.is_match_at(haystack, at), + } + } + + fn shortest_match( + &self, + haystack: &[u8], + ) -> Result, NoError> { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.shortest_match(haystack), + Word(ref m) => m.shortest_match(haystack), + } + } + + fn shortest_match_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result, NoError> { + use self::RegexMatcherImpl::*; + match self.matcher { + Standard(ref m) => m.shortest_match_at(haystack, at), + Word(ref m) => m.shortest_match_at(haystack, at), + } + } + + fn non_matching_bytes(&self) -> Option<&ByteSet> { + Some(&self.non_matching_bytes) + } + + fn line_terminator(&self) -> Option { + self.config.line_terminator + } + + fn find_candidate_line( + &self, + haystack: &[u8], + ) -> Result, NoError> { + Ok(match self.fast_line_regex { + Some(ref regex) => { + regex.shortest_match(haystack).map(LineMatchKind::Candidate) + } + None => { + self.shortest_match(haystack)?.map(LineMatchKind::Confirmed) + } + }) + } +} + +/// The implementation of the standard regex matcher. +#[derive(Clone, Debug)] +struct StandardMatcher { + /// The regular expression compiled from the pattern provided by the + /// caller. + regex: Regex, + /// A map from capture group name to its corresponding index. + names: HashMap, +} + +impl StandardMatcher { + fn new(expr: &ConfiguredHIR) -> Result { + let regex = expr.regex()?; + let mut names = HashMap::new(); + for (i, optional_name) in regex.capture_names().enumerate() { + if let Some(name) = optional_name { + names.insert(name.to_string(), i); + } + } + Ok(StandardMatcher { regex, names }) + } +} + +impl Matcher for StandardMatcher { + type Captures = RegexCaptures; + type Error = NoError; + + fn find_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result, NoError> { + Ok(self.regex + .find_at(haystack, at) + .map(|m| Match::new(m.start(), m.end()))) + } + + fn new_captures(&self) -> Result { + Ok(RegexCaptures::new(self.regex.capture_locations())) + } + + fn capture_count(&self) -> usize { + self.regex.captures_len() + } + + fn capture_index(&self, name: &str) -> Option { + self.names.get(name).map(|i| *i) + } + + fn try_find_iter( + &self, + haystack: &[u8], + mut matched: F, + ) -> Result, NoError> + where F: FnMut(Match) -> Result + { + for m in self.regex.find_iter(haystack) { + match matched(Match::new(m.start(), m.end())) { + Ok(true) => continue, + Ok(false) => return Ok(Ok(())), + Err(err) => return Ok(Err(err)), + } + } + Ok(Ok(())) + } + + fn captures_at( + &self, + haystack: &[u8], + at: usize, + caps: &mut RegexCaptures, + ) -> Result { + Ok(self.regex.captures_read_at(&mut caps.locs, haystack, at).is_some()) + } + + fn shortest_match_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result, NoError> { + Ok(self.regex.shortest_match_at(haystack, at)) + } +} + +/// Represents the match offsets of each capturing group in a match. +/// +/// The first, or `0`th capture group, always corresponds to the entire match +/// and is guaranteed to be present when a match occurs. The next capture +/// group, at index `1`, corresponds to the first capturing group in the regex, +/// ordered by the position at which the left opening parenthesis occurs. +/// +/// Note that not all capturing groups are guaranteed to be present in a match. +/// For example, in the regex, `(?P\w)|(?P\W)`, only one of `foo` +/// or `bar` will ever be set in any given match. +/// +/// In order to access a capture group by name, you'll need to first find the +/// index of the group using the corresponding matcher's `capture_index` +/// method, and then use that index with `RegexCaptures::get`. +#[derive(Clone, Debug)] +pub struct RegexCaptures { + /// Where the locations are stored. + locs: CaptureLocations, + /// These captures behave as if the capturing groups begin at the given + /// offset. When set to `0`, this has no affect and capture groups are + /// indexed like normal. + /// + /// This is useful when building matchers that wrap arbitrary regular + /// expressions. For example, `WordMatcher` takes an existing regex `re` + /// and creates `(?:^|\W)(re)(?:$|\W)`, but hides the fact that the regex + /// has been wrapped from the caller. In order to do this, the matcher + /// and the capturing groups must behave as if `(re)` is the `0`th capture + /// group. + offset: usize, +} + +impl Captures for RegexCaptures { + fn len(&self) -> usize { + self.locs.len().checked_sub(self.offset).unwrap() + } + + fn get(&self, i: usize) -> Option { + let actual = i.checked_add(self.offset).unwrap(); + self.locs.pos(actual).map(|(s, e)| Match::new(s, e)) + } +} + +impl RegexCaptures { + pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures { + RegexCaptures::with_offset(locs, 0) + } + + pub(crate) fn with_offset( + locs: CaptureLocations, + offset: usize, + ) -> RegexCaptures { + RegexCaptures { locs, offset } + } + + pub(crate) fn locations(&mut self) -> &mut CaptureLocations { + &mut self.locs + } +} + +#[cfg(test)] +mod tests { + use grep_matcher::{LineMatchKind, Matcher}; + use super::*; + + // Test that enabling word matches does the right thing and demonstrate + // the difference between it and surrounding the regex in `\b`. + #[test] + fn word() { + let matcher = RegexMatcherBuilder::new() + .word(true) + .build(r"-2") + .unwrap(); + assert!(matcher.is_match(b"abc -2 foo").unwrap()); + + let matcher = RegexMatcherBuilder::new() + .word(false) + .build(r"\b-2\b") + .unwrap(); + assert!(!matcher.is_match(b"abc -2 foo").unwrap()); + } + + // Test that enabling a line terminator prevents it from matching through + // said line terminator. + #[test] + fn line_terminator() { + // This works, because there's no line terminator specified. + let matcher = RegexMatcherBuilder::new() + .build(r"abc\sxyz") + .unwrap(); + assert!(matcher.is_match(b"abc\nxyz").unwrap()); + + // This doesn't. + let matcher = RegexMatcherBuilder::new() + .line_terminator(Some(b'\n')) + .build(r"abc\sxyz") + .unwrap(); + assert!(!matcher.is_match(b"abc\nxyz").unwrap()); + } + + // Ensure that the builder returns an error if a line terminator is set + // and the regex could not be modified to remove a line terminator. + #[test] + fn line_terminator_error() { + assert!(RegexMatcherBuilder::new() + .line_terminator(Some(b'\n')) + .build(r"a\nz") + .is_err()) + } + + // Test that enabling CRLF permits `$` to match at the end of a line. + #[test] + fn line_terminator_crlf() { + // Test normal use of `$` with a `\n` line terminator. + let matcher = RegexMatcherBuilder::new() + .multi_line(true) + .build(r"abc$") + .unwrap(); + assert!(matcher.is_match(b"abc\n").unwrap()); + + // Test that `$` doesn't match at `\r\n` boundary normally. + let matcher = RegexMatcherBuilder::new() + .multi_line(true) + .build(r"abc$") + .unwrap(); + assert!(!matcher.is_match(b"abc\r\n").unwrap()); + + // Now check the CRLF handling. + let matcher = RegexMatcherBuilder::new() + .multi_line(true) + .crlf(true) + .build(r"abc$") + .unwrap(); + assert!(matcher.is_match(b"abc\r\n").unwrap()); + } + + // Test that smart case works. + #[test] + fn case_smart() { + let matcher = RegexMatcherBuilder::new() + .case_smart(true) + .build(r"abc") + .unwrap(); + assert!(matcher.is_match(b"ABC").unwrap()); + + let matcher = RegexMatcherBuilder::new() + .case_smart(true) + .build(r"aBc") + .unwrap(); + assert!(!matcher.is_match(b"ABC").unwrap()); + } + + // Test that finding candidate lines works as expected. + #[test] + fn candidate_lines() { + fn is_confirmed(m: LineMatchKind) -> bool { + match m { + LineMatchKind::Confirmed(_) => true, + _ => false, + } + } + fn is_candidate(m: LineMatchKind) -> bool { + match m { + LineMatchKind::Candidate(_) => true, + _ => false, + } + } + + // With no line terminator set, we can't employ any optimizations, + // so we get a confirmed match. + let matcher = RegexMatcherBuilder::new() + .build(r"\wfoo\s") + .unwrap(); + let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap(); + assert!(is_confirmed(m)); + + // With a line terminator and a regex specially crafted to have an + // easy-to-detect inner literal, we can apply an optimization that + // quickly finds candidate matches. + let matcher = RegexMatcherBuilder::new() + .line_terminator(Some(b'\n')) + .build(r"\wfoo\s") + .unwrap(); + let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap(); + assert!(is_candidate(m)); + } +} diff -Nru ripgrep-0.6.0/grep-regex/src/non_matching.rs ripgrep-0.10.0.3/grep-regex/src/non_matching.rs --- ripgrep-0.6.0/grep-regex/src/non_matching.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/src/non_matching.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,128 @@ +use grep_matcher::ByteSet; +use regex_syntax::hir::{self, Hir, HirKind}; +use utf8_ranges::Utf8Sequences; + +/// Return a confirmed set of non-matching bytes from the given expression. +pub fn non_matching_bytes(expr: &Hir) -> ByteSet { + let mut set = ByteSet::full(); + remove_matching_bytes(expr, &mut set); + set +} + +/// Remove any bytes from the given set that can occur in a matched produced by +/// the given expression. +fn remove_matching_bytes( + expr: &Hir, + set: &mut ByteSet, +) { + match *expr.kind() { + HirKind::Empty + | HirKind::Anchor(_) + | HirKind::WordBoundary(_) => {} + HirKind::Literal(hir::Literal::Unicode(c)) => { + for &b in c.encode_utf8(&mut [0; 4]).as_bytes() { + set.remove(b); + } + } + HirKind::Literal(hir::Literal::Byte(b)) => { + set.remove(b); + } + HirKind::Class(hir::Class::Unicode(ref cls)) => { + for range in cls.iter() { + // This is presumably faster than encoding every codepoint + // to UTF-8 and then removing those bytes from the set. + for seq in Utf8Sequences::new(range.start(), range.end()) { + for byte_range in seq.as_slice() { + set.remove_all(byte_range.start, byte_range.end); + } + } + } + } + HirKind::Class(hir::Class::Bytes(ref cls)) => { + for range in cls.iter() { + set.remove_all(range.start(), range.end()); + } + } + HirKind::Repetition(ref x) => { + remove_matching_bytes(&x.hir, set); + } + HirKind::Group(ref x) => { + remove_matching_bytes(&x.hir, set); + } + HirKind::Concat(ref xs) => { + for x in xs { + remove_matching_bytes(x, set); + } + } + HirKind::Alternation(ref xs) => { + for x in xs { + remove_matching_bytes(x, set); + } + } + } +} + +#[cfg(test)] +mod tests { + use grep_matcher::ByteSet; + use regex_syntax::ParserBuilder; + + use super::non_matching_bytes; + + fn extract(pattern: &str) -> ByteSet { + let expr = ParserBuilder::new() + .allow_invalid_utf8(true) + .build() + .parse(pattern) + .unwrap(); + non_matching_bytes(&expr) + } + + fn sparse(set: &ByteSet) -> Vec { + let mut sparse_set = vec![]; + for b in (0..256).map(|b| b as u8) { + if set.contains(b) { + sparse_set.push(b); + } + } + sparse_set + } + + fn sparse_except(except: &[u8]) -> Vec { + let mut except_set = vec![false; 256]; + for &b in except { + except_set[b as usize] = true; + } + + let mut set = vec![]; + for b in (0..256).map(|b| b as u8) { + if !except_set[b as usize] { + set.push(b); + } + } + set + } + + #[test] + fn dot() { + assert_eq!(sparse(&extract(".")), vec![ + b'\n', + 192, 193, 245, 246, 247, 248, 249, + 250, 251, 252, 253, 254, 255, + ]); + assert_eq!(sparse(&extract("(?s).")), vec![ + 192, 193, 245, 246, 247, 248, 249, + 250, 251, 252, 253, 254, 255, + ]); + assert_eq!(sparse(&extract("(?-u).")), vec![b'\n']); + assert_eq!(sparse(&extract("(?s-u).")), vec![]); + } + + #[test] + fn literal() { + assert_eq!(sparse(&extract("a")), sparse_except(&[b'a'])); + assert_eq!(sparse(&extract("☃")), sparse_except(&[0xE2, 0x98, 0x83])); + assert_eq!(sparse(&extract(r"\xFF")), sparse_except(&[0xC3, 0xBF])); + assert_eq!(sparse(&extract(r"(?-u)\xFF")), sparse_except(&[0xFF])); + } +} diff -Nru ripgrep-0.6.0/grep-regex/src/strip.rs ripgrep-0.10.0.3/grep-regex/src/strip.rs --- ripgrep-0.6.0/grep-regex/src/strip.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/src/strip.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,154 @@ +use grep_matcher::LineTerminator; +use regex_syntax::hir::{self, Hir, HirKind}; + +use error::{Error, ErrorKind}; + +/// Return an HIR that is guaranteed to never match the given line terminator, +/// if possible. +/// +/// If the transformation isn't possible, then an error is returned. +/// +/// In general, if a literal line terminator occurs anywhere in the HIR, then +/// this will return an error. However, if the line terminator occurs within +/// a character class with at least one other character (that isn't also a line +/// terminator), then the line terminator is simply stripped from that class. +/// +/// If the given line terminator is not ASCII, then this function returns an +/// error. +pub fn strip_from_match( + expr: Hir, + line_term: LineTerminator, +) -> Result { + if line_term.is_crlf() { + let expr1 = strip_from_match_ascii(expr, b'\r')?; + strip_from_match_ascii(expr1, b'\n') + } else { + let b = line_term.as_byte(); + if b > 0x7F { + return Err(Error::new(ErrorKind::InvalidLineTerminator(b))); + } + strip_from_match_ascii(expr, b) + } +} + +/// The implementation of strip_from_match. The given byte must be ASCII. This +/// function panics otherwise. +fn strip_from_match_ascii( + expr: Hir, + byte: u8, +) -> Result { + assert!(byte <= 0x7F); + let chr = byte as char; + assert_eq!(chr.len_utf8(), 1); + + let invalid = || Err(Error::new(ErrorKind::NotAllowed(chr.to_string()))); + + Ok(match expr.into_kind() { + HirKind::Empty => Hir::empty(), + HirKind::Literal(hir::Literal::Unicode(c)) => { + if c == chr { + return invalid(); + } + Hir::literal(hir::Literal::Unicode(c)) + } + HirKind::Literal(hir::Literal::Byte(b)) => { + if b as char == chr { + return invalid(); + } + Hir::literal(hir::Literal::Byte(b)) + } + HirKind::Class(hir::Class::Unicode(mut cls)) => { + let remove = hir::ClassUnicode::new(Some( + hir::ClassUnicodeRange::new(chr, chr), + )); + cls.difference(&remove); + if cls.ranges().is_empty() { + return invalid(); + } + Hir::class(hir::Class::Unicode(cls)) + } + HirKind::Class(hir::Class::Bytes(mut cls)) => { + let remove = hir::ClassBytes::new(Some( + hir::ClassBytesRange::new(byte, byte), + )); + cls.difference(&remove); + if cls.ranges().is_empty() { + return invalid(); + } + Hir::class(hir::Class::Bytes(cls)) + } + HirKind::Anchor(x) => Hir::anchor(x), + HirKind::WordBoundary(x) => Hir::word_boundary(x), + HirKind::Repetition(mut x) => { + x.hir = Box::new(strip_from_match_ascii(*x.hir, byte)?); + Hir::repetition(x) + } + HirKind::Group(mut x) => { + x.hir = Box::new(strip_from_match_ascii(*x.hir, byte)?); + Hir::group(x) + } + HirKind::Concat(xs) => { + let xs = xs.into_iter() + .map(|e| strip_from_match_ascii(e, byte)) + .collect::, Error>>()?; + Hir::concat(xs) + } + HirKind::Alternation(xs) => { + let xs = xs.into_iter() + .map(|e| strip_from_match_ascii(e, byte)) + .collect::, Error>>()?; + Hir::alternation(xs) + } + }) +} + +#[cfg(test)] +mod tests { + use regex_syntax::Parser; + + use error::Error; + use super::{LineTerminator, strip_from_match}; + + fn roundtrip(pattern: &str, byte: u8) -> String { + roundtrip_line_term(pattern, LineTerminator::byte(byte)).unwrap() + } + + fn roundtrip_crlf(pattern: &str) -> String { + roundtrip_line_term(pattern, LineTerminator::crlf()).unwrap() + } + + fn roundtrip_err(pattern: &str, byte: u8) -> Result { + roundtrip_line_term(pattern, LineTerminator::byte(byte)) + } + + fn roundtrip_line_term( + pattern: &str, + line_term: LineTerminator, + ) -> Result { + let expr1 = Parser::new().parse(pattern).unwrap(); + let expr2 = strip_from_match(expr1, line_term)?; + Ok(expr2.to_string()) + } + + #[test] + fn various() { + assert_eq!(roundtrip(r"[a\n]", b'\n'), "[a]"); + assert_eq!(roundtrip(r"[a\n]", b'a'), "[\n]"); + assert_eq!(roundtrip_crlf(r"[a\n]"), "[a]"); + assert_eq!(roundtrip_crlf(r"[a\r]"), "[a]"); + assert_eq!(roundtrip_crlf(r"[a\r\n]"), "[a]"); + + assert_eq!(roundtrip(r"(?-u)\s", b'a'), r"(?-u:[\x09-\x0D\x20])"); + assert_eq!(roundtrip(r"(?-u)\s", b'\n'), r"(?-u:[\x09\x0B-\x0D\x20])"); + + assert!(roundtrip_err(r"\n", b'\n').is_err()); + assert!(roundtrip_err(r"abc\n", b'\n').is_err()); + assert!(roundtrip_err(r"\nabc", b'\n').is_err()); + assert!(roundtrip_err(r"abc\nxyz", b'\n').is_err()); + assert!(roundtrip_err(r"\x0A", b'\n').is_err()); + assert!(roundtrip_err(r"\u000A", b'\n').is_err()); + assert!(roundtrip_err(r"\U0000000A", b'\n').is_err()); + assert!(roundtrip_err(r"\u{A}", b'\n').is_err()); + assert!(roundtrip_err("\n", b'\n').is_err()); + } +} diff -Nru ripgrep-0.6.0/grep-regex/src/util.rs ripgrep-0.10.0.3/grep-regex/src/util.rs --- ripgrep-0.6.0/grep-regex/src/util.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/src/util.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,29 @@ +/// Converts an arbitrary sequence of bytes to a literal suitable for building +/// a regular expression. +pub fn bytes_to_regex(bs: &[u8]) -> String { + use std::fmt::Write; + use regex_syntax::is_meta_character; + + let mut s = String::with_capacity(bs.len()); + for &b in bs { + if b <= 0x7F && !is_meta_character(b as char) { + write!(s, r"{}", b as char).unwrap(); + } else { + write!(s, r"\x{:02x}", b).unwrap(); + } + } + s +} + +/// Converts arbitrary bytes to a nice string. +pub fn show_bytes(bs: &[u8]) -> String { + use std::ascii::escape_default; + use std::str; + + let mut nice = String::new(); + for &b in bs { + let part: Vec = escape_default(b).collect(); + nice.push_str(str::from_utf8(&part).unwrap()); + } + nice +} diff -Nru ripgrep-0.6.0/grep-regex/src/word.rs ripgrep-0.10.0.3/grep-regex/src/word.rs --- ripgrep-0.6.0/grep-regex/src/word.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/src/word.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,196 @@ +use std::collections::HashMap; +use std::cell::RefCell; +use std::sync::Arc; + +use grep_matcher::{Match, Matcher, NoError}; +use regex::bytes::{CaptureLocations, Regex}; +use thread_local::CachedThreadLocal; + +use config::ConfiguredHIR; +use error::Error; +use matcher::RegexCaptures; + +/// A matcher for implementing "word match" semantics. +#[derive(Debug)] +pub struct WordMatcher { + /// The regex which is roughly `(?:^|\W)()(?:$|\W)`. + regex: Regex, + /// A map from capture group name to capture group index. + names: HashMap, + /// A reusable buffer for finding the match location of the inner group. + locs: Arc>>, +} + +impl Clone for WordMatcher { + fn clone(&self) -> WordMatcher { + // We implement Clone manually so that we get a fresh CachedThreadLocal + // such that it can set its own thread owner. This permits each thread + // usings `locs` to hit the fast path. + WordMatcher { + regex: self.regex.clone(), + names: self.names.clone(), + locs: Arc::new(CachedThreadLocal::new()), + } + } +} + +impl WordMatcher { + /// Create a new matcher from the given pattern that only produces matches + /// that are considered "words." + /// + /// The given options are used to construct the regular expression + /// internally. + pub fn new(expr: &ConfiguredHIR) -> Result { + let word_expr = expr.with_pattern(|pat| { + format!(r"(?:(?m:^)|\W)({})(?:(?m:$)|\W)", pat) + })?; + let regex = word_expr.regex()?; + let locs = Arc::new(CachedThreadLocal::new()); + + let mut names = HashMap::new(); + for (i, optional_name) in regex.capture_names().enumerate() { + if let Some(name) = optional_name { + names.insert(name.to_string(), i.checked_sub(1).unwrap()); + } + } + Ok(WordMatcher { regex, names, locs }) + } +} + +impl Matcher for WordMatcher { + type Captures = RegexCaptures; + type Error = NoError; + + fn find_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result, NoError> { + // To make this easy to get right, we extract captures here instead of + // calling `find_at`. The actual match is at capture group `1` instead + // of `0`. We *could* use `find_at` here and then trim the match after + // the fact, but that's a bit harder to get right, and it's not clear + // if it's worth it. + + let cell = self.locs.get_or(|| { + Box::new(RefCell::new(self.regex.capture_locations())) + }); + let mut caps = cell.borrow_mut(); + self.regex.captures_read_at(&mut caps, haystack, at); + Ok(caps.get(1).map(|m| Match::new(m.0, m.1))) + } + + fn new_captures(&self) -> Result { + Ok(RegexCaptures::with_offset(self.regex.capture_locations(), 1)) + } + + fn capture_count(&self) -> usize { + self.regex.captures_len().checked_sub(1).unwrap() + } + + fn capture_index(&self, name: &str) -> Option { + self.names.get(name).map(|i| *i) + } + + fn captures_at( + &self, + haystack: &[u8], + at: usize, + caps: &mut RegexCaptures, + ) -> Result { + let r = self.regex.captures_read_at(caps.locations(), haystack, at); + Ok(r.is_some()) + } + + // We specifically do not implement other methods like find_iter or + // captures_iter. Namely, the iter methods are guaranteed to be correct + // by virtue of implementing find_at and captures_at above. +} + +#[cfg(test)] +mod tests { + use grep_matcher::{Captures, Match, Matcher}; + use config::Config; + use super::WordMatcher; + + fn matcher(pattern: &str) -> WordMatcher { + let chir = Config::default().hir(pattern).unwrap(); + WordMatcher::new(&chir).unwrap() + } + + fn find(pattern: &str, haystack: &str) -> Option<(usize, usize)> { + matcher(pattern) + .find(haystack.as_bytes()) + .unwrap() + .map(|m| (m.start(), m.end())) + } + + fn find_by_caps(pattern: &str, haystack: &str) -> Option<(usize, usize)> { + let m = matcher(pattern); + let mut caps = m.new_captures().unwrap(); + if !m.captures(haystack.as_bytes(), &mut caps).unwrap() { + None + } else { + caps.get(0).map(|m| (m.start(), m.end())) + } + } + + // Test that the standard `find` API reports offsets correctly. + #[test] + fn various_find() { + assert_eq!(Some((0, 3)), find(r"foo", "foo")); + assert_eq!(Some((0, 3)), find(r"foo", "foo(")); + assert_eq!(Some((1, 4)), find(r"foo", "!foo(")); + assert_eq!(None, find(r"foo", "!afoo(")); + + assert_eq!(Some((0, 3)), find(r"foo", "foo☃")); + assert_eq!(None, find(r"foo", "fooб")); + // assert_eq!(Some((0, 3)), find(r"foo", "fooб")); + + // See: https://github.com/BurntSushi/ripgrep/issues/389 + assert_eq!(Some((0, 2)), find(r"-2", "-2")); + } + + // Test that the captures API also reports offsets correctly, just as + // find does. This exercises a different path in the code since captures + // are handled differently. + #[test] + fn various_captures() { + assert_eq!(Some((0, 3)), find_by_caps(r"foo", "foo")); + assert_eq!(Some((0, 3)), find_by_caps(r"foo", "foo(")); + assert_eq!(Some((1, 4)), find_by_caps(r"foo", "!foo(")); + assert_eq!(None, find_by_caps(r"foo", "!afoo(")); + + assert_eq!(Some((0, 3)), find_by_caps(r"foo", "foo☃")); + assert_eq!(None, find_by_caps(r"foo", "fooб")); + // assert_eq!(Some((0, 3)), find_by_caps(r"foo", "fooб")); + + // See: https://github.com/BurntSushi/ripgrep/issues/389 + assert_eq!(Some((0, 2)), find_by_caps(r"-2", "-2")); + } + + // Test that the capture reporting methods work as advertised. + #[test] + fn capture_indexing() { + let m = matcher(r"(a)(?Pb)(c)"); + assert_eq!(4, m.capture_count()); + assert_eq!(Some(2), m.capture_index("foo")); + + let mut caps = m.new_captures().unwrap(); + assert_eq!(4, caps.len()); + + assert!(m.captures(b"abc", &mut caps).unwrap()); + assert_eq!(caps.get(0), Some(Match::new(0, 3))); + assert_eq!(caps.get(1), Some(Match::new(0, 1))); + assert_eq!(caps.get(2), Some(Match::new(1, 2))); + assert_eq!(caps.get(3), Some(Match::new(2, 3))); + assert_eq!(caps.get(4), None); + + assert!(m.captures(b"#abc#", &mut caps).unwrap()); + assert_eq!(caps.get(0), Some(Match::new(1, 4))); + assert_eq!(caps.get(1), Some(Match::new(1, 2))); + assert_eq!(caps.get(2), Some(Match::new(2, 3))); + assert_eq!(caps.get(3), Some(Match::new(3, 4))); + assert_eq!(caps.get(4), None); + } +} diff -Nru ripgrep-0.6.0/grep-regex/UNLICENSE ripgrep-0.10.0.3/grep-regex/UNLICENSE --- ripgrep-0.6.0/grep-regex/UNLICENSE 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-regex/UNLICENSE 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff -Nru ripgrep-0.6.0/grep-searcher/Cargo.toml ripgrep-0.10.0.3/grep-searcher/Cargo.toml --- ripgrep-0.6.0/grep-searcher/Cargo.toml 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/Cargo.toml 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,35 @@ +[package] +name = "grep-searcher" +version = "0.1.1" #:version +authors = ["Andrew Gallant "] +description = """ +Fast line oriented regex searching as a library. +""" +documentation = "https://docs.rs/grep-searcher" +homepage = "https://github.com/BurntSushi/ripgrep" +repository = "https://github.com/BurntSushi/ripgrep" +readme = "README.md" +keywords = ["regex", "grep", "egrep", "search", "pattern"] +license = "Unlicense/MIT" + +[dependencies] +bytecount = "0.3.2" +encoding_rs = "0.8.6" +encoding_rs_io = "0.1.2" +grep-matcher = { version = "0.1.1", path = "../grep-matcher" } +log = "0.4.5" +memchr = "2.0.2" +memmap = "0.6.2" + +[dev-dependencies] +grep-regex = { version = "0.1.1", path = "../grep-regex" } +regex = "1.0.5" + +[features] +avx-accel = [ + "bytecount/avx-accel", +] +simd-accel = [ + "bytecount/simd-accel", + "encoding_rs/simd-accel", +] diff -Nru ripgrep-0.6.0/grep-searcher/examples/search-stdin.rs ripgrep-0.10.0.3/grep-searcher/examples/search-stdin.rs --- ripgrep-0.6.0/grep-searcher/examples/search-stdin.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/examples/search-stdin.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,33 @@ +extern crate grep_regex; +extern crate grep_searcher; + +use std::env; +use std::error::Error; +use std::io; +use std::process; + +use grep_regex::RegexMatcher; +use grep_searcher::Searcher; +use grep_searcher::sinks::UTF8; + +fn main() { + if let Err(err) = example() { + eprintln!("{}", err); + process::exit(1); + } +} + +fn example() -> Result<(), Box> { + let pattern = match env::args().nth(1) { + Some(pattern) => pattern, + None => return Err(From::from(format!( + "Usage: search-stdin " + ))), + }; + let matcher = RegexMatcher::new(&pattern)?; + Searcher::new().search_reader(&matcher, io::stdin(), UTF8(|lnum, line| { + print!("{}:{}", lnum, line); + Ok(true) + }))?; + Ok(()) +} diff -Nru ripgrep-0.6.0/grep-searcher/LICENSE-MIT ripgrep-0.10.0.3/grep-searcher/LICENSE-MIT --- ripgrep-0.6.0/grep-searcher/LICENSE-MIT 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/LICENSE-MIT 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Andrew Gallant + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff -Nru ripgrep-0.6.0/grep-searcher/README.md ripgrep-0.10.0.3/grep-searcher/README.md --- ripgrep-0.6.0/grep-searcher/README.md 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/README.md 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,37 @@ +grep-searcher +------------- +A high level library for executing fast line oriented searches. This handles +things like reporting contextual lines, counting lines, inverting a search, +detecting binary data, automatic UTF-16 transcoding and deciding whether or not +to use memory maps. + +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) +[![](https://img.shields.io/crates/v/grep-searcher.svg)](https://crates.io/crates/grep-searcher) + +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + +### Documentation + +[https://docs.rs/grep-searcher](https://docs.rs/grep-searcher) + +**NOTE:** You probably don't want to use this crate directly. Instead, you +should prefer the facade defined in the +[`grep`](https://docs.rs/grep) +crate. + + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +grep-searcher = "0.1" +``` + +and this to your crate root: + +```rust +extern crate grep_searcher; +``` diff -Nru ripgrep-0.6.0/grep-searcher/src/lib.rs ripgrep-0.10.0.3/grep-searcher/src/lib.rs --- ripgrep-0.6.0/grep-searcher/src/lib.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/src/lib.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,132 @@ +/*! +This crate provides an implementation of line oriented search, with optional +support for multi-line search. + +# Brief overview + +The principle type in this crate is a +[`Searcher`](struct.Searcher.html), +which can be configured and built by a +[`SearcherBuilder`](struct.SearcherBuilder.html). +A `Searcher` is responsible for reading bytes from a source (e.g., a file), +executing a search of those bytes using a `Matcher` (e.g., a regex) and then +reporting the results of that search to a +[`Sink`](trait.Sink.html) +(e.g., stdout). The `Searcher` itself is principally responsible for managing +the consumption of bytes from a source and applying a `Matcher` over those +bytes in an efficient way. The `Searcher` is also responsible for inverting +a search, counting lines, reporting contextual lines, detecting binary data +and even deciding whether or not to use memory maps. + +A `Matcher` (which is defined in the +[`grep-matcher`](https://crates.io/crates/grep-matcher) +crate) is a trait for describing the lowest levels of pattern search in a +generic way. The interface itself is very similar to the interface of a regular +expression. For example, the +[`grep-regex`](https://crates.io/crates/grep-regex) +crate provides an implementation of the `Matcher` trait using Rust's +[`regex`](https://crates.io/crates/regex) +crate. + +Finally, a `Sink` describes how callers receive search results producer by a +`Searcher`. This includes routines that are called at the beginning and end of +a search, in addition to routines that are called when matching or contextual +lines are found by the `Searcher`. Implementations of `Sink` can be trivially +simple, or extraordinarily complex, such as the +`Standard` printer found in the +[`grep-printer`](https://crates.io/crates/grep-printer) +crate, which effectively implements grep-like output. +This crate also provides convenience `Sink` implementations in the +[`sinks`](sinks/index.html) +sub-module for easy searching with closures. + +# Example + +This example shows how to execute the searcher and read the search results +using the +[`UTF8`](sinks/struct.UTF8.html) +implementation of `Sink`. + +``` +extern crate grep_matcher; +extern crate grep_regex; +extern crate grep_searcher; + +use std::error::Error; + +use grep_matcher::Matcher; +use grep_regex::RegexMatcher; +use grep_searcher::Searcher; +use grep_searcher::sinks::UTF8; + +const SHERLOCK: &'static [u8] = b"\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + +# fn main() { example().unwrap() } +fn example() -> Result<(), Box> { + let matcher = RegexMatcher::new(r"Doctor \w+")?; + let mut matches: Vec<(u64, String)> = vec![]; + Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| { + // We are guaranteed to find a match, so the unwrap is OK. + let mymatch = matcher.find(line.as_bytes())?.unwrap(); + matches.push((lnum, line[mymatch].to_string())); + Ok(true) + }))?; + + assert_eq!(matches.len(), 2); + assert_eq!( + matches[0], + (1, "Doctor Watsons".to_string()) + ); + assert_eq!( + matches[1], + (5, "Doctor Watson".to_string()) + ); + Ok(()) +} +``` + +See also `examples/search-stdin.rs` from the root of this crate's directory +to see a similar example that accepts a pattern on the command line and +searches stdin. +*/ + +#![deny(missing_docs)] + +extern crate bytecount; +extern crate encoding_rs; +extern crate encoding_rs_io; +extern crate grep_matcher; +#[macro_use] +extern crate log; +extern crate memchr; +extern crate memmap; +#[cfg(test)] +extern crate regex; + +pub use lines::{LineIter, LineStep}; +pub use searcher::{ + BinaryDetection, ConfigError, Encoding, MmapChoice, + Searcher, SearcherBuilder, +}; +pub use sink::{ + Sink, SinkError, + SinkContext, SinkContextKind, SinkFinish, SinkMatch, +}; +pub use sink::sinks; + +#[macro_use] +mod macros; + +mod line_buffer; +mod lines; +mod searcher; +mod sink; +#[cfg(test)] +mod testutil; diff -Nru ripgrep-0.6.0/grep-searcher/src/line_buffer.rs ripgrep-0.10.0.3/grep-searcher/src/line_buffer.rs --- ripgrep-0.6.0/grep-searcher/src/line_buffer.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/src/line_buffer.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,968 @@ +use std::cmp; +use std::io; +use std::ptr; + +use memchr::{memchr, memrchr}; + +/// The default buffer capacity that we use for the line buffer. +pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB + +/// The behavior of a searcher in the face of long lines and big contexts. +/// +/// When searching data incrementally using a fixed size buffer, this controls +/// the amount of *additional* memory to allocate beyond the size of the buffer +/// to accommodate lines (which may include the lines in a context window, when +/// enabled) that do not fit in the buffer. +/// +/// The default is to eagerly allocate without a limit. +#[derive(Clone, Copy, Debug)] +pub enum BufferAllocation { + /// Attempt to expand the size of the buffer until either at least the next + /// line fits into memory or until all available memory is exhausted. + /// + /// This is the default. + Eager, + /// Limit the amount of additional memory allocated to the given size. If + /// a line is found that requires more memory than is allowed here, then + /// stop reading and return an error. + Error(usize), +} + +impl Default for BufferAllocation { + fn default() -> BufferAllocation { + BufferAllocation::Eager + } +} + +/// Create a new error to be used when a configured allocation limit has been +/// reached. +pub fn alloc_error(limit: usize) -> io::Error { + let msg = format!("configured allocation limit ({}) exceeded", limit); + io::Error::new(io::ErrorKind::Other, msg) +} + +/// The behavior of binary detection in the line buffer. +/// +/// Binary detection is the process of _heuristically_ identifying whether a +/// given chunk of data is binary or not, and then taking an action based on +/// the result of that heuristic. The motivation behind detecting binary data +/// is that binary data often indicates data that is undesirable to search +/// using textual patterns. Of course, there are many cases in which this isn't +/// true, which is why binary detection is disabled by default. +#[derive(Clone, Copy, Debug)] +pub enum BinaryDetection { + /// No binary detection is performed. Data reported by the line buffer may + /// contain arbitrary bytes. + None, + /// The given byte is searched in all contents read by the line buffer. If + /// it occurs, then the data is considered binary and the line buffer acts + /// as if it reached EOF. The line buffer guarantees that this byte will + /// never be observable by callers. + Quit(u8), + /// The given byte is searched in all contents read by the line buffer. If + /// it occurs, then it is replaced by the line terminator. The line buffer + /// guarantees that this byte will never be observable by callers. + Convert(u8), +} + +impl Default for BinaryDetection { + fn default() -> BinaryDetection { + BinaryDetection::None + } +} + +impl BinaryDetection { + /// Returns true if and only if the detection heuristic demands that + /// the line buffer stop read data once binary data is observed. + fn is_quit(&self) -> bool { + match *self { + BinaryDetection::Quit(_) => true, + _ => false, + } + } +} + +/// The configuration of a buffer. This contains options that are fixed once +/// a buffer has been constructed. +#[derive(Clone, Copy, Debug)] +struct Config { + /// The number of bytes to attempt to read at a time. + capacity: usize, + /// The line terminator. + lineterm: u8, + /// The behavior for handling long lines. + buffer_alloc: BufferAllocation, + /// When set, the presence of the given byte indicates binary content. + binary: BinaryDetection, +} + +impl Default for Config { + fn default() -> Config { + Config { + capacity: DEFAULT_BUFFER_CAPACITY, + lineterm: b'\n', + buffer_alloc: BufferAllocation::default(), + binary: BinaryDetection::default(), + } + } +} + +/// A builder for constructing line buffers. +#[derive(Clone, Debug, Default)] +pub struct LineBufferBuilder { + config: Config, +} + +impl LineBufferBuilder { + /// Create a new builder for a buffer. + pub fn new() -> LineBufferBuilder { + LineBufferBuilder { config: Config::default() } + } + + /// Create a new line buffer from this builder's configuration. + pub fn build(&self) -> LineBuffer { + LineBuffer { + config: self.config, + buf: vec![0; self.config.capacity], + pos: 0, + last_lineterm: 0, + end: 0, + absolute_byte_offset: 0, + binary_byte_offset: None, + } + } + + /// Set the default capacity to use for a buffer. + /// + /// In general, the capacity of a buffer corresponds to the amount of data + /// to hold in memory, and the size of the reads to make to the underlying + /// reader. + /// + /// This is set to a reasonable default and probably shouldn't be changed + /// unless there's a specific reason to do so. + pub fn capacity(&mut self, capacity: usize) -> &mut LineBufferBuilder { + self.config.capacity = capacity; + self + } + + /// Set the line terminator for the buffer. + /// + /// Every buffer has a line terminator, and this line terminator is used + /// to determine how to roll the buffer forward. For example, when a read + /// to the buffer's underlying reader occurs, the end of the data that is + /// read is likely to correspond to an incomplete line. As a line buffer, + /// callers should not access this data since it is incomplete. The line + /// terminator is how the line buffer determines the part of the read that + /// is incomplete. + /// + /// By default, this is set to `b'\n'`. + pub fn line_terminator(&mut self, lineterm: u8) -> &mut LineBufferBuilder { + self.config.lineterm = lineterm; + self + } + + /// Set the maximum amount of additional memory to allocate for long lines. + /// + /// In order to enable line oriented search, a fundamental requirement is + /// that, at a minimum, each line must be able to fit into memory. This + /// setting controls how big that line is allowed to be. By default, this + /// is set to `BufferAllocation::Eager`, which means a line buffer will + /// attempt to allocate as much memory as possible to fit a line, and will + /// only be limited by available memory. + /// + /// Note that this setting only applies to the amount of *additional* + /// memory to allocate, beyond the capacity of the buffer. That means that + /// a value of `0` is sensible, and in particular, will guarantee that a + /// line buffer will never allocate additional memory beyond its initial + /// capacity. + pub fn buffer_alloc( + &mut self, + behavior: BufferAllocation, + ) -> &mut LineBufferBuilder { + self.config.buffer_alloc = behavior; + self + } + + /// Whether to enable binary detection or not. Depending on the setting, + /// this can either cause the line buffer to report EOF early or it can + /// cause the line buffer to clean the data. + /// + /// By default, this is disabled. In general, binary detection should be + /// viewed as an imperfect heuristic. + pub fn binary_detection( + &mut self, + detection: BinaryDetection, + ) -> &mut LineBufferBuilder { + self.config.binary = detection; + self + } +} + +/// A line buffer reader efficiently reads a line oriented buffer from an +/// arbitrary reader. +#[derive(Debug)] +pub struct LineBufferReader<'b, R> { + rdr: R, + line_buffer: &'b mut LineBuffer, +} + +impl<'b, R: io::Read> LineBufferReader<'b, R> { + /// Create a new buffered reader that reads from `rdr` and uses the given + /// `line_buffer` as an intermediate buffer. + /// + /// This does not change the binary detection behavior of the given line + /// buffer. + pub fn new( + rdr: R, + line_buffer: &'b mut LineBuffer, + ) -> LineBufferReader<'b, R> { + line_buffer.clear(); + LineBufferReader { rdr, line_buffer } + } + + /// The absolute byte offset which corresponds to the starting offsets + /// of the data returned by `buffer` relative to the beginning of the + /// underlying reader's contents. As such, this offset does not generally + /// correspond to an offset in memory. It is typically used for reporting + /// purposes. It can also be used for counting the number of bytes that + /// have been searched. + pub fn absolute_byte_offset(&self) -> u64 { + self.line_buffer.absolute_byte_offset() + } + + /// If binary data was detected, then this returns the absolute byte offset + /// at which binary data was initially found. + pub fn binary_byte_offset(&self) -> Option { + self.line_buffer.binary_byte_offset() + } + + /// Fill the contents of this buffer by discarding the part of the buffer + /// that has been consumed. The free space created by discarding the + /// consumed part of the buffer is then filled with new data from the + /// reader. + /// + /// If EOF is reached, then `false` is returned. Otherwise, `true` is + /// returned. (Note that if this line buffer's binary detection is set to + /// `Quit`, then the presence of binary data will cause this buffer to + /// behave as if it had seen EOF at the first occurrence of binary data.) + /// + /// This forwards any errors returned by the underlying reader, and will + /// also return an error if the buffer must be expanded past its allocation + /// limit, as governed by the buffer allocation strategy. + pub fn fill(&mut self) -> Result { + self.line_buffer.fill(&mut self.rdr) + } + + /// Return the contents of this buffer. + pub fn buffer(&self) -> &[u8] { + self.line_buffer.buffer() + } + + /// Consume the number of bytes provided. This must be less than or equal + /// to the number of bytes returned by `buffer`. + pub fn consume(&mut self, amt: usize) { + self.line_buffer.consume(amt); + } + + /// Consumes the remainder of the buffer. Subsequent calls to `buffer` are + /// guaranteed to return an empty slice until the buffer is refilled. + /// + /// This is a convenience function for `consume(buffer.len())`. + #[cfg(test)] + fn consume_all(&mut self) { + self.line_buffer.consume_all(); + } +} + +/// A line buffer manages a (typically fixed) buffer for holding lines. +/// +/// Callers should create line buffers sparingly and reuse them when possible. +/// Line buffers cannot be used directly, but instead must be used via the +/// LineBufferReader. +#[derive(Clone, Debug)] +pub struct LineBuffer { + /// The configuration of this buffer. + config: Config, + /// The primary buffer with which to hold data. + buf: Vec, + /// The current position of this buffer. This is always a valid sliceable + /// index into `buf`, and its maximum value is the length of `buf`. + pos: usize, + /// The end position of searchable content in this buffer. This is either + /// set to just after the final line terminator in the buffer, or to just + /// after the end of the last byte emitted by the reader when the reader + /// has been exhausted. + last_lineterm: usize, + /// The end position of the buffer. This is always greater than or equal to + /// last_lineterm. The bytes between last_lineterm and end, if any, always + /// correspond to a partial line. + end: usize, + /// The absolute byte offset corresponding to `pos`. This is most typically + /// not a valid index into addressable memory, but rather, an offset that + /// is relative to all data that passes through a line buffer (since + /// construction or since the last time `clear` was called). + /// + /// When the line buffer reaches EOF, this is set to the position just + /// after the last byte read from the underlying reader. That is, it + /// becomes the total count of bytes that have been read. + absolute_byte_offset: u64, + /// If binary data was found, this records the absolute byte offset at + /// which it was first detected. + binary_byte_offset: Option, +} + +impl LineBuffer { + /// Reset this buffer, such that it can be used with a new reader. + fn clear(&mut self) { + self.pos = 0; + self.last_lineterm = 0; + self.end = 0; + self.absolute_byte_offset = 0; + self.binary_byte_offset = None; + } + + /// The absolute byte offset which corresponds to the starting offsets + /// of the data returned by `buffer` relative to the beginning of the + /// reader's contents. As such, this offset does not generally correspond + /// to an offset in memory. It is typically used for reporting purposes, + /// particularly in error messages. + /// + /// This is reset to `0` when `clear` is called. + fn absolute_byte_offset(&self) -> u64 { + self.absolute_byte_offset + } + + /// If binary data was detected, then this returns the absolute byte offset + /// at which binary data was initially found. + fn binary_byte_offset(&self) -> Option { + self.binary_byte_offset + } + + /// Return the contents of this buffer. + fn buffer(&self) -> &[u8] { + &self.buf[self.pos..self.last_lineterm] + } + + /// Return the contents of the free space beyond the end of the buffer as + /// a mutable slice. + fn free_buffer(&mut self) -> &mut [u8] { + &mut self.buf[self.end..] + } + + /// Consume the number of bytes provided. This must be less than or equal + /// to the number of bytes returned by `buffer`. + fn consume(&mut self, amt: usize) { + assert!(amt <= self.buffer().len()); + self.pos += amt; + self.absolute_byte_offset += amt as u64; + } + + /// Consumes the remainder of the buffer. Subsequent calls to `buffer` are + /// guaranteed to return an empty slice until the buffer is refilled. + /// + /// This is a convenience function for `consume(buffer.len())`. + #[cfg(test)] + fn consume_all(&mut self) { + let amt = self.buffer().len(); + self.consume(amt); + } + + /// Fill the contents of this buffer by discarding the part of the buffer + /// that has been consumed. The free space created by discarding the + /// consumed part of the buffer is then filled with new data from the given + /// reader. + /// + /// Callers should provide the same reader to this line buffer in + /// subsequent calls to fill. A different reader can only be used + /// immediately following a call to `clear`. + /// + /// If EOF is reached, then `false` is returned. Otherwise, `true` is + /// returned. (Note that if this line buffer's binary detection is set to + /// `Quit`, then the presence of binary data will cause this buffer to + /// behave as if it had seen EOF.) + /// + /// This forwards any errors returned by `rdr`, and will also return an + /// error if the buffer must be expanded past its allocation limit, as + /// governed by the buffer allocation strategy. + fn fill(&mut self, mut rdr: R) -> Result { + // If the binary detection heuristic tells us to quit once binary data + // has been observed, then we no longer read new data and reach EOF + // once the current buffer has been consumed. + if self.config.binary.is_quit() && self.binary_byte_offset.is_some() { + return Ok(!self.buffer().is_empty()); + } + + self.roll(); + assert_eq!(self.pos, 0); + loop { + self.ensure_capacity()?; + let readlen = rdr.read(self.free_buffer())?; + if readlen == 0 { + // We're only done reading for good once the caller has + // consumed everything. + self.last_lineterm = self.end; + return Ok(!self.buffer().is_empty()); + } + + // Get a mutable view into the bytes we've just read. These are + // the bytes that we do binary detection on, and also the bytes we + // search to find the last line terminator. We need a mutable slice + // in the case of binary conversion. + let oldend = self.end; + self.end += readlen; + let newbytes = &mut self.buf[oldend..self.end]; + + // Binary detection. + match self.config.binary { + BinaryDetection::None => {} // nothing to do + BinaryDetection::Quit(byte) => { + if let Some(i) = memchr(byte, newbytes) { + self.end = oldend + i; + self.last_lineterm = self.end; + self.binary_byte_offset = + Some(self.absolute_byte_offset + self.end as u64); + // If the first byte in our buffer is a binary byte, + // then our buffer is empty and we should report as + // such to the caller. + return Ok(self.pos < self.end); + } + } + BinaryDetection::Convert(byte) => { + if let Some(i) = replace_bytes( + newbytes, + byte, + self.config.lineterm, + ) { + // Record only the first binary offset. + if self.binary_byte_offset.is_none() { + self.binary_byte_offset = + Some(self.absolute_byte_offset + + (oldend + i) as u64); + } + } + } + } + + // Update our `last_lineterm` positions if we read one. + if let Some(i) = memrchr(self.config.lineterm, newbytes) { + self.last_lineterm = oldend + i + 1; + return Ok(true); + } + // At this point, if we couldn't find a line terminator, then we + // don't have a complete line. Therefore, we try to read more! + } + } + + /// Roll the unconsumed parts of the buffer to the front. + /// + /// This operation is idempotent. + /// + /// After rolling, `last_lineterm` and `end` point to the same location, + /// and `pos` is always set to `0`. + fn roll(&mut self) { + if self.pos == self.end { + self.pos = 0; + self.last_lineterm = 0; + self.end = 0; + return; + } + + assert!(self.pos < self.end && self.end <= self.buf.len()); + let roll_len = self.end - self.pos; + unsafe { + // SAFETY: A buffer contains Copy data, so there's no problem + // moving it around. Safety also depends on our indices being + // in bounds, which they should always be, and we enforce with + // an assert above. + // + // It seems like it should be possible to do this in safe code that + // results in the same codegen. I tried the obvious: + // + // for (src, dst) in (self.pos..self.end).zip(0..) { + // self.buf[dst] = self.buf[src]; + // } + // + // But the above does not work, and in fact compiles down to a slow + // byte-by-byte loop. I tried a few other minor variations, but + // alas, better minds might prevail. + // + // Overall, this doesn't save us *too* much. It mostly matters when + // the number of bytes we're copying is large, which can happen + // if the searcher is asked to produce a lot of context. We could + // decide this isn't worth it, but it does make an appreciable + // impact at or around the context=30 range on my machine. + // + // We could also use a temporary buffer that compiles down to two + // memcpys and is faster than the byte-at-a-time loop, but it + // complicates our options for limiting memory allocation a bit. + ptr::copy( + self.buf[self.pos..].as_ptr(), + self.buf.as_mut_ptr(), + roll_len, + ); + } + self.pos = 0; + self.last_lineterm = roll_len; + self.end = roll_len; + } + + /// Ensures that the internal buffer has a non-zero amount of free space + /// in which to read more data. If there is no free space, then more is + /// allocated. If the allocation must exceed the configured limit, then + /// this returns an error. + fn ensure_capacity(&mut self) -> Result<(), io::Error> { + if !self.free_buffer().is_empty() { + return Ok(()); + } + // `len` is used for computing the next allocation size. The capacity + // is permitted to start at `0`, so we make sure it's at least `1`. + let len = cmp::max(1, self.buf.len()); + let additional = match self.config.buffer_alloc { + BufferAllocation::Eager => len * 2, + BufferAllocation::Error(limit) => { + let used = self.buf.len() - self.config.capacity; + let n = cmp::min(len * 2, limit - used); + if n == 0 { + return Err(alloc_error(self.config.capacity + limit)); + } + n + } + }; + assert!(additional > 0); + let newlen = self.buf.len() + additional; + self.buf.resize(newlen, 0); + assert!(!self.free_buffer().is_empty()); + Ok(()) + } +} + +/// Replaces `src` with `replacement` in bytes. +fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option { + if src == replacement { + return None; + } + let mut first_pos = None; + let mut pos = 0; + while let Some(i) = memchr(src, &bytes[pos..]).map(|i| pos + i) { + if first_pos.is_none() { + first_pos = Some(i); + } + bytes[i] = replacement; + pos = i + 1; + while bytes.get(pos) == Some(&src) { + bytes[pos] = replacement; + pos += 1; + } + } + first_pos +} + +#[cfg(test)] +mod tests { + use std::str; + use super::*; + + const SHERLOCK: &'static str = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached.\ +"; + + fn s(slice: &str) -> String { + slice.to_string() + } + + fn btos(slice: &[u8]) -> &str { + str::from_utf8(slice).unwrap() + } + + fn replace_str( + slice: &str, + src: u8, + replacement: u8, + ) -> (String, Option) { + let mut dst = slice.to_string().into_bytes(); + let result = replace_bytes(&mut dst, src, replacement); + (String::from_utf8(dst).unwrap(), result) + } + + #[test] + fn replace() { + assert_eq!(replace_str("abc", b'b', b'z'), (s("azc"), Some(1))); + assert_eq!(replace_str("abb", b'b', b'z'), (s("azz"), Some(1))); + assert_eq!(replace_str("aba", b'a', b'z'), (s("zbz"), Some(0))); + assert_eq!(replace_str("bbb", b'b', b'z'), (s("zzz"), Some(0))); + assert_eq!(replace_str("bac", b'b', b'z'), (s("zac"), Some(0))); + } + + #[test] + fn buffer_basics1() { + let bytes = "homer\nlisa\nmaggie"; + let mut linebuf = LineBufferBuilder::new().build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.buffer().is_empty()); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "homer\nlisa\n"); + assert_eq!(rdr.absolute_byte_offset(), 0); + rdr.consume(5); + assert_eq!(rdr.absolute_byte_offset(), 5); + rdr.consume_all(); + assert_eq!(rdr.absolute_byte_offset(), 11); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "maggie"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); + assert_eq!(rdr.binary_byte_offset(), None); + } + + #[test] + fn buffer_basics2() { + let bytes = "homer\nlisa\nmaggie\n"; + let mut linebuf = LineBufferBuilder::new().build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); + assert_eq!(rdr.binary_byte_offset(), None); + } + + #[test] + fn buffer_basics3() { + let bytes = "\n"; + let mut linebuf = LineBufferBuilder::new().build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "\n"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); + assert_eq!(rdr.binary_byte_offset(), None); + } + + #[test] + fn buffer_basics4() { + let bytes = "\n\n"; + let mut linebuf = LineBufferBuilder::new().build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "\n\n"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); + assert_eq!(rdr.binary_byte_offset(), None); + } + + #[test] + fn buffer_empty() { + let bytes = ""; + let mut linebuf = LineBufferBuilder::new().build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); + assert_eq!(rdr.binary_byte_offset(), None); + } + + #[test] + fn buffer_zero_capacity() { + let bytes = "homer\nlisa\nmaggie"; + let mut linebuf = LineBufferBuilder::new().capacity(0).build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + while rdr.fill().unwrap() { + rdr.consume_all(); + } + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); + assert_eq!(rdr.binary_byte_offset(), None); + } + + #[test] + fn buffer_small_capacity() { + let bytes = "homer\nlisa\nmaggie"; + let mut linebuf = LineBufferBuilder::new().capacity(1).build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + let mut got = vec![]; + while rdr.fill().unwrap() { + got.extend(rdr.buffer()); + rdr.consume_all(); + } + assert_eq!(bytes, btos(&got)); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); + assert_eq!(rdr.binary_byte_offset(), None); + } + + #[test] + fn buffer_limited_capacity1() { + let bytes = "homer\nlisa\nmaggie"; + let mut linebuf = LineBufferBuilder::new() + .capacity(1) + .buffer_alloc(BufferAllocation::Error(5)) + .build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "homer\n"); + rdr.consume_all(); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "lisa\n"); + rdr.consume_all(); + + // This returns an error because while we have just enough room to + // store maggie in the buffer, we *don't* have enough room to read one + // more byte, so we don't know whether we're at EOF or not, and + // therefore must give up. + assert!(rdr.fill().is_err()); + + // We can mush on though! + assert_eq!(btos(rdr.buffer()), "m"); + rdr.consume_all(); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "aggie"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + } + + #[test] + fn buffer_limited_capacity2() { + let bytes = "homer\nlisa\nmaggie"; + let mut linebuf = LineBufferBuilder::new() + .capacity(1) + .buffer_alloc(BufferAllocation::Error(6)) + .build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "homer\n"); + rdr.consume_all(); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "lisa\n"); + rdr.consume_all(); + + // We have just enough space. + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "maggie"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + } + + #[test] + fn buffer_limited_capacity3() { + let bytes = "homer\nlisa\nmaggie"; + let mut linebuf = LineBufferBuilder::new() + .capacity(1) + .buffer_alloc(BufferAllocation::Error(0)) + .build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.fill().is_err()); + assert_eq!(btos(rdr.buffer()), ""); + } + + #[test] + fn buffer_binary_none() { + let bytes = "homer\nli\x00sa\nmaggie\n"; + let mut linebuf = LineBufferBuilder::new().build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.buffer().is_empty()); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "homer\nli\x00sa\nmaggie\n"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); + assert_eq!(rdr.binary_byte_offset(), None); + } + + #[test] + fn buffer_binary_quit1() { + let bytes = "homer\nli\x00sa\nmaggie\n"; + let mut linebuf = LineBufferBuilder::new() + .binary_detection(BinaryDetection::Quit(b'\x00')) + .build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.buffer().is_empty()); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "homer\nli"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), 8); + assert_eq!(rdr.binary_byte_offset(), Some(8)); + } + + #[test] + fn buffer_binary_quit2() { + let bytes = "\x00homer\nlisa\nmaggie\n"; + let mut linebuf = LineBufferBuilder::new() + .binary_detection(BinaryDetection::Quit(b'\x00')) + .build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(!rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), ""); + assert_eq!(rdr.absolute_byte_offset(), 0); + assert_eq!(rdr.binary_byte_offset(), Some(0)); + } + + #[test] + fn buffer_binary_quit3() { + let bytes = "homer\nlisa\nmaggie\n\x00"; + let mut linebuf = LineBufferBuilder::new() + .binary_detection(BinaryDetection::Quit(b'\x00')) + .build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.buffer().is_empty()); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64 - 1); + assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 1)); + } + + #[test] + fn buffer_binary_quit4() { + let bytes = "homer\nlisa\nmaggie\x00\n"; + let mut linebuf = LineBufferBuilder::new() + .binary_detection(BinaryDetection::Quit(b'\x00')) + .build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.buffer().is_empty()); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64 - 2); + assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 2)); + } + + #[test] + fn buffer_binary_quit5() { + let mut linebuf = LineBufferBuilder::new() + .binary_detection(BinaryDetection::Quit(b'u')) + .build(); + let mut rdr = LineBufferReader::new(SHERLOCK.as_bytes(), &mut linebuf); + + assert!(rdr.buffer().is_empty()); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, s\ +"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), 76); + assert_eq!(rdr.binary_byte_offset(), Some(76)); + assert_eq!(SHERLOCK.as_bytes()[76], b'u'); + } + + #[test] + fn buffer_binary_convert1() { + let bytes = "homer\nli\x00sa\nmaggie\n"; + let mut linebuf = LineBufferBuilder::new() + .binary_detection(BinaryDetection::Convert(b'\x00')) + .build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.buffer().is_empty()); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "homer\nli\nsa\nmaggie\n"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); + assert_eq!(rdr.binary_byte_offset(), Some(8)); + } + + #[test] + fn buffer_binary_convert2() { + let bytes = "\x00homer\nlisa\nmaggie\n"; + let mut linebuf = LineBufferBuilder::new() + .binary_detection(BinaryDetection::Convert(b'\x00')) + .build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.buffer().is_empty()); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "\nhomer\nlisa\nmaggie\n"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); + assert_eq!(rdr.binary_byte_offset(), Some(0)); + } + + #[test] + fn buffer_binary_convert3() { + let bytes = "homer\nlisa\nmaggie\n\x00"; + let mut linebuf = LineBufferBuilder::new() + .binary_detection(BinaryDetection::Convert(b'\x00')) + .build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.buffer().is_empty()); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); + assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 1)); + } + + #[test] + fn buffer_binary_convert4() { + let bytes = "homer\nlisa\nmaggie\x00\n"; + let mut linebuf = LineBufferBuilder::new() + .binary_detection(BinaryDetection::Convert(b'\x00')) + .build(); + let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); + + assert!(rdr.buffer().is_empty()); + + assert!(rdr.fill().unwrap()); + assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n"); + rdr.consume_all(); + + assert!(!rdr.fill().unwrap()); + assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); + assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 2)); + } +} diff -Nru ripgrep-0.6.0/grep-searcher/src/lines.rs ripgrep-0.10.0.3/grep-searcher/src/lines.rs --- ripgrep-0.6.0/grep-searcher/src/lines.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/src/lines.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,462 @@ +/*! +A collection of routines for performing operations on lines. +*/ + +use bytecount; +use memchr::{memchr, memrchr}; +use grep_matcher::{LineTerminator, Match}; + +/// An iterator over lines in a particular slice of bytes. +/// +/// Line terminators are considered part of the line they terminate. All lines +/// yielded by the iterator are guaranteed to be non-empty. +/// +/// `'b` refers to the lifetime of the underlying bytes. +#[derive(Debug)] +pub struct LineIter<'b> { + bytes: &'b [u8], + stepper: LineStep, +} + +impl<'b> LineIter<'b> { + /// Create a new line iterator that yields lines in the given bytes that + /// are terminated by `line_term`. + pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> { + LineIter { + bytes: bytes, + stepper: LineStep::new(line_term, 0, bytes.len()), + } + } +} + +impl<'b> Iterator for LineIter<'b> { + type Item = &'b [u8]; + + fn next(&mut self) -> Option<&'b [u8]> { + self.stepper.next_match(self.bytes).map(|m| &self.bytes[m]) + } +} + +/// An explicit iterator over lines in a particular slice of bytes. +/// +/// This iterator avoids borrowing the bytes themselves, and instead requires +/// callers to explicitly provide the bytes when moving through the iterator. +/// While not idiomatic, this provides a simple way of iterating over lines +/// that doesn't require borrowing the slice itself, which can be convenient. +/// +/// Line terminators are considered part of the line they terminate. All lines +/// yielded by the iterator are guaranteed to be non-empty. +#[derive(Debug)] +pub struct LineStep { + line_term: u8, + pos: usize, + end: usize, +} + +impl LineStep { + /// Create a new line iterator over the given range of bytes using the + /// given line terminator. + /// + /// Callers should provide the actual bytes for each call to `next`. The + /// same slice must be provided to each call. + /// + /// This panics if `start` is not less than or equal to `end`. + pub fn new(line_term: u8, start: usize, end: usize) -> LineStep { + LineStep { line_term, pos: start, end: end } + } + + /// Return the start and end position of the next line in the given bytes. + /// + /// The caller must past exactly the same slice of bytes for each call to + /// `next`. + /// + /// The range returned includes the line terminator. Ranges are always + /// non-empty. + pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> { + self.next_impl(bytes) + } + + /// Like next, but returns a `Match` instead of a tuple. + #[inline(always)] + pub(crate) fn next_match(&mut self, bytes: &[u8]) -> Option { + self.next_impl(bytes).map(|(s, e)| Match::new(s, e)) + } + + #[inline(always)] + fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> { + bytes = &bytes[..self.end]; + match memchr(self.line_term, &bytes[self.pos..]) { + None => { + if self.pos < bytes.len() { + let m = (self.pos, bytes.len()); + assert!(m.0 <= m.1); + + self.pos = m.1; + Some(m) + } else { + None + } + } + Some(line_end) => { + let m = (self.pos, self.pos + line_end + 1); + assert!(m.0 <= m.1); + + self.pos = m.1; + Some(m) + } + } + } +} + +/// Count the number of occurrences of `line_term` in `bytes`. +pub fn count(bytes: &[u8], line_term: u8) -> u64 { + bytecount::count(bytes, line_term) as u64 +} + +/// Given a line that possibly ends with a terminator, return that line without +/// the terminator. +#[inline(always)] +pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] { + let line_term = line_term.as_bytes(); + let start = bytes.len().saturating_sub(line_term.len()); + if bytes.get(start..) == Some(line_term) { + return &bytes[..bytes.len() - line_term.len()]; + } + bytes +} + +/// Return the start and end offsets of the lines containing the given range +/// of bytes. +/// +/// Line terminators are considered part of the line they terminate. +#[inline(always)] +pub fn locate( + bytes: &[u8], + line_term: u8, + range: Match, +) -> Match { + let line_start = memrchr(line_term, &bytes[0..range.start()]) + .map_or(0, |i| i + 1); + let line_end = + if range.end() > line_start && bytes[range.end() - 1] == line_term { + range.end() + } else { + memchr(line_term, &bytes[range.end()..]) + .map_or(bytes.len(), |i| range.end() + i + 1) + }; + Match::new(line_start, line_end) +} + +/// Returns the minimal starting offset of the line that occurs `count` lines +/// before the last line in `bytes`. +/// +/// Lines are terminated by `line_term`. If `count` is zero, then this returns +/// the starting offset of the last line in `bytes`. +/// +/// If `bytes` ends with a line terminator, then the terminator itself is +/// considered part of the last line. +pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize { + preceding_by_pos(bytes, bytes.len(), line_term, count) +} + +/// Returns the minimal starting offset of the line that occurs `count` lines +/// before the line containing `pos`. Lines are terminated by `line_term`. +/// If `count` is zero, then this returns the starting offset of the line +/// containing `pos`. +/// +/// If `pos` points just past a line terminator, then it is considered part of +/// the line that it terminates. For example, given `bytes = b"abc\nxyz\n"` +/// and `pos = 7`, `preceding(bytes, pos, b'\n', 0)` returns `4` (as does `pos +/// = 8`) and `preceding(bytes, pos, `b'\n', 1)` returns `0`. +fn preceding_by_pos( + bytes: &[u8], + mut pos: usize, + line_term: u8, + mut count: usize, +) -> usize { + if pos == 0 { + return 0; + } else if bytes[pos - 1] == line_term { + pos -= 1; + } + loop { + match memrchr(line_term, &bytes[..pos]) { + None => { + return 0; + } + Some(i) => { + if count == 0 { + return i + 1; + } else if i == 0 { + return 0; + } + count -= 1; + pos = i; + } + } + } +} + +#[cfg(test)] +mod tests { + use std::ops::Range; + use std::str; + use grep_matcher::Match; + use super::*; + + const SHERLOCK: &'static str = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached.\ +"; + + fn m(start: usize, end: usize) -> Match { + Match::new(start, end) + } + + fn lines(text: &str) -> Vec<&str> { + let mut results = vec![]; + let mut it = LineStep::new(b'\n', 0, text.len()); + while let Some(m) = it.next_match(text.as_bytes()) { + results.push(&text[m]); + } + results + } + + fn line_ranges(text: &str) -> Vec> { + let mut results = vec![]; + let mut it = LineStep::new(b'\n', 0, text.len()); + while let Some(m) = it.next_match(text.as_bytes()) { + results.push(m.start()..m.end()); + } + results + } + + fn prev(text: &str, pos: usize, count: usize) -> usize { + preceding_by_pos(text.as_bytes(), pos, b'\n', count) + } + + fn loc(text: &str, start: usize, end: usize) -> Match { + locate(text.as_bytes(), b'\n', Match::new(start, end)) + } + + #[test] + fn line_count() { + assert_eq!(0, count(b"", b'\n')); + assert_eq!(1, count(b"\n", b'\n')); + assert_eq!(2, count(b"\n\n", b'\n')); + assert_eq!(2, count(b"a\nb\nc", b'\n')); + } + + #[test] + fn line_locate() { + let t = SHERLOCK; + let lines = line_ranges(t); + + assert_eq!( + loc(t, lines[0].start, lines[0].end), + m(lines[0].start, lines[0].end)); + assert_eq!( + loc(t, lines[0].start + 1, lines[0].end), + m(lines[0].start, lines[0].end)); + assert_eq!( + loc(t, lines[0].end - 1, lines[0].end), + m(lines[0].start, lines[0].end)); + assert_eq!( + loc(t, lines[0].end, lines[0].end), + m(lines[1].start, lines[1].end)); + + assert_eq!( + loc(t, lines[5].start, lines[5].end), + m(lines[5].start, lines[5].end)); + assert_eq!( + loc(t, lines[5].start + 1, lines[5].end), + m(lines[5].start, lines[5].end)); + assert_eq!( + loc(t, lines[5].end - 1, lines[5].end), + m(lines[5].start, lines[5].end)); + assert_eq!( + loc(t, lines[5].end, lines[5].end), + m(lines[5].start, lines[5].end)); + } + + #[test] + fn line_locate_weird() { + assert_eq!(loc("", 0, 0), m(0, 0)); + + assert_eq!(loc("\n", 0, 1), m(0, 1)); + assert_eq!(loc("\n", 1, 1), m(1, 1)); + + assert_eq!(loc("\n\n", 0, 0), m(0, 1)); + assert_eq!(loc("\n\n", 0, 1), m(0, 1)); + assert_eq!(loc("\n\n", 1, 1), m(1, 2)); + assert_eq!(loc("\n\n", 1, 2), m(1, 2)); + assert_eq!(loc("\n\n", 2, 2), m(2, 2)); + + assert_eq!(loc("a\nb\nc", 0, 1), m(0, 2)); + assert_eq!(loc("a\nb\nc", 1, 2), m(0, 2)); + assert_eq!(loc("a\nb\nc", 2, 3), m(2, 4)); + assert_eq!(loc("a\nb\nc", 3, 4), m(2, 4)); + assert_eq!(loc("a\nb\nc", 4, 5), m(4, 5)); + assert_eq!(loc("a\nb\nc", 5, 5), m(4, 5)); + } + + #[test] + fn line_iter() { + assert_eq!(lines("abc"), vec!["abc"]); + + assert_eq!(lines("abc\n"), vec!["abc\n"]); + assert_eq!(lines("abc\nxyz"), vec!["abc\n", "xyz"]); + assert_eq!(lines("abc\nxyz\n"), vec!["abc\n", "xyz\n"]); + + assert_eq!(lines("abc\n\n"), vec!["abc\n", "\n"]); + assert_eq!(lines("abc\n\n\n"), vec!["abc\n", "\n", "\n"]); + assert_eq!(lines("abc\n\nxyz"), vec!["abc\n", "\n", "xyz"]); + assert_eq!(lines("abc\n\nxyz\n"), vec!["abc\n", "\n", "xyz\n"]); + assert_eq!(lines("abc\nxyz\n\n"), vec!["abc\n", "xyz\n", "\n"]); + + assert_eq!(lines("\n"), vec!["\n"]); + assert_eq!(lines(""), Vec::<&str>::new()); + } + + #[test] + fn line_iter_empty() { + let mut it = LineStep::new(b'\n', 0, 0); + assert_eq!(it.next(b"abc"), None); + } + + #[test] + fn preceding_lines_doc() { + // These are the examples mentions in the documentation of `preceding`. + let bytes = b"abc\nxyz\n"; + assert_eq!(4, preceding_by_pos(bytes, 7, b'\n', 0)); + assert_eq!(4, preceding_by_pos(bytes, 8, b'\n', 0)); + assert_eq!(0, preceding_by_pos(bytes, 7, b'\n', 1)); + assert_eq!(0, preceding_by_pos(bytes, 8, b'\n', 1)); + } + + #[test] + fn preceding_lines_sherlock() { + let t = SHERLOCK; + let lines = line_ranges(t); + + // The following tests check the count == 0 case, i.e., finding the + // beginning of the line containing the given position. + assert_eq!(0, prev(t, 0, 0)); + assert_eq!(0, prev(t, 1, 0)); + // The line terminator is addressed by `end-1` and terminates the line + // it is part of. + assert_eq!(0, prev(t, lines[0].end - 1, 0)); + assert_eq!(lines[0].start, prev(t, lines[0].end, 0)); + // The end position of line addresses the byte immediately following a + // line terminator, which puts it on the following line. + assert_eq!(lines[1].start, prev(t, lines[0].end + 1, 0)); + + // Now tests for count > 0. + assert_eq!(0, prev(t, 0, 1)); + assert_eq!(0, prev(t, 0, 2)); + assert_eq!(0, prev(t, 1, 1)); + assert_eq!(0, prev(t, 1, 2)); + assert_eq!(0, prev(t, lines[0].end - 1, 1)); + assert_eq!(0, prev(t, lines[0].end - 1, 2)); + assert_eq!(0, prev(t, lines[0].end, 1)); + assert_eq!(0, prev(t, lines[0].end, 2)); + assert_eq!(lines[3].start, prev(t, lines[4].end - 1, 1)); + assert_eq!(lines[3].start, prev(t, lines[4].end, 1)); + assert_eq!(lines[4].start, prev(t, lines[4].end + 1, 1)); + + // The last line has no line terminator. + assert_eq!(lines[5].start, prev(t, lines[5].end, 0)); + assert_eq!(lines[5].start, prev(t, lines[5].end - 1, 0)); + assert_eq!(lines[4].start, prev(t, lines[5].end, 1)); + assert_eq!(lines[0].start, prev(t, lines[5].end, 5)); + } + + #[test] + fn preceding_lines_short() { + let t = "a\nb\nc\nd\ne\nf\n"; + let lines = line_ranges(t); + assert_eq!(12, t.len()); + + assert_eq!(lines[5].start, prev(t, lines[5].end, 0)); + assert_eq!(lines[4].start, prev(t, lines[5].end, 1)); + assert_eq!(lines[3].start, prev(t, lines[5].end, 2)); + assert_eq!(lines[2].start, prev(t, lines[5].end, 3)); + assert_eq!(lines[1].start, prev(t, lines[5].end, 4)); + assert_eq!(lines[0].start, prev(t, lines[5].end, 5)); + assert_eq!(lines[0].start, prev(t, lines[5].end, 6)); + + assert_eq!(lines[5].start, prev(t, lines[5].end - 1, 0)); + assert_eq!(lines[4].start, prev(t, lines[5].end - 1, 1)); + assert_eq!(lines[3].start, prev(t, lines[5].end - 1, 2)); + assert_eq!(lines[2].start, prev(t, lines[5].end - 1, 3)); + assert_eq!(lines[1].start, prev(t, lines[5].end - 1, 4)); + assert_eq!(lines[0].start, prev(t, lines[5].end - 1, 5)); + assert_eq!(lines[0].start, prev(t, lines[5].end - 1, 6)); + + assert_eq!(lines[4].start, prev(t, lines[5].start, 0)); + assert_eq!(lines[3].start, prev(t, lines[5].start, 1)); + assert_eq!(lines[2].start, prev(t, lines[5].start, 2)); + assert_eq!(lines[1].start, prev(t, lines[5].start, 3)); + assert_eq!(lines[0].start, prev(t, lines[5].start, 4)); + assert_eq!(lines[0].start, prev(t, lines[5].start, 5)); + + assert_eq!(lines[3].start, prev(t, lines[4].end - 1, 1)); + assert_eq!(lines[2].start, prev(t, lines[4].start, 1)); + + assert_eq!(lines[2].start, prev(t, lines[3].end - 1, 1)); + assert_eq!(lines[1].start, prev(t, lines[3].start, 1)); + + assert_eq!(lines[1].start, prev(t, lines[2].end - 1, 1)); + assert_eq!(lines[0].start, prev(t, lines[2].start, 1)); + + assert_eq!(lines[0].start, prev(t, lines[1].end - 1, 1)); + assert_eq!(lines[0].start, prev(t, lines[1].start, 1)); + + assert_eq!(lines[0].start, prev(t, lines[0].end - 1, 1)); + assert_eq!(lines[0].start, prev(t, lines[0].start, 1)); + } + + #[test] + fn preceding_lines_empty1() { + let t = "\n\n\nd\ne\nf\n"; + let lines = line_ranges(t); + assert_eq!(9, t.len()); + + assert_eq!(lines[0].start, prev(t, lines[0].end, 0)); + assert_eq!(lines[0].start, prev(t, lines[0].end, 1)); + assert_eq!(lines[1].start, prev(t, lines[1].end, 0)); + assert_eq!(lines[0].start, prev(t, lines[1].end, 1)); + + assert_eq!(lines[5].start, prev(t, lines[5].end, 0)); + assert_eq!(lines[4].start, prev(t, lines[5].end, 1)); + assert_eq!(lines[3].start, prev(t, lines[5].end, 2)); + assert_eq!(lines[2].start, prev(t, lines[5].end, 3)); + assert_eq!(lines[1].start, prev(t, lines[5].end, 4)); + assert_eq!(lines[0].start, prev(t, lines[5].end, 5)); + assert_eq!(lines[0].start, prev(t, lines[5].end, 6)); + } + + #[test] + fn preceding_lines_empty2() { + let t = "a\n\n\nd\ne\nf\n"; + let lines = line_ranges(t); + assert_eq!(10, t.len()); + + assert_eq!(lines[0].start, prev(t, lines[0].end, 0)); + assert_eq!(lines[0].start, prev(t, lines[0].end, 1)); + assert_eq!(lines[1].start, prev(t, lines[1].end, 0)); + assert_eq!(lines[0].start, prev(t, lines[1].end, 1)); + + assert_eq!(lines[5].start, prev(t, lines[5].end, 0)); + assert_eq!(lines[4].start, prev(t, lines[5].end, 1)); + assert_eq!(lines[3].start, prev(t, lines[5].end, 2)); + assert_eq!(lines[2].start, prev(t, lines[5].end, 3)); + assert_eq!(lines[1].start, prev(t, lines[5].end, 4)); + assert_eq!(lines[0].start, prev(t, lines[5].end, 5)); + assert_eq!(lines[0].start, prev(t, lines[5].end, 6)); + } +} diff -Nru ripgrep-0.6.0/grep-searcher/src/macros.rs ripgrep-0.10.0.3/grep-searcher/src/macros.rs --- ripgrep-0.6.0/grep-searcher/src/macros.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/src/macros.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,24 @@ +#[cfg(test)] +#[macro_export] +macro_rules! assert_eq_printed { + ($expected:expr, $got:expr, $($tt:tt)*) => { + let expected = &*$expected; + let got = &*$got; + let label = format!($($tt)*); + if expected != got { + panic!(" +printed outputs differ! (label: {}) + +expected: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +got: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +", label, expected, got); + } + } +} diff -Nru ripgrep-0.6.0/grep-searcher/src/searcher/core.rs ripgrep-0.10.0.3/grep-searcher/src/searcher/core.rs --- ripgrep-0.6.0/grep-searcher/src/searcher/core.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/src/searcher/core.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,592 @@ +use std::cmp; + +use memchr::memchr; + +use grep_matcher::{LineMatchKind, Matcher}; +use lines::{self, LineStep}; +use line_buffer::BinaryDetection; +use searcher::{Config, Range, Searcher}; +use sink::{ + Sink, SinkError, + SinkFinish, SinkContext, SinkContextKind, SinkMatch, +}; + +#[derive(Debug)] +pub struct Core<'s, M: 's, S> { + config: &'s Config, + matcher: M, + searcher: &'s Searcher, + sink: S, + binary: bool, + pos: usize, + absolute_byte_offset: u64, + binary_byte_offset: Option, + line_number: Option, + last_line_counted: usize, + last_line_visited: usize, + after_context_left: usize, + has_sunk: bool, +} + +impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { + pub fn new( + searcher: &'s Searcher, + matcher: M, + sink: S, + binary: bool, + ) -> Core<'s, M, S> { + let line_number = + if searcher.config.line_number { + Some(1) + } else { + None + }; + let core = Core { + config: &searcher.config, + matcher: matcher, + searcher: searcher, + sink: sink, + binary: binary, + pos: 0, + absolute_byte_offset: 0, + binary_byte_offset: None, + line_number: line_number, + last_line_counted: 0, + last_line_visited: 0, + after_context_left: 0, + has_sunk: false, + }; + if !core.searcher.multi_line_with_matcher(&core.matcher) { + if core.is_line_by_line_fast() { + trace!("searcher core: will use fast line searcher"); + } else { + trace!("searcher core: will use slow line searcher"); + } + } + core + } + + pub fn pos(&self) -> usize { + self.pos + } + + pub fn set_pos(&mut self, pos: usize) { + self.pos = pos; + } + + pub fn binary_byte_offset(&self) -> Option { + self.binary_byte_offset.map(|offset| offset as u64) + } + + pub fn matcher(&self) -> &M { + &self.matcher + } + + pub fn matched( + &mut self, + buf: &[u8], + range: &Range, + ) -> Result { + self.sink_matched(buf, range) + } + + pub fn begin(&mut self) -> Result { + self.sink.begin(&self.searcher) + } + + pub fn finish( + &mut self, + byte_count: u64, + binary_byte_offset: Option, + ) -> Result<(), S::Error> { + self.sink.finish( + &self.searcher, + &SinkFinish { + byte_count, + binary_byte_offset, + }) + } + + pub fn match_by_line(&mut self, buf: &[u8]) -> Result { + if self.is_line_by_line_fast() { + self.match_by_line_fast(buf) + } else { + self.match_by_line_slow(buf) + } + } + + pub fn roll(&mut self, buf: &[u8]) -> usize { + let consumed = + if self.config.max_context() == 0 { + buf.len() + } else { + // It might seem like all we need to care about here is just + // the "before context," but in order to sink the context + // separator (when before_context==0 and after_context>0), we + // need to know something about the position of the previous + // line visited, even if we're at the beginning of the buffer. + let context_start = lines::preceding( + buf, + self.config.line_term.as_byte(), + self.config.max_context(), + ); + let consumed = cmp::max(context_start, self.last_line_visited); + consumed + }; + self.count_lines(buf, consumed); + self.absolute_byte_offset += consumed as u64; + self.last_line_counted = 0; + self.last_line_visited = 0; + self.set_pos(buf.len() - consumed); + consumed + } + + pub fn detect_binary(&mut self, buf: &[u8], range: &Range) -> bool { + if self.binary_byte_offset.is_some() { + return true; + } + let binary_byte = match self.config.binary.0 { + BinaryDetection::Quit(b) => b, + _ => return false, + }; + if let Some(i) = memchr(binary_byte, &buf[*range]) { + self.binary_byte_offset = Some(range.start() + i); + true + } else { + false + } + } + + pub fn before_context_by_line( + &mut self, + buf: &[u8], + upto: usize, + ) -> Result { + if self.config.before_context == 0 { + return Ok(true); + } + let range = Range::new(self.last_line_visited, upto); + if range.is_empty() { + return Ok(true); + } + let before_context_start = range.start() + lines::preceding( + &buf[range], + self.config.line_term.as_byte(), + self.config.before_context - 1, + ); + + let range = Range::new(before_context_start, range.end()); + let mut stepper = LineStep::new( + self.config.line_term.as_byte(), + range.start(), + range.end(), + ); + while let Some(line) = stepper.next_match(buf) { + if !self.sink_break_context(line.start())? { + return Ok(false); + } + if !self.sink_before_context(buf, &line)? { + return Ok(false); + } + } + Ok(true) + } + + pub fn after_context_by_line( + &mut self, + buf: &[u8], + upto: usize, + ) -> Result { + if self.after_context_left == 0 { + return Ok(true); + } + let range = Range::new(self.last_line_visited, upto); + let mut stepper = LineStep::new( + self.config.line_term.as_byte(), + range.start(), + range.end(), + ); + while let Some(line) = stepper.next_match(buf) { + if !self.sink_after_context(buf, &line)? { + return Ok(false); + } + if self.after_context_left == 0 { + break; + } + } + Ok(true) + } + + pub fn other_context_by_line( + &mut self, + buf: &[u8], + upto: usize, + ) -> Result { + let range = Range::new(self.last_line_visited, upto); + let mut stepper = LineStep::new( + self.config.line_term.as_byte(), + range.start(), + range.end(), + ); + while let Some(line) = stepper.next_match(buf) { + if !self.sink_other_context(buf, &line)? { + return Ok(false); + } + } + Ok(true) + } + + fn match_by_line_slow(&mut self, buf: &[u8]) -> Result { + debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher)); + + let range = Range::new(self.pos(), buf.len()); + let mut stepper = LineStep::new( + self.config.line_term.as_byte(), + range.start(), + range.end(), + ); + while let Some(line) = stepper.next_match(buf) { + let matched = { + // Stripping the line terminator is necessary to prevent some + // classes of regexes from matching the empty position *after* + // the end of the line. For example, `(?m)^$` will match at + // position (2, 2) in the string `a\n`. + let slice = lines::without_terminator( + &buf[line], + self.config.line_term, + ); + match self.matcher.shortest_match(slice) { + Err(err) => return Err(S::Error::error_message(err)), + Ok(result) => result.is_some(), + } + }; + self.set_pos(line.end()); + if matched != self.config.invert_match { + if !self.before_context_by_line(buf, line.start())? { + return Ok(false); + } + if !self.sink_matched(buf, &line)? { + return Ok(false); + } + } else if self.after_context_left >= 1 { + if !self.sink_after_context(buf, &line)? { + return Ok(false); + } + } else if self.config.passthru { + if !self.sink_other_context(buf, &line)? { + return Ok(false); + } + } + } + Ok(true) + } + + fn match_by_line_fast(&mut self, buf: &[u8]) -> Result { + debug_assert!(!self.config.passthru); + + while !buf[self.pos()..].is_empty() { + if self.config.invert_match { + if !self.match_by_line_fast_invert(buf)? { + return Ok(false); + } + } else if let Some(line) = self.find_by_line_fast(buf)? { + if self.config.max_context() > 0 { + if !self.after_context_by_line(buf, line.start())? { + return Ok(false); + } + if !self.before_context_by_line(buf, line.start())? { + return Ok(false); + } + } + self.set_pos(line.end()); + if !self.sink_matched(buf, &line)? { + return Ok(false); + } + } else { + break; + } + } + if !self.after_context_by_line(buf, buf.len())? { + return Ok(false); + } + self.set_pos(buf.len()); + Ok(true) + } + + #[inline(always)] + fn match_by_line_fast_invert( + &mut self, + buf: &[u8], + ) -> Result { + assert!(self.config.invert_match); + + let invert_match = match self.find_by_line_fast(buf)? { + None => { + let range = Range::new(self.pos(), buf.len()); + self.set_pos(range.end()); + range + } + Some(line) => { + let range = Range::new(self.pos(), line.start()); + self.set_pos(line.end()); + range + } + }; + if invert_match.is_empty() { + return Ok(true); + } + if !self.after_context_by_line(buf, invert_match.start())? { + return Ok(false); + } + if !self.before_context_by_line(buf, invert_match.start())? { + return Ok(false); + } + let mut stepper = LineStep::new( + self.config.line_term.as_byte(), + invert_match.start(), + invert_match.end(), + ); + while let Some(line) = stepper.next_match(buf) { + if !self.sink_matched(buf, &line)? { + return Ok(false); + } + } + Ok(true) + } + + #[inline(always)] + fn find_by_line_fast( + &self, + buf: &[u8], + ) -> Result, S::Error> { + debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher)); + debug_assert!(self.is_line_by_line_fast()); + + let mut pos = self.pos(); + while !buf[pos..].is_empty() { + match self.matcher.find_candidate_line(&buf[pos..]) { + Err(err) => return Err(S::Error::error_message(err)), + Ok(None) => return Ok(None), + Ok(Some(LineMatchKind::Confirmed(i))) => { + let line = lines::locate( + buf, + self.config.line_term.as_byte(), + Range::zero(i).offset(pos), + ); + // If we matched beyond the end of the buffer, then we + // don't report this as a match. + if line.start() == buf.len() { + pos = buf.len(); + continue; + } + return Ok(Some(line)); + } + Ok(Some(LineMatchKind::Candidate(i))) => { + let line = lines::locate( + buf, + self.config.line_term.as_byte(), + Range::zero(i).offset(pos), + ); + // We need to strip the line terminator here to match the + // semantics of line-by-line searching. Namely, regexes + // like `(?m)^$` can match at the final position beyond a + // line terminator, which is non-sensical in line oriented + // matching. + let slice = lines::without_terminator( + &buf[line], + self.config.line_term, + ); + match self.matcher.is_match(slice) { + Err(err) => return Err(S::Error::error_message(err)), + Ok(true) => return Ok(Some(line)), + Ok(false) => { + pos = line.end(); + continue; + } + } + } + } + } + Ok(None) + } + + #[inline(always)] + fn sink_matched( + &mut self, + buf: &[u8], + range: &Range, + ) -> Result { + if self.binary && self.detect_binary(buf, range) { + return Ok(false); + } + if !self.sink_break_context(range.start())? { + return Ok(false); + } + self.count_lines(buf, range.start()); + let offset = self.absolute_byte_offset + range.start() as u64; + let linebuf = + if self.config.line_term.is_crlf() { + // Normally, a line terminator is never part of a match, but + // if the line terminator is CRLF, then it's possible for `\r` + // to end up in the match, which we generally don't want. So + // we strip it here. + lines::without_terminator(&buf[*range], self.config.line_term) + } else { + &buf[*range] + }; + let keepgoing = self.sink.matched( + &self.searcher, + &SinkMatch { + line_term: self.config.line_term, + bytes: linebuf, + absolute_byte_offset: offset, + line_number: self.line_number, + }, + )?; + if !keepgoing { + return Ok(false); + } + self.last_line_visited = range.end(); + self.after_context_left = self.config.after_context; + self.has_sunk = true; + Ok(true) + } + + fn sink_before_context( + &mut self, + buf: &[u8], + range: &Range, + ) -> Result { + if self.binary && self.detect_binary(buf, range) { + return Ok(false); + } + self.count_lines(buf, range.start()); + let offset = self.absolute_byte_offset + range.start() as u64; + let keepgoing = self.sink.context( + &self.searcher, + &SinkContext { + line_term: self.config.line_term, + bytes: &buf[*range], + kind: SinkContextKind::Before, + absolute_byte_offset: offset, + line_number: self.line_number, + }, + )?; + if !keepgoing { + return Ok(false); + } + self.last_line_visited = range.end(); + self.has_sunk = true; + Ok(true) + } + + fn sink_after_context( + &mut self, + buf: &[u8], + range: &Range, + ) -> Result { + assert!(self.after_context_left >= 1); + + if self.binary && self.detect_binary(buf, range) { + return Ok(false); + } + self.count_lines(buf, range.start()); + let offset = self.absolute_byte_offset + range.start() as u64; + let keepgoing = self.sink.context( + &self.searcher, + &SinkContext { + line_term: self.config.line_term, + bytes: &buf[*range], + kind: SinkContextKind::After, + absolute_byte_offset: offset, + line_number: self.line_number, + }, + )?; + if !keepgoing { + return Ok(false); + } + self.last_line_visited = range.end(); + self.after_context_left -= 1; + self.has_sunk = true; + Ok(true) + } + + fn sink_other_context( + &mut self, + buf: &[u8], + range: &Range, + ) -> Result { + if self.binary && self.detect_binary(buf, range) { + return Ok(false); + } + self.count_lines(buf, range.start()); + let offset = self.absolute_byte_offset + range.start() as u64; + let keepgoing = self.sink.context( + &self.searcher, + &SinkContext { + line_term: self.config.line_term, + bytes: &buf[*range], + kind: SinkContextKind::Other, + absolute_byte_offset: offset, + line_number: self.line_number, + }, + )?; + if !keepgoing { + return Ok(false); + } + self.last_line_visited = range.end(); + self.has_sunk = true; + Ok(true) + } + + fn sink_break_context( + &mut self, + start_of_line: usize, + ) -> Result { + let is_gap = self.last_line_visited < start_of_line; + let any_context = + self.config.before_context > 0 + || self.config.after_context > 0; + + if !any_context || !self.has_sunk || !is_gap { + Ok(true) + } else { + self.sink.context_break(&self.searcher) + } + } + + fn count_lines(&mut self, buf: &[u8], upto: usize) { + if let Some(ref mut line_number) = self.line_number { + if self.last_line_counted >= upto { + return; + } + let slice = &buf[self.last_line_counted..upto]; + let count = lines::count(slice, self.config.line_term.as_byte()); + *line_number += count; + self.last_line_counted = upto; + } + } + + fn is_line_by_line_fast(&self) -> bool { + debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher)); + + if self.config.passthru { + return false; + } + if let Some(line_term) = self.matcher.line_terminator() { + if line_term == self.config.line_term { + return true; + } + } + if let Some(non_matching) = self.matcher.non_matching_bytes() { + // If the line terminator is CRLF, we don't actually need to care + // whether the regex can match `\r` or not. Namely, a `\r` is + // neither necessary nor sufficient to terminate a line. A `\n` is + // always required. + if non_matching.contains(self.config.line_term.as_byte()) { + return true; + } + } + false + } +} diff -Nru ripgrep-0.6.0/grep-searcher/src/searcher/glue.rs ripgrep-0.10.0.3/grep-searcher/src/searcher/glue.rs --- ripgrep-0.6.0/grep-searcher/src/searcher/glue.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/src/searcher/glue.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,1506 @@ +use std::cmp; +use std::io; + +use grep_matcher::Matcher; +use lines::{self, LineStep}; +use line_buffer::{DEFAULT_BUFFER_CAPACITY, LineBufferReader}; +use sink::{Sink, SinkError}; + +use searcher::{Config, Range, Searcher}; +use searcher::core::Core; + +#[derive(Debug)] +pub struct ReadByLine<'s, M: 's, R, S> { + config: &'s Config, + core: Core<'s, M, S>, + rdr: LineBufferReader<'s, R>, +} + +impl<'s, M, R, S> ReadByLine<'s, M, R, S> +where M: Matcher, + R: io::Read, + S: Sink +{ + pub fn new( + searcher: &'s Searcher, + matcher: M, + read_from: LineBufferReader<'s, R>, + write_to: S, + ) -> ReadByLine<'s, M, R, S> { + debug_assert!(!searcher.multi_line_with_matcher(&matcher)); + + ReadByLine { + config: &searcher.config, + core: Core::new(searcher, matcher, write_to, false), + rdr: read_from, + } + } + + pub fn run(mut self) -> Result<(), S::Error> { + if self.core.begin()? { + while + self.fill()? && self.core.match_by_line(self.rdr.buffer())? + {} + } + self.core.finish( + self.rdr.absolute_byte_offset(), + self.rdr.binary_byte_offset(), + ) + } + + fn fill(&mut self) -> Result { + assert!(self.rdr.buffer()[self.core.pos()..].is_empty()); + + let old_buf_len = self.rdr.buffer().len(); + let consumed = self.core.roll(self.rdr.buffer()); + self.rdr.consume(consumed); + let didread = match self.rdr.fill() { + Err(err) => return Err(S::Error::error_io(err)), + Ok(didread) => didread, + }; + if !didread || self.rdr.binary_byte_offset().is_some() { + return Ok(false); + } + // If rolling the buffer didn't result in consuming anything and if + // re-filling the buffer didn't add any bytes, then the only thing in + // our buffer is leftover context, which we no longer need since there + // is nothing left to search. So forcefully quit. + if consumed == 0 && old_buf_len == self.rdr.buffer().len() { + self.rdr.consume(old_buf_len); + return Ok(false); + } + Ok(true) + } +} + +#[derive(Debug)] +pub struct SliceByLine<'s, M: 's, S> { + config: &'s Config, + core: Core<'s, M, S>, + slice: &'s [u8], +} + +impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> { + pub fn new( + searcher: &'s Searcher, + matcher: M, + slice: &'s [u8], + write_to: S, + ) -> SliceByLine<'s, M, S> { + debug_assert!(!searcher.multi_line_with_matcher(&matcher)); + + SliceByLine { + config: &searcher.config, + core: Core::new(searcher, matcher, write_to, true), + slice: slice, + } + } + + pub fn run(mut self) -> Result<(), S::Error> { + if self.core.begin()? { + let binary_upto = cmp::min( + self.slice.len(), + DEFAULT_BUFFER_CAPACITY, + ); + let binary_range = Range::new(0, binary_upto); + if !self.core.detect_binary(self.slice, &binary_range) { + while + !self.slice[self.core.pos()..].is_empty() + && self.core.match_by_line(self.slice)? + {} + } + } + let byte_count = self.byte_count(); + let binary_byte_offset = self.core.binary_byte_offset(); + self.core.finish(byte_count, binary_byte_offset) + } + + fn byte_count(&mut self) -> u64 { + match self.core.binary_byte_offset() { + Some(offset) if offset < self.core.pos() as u64 => offset, + _ => self.core.pos() as u64, + } + } +} + +#[derive(Debug)] +pub struct MultiLine<'s, M: 's, S> { + config: &'s Config, + core: Core<'s, M, S>, + slice: &'s [u8], + last_match: Option, +} + +impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> { + pub fn new( + searcher: &'s Searcher, + matcher: M, + slice: &'s [u8], + write_to: S, + ) -> MultiLine<'s, M, S> { + debug_assert!(searcher.multi_line_with_matcher(&matcher)); + + MultiLine { + config: &searcher.config, + core: Core::new(searcher, matcher, write_to, true), + slice: slice, + last_match: None, + } + } + + pub fn run(mut self) -> Result<(), S::Error> { + if self.core.begin()? { + let binary_upto = cmp::min( + self.slice.len(), + DEFAULT_BUFFER_CAPACITY, + ); + let binary_range = Range::new(0, binary_upto); + if !self.core.detect_binary(self.slice, &binary_range) { + let mut keepgoing = true; + while !self.slice[self.core.pos()..].is_empty() && keepgoing { + keepgoing = self.sink()?; + } + if keepgoing { + keepgoing = match self.last_match.take() { + None => true, + Some(last_match) => { + if self.sink_context(&last_match)? { + self.sink_matched(&last_match)?; + } + true + } + }; + } + // Take care of any remaining context after the last match. + if keepgoing { + if self.config.passthru { + self.core.other_context_by_line( + self.slice, + self.slice.len(), + )?; + } else { + self.core.after_context_by_line( + self.slice, + self.slice.len(), + )?; + } + } + } + } + let byte_count = self.byte_count(); + let binary_byte_offset = self.core.binary_byte_offset(); + self.core.finish(byte_count, binary_byte_offset) + } + + fn sink(&mut self) -> Result { + if self.config.invert_match { + return self.sink_matched_inverted(); + } + let mat = match self.find()? { + Some(range) => range, + None => { + self.core.set_pos(self.slice.len()); + return Ok(true); + } + }; + self.advance(&mat); + + let line = lines::locate( + self.slice, + self.config.line_term.as_byte(), + mat, + ); + // We delay sinking the match to make sure we group adjacent matches + // together in a single sink. Adjacent matches are distinct matches + // that start and end on the same line, respectively. This guarantees + // that a single line is never sinked more than once. + match self.last_match.take() { + None => { + self.last_match = Some(line); + Ok(true) + } + Some(last_match) => { + // If the lines in the previous match overlap with the lines + // in this match, then simply grow the match and move on. + // This happens when the next match begins on the same line + // that the last match ends on. + if last_match.end() > line.start() { + self.last_match = Some(last_match.with_end(line.end())); + Ok(true) + } else { + self.last_match = Some(line); + if !self.sink_context(&last_match)? { + return Ok(false); + } + self.sink_matched(&last_match) + } + } + } + } + + fn sink_matched_inverted(&mut self) -> Result { + assert!(self.config.invert_match); + + let invert_match = match self.find()? { + None => { + let range = Range::new(self.core.pos(), self.slice.len()); + self.core.set_pos(range.end()); + range + } + Some(mat) => { + let line = lines::locate( + self.slice, + self.config.line_term.as_byte(), + mat, + ); + let range = Range::new(self.core.pos(), line.start()); + self.advance(&line); + range + } + }; + if invert_match.is_empty() { + return Ok(true); + } + if !self.sink_context(&invert_match)? { + return Ok(false); + } + let mut stepper = LineStep::new( + self.config.line_term.as_byte(), + invert_match.start(), + invert_match.end(), + ); + while let Some(line) = stepper.next_match(self.slice) { + if !self.sink_matched(&line)? { + return Ok(false); + } + } + Ok(true) + } + + fn sink_matched(&mut self, range: &Range) -> Result { + if range.is_empty() { + // The only way we can produce an empty line for a match is if we + // match the position immediately following the last byte that we + // search, and where that last byte is also the line terminator. We + // never want to report that match, and we know we're done at that + // point anyway, so stop the search. + return Ok(false); + } + self.core.matched(self.slice, range) + } + + fn sink_context(&mut self, range: &Range) -> Result { + if self.config.passthru { + if !self.core.other_context_by_line(self.slice, range.start())? { + return Ok(false); + } + } else { + if !self.core.after_context_by_line(self.slice, range.start())? { + return Ok(false); + } + if !self.core.before_context_by_line(self.slice, range.start())? { + return Ok(false); + } + } + Ok(true) + } + + fn find(&mut self) -> Result, S::Error> { + match self.core.matcher().find(&self.slice[self.core.pos()..]) { + Err(err) => Err(S::Error::error_message(err)), + Ok(None) => Ok(None), + Ok(Some(m)) => Ok(Some(m.offset(self.core.pos()))), + } + } + + /// Advance the search position based on the previous match. + /// + /// If the previous match is zero width, then this advances the search + /// position one byte past the end of the match. + fn advance(&mut self, range: &Range) { + self.core.set_pos(range.end()); + if range.is_empty() && self.core.pos() < self.slice.len() { + let newpos = self.core.pos() + 1; + self.core.set_pos(newpos); + } + } + + fn byte_count(&mut self) -> u64 { + match self.core.binary_byte_offset() { + Some(offset) if offset < self.core.pos() as u64 => offset, + _ => self.core.pos() as u64, + } + } +} + +#[cfg(test)] +mod tests { + use searcher::{BinaryDetection, SearcherBuilder}; + use testutil::{KitchenSink, RegexMatcher, SearcherTester}; + + use super::*; + + const SHERLOCK: &'static str = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached.\ +"; + + const CODE: &'static str = "\ +extern crate snap; + +use std::io; + +fn main() { + let stdin = io::stdin(); + let stdout = io::stdout(); + + // Wrap the stdin reader in a Snappy reader. + let mut rdr = snap::Reader::new(stdin.lock()); + let mut wtr = stdout.lock(); + io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); +} +"; + + #[test] + fn basic1() { + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +129:be, to a very large extent, the result of luck. Sherlock Holmes + +byte count:366 +"; + SearcherTester::new(SHERLOCK, "Sherlock") + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn basic2() { + let exp = "\nbyte count:366\n"; + SearcherTester::new(SHERLOCK, "NADA") + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn basic3() { + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65:Holmeses, success in the province of detective work must always +129:be, to a very large extent, the result of luck. Sherlock Holmes +193:can extract a clew from a wisp of straw or a flake of cigar ash; +258:but Doctor Watson has to have it taken out for him and dusted, +321:and exhibited clearly, with a label attached. +byte count:366 +"; + SearcherTester::new(SHERLOCK, "a") + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn basic4() { + let haystack = "\ +a +b + +c + + +d +"; + let byte_count = haystack.len(); + let exp = format!("0:a\n\nbyte count:{}\n", byte_count); + SearcherTester::new(haystack, "a") + .line_number(false) + .expected_no_line_number(&exp) + .test(); + } + + #[test] + fn invert1() { + let exp = "\ +65:Holmeses, success in the province of detective work must always +193:can extract a clew from a wisp of straw or a flake of cigar ash; +258:but Doctor Watson has to have it taken out for him and dusted, +321:and exhibited clearly, with a label attached. +byte count:366 +"; + SearcherTester::new(SHERLOCK, "Sherlock") + .line_number(false) + .invert_match(true) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn line_number1() { + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +129:be, to a very large extent, the result of luck. Sherlock Holmes + +byte count:366 +"; + let exp_line = "\ +1:0:For the Doctor Watsons of this world, as opposed to the Sherlock +3:129:be, to a very large extent, the result of luck. Sherlock Holmes + +byte count:366 +"; + SearcherTester::new(SHERLOCK, "Sherlock") + .expected_no_line_number(exp) + .expected_with_line_number(exp_line) + .test(); + } + + #[test] + fn line_number_invert1() { + let exp = "\ +65:Holmeses, success in the province of detective work must always +193:can extract a clew from a wisp of straw or a flake of cigar ash; +258:but Doctor Watson has to have it taken out for him and dusted, +321:and exhibited clearly, with a label attached. +byte count:366 +"; + let exp_line = "\ +2:65:Holmeses, success in the province of detective work must always +4:193:can extract a clew from a wisp of straw or a flake of cigar ash; +5:258:but Doctor Watson has to have it taken out for him and dusted, +6:321:and exhibited clearly, with a label attached. +byte count:366 +"; + SearcherTester::new(SHERLOCK, "Sherlock") + .invert_match(true) + .expected_no_line_number(exp) + .expected_with_line_number(exp_line) + .test(); + } + + #[test] + fn multi_line_overlap1() { + let haystack = "xxx\nabc\ndefxxxabc\ndefxxx\nxxx"; + let byte_count = haystack.len(); + let exp = format!( + "4:abc\n8:defxxxabc\n18:defxxx\n\nbyte count:{}\n", + byte_count); + + SearcherTester::new(haystack, "abc\ndef") + .by_line(false) + .line_number(false) + .expected_no_line_number(&exp) + .test(); + } + + #[test] + fn multi_line_overlap2() { + let haystack = "xxx\nabc\ndefabc\ndefxxx\nxxx"; + let byte_count = haystack.len(); + let exp = format!( + "4:abc\n8:defabc\n15:defxxx\n\nbyte count:{}\n", + byte_count); + + SearcherTester::new(haystack, "abc\ndef") + .by_line(false) + .line_number(false) + .expected_no_line_number(&exp) + .test(); + } + + #[test] + fn empty_line1() { + let exp = "\nbyte count:0\n"; + SearcherTester::new("", r"^$") + .expected_no_line_number(exp) + .expected_with_line_number(exp) + .test(); + } + + #[test] + fn empty_line2() { + let exp = "0:\n\nbyte count:1\n"; + let exp_line = "1:0:\n\nbyte count:1\n"; + + SearcherTester::new("\n", r"^$") + .expected_no_line_number(exp) + .expected_with_line_number(exp_line) + .test(); + } + + #[test] + fn empty_line3() { + let exp = "0:\n1:\n\nbyte count:2\n"; + let exp_line = "1:0:\n2:1:\n\nbyte count:2\n"; + + SearcherTester::new("\n\n", r"^$") + .expected_no_line_number(exp) + .expected_with_line_number(exp_line) + .test(); + } + + #[test] + fn empty_line4() { + // See: https://github.com/BurntSushi/ripgrep/issues/441 + let haystack = "\ +a +b + +c + + +d +"; + let byte_count = haystack.len(); + let exp = format!("4:\n7:\n8:\n\nbyte count:{}\n", byte_count); + let exp_line = format!( + "3:4:\n5:7:\n6:8:\n\nbyte count:{}\n", + byte_count); + + SearcherTester::new(haystack, r"^$") + .expected_no_line_number(&exp) + .expected_with_line_number(&exp_line) + .test(); + } + + #[test] + fn empty_line5() { + // See: https://github.com/BurntSushi/ripgrep/issues/441 + // This is like empty_line4, but lacks the trailing line terminator. + let haystack = "\ +a +b + +c + + +d"; + let byte_count = haystack.len(); + let exp = format!("4:\n7:\n8:\n\nbyte count:{}\n", byte_count); + let exp_line = format!( + "3:4:\n5:7:\n6:8:\n\nbyte count:{}\n", + byte_count); + + SearcherTester::new(haystack, r"^$") + .expected_no_line_number(&exp) + .expected_with_line_number(&exp_line) + .test(); + } + + #[test] + fn empty_line6() { + // See: https://github.com/BurntSushi/ripgrep/issues/441 + // This is like empty_line4, but includes an empty line at the end. + let haystack = "\ +a +b + +c + + +d + +"; + let byte_count = haystack.len(); + let exp = format!( + "4:\n7:\n8:\n11:\n\nbyte count:{}\n", + byte_count); + let exp_line = format!( + "3:4:\n5:7:\n6:8:\n8:11:\n\nbyte count:{}\n", + byte_count); + + SearcherTester::new(haystack, r"^$") + .expected_no_line_number(&exp) + .expected_with_line_number(&exp_line) + .test(); + } + + #[test] + fn big1() { + let mut haystack = String::new(); + haystack.push_str("a\n"); + // Pick an arbitrary number above the capacity. + for _ in 0..(4 * (DEFAULT_BUFFER_CAPACITY + 7)) { + haystack.push_str("zzz\n"); + } + haystack.push_str("a\n"); + + let byte_count = haystack.len(); + let exp = format!("0:a\n131186:a\n\nbyte count:{}\n", byte_count); + + SearcherTester::new(&haystack, "a") + .line_number(false) + .expected_no_line_number(&exp) + .test(); + } + + #[test] + fn big_error_one_line() { + let mut haystack = String::new(); + haystack.push_str("a\n"); + // Pick an arbitrary number above the capacity. + for _ in 0..(4 * (DEFAULT_BUFFER_CAPACITY + 7)) { + haystack.push_str("zzz\n"); + } + haystack.push_str("a\n"); + + let matcher = RegexMatcher::new("a"); + let mut sink = KitchenSink::new(); + let mut searcher = SearcherBuilder::new() + .heap_limit(Some(3)) // max line length is 4, one byte short + .build(); + let result = searcher.search_reader( + &matcher, + haystack.as_bytes(), + &mut sink, + ); + assert!(result.is_err()); + } + + #[test] + fn big_error_multi_line() { + let mut haystack = String::new(); + haystack.push_str("a\n"); + // Pick an arbitrary number above the capacity. + for _ in 0..(4 * (DEFAULT_BUFFER_CAPACITY + 7)) { + haystack.push_str("zzz\n"); + } + haystack.push_str("a\n"); + + let matcher = RegexMatcher::new("a"); + let mut sink = KitchenSink::new(); + let mut searcher = SearcherBuilder::new() + .multi_line(true) + .heap_limit(Some(haystack.len())) // actually need one more byte + .build(); + let result = searcher.search_reader( + &matcher, + haystack.as_bytes(), + &mut sink, + ); + assert!(result.is_err()); + } + + #[test] + fn binary1() { + let haystack = "\x00a"; + let exp = "\nbyte count:0\nbinary offset:0\n"; + + SearcherTester::new(haystack, "a") + .binary_detection(BinaryDetection::quit(0)) + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn binary2() { + let haystack = "a\x00"; + let exp = "\nbyte count:0\nbinary offset:1\n"; + + SearcherTester::new(haystack, "a") + .binary_detection(BinaryDetection::quit(0)) + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn binary3() { + let mut haystack = String::new(); + haystack.push_str("a\n"); + for _ in 0..DEFAULT_BUFFER_CAPACITY { + haystack.push_str("zzz\n"); + } + haystack.push_str("a\n"); + haystack.push_str("a\x00a\n"); + haystack.push_str("a\n"); + + // The line buffered searcher has slightly different semantics here. + // Namely, it will *always* detect binary data in the current buffer + // before searching it. Thus, the total number of bytes searched is + // smaller than below. + let exp = "0:a\n\nbyte count:32770\nbinary offset:32773\n"; + // In contrast, the slice readers (for multi line as well) will only + // look for binary data in the initial chunk of bytes. After that + // point, it only looks for binary data in matches. Note though that + // the binary offset remains the same. (See the binary4 test for a case + // where the offset is explicitly different.) + let exp_slice = + "0:a\n32770:a\n\nbyte count:32773\nbinary offset:32773\n"; + + SearcherTester::new(&haystack, "a") + .binary_detection(BinaryDetection::quit(0)) + .line_number(false) + .auto_heap_limit(false) + .expected_no_line_number(exp) + .expected_slice_no_line_number(exp_slice) + .test(); + } + + #[test] + fn binary4() { + let mut haystack = String::new(); + haystack.push_str("a\n"); + for _ in 0..DEFAULT_BUFFER_CAPACITY { + haystack.push_str("zzz\n"); + } + haystack.push_str("a\n"); + // The Read searcher will detect binary data here, but since this is + // beyond the initial buffer size and doesn't otherwise contain a + // match, the Slice reader won't detect the binary data until the next + // line (which is a match). + haystack.push_str("b\x00b\n"); + haystack.push_str("a\x00a\n"); + haystack.push_str("a\n"); + + let exp = "0:a\n\nbyte count:32770\nbinary offset:32773\n"; + // The binary offset for the Slice readers corresponds to the binary + // data in `a\x00a\n` since the first line with binary data + // (`b\x00b\n`) isn't part of a match, and is therefore undetected. + let exp_slice = + "0:a\n32770:a\n\nbyte count:32777\nbinary offset:32777\n"; + + SearcherTester::new(&haystack, "a") + .binary_detection(BinaryDetection::quit(0)) + .line_number(false) + .auto_heap_limit(false) + .expected_no_line_number(exp) + .expected_slice_no_line_number(exp_slice) + .test(); + } + + #[test] + fn passthru_sherlock1() { + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65-Holmeses, success in the province of detective work must always +129:be, to a very large extent, the result of luck. Sherlock Holmes +193-can extract a clew from a wisp of straw or a flake of cigar ash; +258-but Doctor Watson has to have it taken out for him and dusted, +321-and exhibited clearly, with a label attached. +byte count:366 +"; + SearcherTester::new(SHERLOCK, "Sherlock") + .passthru(true) + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn passthru_sherlock_invert1() { + let exp = "\ +0-For the Doctor Watsons of this world, as opposed to the Sherlock +65:Holmeses, success in the province of detective work must always +129-be, to a very large extent, the result of luck. Sherlock Holmes +193:can extract a clew from a wisp of straw or a flake of cigar ash; +258:but Doctor Watson has to have it taken out for him and dusted, +321:and exhibited clearly, with a label attached. +byte count:366 +"; + SearcherTester::new(SHERLOCK, "Sherlock") + .passthru(true) + .line_number(false) + .invert_match(true) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn context_sherlock1() { + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65-Holmeses, success in the province of detective work must always +129:be, to a very large extent, the result of luck. Sherlock Holmes +193-can extract a clew from a wisp of straw or a flake of cigar ash; + +byte count:366 +"; + let exp_lines = "\ +1:0:For the Doctor Watsons of this world, as opposed to the Sherlock +2-65-Holmeses, success in the province of detective work must always +3:129:be, to a very large extent, the result of luck. Sherlock Holmes +4-193-can extract a clew from a wisp of straw or a flake of cigar ash; + +byte count:366 +"; + // before and after + line numbers + SearcherTester::new(SHERLOCK, "Sherlock") + .after_context(1) + .before_context(1) + .line_number(true) + .expected_no_line_number(exp) + .expected_with_line_number(exp_lines) + .test(); + + // after + SearcherTester::new(SHERLOCK, "Sherlock") + .after_context(1) + .line_number(false) + .expected_no_line_number(exp) + .test(); + + // before + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65-Holmeses, success in the province of detective work must always +129:be, to a very large extent, the result of luck. Sherlock Holmes + +byte count:366 +"; + SearcherTester::new(SHERLOCK, "Sherlock") + .before_context(1) + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn context_sherlock_invert1() { + let exp = "\ +0-For the Doctor Watsons of this world, as opposed to the Sherlock +65:Holmeses, success in the province of detective work must always +129-be, to a very large extent, the result of luck. Sherlock Holmes +193:can extract a clew from a wisp of straw or a flake of cigar ash; +258:but Doctor Watson has to have it taken out for him and dusted, +321:and exhibited clearly, with a label attached. +byte count:366 +"; + let exp_lines = "\ +1-0-For the Doctor Watsons of this world, as opposed to the Sherlock +2:65:Holmeses, success in the province of detective work must always +3-129-be, to a very large extent, the result of luck. Sherlock Holmes +4:193:can extract a clew from a wisp of straw or a flake of cigar ash; +5:258:but Doctor Watson has to have it taken out for him and dusted, +6:321:and exhibited clearly, with a label attached. +byte count:366 +"; + // before and after + line numbers + SearcherTester::new(SHERLOCK, "Sherlock") + .after_context(1) + .before_context(1) + .line_number(true) + .invert_match(true) + .expected_no_line_number(exp) + .expected_with_line_number(exp_lines) + .test(); + + // before + SearcherTester::new(SHERLOCK, "Sherlock") + .before_context(1) + .line_number(false) + .invert_match(true) + .expected_no_line_number(exp) + .test(); + + // after + let exp = "\ +65:Holmeses, success in the province of detective work must always +129-be, to a very large extent, the result of luck. Sherlock Holmes +193:can extract a clew from a wisp of straw or a flake of cigar ash; +258:but Doctor Watson has to have it taken out for him and dusted, +321:and exhibited clearly, with a label attached. +byte count:366 +"; + SearcherTester::new(SHERLOCK, "Sherlock") + .after_context(1) + .line_number(false) + .invert_match(true) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn context_sherlock2() { + let exp = "\ +65-Holmeses, success in the province of detective work must always +129:be, to a very large extent, the result of luck. Sherlock Holmes +193:can extract a clew from a wisp of straw or a flake of cigar ash; +258-but Doctor Watson has to have it taken out for him and dusted, +321:and exhibited clearly, with a label attached. +byte count:366 +"; + let exp_lines = "\ +2-65-Holmeses, success in the province of detective work must always +3:129:be, to a very large extent, the result of luck. Sherlock Holmes +4:193:can extract a clew from a wisp of straw or a flake of cigar ash; +5-258-but Doctor Watson has to have it taken out for him and dusted, +6:321:and exhibited clearly, with a label attached. +byte count:366 +"; + // before + after + line numbers + SearcherTester::new(SHERLOCK, " a ") + .after_context(1) + .before_context(1) + .line_number(true) + .expected_no_line_number(exp) + .expected_with_line_number(exp_lines) + .test(); + + // before + SearcherTester::new(SHERLOCK, " a ") + .before_context(1) + .line_number(false) + .expected_no_line_number(exp) + .test(); + + // after + let exp = "\ +129:be, to a very large extent, the result of luck. Sherlock Holmes +193:can extract a clew from a wisp of straw or a flake of cigar ash; +258-but Doctor Watson has to have it taken out for him and dusted, +321:and exhibited clearly, with a label attached. +byte count:366 +"; + SearcherTester::new(SHERLOCK, " a ") + .after_context(1) + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn context_sherlock_invert2() { + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65:Holmeses, success in the province of detective work must always +129-be, to a very large extent, the result of luck. Sherlock Holmes +193-can extract a clew from a wisp of straw or a flake of cigar ash; +258:but Doctor Watson has to have it taken out for him and dusted, +321-and exhibited clearly, with a label attached. +byte count:366 +"; + let exp_lines = "\ +1:0:For the Doctor Watsons of this world, as opposed to the Sherlock +2:65:Holmeses, success in the province of detective work must always +3-129-be, to a very large extent, the result of luck. Sherlock Holmes +4-193-can extract a clew from a wisp of straw or a flake of cigar ash; +5:258:but Doctor Watson has to have it taken out for him and dusted, +6-321-and exhibited clearly, with a label attached. +byte count:366 +"; + // before + after + line numbers + SearcherTester::new(SHERLOCK, " a ") + .after_context(1) + .before_context(1) + .line_number(true) + .invert_match(true) + .expected_no_line_number(exp) + .expected_with_line_number(exp_lines) + .test(); + + // before + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65:Holmeses, success in the province of detective work must always +-- +193-can extract a clew from a wisp of straw or a flake of cigar ash; +258:but Doctor Watson has to have it taken out for him and dusted, + +byte count:366 +"; + SearcherTester::new(SHERLOCK, " a ") + .before_context(1) + .line_number(false) + .invert_match(true) + .expected_no_line_number(exp) + .test(); + + // after + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65:Holmeses, success in the province of detective work must always +129-be, to a very large extent, the result of luck. Sherlock Holmes +-- +258:but Doctor Watson has to have it taken out for him and dusted, +321-and exhibited clearly, with a label attached. +byte count:366 +"; + SearcherTester::new(SHERLOCK, " a ") + .after_context(1) + .line_number(false) + .invert_match(true) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn context_sherlock3() { + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65-Holmeses, success in the province of detective work must always +129:be, to a very large extent, the result of luck. Sherlock Holmes +193-can extract a clew from a wisp of straw or a flake of cigar ash; +258-but Doctor Watson has to have it taken out for him and dusted, + +byte count:366 +"; + let exp_lines = "\ +1:0:For the Doctor Watsons of this world, as opposed to the Sherlock +2-65-Holmeses, success in the province of detective work must always +3:129:be, to a very large extent, the result of luck. Sherlock Holmes +4-193-can extract a clew from a wisp of straw or a flake of cigar ash; +5-258-but Doctor Watson has to have it taken out for him and dusted, + +byte count:366 +"; + // before and after + line numbers + SearcherTester::new(SHERLOCK, "Sherlock") + .after_context(2) + .before_context(2) + .line_number(true) + .expected_no_line_number(exp) + .expected_with_line_number(exp_lines) + .test(); + + // after + SearcherTester::new(SHERLOCK, "Sherlock") + .after_context(2) + .line_number(false) + .expected_no_line_number(exp) + .test(); + + // before + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65-Holmeses, success in the province of detective work must always +129:be, to a very large extent, the result of luck. Sherlock Holmes + +byte count:366 +"; + SearcherTester::new(SHERLOCK, "Sherlock") + .before_context(2) + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn context_sherlock4() { + let exp = "\ +129-be, to a very large extent, the result of luck. Sherlock Holmes +193-can extract a clew from a wisp of straw or a flake of cigar ash; +258:but Doctor Watson has to have it taken out for him and dusted, +321-and exhibited clearly, with a label attached. +byte count:366 +"; + let exp_lines = "\ +3-129-be, to a very large extent, the result of luck. Sherlock Holmes +4-193-can extract a clew from a wisp of straw or a flake of cigar ash; +5:258:but Doctor Watson has to have it taken out for him and dusted, +6-321-and exhibited clearly, with a label attached. +byte count:366 +"; + // before and after + line numbers + SearcherTester::new(SHERLOCK, "dusted") + .after_context(2) + .before_context(2) + .line_number(true) + .expected_no_line_number(exp) + .expected_with_line_number(exp_lines) + .test(); + + // after + let exp = "\ +258:but Doctor Watson has to have it taken out for him and dusted, +321-and exhibited clearly, with a label attached. +byte count:366 +"; + SearcherTester::new(SHERLOCK, "dusted") + .after_context(2) + .line_number(false) + .expected_no_line_number(exp) + .test(); + + // before + let exp = "\ +129-be, to a very large extent, the result of luck. Sherlock Holmes +193-can extract a clew from a wisp of straw or a flake of cigar ash; +258:but Doctor Watson has to have it taken out for him and dusted, + +byte count:366 +"; + SearcherTester::new(SHERLOCK, "dusted") + .before_context(2) + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn context_sherlock5() { + let exp = "\ +0-For the Doctor Watsons of this world, as opposed to the Sherlock +65:Holmeses, success in the province of detective work must always +129-be, to a very large extent, the result of luck. Sherlock Holmes +193-can extract a clew from a wisp of straw or a flake of cigar ash; +258-but Doctor Watson has to have it taken out for him and dusted, +321:and exhibited clearly, with a label attached. +byte count:366 +"; + let exp_lines = "\ +1-0-For the Doctor Watsons of this world, as opposed to the Sherlock +2:65:Holmeses, success in the province of detective work must always +3-129-be, to a very large extent, the result of luck. Sherlock Holmes +4-193-can extract a clew from a wisp of straw or a flake of cigar ash; +5-258-but Doctor Watson has to have it taken out for him and dusted, +6:321:and exhibited clearly, with a label attached. +byte count:366 +"; + // before and after + line numbers + SearcherTester::new(SHERLOCK, "success|attached") + .after_context(2) + .before_context(2) + .line_number(true) + .expected_no_line_number(exp) + .expected_with_line_number(exp_lines) + .test(); + + // after + let exp = "\ +65:Holmeses, success in the province of detective work must always +129-be, to a very large extent, the result of luck. Sherlock Holmes +193-can extract a clew from a wisp of straw or a flake of cigar ash; +-- +321:and exhibited clearly, with a label attached. +byte count:366 +"; + SearcherTester::new(SHERLOCK, "success|attached") + .after_context(2) + .line_number(false) + .expected_no_line_number(exp) + .test(); + + // before + let exp = "\ +0-For the Doctor Watsons of this world, as opposed to the Sherlock +65:Holmeses, success in the province of detective work must always +-- +193-can extract a clew from a wisp of straw or a flake of cigar ash; +258-but Doctor Watson has to have it taken out for him and dusted, +321:and exhibited clearly, with a label attached. +byte count:366 +"; + SearcherTester::new(SHERLOCK, "success|attached") + .before_context(2) + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn context_sherlock6() { + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65-Holmeses, success in the province of detective work must always +129:be, to a very large extent, the result of luck. Sherlock Holmes +193-can extract a clew from a wisp of straw or a flake of cigar ash; +258-but Doctor Watson has to have it taken out for him and dusted, +321-and exhibited clearly, with a label attached. +byte count:366 +"; + let exp_lines = "\ +1:0:For the Doctor Watsons of this world, as opposed to the Sherlock +2-65-Holmeses, success in the province of detective work must always +3:129:be, to a very large extent, the result of luck. Sherlock Holmes +4-193-can extract a clew from a wisp of straw or a flake of cigar ash; +5-258-but Doctor Watson has to have it taken out for him and dusted, +6-321-and exhibited clearly, with a label attached. +byte count:366 +"; + // before and after + line numbers + SearcherTester::new(SHERLOCK, "Sherlock") + .after_context(3) + .before_context(3) + .line_number(true) + .expected_no_line_number(exp) + .expected_with_line_number(exp_lines) + .test(); + + // after + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65-Holmeses, success in the province of detective work must always +129:be, to a very large extent, the result of luck. Sherlock Holmes +193-can extract a clew from a wisp of straw or a flake of cigar ash; +258-but Doctor Watson has to have it taken out for him and dusted, +321-and exhibited clearly, with a label attached. +byte count:366 +"; + SearcherTester::new(SHERLOCK, "Sherlock") + .after_context(3) + .line_number(false) + .expected_no_line_number(exp) + .test(); + + // before + let exp = "\ +0:For the Doctor Watsons of this world, as opposed to the Sherlock +65-Holmeses, success in the province of detective work must always +129:be, to a very large extent, the result of luck. Sherlock Holmes + +byte count:366 +"; + SearcherTester::new(SHERLOCK, "Sherlock") + .before_context(3) + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn context_code1() { + // before and after + let exp = "\ +33- +34-fn main() { +46: let stdin = io::stdin(); +75- let stdout = io::stdout(); +106- +107: // Wrap the stdin reader in a Snappy reader. +156: let mut rdr = snap::Reader::new(stdin.lock()); +207- let mut wtr = stdout.lock(); +240- io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); + +byte count:307 +"; + let exp_lines = "\ +4-33- +5-34-fn main() { +6:46: let stdin = io::stdin(); +7-75- let stdout = io::stdout(); +8-106- +9:107: // Wrap the stdin reader in a Snappy reader. +10:156: let mut rdr = snap::Reader::new(stdin.lock()); +11-207- let mut wtr = stdout.lock(); +12-240- io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); + +byte count:307 +"; + // before and after + line numbers + SearcherTester::new(CODE, "stdin") + .after_context(2) + .before_context(2) + .line_number(true) + .expected_no_line_number(exp) + .expected_with_line_number(exp_lines) + .test(); + + // after + let exp = "\ +46: let stdin = io::stdin(); +75- let stdout = io::stdout(); +106- +107: // Wrap the stdin reader in a Snappy reader. +156: let mut rdr = snap::Reader::new(stdin.lock()); +207- let mut wtr = stdout.lock(); +240- io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); + +byte count:307 +"; + SearcherTester::new(CODE, "stdin") + .after_context(2) + .line_number(false) + .expected_no_line_number(exp) + .test(); + + // before + let exp = "\ +33- +34-fn main() { +46: let stdin = io::stdin(); +75- let stdout = io::stdout(); +106- +107: // Wrap the stdin reader in a Snappy reader. +156: let mut rdr = snap::Reader::new(stdin.lock()); + +byte count:307 +"; + SearcherTester::new(CODE, "stdin") + .before_context(2) + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn context_code2() { + let exp = "\ +34-fn main() { +46- let stdin = io::stdin(); +75: let stdout = io::stdout(); +106- +107- // Wrap the stdin reader in a Snappy reader. +156- let mut rdr = snap::Reader::new(stdin.lock()); +207: let mut wtr = stdout.lock(); +240- io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); +305-} + +byte count:307 +"; + let exp_lines = "\ +5-34-fn main() { +6-46- let stdin = io::stdin(); +7:75: let stdout = io::stdout(); +8-106- +9-107- // Wrap the stdin reader in a Snappy reader. +10-156- let mut rdr = snap::Reader::new(stdin.lock()); +11:207: let mut wtr = stdout.lock(); +12-240- io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); +13-305-} + +byte count:307 +"; + // before and after + line numbers + SearcherTester::new(CODE, "stdout") + .after_context(2) + .before_context(2) + .line_number(true) + .expected_no_line_number(exp) + .expected_with_line_number(exp_lines) + .test(); + + // after + let exp = "\ +75: let stdout = io::stdout(); +106- +107- // Wrap the stdin reader in a Snappy reader. +-- +207: let mut wtr = stdout.lock(); +240- io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); +305-} + +byte count:307 +"; + SearcherTester::new(CODE, "stdout") + .after_context(2) + .line_number(false) + .expected_no_line_number(exp) + .test(); + + // before + let exp = "\ +34-fn main() { +46- let stdin = io::stdin(); +75: let stdout = io::stdout(); +-- +107- // Wrap the stdin reader in a Snappy reader. +156- let mut rdr = snap::Reader::new(stdin.lock()); +207: let mut wtr = stdout.lock(); + +byte count:307 +"; + SearcherTester::new(CODE, "stdout") + .before_context(2) + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn context_code3() { + let exp = "\ +20-use std::io; +33- +34:fn main() { +46- let stdin = io::stdin(); +75- let stdout = io::stdout(); +106- +107- // Wrap the stdin reader in a Snappy reader. +156: let mut rdr = snap::Reader::new(stdin.lock()); +207- let mut wtr = stdout.lock(); +240- io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); + +byte count:307 +"; + let exp_lines = "\ +3-20-use std::io; +4-33- +5:34:fn main() { +6-46- let stdin = io::stdin(); +7-75- let stdout = io::stdout(); +8-106- +9-107- // Wrap the stdin reader in a Snappy reader. +10:156: let mut rdr = snap::Reader::new(stdin.lock()); +11-207- let mut wtr = stdout.lock(); +12-240- io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); + +byte count:307 +"; + // before and after + line numbers + SearcherTester::new(CODE, "fn main|let mut rdr") + .after_context(2) + .before_context(2) + .line_number(true) + .expected_no_line_number(exp) + .expected_with_line_number(exp_lines) + .test(); + + // after + let exp = "\ +34:fn main() { +46- let stdin = io::stdin(); +75- let stdout = io::stdout(); +-- +156: let mut rdr = snap::Reader::new(stdin.lock()); +207- let mut wtr = stdout.lock(); +240- io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); + +byte count:307 +"; + SearcherTester::new(CODE, "fn main|let mut rdr") + .after_context(2) + .line_number(false) + .expected_no_line_number(exp) + .test(); + + // before + let exp = "\ +20-use std::io; +33- +34:fn main() { +-- +106- +107- // Wrap the stdin reader in a Snappy reader. +156: let mut rdr = snap::Reader::new(stdin.lock()); + +byte count:307 +"; + SearcherTester::new(CODE, "fn main|let mut rdr") + .before_context(2) + .line_number(false) + .expected_no_line_number(exp) + .test(); + } + + #[test] + fn scratch() { + use sinks; + use testutil::RegexMatcher; + + const SHERLOCK: &'static [u8] = b"\ +For the Doctor Wat\xFFsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached.\ + "; + + let haystack = SHERLOCK; + let matcher = RegexMatcher::new("Sherlock"); + let mut searcher = SearcherBuilder::new() + .line_number(true) + .build(); + searcher.search_reader(&matcher, haystack, sinks::Lossy(|n, line| { + print!("{}:{}", n, line); + Ok(true) + })).unwrap(); + } +} diff -Nru ripgrep-0.6.0/grep-searcher/src/searcher/mmap.rs ripgrep-0.10.0.3/grep-searcher/src/searcher/mmap.rs --- ripgrep-0.6.0/grep-searcher/src/searcher/mmap.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/src/searcher/mmap.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,106 @@ +use std::fs::File; +use std::path::Path; + +use memmap::Mmap; + +/// Controls the strategy used for determining when to use memory maps. +/// +/// If a searcher is called in circumstances where it is possible to use memory +/// maps, and memory maps are enabled, then it will attempt to do so if it +/// believes it will make the search faster. +/// +/// By default, memory maps are disabled. +#[derive(Clone, Debug)] +pub struct MmapChoice(MmapChoiceImpl); + +#[derive(Clone, Debug)] +enum MmapChoiceImpl { + Auto, + Never, +} + +impl Default for MmapChoice { + fn default() -> MmapChoice { + MmapChoice(MmapChoiceImpl::Never) + } +} + +impl MmapChoice { + /// Use memory maps when they are believed to be advantageous. + /// + /// The heuristics used to determine whether to use a memory map or not + /// may depend on many things, including but not limited to, file size + /// and platform. + /// + /// If memory maps are unavailable or cannot be used for a specific input, + /// then normal OS read calls are used instead. + /// + /// # Safety + /// + /// This constructor is not safe because there is no obvious way to + /// encapsulate the safety of file backed memory maps on all platforms + /// without simultaneously negating some or all of their benefits. + /// + /// The specific contract the caller is required to uphold isn't precise, + /// but it basically amounts to something like, "the caller guarantees that + /// the underlying file won't be mutated." This, of course, isn't feasible + /// in many environments. However, command line tools may still decide to + /// take the risk of, say, a `SIGBUS` occurring while attempting to read a + /// memory map. + pub unsafe fn auto() -> MmapChoice { + MmapChoice(MmapChoiceImpl::Auto) + } + + /// Never use memory maps, no matter what. This is the default. + pub fn never() -> MmapChoice { + MmapChoice(MmapChoiceImpl::Never) + } + + /// Return a memory map if memory maps are enabled and if creating a + /// memory from the given file succeeded and if memory maps are believed + /// to be advantageous for performance. + /// + /// If this does attempt to open a memory map and it fails, then `None` + /// is returned and the corresponding error (along with the file path, if + /// present) is logged at the debug level. + pub(crate) fn open( + &self, + file: &File, + path: Option<&Path>, + ) -> Option { + if !self.is_enabled() { + return None; + } + if cfg!(target_os = "macos") { + // I guess memory maps on macOS aren't great. Should re-evaluate. + return None; + } + // SAFETY: This is acceptable because the only way `MmapChoiceImpl` can + // be `Auto` is if the caller invoked the `auto` constructor, which + // is itself not safe. Thus, this is a propagation of the caller's + // assertion that using memory maps is safe. + match unsafe { Mmap::map(file) } { + Ok(mmap) => Some(mmap), + Err(err) => { + if let Some(path) = path { + debug!( + "{}: failed to open memory map: {}", + path.display(), + err + ); + } else { + debug!("failed to open memory map: {}", err); + } + None + } + } + } + + /// Whether this strategy may employ memory maps or not. + pub(crate) fn is_enabled(&self) -> bool { + match self.0 { + MmapChoiceImpl::Auto => true, + MmapChoiceImpl::Never => false, + } + } +} diff -Nru ripgrep-0.6.0/grep-searcher/src/searcher/mod.rs ripgrep-0.10.0.3/grep-searcher/src/searcher/mod.rs --- ripgrep-0.6.0/grep-searcher/src/searcher/mod.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/src/searcher/mod.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,956 @@ +use std::cell::RefCell; +use std::cmp; +use std::fmt; +use std::fs::File; +use std::io::{self, Read}; +use std::path::Path; + +use encoding_rs; +use encoding_rs_io::DecodeReaderBytesBuilder; +use grep_matcher::{LineTerminator, Match, Matcher}; +use line_buffer::{ + self, BufferAllocation, LineBuffer, LineBufferBuilder, LineBufferReader, + DEFAULT_BUFFER_CAPACITY, alloc_error, +}; +use searcher::glue::{ReadByLine, SliceByLine, MultiLine}; +use sink::{Sink, SinkError}; + +pub use self::mmap::MmapChoice; + +mod core; +mod glue; +mod mmap; + +/// We use this type alias since we want the ergonomics of a matcher's `Match` +/// type, but in practice, we use it for arbitrary ranges, so give it a more +/// accurate name. This is only used in the searcher's internals. +type Range = Match; + +/// The behavior of binary detection while searching. +/// +/// Binary detection is the process of _heuristically_ identifying whether a +/// given chunk of data is binary or not, and then taking an action based on +/// the result of that heuristic. The motivation behind detecting binary data +/// is that binary data often indicates data that is undesirable to search +/// using textual patterns. Of course, there are many cases in which this isn't +/// true, which is why binary detection is disabled by default. +/// +/// Unfortunately, binary detection works differently depending on the type of +/// search being executed: +/// +/// 1. When performing a search using a fixed size buffer, binary detection is +/// applied to the buffer's contents as it is filled. Binary detection must +/// be applied to the buffer directly because binary files may not contain +/// line terminators, which could result in exorbitant memory usage. +/// 2. When performing a search using memory maps or by reading data off the +/// heap, then binary detection is only guaranteed to be applied to the +/// parts corresponding to a match. When `Quit` is enabled, then the first +/// few KB of the data are searched for binary data. +#[derive(Clone, Debug, Default)] +pub struct BinaryDetection(line_buffer::BinaryDetection); + +impl BinaryDetection { + /// No binary detection is performed. Data reported by the searcher may + /// contain arbitrary bytes. + /// + /// This is the default. + pub fn none() -> BinaryDetection { + BinaryDetection(line_buffer::BinaryDetection::None) + } + + /// Binary detection is performed by looking for the given byte. + /// + /// When searching is performed using a fixed size buffer, then the + /// contents of that buffer are always searched for the presence of this + /// byte. If it is found, then the underlying data is considered binary + /// and the search stops as if it reached EOF. + /// + /// When searching is performed with the entire contents mapped into + /// memory, then binary detection is more conservative. Namely, only a + /// fixed sized region at the beginning of the contents are detected for + /// binary data. As a compromise, any subsequent matching (or context) + /// lines are also searched for binary data. If binary data is detected at + /// any point, then the search stops as if it reached EOF. + pub fn quit(binary_byte: u8) -> BinaryDetection { + BinaryDetection(line_buffer::BinaryDetection::Quit(binary_byte)) + } + + // TODO(burntsushi): Figure out how to make binary conversion work. This + // permits implementing GNU grep's default behavior, which is to zap NUL + // bytes but still execute a search (if a match is detected, then GNU grep + // stops and reports that a match was found but doesn't print the matching + // line itself). + // + // This behavior is pretty simple to implement using the line buffer (and + // in fact, it is already implemented and tested), since there's a fixed + // size buffer that we can easily write to. The issue arises when searching + // a `&[u8]` (whether on the heap or via a memory map), since this isn't + // something we can easily write to. + + /// The given byte is searched in all contents read by the line buffer. If + /// it occurs, then it is replaced by the line terminator. The line buffer + /// guarantees that this byte will never be observable by callers. + #[allow(dead_code)] + fn convert(binary_byte: u8) -> BinaryDetection { + BinaryDetection(line_buffer::BinaryDetection::Convert(binary_byte)) + } +} + +/// An encoding to use when searching. +/// +/// An encoding can be used to configure a +/// [`SearcherBuilder`](struct.SearchBuilder.html) +/// to transcode source data from an encoding to UTF-8 before searching. +/// +/// An `Encoding` will always be cheap to clone. +#[derive(Clone, Debug)] +pub struct Encoding(&'static encoding_rs::Encoding); + +impl Encoding { + /// Create a new encoding for the specified label. + /// + /// The encoding label provided is mapped to an encoding via the set of + /// available choices specified in the + /// [Encoding Standard](https://encoding.spec.whatwg.org/#concept-encoding-get). + /// If the given label does not correspond to a valid encoding, then this + /// returns an error. + pub fn new(label: &str) -> Result { + let label = label.as_bytes(); + match encoding_rs::Encoding::for_label_no_replacement(label) { + Some(encoding) => Ok(Encoding(encoding)), + None => { + Err(ConfigError::UnknownEncoding { label: label.to_vec() }) + } + } + } +} + +/// The internal configuration of a searcher. This is shared among several +/// search related types, but is only ever written to by the SearcherBuilder. +#[derive(Clone, Debug)] +pub struct Config { + /// The line terminator to use. + line_term: LineTerminator, + /// Whether to invert matching. + invert_match: bool, + /// The number of lines after a match to include. + after_context: usize, + /// The number of lines before a match to include. + before_context: usize, + /// Whether to enable unbounded context or not. + passthru: bool, + /// Whether to count line numbers. + line_number: bool, + /// The maximum amount of heap memory to use. + /// + /// When not given, no explicit limit is enforced. When set to `0`, then + /// only the memory map search strategy is available. + heap_limit: Option, + /// The memory map strategy. + mmap: MmapChoice, + /// The binary data detection strategy. + binary: BinaryDetection, + /// Whether to enable matching across multiple lines. + multi_line: bool, + /// An encoding that, when present, causes the searcher to transcode all + /// input from the encoding to UTF-8. + encoding: Option, +} + +impl Default for Config { + fn default() -> Config { + Config { + line_term: LineTerminator::default(), + invert_match: false, + after_context: 0, + before_context: 0, + passthru: false, + line_number: true, + heap_limit: None, + mmap: MmapChoice::default(), + binary: BinaryDetection::default(), + multi_line: false, + encoding: None, + } + } +} + +impl Config { + /// Return the maximal amount of lines needed to fulfill this + /// configuration's context. + /// + /// If this returns `0`, then no context is ever needed. + fn max_context(&self) -> usize { + cmp::max(self.before_context, self.after_context) + } + + /// Build a line buffer from this configuration. + fn line_buffer(&self) -> LineBuffer { + let mut builder = LineBufferBuilder::new(); + builder + .line_terminator(self.line_term.as_byte()) + .binary_detection(self.binary.0); + + if let Some(limit) = self.heap_limit { + let (capacity, additional) = + if limit <= DEFAULT_BUFFER_CAPACITY { + (limit, 0) + } else { + (DEFAULT_BUFFER_CAPACITY, limit - DEFAULT_BUFFER_CAPACITY) + }; + builder + .capacity(capacity) + .buffer_alloc(BufferAllocation::Error(additional)); + } + builder.build() + } +} + +/// An error that can occur when building a searcher. +/// +/// This error occurs when a non-sensical configuration is present when trying +/// to construct a `Searcher` from a `SearcherBuilder`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ConfigError { + /// Indicates that the heap limit configuration prevents all possible + /// search strategies from being used. For example, if the heap limit is + /// set to 0 and memory map searching is disabled or unavailable. + SearchUnavailable, + /// Occurs when a matcher reports a line terminator that is different than + /// the one configured in the searcher. + MismatchedLineTerminators { + /// The matcher's line terminator. + matcher: LineTerminator, + /// The searcher's line terminator. + searcher: LineTerminator, + }, + /// Occurs when no encoding could be found for a particular label. + UnknownEncoding { + /// The provided encoding label that could not be found. + label: Vec, + }, + /// Hints that destructuring should not be exhaustive. + /// + /// This enum may grow additional variants, so this makes sure clients + /// don't count on exhaustive matching. (Otherwise, adding a new variant + /// could break existing code.) + #[doc(hidden)] + __Nonexhaustive, +} + +impl ::std::error::Error for ConfigError { + fn description(&self) -> &str { "grep-searcher configuration error" } +} + +impl fmt::Display for ConfigError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + ConfigError::SearchUnavailable => { + write!(f, "grep config error: no available searchers") + } + ConfigError::MismatchedLineTerminators { matcher, searcher } => { + write!( + f, + "grep config error: mismatched line terminators, \ + matcher has {:?} but searcher has {:?}", + matcher, + searcher + ) + } + ConfigError::UnknownEncoding { ref label } => { + write!( + f, + "grep config error: unknown encoding: {}", + String::from_utf8_lossy(label), + ) + } + _ => panic!("BUG: unexpected variant found"), + } + } +} + +/// A builder for configuring a searcher. +/// +/// A search builder permits specifying the configuration of a searcher, +/// including options like whether to invert the search or to enable multi +/// line search. +/// +/// Once a searcher has been built, it is beneficial to reuse that searcher +/// for multiple searches, if possible. +#[derive(Clone, Debug)] +pub struct SearcherBuilder { + config: Config, +} + +impl Default for SearcherBuilder { + fn default() -> SearcherBuilder { + SearcherBuilder::new() + } +} + +impl SearcherBuilder { + /// Create a new searcher builder with a default configuration. + pub fn new() -> SearcherBuilder { + SearcherBuilder { + config: Config::default(), + } + } + + /// Build a searcher with the given matcher. + pub fn build(&self) -> Searcher { + let mut config = self.config.clone(); + if config.passthru { + config.before_context = 0; + config.after_context = 0; + } + let mut decode_builder = DecodeReaderBytesBuilder::new(); + decode_builder + .encoding(self.config.encoding.as_ref().map(|e| e.0)) + .utf8_passthru(true) + .bom_override(true); + Searcher { + config: config, + decode_builder: decode_builder, + decode_buffer: RefCell::new(vec![0; 8 * (1<<10)]), + line_buffer: RefCell::new(self.config.line_buffer()), + multi_line_buffer: RefCell::new(vec![]), + } + } + + /// Set the line terminator that is used by the searcher. + /// + /// When using a searcher, if the matcher provided has a line terminator + /// set, then it must be the same as this one. If they aren't, building + /// a searcher will return an error. + /// + /// By default, this is set to `b'\n'`. + pub fn line_terminator( + &mut self, + line_term: LineTerminator, + ) -> &mut SearcherBuilder { + self.config.line_term = line_term; + self + } + + /// Whether to invert matching, whereby lines that don't match are reported + /// instead of reporting lines that do match. + /// + /// By default, this is disabled. + pub fn invert_match(&mut self, yes: bool) -> &mut SearcherBuilder { + self.config.invert_match = yes; + self + } + + /// Whether to count and include line numbers with matching lines. + /// + /// This is enabled by default. There is a small performance penalty + /// associated with computing line numbers, so this can be disabled when + /// this isn't desirable. + pub fn line_number(&mut self, yes: bool) -> &mut SearcherBuilder { + self.config.line_number = yes; + self + } + + /// Whether to enable multi line search or not. + /// + /// When multi line search is enabled, matches *may* match across multiple + /// lines. Conversely, when multi line search is disabled, it is impossible + /// for any match to span more than one line. + /// + /// **Warning:** multi line search requires having the entire contents to + /// search mapped in memory at once. When searching files, memory maps + /// will be used if possible and if they are enabled, which avoids using + /// your program's heap. However, if memory maps cannot be used (e.g., + /// for searching streams like `stdin` or if transcoding is necessary), + /// then the entire contents of the stream are read on to the heap before + /// starting the search. + /// + /// This is disabled by default. + pub fn multi_line(&mut self, yes: bool) -> &mut SearcherBuilder { + self.config.multi_line = yes; + self + } + + /// Whether to include a fixed number of lines after every match. + /// + /// When this is set to a non-zero number, then the searcher will report + /// `line_count` contextual lines after every match. + /// + /// This is set to `0` by default. + pub fn after_context( + &mut self, + line_count: usize, + ) -> &mut SearcherBuilder { + self.config.after_context = line_count; + self + } + + /// Whether to include a fixed number of lines before every match. + /// + /// When this is set to a non-zero number, then the searcher will report + /// `line_count` contextual lines before every match. + /// + /// This is set to `0` by default. + pub fn before_context( + &mut self, + line_count: usize, + ) -> &mut SearcherBuilder { + self.config.before_context = line_count; + self + } + + /// Whether to enable the "passthru" feature or not. + /// + /// When passthru is enabled, it effectively treats all non-matching lines + /// as contextual lines. In other words, enabling this is akin to + /// requesting an unbounded number of before and after contextual lines. + /// + /// When passthru mode is enabled, any `before_context` or `after_context` + /// settings are ignored by setting them to `0`. + /// + /// This is disabled by default. + pub fn passthru(&mut self, yes: bool) -> &mut SearcherBuilder { + self.config.passthru = yes; + self + } + + /// Set an approximate limit on the amount of heap space used by a + /// searcher. + /// + /// The heap limit is enforced in two scenarios: + /// + /// * When searching using a fixed size buffer, the heap limit controls + /// how big this buffer is allowed to be. Assuming contexts are disabled, + /// the minimum size of this buffer is the length (in bytes) of the + /// largest single line in the contents being searched. If any line + /// exceeds the heap limit, then an error will be returned. + /// * When performing a multi line search, a fixed size buffer cannot be + /// used. Thus, the only choices are to read the entire contents on to + /// the heap, or use memory maps. In the former case, the heap limit set + /// here is enforced. + /// + /// If a heap limit is set to `0`, then no heap space is used. If there are + /// no alternative strategies available for searching without heap space + /// (e.g., memory maps are disabled), then the searcher wil return an error + /// immediately. + /// + /// By default, no limit is set. + pub fn heap_limit( + &mut self, + bytes: Option, + ) -> &mut SearcherBuilder { + self.config.heap_limit = bytes; + self + } + + /// Set the strategy to employ use of memory maps. + /// + /// Currently, there are only two strategies that can be employed: + /// + /// * **Automatic** - A searcher will use heuristics, including but not + /// limited to file size and platform, to determine whether to use memory + /// maps or not. + /// * **Never** - Memory maps will never be used. If multi line search is + /// enabled, then the entire contents will be read on to the heap before + /// searching begins. + /// + /// The default behavior is **never**. Generally speaking, and perhaps + /// against conventional wisdom, memory maps don't necessarily enable + /// faster searching. For example, depending on the platform, using memory + /// maps while searching a large directory can actually be quite a bit + /// slower than using normal read calls because of the overhead of managing + /// the memory maps. + /// + /// Memory maps can be faster in some cases however. On some platforms, + /// when searching a very large file that *is already in memory*, it can + /// be slightly faster to search it as a memory map instead of using + /// normal read calls. + /// + /// Finally, memory maps have a somewhat complicated safety story in Rust. + /// If you aren't sure whether enabling memory maps is worth it, then just + /// don't bother with it. + /// + /// **WARNING**: If your process is searching a file backed memory map + /// at the same time that file is truncated, then it's possible for the + /// process to terminate with a bus error. + pub fn memory_map( + &mut self, + strategy: MmapChoice, + ) -> &mut SearcherBuilder { + self.config.mmap = strategy; + self + } + + /// Set the binary detection strategy. + /// + /// The binary detection strategy determines not only how the searcher + /// detects binary data, but how it responds to the presence of binary + /// data. See the [`BinaryDetection`](struct.BinaryDetection.html) type + /// for more information. + /// + /// By default, binary detection is disabled. + pub fn binary_detection( + &mut self, + detection: BinaryDetection, + ) -> &mut SearcherBuilder { + self.config.binary = detection; + self + } + + /// Set the encoding used to read the source data before searching. + /// + /// When an encoding is provided, then the source data is _unconditionally_ + /// transcoded using the encoding, unless a BOM is present. If a BOM is + /// present, then the encoding indicated by the BOM is used instead. If the + /// transcoding process encounters an error, then bytes are replaced with + /// the Unicode replacement codepoint. + /// + /// When no encoding is specified (the default), then BOM sniffing is used + /// to determine whether the source data is UTF-8 or UTF-16, and + /// transcoding will be performed automatically. If no BOM could be found, + /// then the source data is searched _as if_ it were UTF-8. However, so + /// long as the source data is at least ASCII compatible, then it is + /// possible for a search to produce useful results. + pub fn encoding( + &mut self, + encoding: Option, + ) -> &mut SearcherBuilder { + self.config.encoding = encoding; + self + } +} + +/// A searcher executes searches over a haystack and writes results to a caller +/// provided sink. +/// +/// Matches are detected via implementations of the `Matcher` trait, which must +/// be provided by the caller when executing a search. +/// +/// When possible, a searcher should be reused. +#[derive(Clone, Debug)] +pub struct Searcher { + /// The configuration for this searcher. + /// + /// We make most of these settings available to users of `Searcher` via + /// public API methods, which can be queried in implementations of `Sink` + /// if necessary. + config: Config, + /// A builder for constructing a streaming reader that transcodes source + /// data according to either an explicitly specified encoding or via an + /// automatically detected encoding via BOM sniffing. + /// + /// When no transcoding is needed, then the transcoder built will pass + /// through the underlying bytes with no additional overhead. + decode_builder: DecodeReaderBytesBuilder, + /// A buffer that is used for transcoding scratch space. + decode_buffer: RefCell>, + /// A line buffer for use in line oriented searching. + /// + /// We wrap it in a RefCell to permit lending out borrows of `Searcher` + /// to sinks. We still require a mutable borrow to execute a search, so + /// we statically prevent callers from causing RefCell to panic at runtime + /// due to a borrowing violation. + line_buffer: RefCell, + /// A buffer in which to store the contents of a reader when performing a + /// multi line search. In particular, multi line searches cannot be + /// performed incrementally, and need the entire haystack in memory at + /// once. + multi_line_buffer: RefCell>, +} + +impl Searcher { + /// Create a new searcher with a default configuration. + /// + /// To configure the searcher (e.g., invert matching, enable memory maps, + /// enable contexts, etc.), use the + /// [`SearcherBuilder`](struct.SearcherBuilder.html). + pub fn new() -> Searcher { + SearcherBuilder::new().build() + } + + /// Execute a search over the file with the given path and write the + /// results to the given sink. + /// + /// If memory maps are enabled and the searcher heuristically believes + /// memory maps will help the search run faster, then this will use + /// memory maps. For this reason, callers should prefer using this method + /// or `search_file` over the more generic `search_reader` when possible. + pub fn search_path( + &mut self, + matcher: M, + path: P, + write_to: S, + ) -> Result<(), S::Error> + where P: AsRef, + M: Matcher, + S: Sink, + { + let path = path.as_ref(); + let file = File::open(path).map_err(S::Error::error_io)?; + self.search_file_maybe_path(matcher, Some(path), &file, write_to) + } + + /// Execute a search over a file and write the results to the given sink. + /// + /// If memory maps are enabled and the searcher heuristically believes + /// memory maps will help the search run faster, then this will use + /// memory maps. For this reason, callers should prefer using this method + /// or `search_path` over the more generic `search_reader` when possible. + pub fn search_file( + &mut self, + matcher: M, + file: &File, + write_to: S, + ) -> Result<(), S::Error> + where M: Matcher, + S: Sink, + { + self.search_file_maybe_path(matcher, None, file, write_to) + } + + fn search_file_maybe_path( + &mut self, + matcher: M, + path: Option<&Path>, + file: &File, + write_to: S, + ) -> Result<(), S::Error> + where M: Matcher, + S: Sink, + { + if let Some(mmap) = self.config.mmap.open(file, path) { + trace!("{:?}: searching via memory map", path); + return self.search_slice(matcher, &mmap, write_to); + } + // Fast path for multi-line searches of files when memory maps are + // not enabled. This pre-allocates a buffer roughly the size of the + // file, which isn't possible when searching an arbitrary io::Read. + if self.multi_line_with_matcher(&matcher) { + trace!("{:?}: reading entire file on to heap for mulitline", path); + self.fill_multi_line_buffer_from_file::(file)?; + trace!("{:?}: searching via multiline strategy", path); + MultiLine::new( + self, + matcher, + &*self.multi_line_buffer.borrow(), + write_to, + ).run() + } else { + trace!("{:?}: searching using generic reader", path); + self.search_reader(matcher, file, write_to) + } + } + + /// Execute a search over any implementation of `io::Read` and write the + /// results to the given sink. + /// + /// When possible, this implementation will search the reader incrementally + /// without reading it into memory. In some cases---for example, if multi + /// line search is enabled---an incremental search isn't possible and the + /// given reader is consumed completely and placed on the heap before + /// searching begins. For this reason, when multi line search is enabled, + /// one should try to use higher level APIs (e.g., searching by file or + /// file path) so that memory maps can be used if they are available and + /// enabled. + pub fn search_reader( + &mut self, + matcher: M, + read_from: R, + write_to: S, + ) -> Result<(), S::Error> + where M: Matcher, + R: io::Read, + S: Sink, + { + self.check_config(&matcher).map_err(S::Error::error_config)?; + + let mut decode_buffer = self.decode_buffer.borrow_mut(); + let read_from = self.decode_builder + .build_with_buffer(read_from, &mut *decode_buffer) + .map_err(S::Error::error_io)?; + + if self.multi_line_with_matcher(&matcher) { + trace!("generic reader: reading everything to heap for multiline"); + self.fill_multi_line_buffer_from_reader::<_, S>(read_from)?; + trace!("generic reader: searching via multiline strategy"); + MultiLine::new( + self, + matcher, + &*self.multi_line_buffer.borrow(), + write_to, + ).run() + } else { + let mut line_buffer = self.line_buffer.borrow_mut(); + let rdr = LineBufferReader::new(read_from, &mut *line_buffer); + trace!("generic reader: searching via roll buffer strategy"); + ReadByLine::new(self, matcher, rdr, write_to).run() + } + } + + /// Execute a search over the given slice and write the results to the + /// given sink. + pub fn search_slice( + &mut self, + matcher: M, + slice: &[u8], + write_to: S, + ) -> Result<(), S::Error> + where M: Matcher, + S: Sink, + { + self.check_config(&matcher).map_err(S::Error::error_config)?; + + // We can search the slice directly, unless we need to do transcoding. + if self.slice_needs_transcoding(slice) { + trace!("slice reader: needs transcoding, using generic reader"); + return self.search_reader(matcher, slice, write_to); + } + if self.multi_line_with_matcher(&matcher) { + trace!("slice reader: searching via multiline strategy"); + MultiLine::new(self, matcher, slice, write_to).run() + } else { + trace!("slice reader: searching via slice-by-line strategy"); + SliceByLine::new(self, matcher, slice, write_to).run() + } + } + + /// Check that the searcher's configuration and the matcher are consistent + /// with each other. + fn check_config(&self, matcher: M) -> Result<(), ConfigError> { + if self.config.heap_limit == Some(0) + && !self.config.mmap.is_enabled() + { + return Err(ConfigError::SearchUnavailable); + } + let matcher_line_term = match matcher.line_terminator() { + None => return Ok(()), + Some(line_term) => line_term, + }; + if matcher_line_term != self.config.line_term { + return Err(ConfigError::MismatchedLineTerminators { + matcher: matcher_line_term, + searcher: self.config.line_term, + }); + } + Ok(()) + } + + /// Returns true if and only if the given slice needs to be transcoded. + fn slice_needs_transcoding(&self, slice: &[u8]) -> bool { + self.config.encoding.is_some() || slice_has_utf16_bom(slice) + } +} + +/// The following methods permit querying the configuration of a searcher. +/// These can be useful in generic implementations of +/// [`Sink`](trait.Sink.html), +/// where the output may be tailored based on how the searcher is configured. +impl Searcher { + /// Returns the line terminator used by this searcher. + #[inline] + pub fn line_terminator(&self) -> LineTerminator { + self.config.line_term + } + + /// Returns true if and only if this searcher is configured to invert its + /// search results. That is, matching lines are lines that do **not** match + /// the searcher's matcher. + #[inline] + pub fn invert_match(&self) -> bool { + self.config.invert_match + } + + /// Returns true if and only if this searcher is configured to count line + /// numbers. + #[inline] + pub fn line_number(&self) -> bool { + self.config.line_number + } + + /// Returns true if and only if this searcher is configured to perform + /// multi line search. + #[inline] + pub fn multi_line(&self) -> bool { + self.config.multi_line + } + + /// Returns true if and only if this searcher will choose a multi-line + /// strategy given the provided matcher. + /// + /// This may diverge from the result of `multi_line` in cases where the + /// searcher has been configured to execute a search that can report + /// matches over multiple lines, but where the matcher guarantees that it + /// will never produce a match over multiple lines. + pub fn multi_line_with_matcher(&self, matcher: M) -> bool { + if !self.multi_line() { + return false; + } + if let Some(line_term) = matcher.line_terminator() { + if line_term == self.line_terminator() { + return false; + } + } + if let Some(non_matching) = matcher.non_matching_bytes() { + // If the line terminator is CRLF, we don't actually need to care + // whether the regex can match `\r` or not. Namely, a `\r` is + // neither necessary nor sufficient to terminate a line. A `\n` is + // always required. + if non_matching.contains(self.line_terminator().as_byte()) { + return false; + } + } + true + } + + /// Returns the number of "after" context lines to report. When context + /// reporting is not enabled, this returns `0`. + #[inline] + pub fn after_context(&self) -> usize { + self.config.after_context + } + + /// Returns the number of "before" context lines to report. When context + /// reporting is not enabled, this returns `0`. + #[inline] + pub fn before_context(&self) -> usize { + self.config.before_context + } + + /// Returns true if and only if the searcher has "passthru" mode enabled. + #[inline] + pub fn passthru(&self) -> bool { + self.config.passthru + } + + /// Fill the buffer for use with multi-line searching from the given file. + /// This reads from the file until EOF or until an error occurs. If the + /// contents exceed the configured heap limit, then an error is returned. + fn fill_multi_line_buffer_from_file( + &self, + file: &File, + ) -> Result<(), S::Error> { + assert!(self.config.multi_line); + + let mut decode_buffer = self.decode_buffer.borrow_mut(); + let mut read_from = self.decode_builder + .build_with_buffer(file, &mut *decode_buffer) + .map_err(S::Error::error_io)?; + + // If we don't have a heap limit, then we can defer to std's + // read_to_end implementation. fill_multi_line_buffer_from_reader will + // do this too, but since we have a File, we can be a bit smarter about + // pre-allocating here. + // + // If we're transcoding, then our pre-allocation might not be exact, + // but is probably still better than nothing. + if self.config.heap_limit.is_none() { + let mut buf = self.multi_line_buffer.borrow_mut(); + buf.clear(); + let cap = file + .metadata() + .map(|m| m.len() as usize + 1) + .unwrap_or(0); + buf.reserve(cap); + read_from.read_to_end(&mut *buf).map_err(S::Error::error_io)?; + return Ok(()); + } + self.fill_multi_line_buffer_from_reader::<_, S>(read_from) + } + + /// Fill the buffer for use with multi-line searching from the given + /// reader. This reads from the reader until EOF or until an error occurs. + /// If the contents exceed the configured heap limit, then an error is + /// returned. + fn fill_multi_line_buffer_from_reader( + &self, + mut read_from: R, + ) -> Result<(), S::Error> { + assert!(self.config.multi_line); + + let mut buf = self.multi_line_buffer.borrow_mut(); + buf.clear(); + + // If we don't have a heap limit, then we can defer to std's + // read_to_end implementation... + let heap_limit = match self.config.heap_limit { + Some(heap_limit) => heap_limit, + None => { + read_from.read_to_end(&mut *buf).map_err(S::Error::error_io)?; + return Ok(()); + } + }; + if heap_limit == 0 { + return Err(S::Error::error_io(alloc_error(heap_limit))); + } + + // ... otherwise we need to roll our own. This is likely quite a bit + // slower than what is optimal, but we avoid worry about memory safety + // until there's a compelling reason to speed this up. + buf.resize(cmp::min(DEFAULT_BUFFER_CAPACITY, heap_limit), 0); + let mut pos = 0; + loop { + let nread = match read_from.read(&mut buf[pos..]) { + Ok(nread) => nread, + Err(ref err) if err.kind() == io::ErrorKind::Interrupted => { + continue; + } + Err(err) => return Err(S::Error::error_io(err)), + }; + if nread == 0 { + buf.resize(pos, 0); + return Ok(()); + } + + pos += nread; + if buf[pos..].is_empty() { + let additional = heap_limit - buf.len(); + if additional == 0 { + return Err(S::Error::error_io(alloc_error(heap_limit))); + } + let limit = buf.len() + additional; + let doubled = 2 * buf.len(); + buf.resize(cmp::min(doubled, limit), 0); + } + } + } +} + +/// Returns true if and only if the given slice begins with a UTF-16 BOM. +/// +/// This is used by the searcher to determine if a transcoder is necessary. +/// Otherwise, it is advantageous to search the slice directly. +fn slice_has_utf16_bom(slice: &[u8]) -> bool { + let enc = match encoding_rs::Encoding::for_bom(slice) { + None => return false, + Some((enc, _)) => enc, + }; + [encoding_rs::UTF_16LE, encoding_rs::UTF_16BE].contains(&enc) +} + +#[cfg(test)] +mod tests { + use testutil::{KitchenSink, RegexMatcher}; + use super::*; + + #[test] + fn config_error_heap_limit() { + let matcher = RegexMatcher::new(""); + let sink = KitchenSink::new(); + let mut searcher = SearcherBuilder::new() + .heap_limit(Some(0)) + .build(); + let res = searcher.search_slice(matcher, &[], sink); + assert!(res.is_err()); + } + + #[test] + fn config_error_line_terminator() { + let mut matcher = RegexMatcher::new(""); + matcher.set_line_term(Some(LineTerminator::byte(b'z'))); + + let sink = KitchenSink::new(); + let mut searcher = Searcher::new(); + let res = searcher.search_slice(matcher, &[], sink); + assert!(res.is_err()); + } +} diff -Nru ripgrep-0.6.0/grep-searcher/src/sink.rs ripgrep-0.10.0.3/grep-searcher/src/sink.rs --- ripgrep-0.6.0/grep-searcher/src/sink.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/src/sink.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,606 @@ +use std::fmt; +use std::io; + +use grep_matcher::LineTerminator; + +use lines::LineIter; +use searcher::{ConfigError, Searcher}; + +/// A trait that describes errors that can be reported by searchers and +/// implementations of `Sink`. +/// +/// Unless you have a specialized use case, you probably don't need to +/// implement this trait explicitly. It's likely that using `io::Error` (which +/// implements this trait) for your error type is good enough, largely because +/// most errors that occur during search will likely be an `io::Error`. +pub trait SinkError: Sized { + /// A constructor for converting any value that satisfies the + /// `fmt::Display` trait into an error. + fn error_message(message: T) -> Self; + + /// A constructor for converting I/O errors that occur while searching into + /// an error of this type. + /// + /// By default, this is implemented via the `error_message` constructor. + fn error_io(err: io::Error) -> Self { + Self::error_message(err) + } + + /// A constructor for converting configuration errors that occur while + /// building a searcher into an error of this type. + /// + /// By default, this is implemented via the `error_message` constructor. + fn error_config(err: ConfigError) -> Self { + Self::error_message(err) + } +} + +/// An `io::Error` can be used as an error for `Sink` implementations out of +/// the box. +impl SinkError for io::Error { + fn error_message(message: T) -> io::Error { + io::Error::new(io::ErrorKind::Other, message.to_string()) + } + + fn error_io(err: io::Error) -> io::Error { + err + } +} + +/// A `Box` can be used as an error for `Sink` +/// implementations out of the box. +impl SinkError for Box<::std::error::Error> { + fn error_message(message: T) -> Box<::std::error::Error> { + Box::<::std::error::Error>::from(message.to_string()) + } +} + +/// A trait that defines how results from searchers are handled. +/// +/// In this crate, a searcher follows the "push" model. What that means is that +/// the searcher drives execution, and pushes results back to the caller. This +/// is in contrast to a "pull" model where the caller drives execution and +/// takes results as they need them. These are also known as "internal" and +/// "external" iteration strategies, respectively. +/// +/// For a variety of reasons, including the complexity of the searcher +/// implementation, this crate chooses the "push" or "internal" model of +/// execution. Thus, in order to act on search results, callers must provide +/// an implementation of this trait to a searcher, and the searcher is then +/// responsible for calling the methods on this trait. +/// +/// This trait defines several behaviors: +/// +/// * What to do when a match is found. Callers must provide this. +/// * What to do when an error occurs. Callers must provide this via the +/// [`SinkError`](trait.SinkError.html) trait. Generally, callers can just +/// use `io::Error` for this, which already implements `SinkError`. +/// * What to do when a contextual line is found. By default, these are +/// ignored. +/// * What to do when a gap between contextual lines has been found. By +/// default, this is ignored. +/// * What to do when a search has started. By default, this does nothing. +/// * What to do when a search has finished successfully. By default, this does +/// nothing. +/// +/// Callers must, at minimum, specify the behavior when an error occurs and +/// the behavior when a match occurs. The rest is optional. For each behavior, +/// callers may report an error (say, if writing the result to another +/// location failed) or simply return `false` if they want the search to stop +/// (e.g., when implementing a cap on the number of search results to show). +/// +/// When errors are reported (whether in the searcher or in the implementation +/// of `Sink`), then searchers quit immediately without calling `finish`. +/// +/// For simpler uses of `Sink`, callers may elect to use one of +/// the more convenient but less flexible implementations in the +/// [`sinks`](sinks/index.html) module. +pub trait Sink { + /// The type of an error that should be reported by a searcher. + /// + /// Errors of this type are not only returned by the methods on this + /// trait, but the constructors defined in `SinkError` are also used in + /// the searcher implementation itself. e.g., When a I/O error occurs when + /// reading data from a file. + type Error: SinkError; + + /// This method is called whenever a match is found. + /// + /// If multi line is enabled on the searcher, then the match reported here + /// may span multiple lines and it may include multiple matches. When multi + /// line is disabled, then the match is guaranteed to span exactly one + /// non-empty line (where a single line is, at minimum, a line terminator). + /// + /// If this returns `true`, then searching continues. If this returns + /// `false`, then searching is stopped immediately and `finish` is called. + /// + /// If this returns an error, then searching is stopped immediately, + /// `finish` is not called and the error is bubbled back up to the caller + /// of the searcher. + fn matched( + &mut self, + _searcher: &Searcher, + _mat: &SinkMatch, + ) -> Result; + + /// This method is called whenever a context line is found, and is optional + /// to implement. By default, it does nothing and returns `true`. + /// + /// In all cases, the context given is guaranteed to span exactly one + /// non-empty line (where a single line is, at minimum, a line terminator). + /// + /// If this returns `true`, then searching continues. If this returns + /// `false`, then searching is stopped immediately and `finish` is called. + /// + /// If this returns an error, then searching is stopped immediately, + /// `finish` is not called and the error is bubbled back up to the caller + /// of the searcher. + #[inline] + fn context( + &mut self, + _searcher: &Searcher, + _context: &SinkContext, + ) -> Result { + Ok(true) + } + + /// This method is called whenever a break in contextual lines is found, + /// and is optional to implement. By default, it does nothing and returns + /// `true`. + /// + /// A break can only occur when context reporting is enabled (that is, + /// either or both of `before_context` or `after_context` are greater than + /// `0`). More precisely, a break occurs between non-contiguous groups of + /// lines. + /// + /// If this returns `true`, then searching continues. If this returns + /// `false`, then searching is stopped immediately and `finish` is called. + /// + /// If this returns an error, then searching is stopped immediately, + /// `finish` is not called and the error is bubbled back up to the caller + /// of the searcher. + #[inline] + fn context_break( + &mut self, + _searcher: &Searcher, + ) -> Result { + Ok(true) + } + + /// This method is called when a search has begun, before any search is + /// executed. By default, this does nothing. + /// + /// If this returns `true`, then searching continues. If this returns + /// `false`, then searching is stopped immediately and `finish` is called. + /// + /// If this returns an error, then searching is stopped immediately, + /// `finish` is not called and the error is bubbled back up to the caller + /// of the searcher. + #[inline] + fn begin( + &mut self, + _searcher: &Searcher, + ) -> Result { + Ok(true) + } + + /// This method is called when a search has completed. By default, this + /// does nothing. + /// + /// If this returns an error, the error is bubbled back up to the caller of + /// the searcher. + #[inline] + fn finish( + &mut self, + _searcher: &Searcher, + _: &SinkFinish, + ) -> Result<(), Self::Error> { + Ok(()) + } +} + +impl<'a, S: Sink> Sink for &'a mut S { + type Error = S::Error; + + #[inline] + fn matched( + &mut self, + searcher: &Searcher, + mat: &SinkMatch, + ) -> Result { + (**self).matched(searcher, mat) + } + + #[inline] + fn context( + &mut self, + searcher: &Searcher, + context: &SinkContext, + ) -> Result { + (**self).context(searcher, context) + } + + #[inline] + fn context_break( + &mut self, + searcher: &Searcher, + ) -> Result { + (**self).context_break(searcher) + } + + #[inline] + fn begin( + &mut self, + searcher: &Searcher, + ) -> Result { + (**self).begin(searcher) + } + + #[inline] + fn finish( + &mut self, + searcher: &Searcher, + sink_finish: &SinkFinish, + ) -> Result<(), S::Error> { + (**self).finish(searcher, sink_finish) + } +} + +impl Sink for Box { + type Error = S::Error; + + #[inline] + fn matched( + &mut self, + searcher: &Searcher, + mat: &SinkMatch, + ) -> Result { + (**self).matched(searcher, mat) + } + + #[inline] + fn context( + &mut self, + searcher: &Searcher, + context: &SinkContext, + ) -> Result { + (**self).context(searcher, context) + } + + #[inline] + fn context_break( + &mut self, + searcher: &Searcher, + ) -> Result { + (**self).context_break(searcher) + } + + #[inline] + fn begin( + &mut self, + searcher: &Searcher, + ) -> Result { + (**self).begin(searcher) + } + + #[inline] + fn finish( + &mut self, + searcher: &Searcher, + sink_finish: &SinkFinish, + ) -> Result<(), S::Error> { + (**self).finish(searcher, sink_finish) + } +} + +/// Summary data reported at the end of a search. +/// +/// This reports data such as the total number of bytes searched and the +/// absolute offset of the first occurrence of binary data, if any were found. +/// +/// A searcher that stops early because of an error does not call `finish`. +/// A searcher that stops early because the `Sink` implementor instructed it +/// to will still call `finish`. +#[derive(Clone, Debug)] +pub struct SinkFinish { + pub(crate) byte_count: u64, + pub(crate) binary_byte_offset: Option, +} + +impl SinkFinish { + /// Return the total number of bytes searched. + #[inline] + pub fn byte_count(&self) -> u64 { + self.byte_count + } + + /// If binary detection is enabled and if binary data was found, then this + /// returns the absolute byte offset of the first detected byte of binary + /// data. + /// + /// Note that since this is an absolute byte offset, it cannot be relied + /// upon to index into any addressable memory. + #[inline] + pub fn binary_byte_offset(&self) -> Option { + self.binary_byte_offset + } +} + +/// A type that describes a match reported by a searcher. +#[derive(Clone, Debug)] +pub struct SinkMatch<'b> { + pub(crate) line_term: LineTerminator, + pub(crate) bytes: &'b [u8], + pub(crate) absolute_byte_offset: u64, + pub(crate) line_number: Option, +} + +impl<'b> SinkMatch<'b> { + /// Returns the bytes for all matching lines, including the line + /// terminators, if they exist. + #[inline] + pub fn bytes(&self) -> &'b [u8] { + self.bytes + } + + /// Return an iterator over the lines in this match. + /// + /// If multi line search is enabled, then this may yield more than one + /// line (but always at least one line). If multi line search is disabled, + /// then this always reports exactly one line (but may consist of just + /// the line terminator). + /// + /// Lines yielded by this iterator include their terminators. + #[inline] + pub fn lines(&self) -> LineIter<'b> { + LineIter::new(self.line_term.as_byte(), self.bytes) + } + + /// Returns the absolute byte offset of the start of this match. This + /// offset is absolute in that it is relative to the very beginning of the + /// input in a search, and can never be relied upon to be a valid index + /// into an in-memory slice. + #[inline] + pub fn absolute_byte_offset(&self) -> u64 { + self.absolute_byte_offset + } + + /// Returns the line number of the first line in this match, if available. + /// + /// Line numbers are only available when the search builder is instructed + /// to compute them. + #[inline] + pub fn line_number(&self) -> Option { + self.line_number + } +} + +/// The type of context reported by a searcher. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum SinkContextKind { + /// The line reported occurred before a match. + Before, + /// The line reported occurred after a match. + After, + /// Any other type of context reported, e.g., as a result of a searcher's + /// "passthru" mode. + Other, +} + +/// A type that describes a contextual line reported by a searcher. +#[derive(Clone, Debug)] +pub struct SinkContext<'b> { + pub(crate) line_term: LineTerminator, + pub(crate) bytes: &'b [u8], + pub(crate) kind: SinkContextKind, + pub(crate) absolute_byte_offset: u64, + pub(crate) line_number: Option, +} + +impl<'b> SinkContext<'b> { + /// Returns the context bytes, including line terminators. + #[inline] + pub fn bytes(&self) -> &'b [u8] { + self.bytes + } + + /// Returns the type of context. + #[inline] + pub fn kind(&self) -> &SinkContextKind { + &self.kind + } + + /// Return an iterator over the lines in this match. + /// + /// This always yields exactly one line (and that one line may contain just + /// the line terminator). + /// + /// Lines yielded by this iterator include their terminators. + #[cfg(test)] + pub(crate) fn lines(&self) -> LineIter<'b> { + LineIter::new(self.line_term.as_byte(), self.bytes) + } + + /// Returns the absolute byte offset of the start of this context. This + /// offset is absolute in that it is relative to the very beginning of the + /// input in a search, and can never be relied upon to be a valid index + /// into an in-memory slice. + #[inline] + pub fn absolute_byte_offset(&self) -> u64 { + self.absolute_byte_offset + } + + /// Returns the line number of the first line in this context, if + /// available. + /// + /// Line numbers are only available when the search builder is instructed + /// to compute them. + #[inline] + pub fn line_number(&self) -> Option { + self.line_number + } +} + +/// A collection of convenience implementations of `Sink`. +/// +/// Each implementation in this module makes some kind of sacrifice in the name +/// of making common cases easier to use. Most frequently, each type is a +/// wrapper around a closure specified by the caller that provides limited +/// access to the full suite of information available to implementors of +/// `Sink`. +/// +/// For example, the `UTF8` sink makes the following sacrifices: +/// +/// * All matches must be UTF-8. An arbitrary `Sink` does not have this +/// restriction and can deal with arbitrary data. If this sink sees invalid +/// UTF-8, then an error is returned and searching stops. (Use the `Lossy` +/// sink instead to suppress this error.) +/// * The searcher must be configured to report line numbers. If it isn't, +/// an error is reported at the first match and searching stops. +/// * Context lines, context breaks and summary data reported at the end of +/// a search are all ignored. +/// * Implementors are forced to use `io::Error` as their error type. +/// +/// If you need more flexibility, then you're advised to implement the `Sink` +/// trait directly. +pub mod sinks { + use std::io; + use std::str; + + use searcher::Searcher; + use super::{Sink, SinkError, SinkMatch}; + + /// A sink that provides line numbers and matches as strings while ignoring + /// everything else. + /// + /// This implementation will return an error if a match contains invalid + /// UTF-8 or if the searcher was not configured to count lines. Errors + /// on invalid UTF-8 can be suppressed by using the `Lossy` sink instead + /// of this one. + /// + /// The closure accepts two parameters: a line number and a UTF-8 string + /// containing the matched data. The closure returns a + /// `Result`. If the `bool` is `false`, then the search + /// stops immediately. Otherwise, searching continues. + /// + /// If multi line mode was enabled, the line number refers to the line + /// number of the first line in the match. + #[derive(Clone, Debug)] + pub struct UTF8(pub F) + where F: FnMut(u64, &str) -> Result; + + impl Sink for UTF8 + where F: FnMut(u64, &str) -> Result + { + type Error = io::Error; + + fn matched( + &mut self, + _searcher: &Searcher, + mat: &SinkMatch, + ) -> Result { + let matched = match str::from_utf8(mat.bytes()) { + Ok(matched) => matched, + Err(err) => return Err(io::Error::error_message(err)), + }; + let line_number = match mat.line_number() { + Some(line_number) => line_number, + None => { + let msg = "line numbers not enabled"; + return Err(io::Error::error_message(msg)); + } + }; + (self.0)(line_number, &matched) + } + } + + /// A sink that provides line numbers and matches as (lossily converted) + /// strings while ignoring everything else. + /// + /// This is like `UTF8`, except that if a match contains invalid UTF-8, + /// then it will be lossily converted to valid UTF-8 by substituting + /// invalid UTF-8 with Unicode replacement characters. + /// + /// This implementation will return an error on the first match if the + /// searcher was not configured to count lines. + /// + /// The closure accepts two parameters: a line number and a UTF-8 string + /// containing the matched data. The closure returns a + /// `Result`. If the `bool` is `false`, then the search + /// stops immediately. Otherwise, searching continues. + /// + /// If multi line mode was enabled, the line number refers to the line + /// number of the first line in the match. + #[derive(Clone, Debug)] + pub struct Lossy(pub F) + where F: FnMut(u64, &str) -> Result; + + impl Sink for Lossy + where F: FnMut(u64, &str) -> Result + { + type Error = io::Error; + + fn matched( + &mut self, + _searcher: &Searcher, + mat: &SinkMatch, + ) -> Result { + use std::borrow::Cow; + + let matched = match str::from_utf8(mat.bytes()) { + Ok(matched) => Cow::Borrowed(matched), + // TODO: In theory, it should be possible to amortize + // allocation here, but `std` doesn't provide such an API. + // Regardless, this only happens on matches with invalid UTF-8, + // which should be pretty rare. + Err(_) => String::from_utf8_lossy(mat.bytes()), + }; + let line_number = match mat.line_number() { + Some(line_number) => line_number, + None => { + let msg = "line numbers not enabled"; + return Err(io::Error::error_message(msg)); + } + }; + (self.0)(line_number, &matched) + } + } + + /// A sink that provides line numbers and matches as raw bytes while + /// ignoring everything else. + /// + /// This implementation will return an error on the first match if the + /// searcher was not configured to count lines. + /// + /// The closure accepts two parameters: a line number and a raw byte string + /// containing the matched data. The closure returns a `Result`. If the `bool` is `false`, then the search stops + /// immediately. Otherwise, searching continues. + /// + /// If multi line mode was enabled, the line number refers to the line + /// number of the first line in the match. + #[derive(Clone, Debug)] + pub struct Bytes(pub F) + where F: FnMut(u64, &[u8]) -> Result; + + impl Sink for Bytes + where F: FnMut(u64, &[u8]) -> Result + { + type Error = io::Error; + + fn matched( + &mut self, + _searcher: &Searcher, + mat: &SinkMatch, + ) -> Result { + let line_number = match mat.line_number() { + Some(line_number) => line_number, + None => { + let msg = "line numbers not enabled"; + return Err(io::Error::error_message(msg)); + } + }; + (self.0)(line_number, mat.bytes()) + } + } +} diff -Nru ripgrep-0.6.0/grep-searcher/src/testutil.rs ripgrep-0.10.0.3/grep-searcher/src/testutil.rs --- ripgrep-0.6.0/grep-searcher/src/testutil.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/src/testutil.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,787 @@ +use std::io::{self, Write}; +use std::str; + +use grep_matcher::{ + LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError, +}; +use memchr::memchr; +use regex::bytes::{Regex, RegexBuilder}; + +use searcher::{BinaryDetection, Searcher, SearcherBuilder}; +use sink::{Sink, SinkContext, SinkFinish, SinkMatch}; + +/// A simple regex matcher. +/// +/// This supports setting the matcher's line terminator configuration directly, +/// which we use for testing purposes. That is, the caller explicitly +/// determines whether the line terminator optimization is enabled. (In reality +/// this optimization is detected automatically by inspecting and possibly +/// modifying the regex itself.) +#[derive(Clone, Debug)] +pub struct RegexMatcher { + regex: Regex, + line_term: Option, + every_line_is_candidate: bool, +} + +impl RegexMatcher { + /// Create a new regex matcher. + pub fn new(pattern: &str) -> RegexMatcher { + let regex = RegexBuilder::new(pattern) + .multi_line(true) // permits ^ and $ to match at \n boundaries + .build() + .unwrap(); + RegexMatcher { + regex: regex, + line_term: None, + every_line_is_candidate: false, + } + } + + /// Forcefully set the line terminator of this matcher. + /// + /// By default, this matcher has no line terminator set. + pub fn set_line_term( + &mut self, + line_term: Option, + ) -> &mut RegexMatcher { + self.line_term = line_term; + self + } + + /// Whether to return every line as a candidate or not. + /// + /// This forces searchers to handle the case of reporting a false positive. + pub fn every_line_is_candidate( + &mut self, + yes: bool, + ) -> &mut RegexMatcher { + self.every_line_is_candidate = yes; + self + } +} + +impl Matcher for RegexMatcher { + type Captures = NoCaptures; + type Error = NoError; + + fn find_at( + &self, + haystack: &[u8], + at: usize, + ) -> Result, NoError> { + Ok(self.regex + .find_at(haystack, at) + .map(|m| Match::new(m.start(), m.end()))) + } + + fn new_captures(&self) -> Result { + Ok(NoCaptures::new()) + } + + fn line_terminator(&self) -> Option { + self.line_term + } + + fn find_candidate_line( + &self, + haystack: &[u8], + ) -> Result, NoError> { + if self.every_line_is_candidate { + assert!(self.line_term.is_some()); + if haystack.is_empty() { + return Ok(None); + } + // Make it interesting and return the last byte in the current + // line. + let i = memchr(self.line_term.unwrap().as_byte(), haystack) + .map(|i| i) + .unwrap_or(haystack.len() - 1); + Ok(Some(LineMatchKind::Candidate(i))) + } else { + Ok(self.shortest_match(haystack)?.map(LineMatchKind::Confirmed)) + } + } +} + +/// An implementation of Sink that prints all available information. +/// +/// This is useful for tests because it lets us easily confirm whether data +/// is being passed to Sink correctly. +#[derive(Clone, Debug)] +pub struct KitchenSink(Vec); + +impl KitchenSink { + /// Create a new implementation of Sink that includes everything in the + /// kitchen. + pub fn new() -> KitchenSink { + KitchenSink(vec![]) + } + + /// Return the data written to this sink. + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } +} + +impl Sink for KitchenSink { + type Error = io::Error; + + fn matched( + &mut self, + _searcher: &Searcher, + mat: &SinkMatch, + ) -> Result { + assert!(!mat.bytes().is_empty()); + assert!(mat.lines().count() >= 1); + + let mut line_number = mat.line_number(); + let mut byte_offset = mat.absolute_byte_offset(); + for line in mat.lines() { + if let Some(ref mut n) = line_number { + write!(self.0, "{}:", n)?; + *n += 1; + } + + write!(self.0, "{}:", byte_offset)?; + byte_offset += line.len() as u64; + self.0.write_all(line)?; + } + Ok(true) + } + + fn context( + &mut self, + _searcher: &Searcher, + context: &SinkContext, + ) -> Result { + assert!(!context.bytes().is_empty()); + assert!(context.lines().count() == 1); + + if let Some(line_number) = context.line_number() { + write!(self.0, "{}-", line_number)?; + } + write!(self.0, "{}-", context.absolute_byte_offset)?; + self.0.write_all(context.bytes())?; + Ok(true) + } + + fn context_break( + &mut self, + _searcher: &Searcher, + ) -> Result { + self.0.write_all(b"--\n")?; + Ok(true) + } + + fn finish( + &mut self, + _searcher: &Searcher, + sink_finish: &SinkFinish, + ) -> Result<(), io::Error> { + writeln!(self.0, "")?; + writeln!(self.0, "byte count:{}", sink_finish.byte_count())?; + if let Some(offset) = sink_finish.binary_byte_offset() { + writeln!(self.0, "binary offset:{}", offset)?; + } + Ok(()) + } +} + +/// A type for expressing tests on a searcher. +/// +/// The searcher code has a lot of different code paths, mostly for the +/// purposes of optimizing a bunch of different use cases. The intent of the +/// searcher is to pick the best code path based on the configuration, which +/// means there is no obviously direct way to ask that a specific code path +/// be exercised. Thus, the purpose of this tester is to explicitly check as +/// many code paths that make sense. +/// +/// The tester works by assuming you want to test all pertinent code paths. +/// These can be trimmed down as necessary via the various builder methods. +#[derive(Debug)] +pub struct SearcherTester { + haystack: String, + pattern: String, + filter: Option<::regex::Regex>, + print_labels: bool, + expected_no_line_number: Option, + expected_with_line_number: Option, + expected_slice_no_line_number: Option, + expected_slice_with_line_number: Option, + by_line: bool, + multi_line: bool, + invert_match: bool, + line_number: bool, + binary: BinaryDetection, + auto_heap_limit: bool, + after_context: usize, + before_context: usize, + passthru: bool, +} + +impl SearcherTester { + /// Create a new tester for testing searchers. + pub fn new(haystack: &str, pattern: &str) -> SearcherTester { + SearcherTester { + haystack: haystack.to_string(), + pattern: pattern.to_string(), + filter: None, + print_labels: false, + expected_no_line_number: None, + expected_with_line_number: None, + expected_slice_no_line_number: None, + expected_slice_with_line_number: None, + by_line: true, + multi_line: true, + invert_match: false, + line_number: true, + binary: BinaryDetection::none(), + auto_heap_limit: true, + after_context: 0, + before_context: 0, + passthru: false, + } + } + + /// Execute the test. If the test succeeds, then this returns successfully. + /// If the test fails, then it panics with an informative message. + pub fn test(&self) { + // Check for configuration errors. + if self.expected_no_line_number.is_none() { + panic!("an 'expected' string with NO line numbers must be given"); + } + if self.line_number && self.expected_with_line_number.is_none() { + panic!("an 'expected' string with line numbers must be given, \ + or disable testing with line numbers"); + } + + let configs = self.configs(); + if configs.is_empty() { + panic!("test configuration resulted in nothing being tested"); + } + if self.print_labels { + for config in &configs { + let labels = vec![ + format!("reader-{}", config.label), + format!("slice-{}", config.label), + ]; + for label in &labels { + if self.include(label) { + println!("{}", label); + } else { + println!("{} (ignored)", label); + } + } + } + } + for config in &configs { + let label = format!("reader-{}", config.label); + if self.include(&label) { + let got = config.search_reader(&self.haystack); + assert_eq_printed!(config.expected_reader, got, "{}", label); + } + + let label = format!("slice-{}", config.label); + if self.include(&label) { + let got = config.search_slice(&self.haystack); + assert_eq_printed!(config.expected_slice, got, "{}", label); + } + } + } + + /// Set a regex pattern to filter the tests that are run. + /// + /// By default, no filter is present. When a filter is set, only test + /// configurations with a label matching the given pattern will be run. + /// + /// This is often useful when debugging tests, e.g., when you want to do + /// printf debugging and only want one particular test configuration to + /// execute. + #[allow(dead_code)] + pub fn filter(&mut self, pattern: &str) -> &mut SearcherTester { + self.filter = Some(::regex::Regex::new(pattern).unwrap()); + self + } + + /// When set, the labels for all test configurations are printed before + /// executing any test. + /// + /// Note that in order to see these in tests that aren't failing, you'll + /// want to use `cargo test -- --nocapture`. + #[allow(dead_code)] + pub fn print_labels(&mut self, yes: bool) -> &mut SearcherTester { + self.print_labels = yes; + self + } + + /// Set the expected search results, without line numbers. + pub fn expected_no_line_number( + &mut self, + exp: &str, + ) -> &mut SearcherTester { + self.expected_no_line_number = Some(exp.to_string()); + self + } + + /// Set the expected search results, with line numbers. + pub fn expected_with_line_number( + &mut self, + exp: &str, + ) -> &mut SearcherTester { + self.expected_with_line_number = Some(exp.to_string()); + self + } + + /// Set the expected search results, without line numbers, when performing + /// a search on a slice. When not present, `expected_no_line_number` is + /// used instead. + pub fn expected_slice_no_line_number( + &mut self, + exp: &str, + ) -> &mut SearcherTester { + self.expected_slice_no_line_number = Some(exp.to_string()); + self + } + + /// Set the expected search results, with line numbers, when performing a + /// search on a slice. When not present, `expected_with_line_number` is + /// used instead. + #[allow(dead_code)] + pub fn expected_slice_with_line_number( + &mut self, + exp: &str, + ) -> &mut SearcherTester { + self.expected_slice_with_line_number = Some(exp.to_string()); + self + } + + /// Whether to test search with line numbers or not. + /// + /// This is enabled by default. When enabled, the string that is expected + /// when line numbers are present must be provided. Otherwise, the expected + /// string isn't required. + pub fn line_number(&mut self, yes: bool) -> &mut SearcherTester { + self.line_number = yes; + self + } + + /// Whether to test search using the line-by-line searcher or not. + /// + /// By default, this is enabled. + pub fn by_line(&mut self, yes: bool) -> &mut SearcherTester { + self.by_line = yes; + self + } + + /// Whether to test search using the multi line searcher or not. + /// + /// By default, this is enabled. + #[allow(dead_code)] + pub fn multi_line(&mut self, yes: bool) -> &mut SearcherTester { + self.multi_line = yes; + self + } + + /// Whether to perform an inverted search or not. + /// + /// By default, this is disabled. + pub fn invert_match(&mut self, yes: bool) -> &mut SearcherTester { + self.invert_match = yes; + self + } + + /// Whether to enable binary detection on all searches. + /// + /// By default, this is disabled. + pub fn binary_detection( + &mut self, + detection: BinaryDetection, + ) -> &mut SearcherTester { + self.binary = detection; + self + } + + /// Whether to automatically attempt to test the heap limit setting or not. + /// + /// By default, one of the test configurations includes setting the heap + /// limit to its minimal value for normal operation, which checks that + /// everything works even at the extremes. However, in some cases, the heap + /// limit can (expectedly) alter the output slightly. For example, it can + /// impact the number of bytes searched when performing binary detection. + /// For convenience, it can be useful to disable the automatic heap limit + /// test. + pub fn auto_heap_limit(&mut self, yes: bool) -> &mut SearcherTester { + self.auto_heap_limit = yes; + self + } + + /// Set the number of lines to include in the "after" context. + /// + /// The default is `0`, which is equivalent to not printing any context. + pub fn after_context(&mut self, lines: usize) -> &mut SearcherTester { + self.after_context = lines; + self + } + + /// Set the number of lines to include in the "before" context. + /// + /// The default is `0`, which is equivalent to not printing any context. + pub fn before_context(&mut self, lines: usize) -> &mut SearcherTester { + self.before_context = lines; + self + } + + /// Whether to enable the "passthru" feature or not. + /// + /// When passthru is enabled, it effectively treats all non-matching lines + /// as contextual lines. In other words, enabling this is akin to + /// requesting an unbounded number of before and after contextual lines. + /// + /// This is disabled by default. + pub fn passthru(&mut self, yes: bool) -> &mut SearcherTester { + self.passthru = yes; + self + } + + /// Return the minimum size of a buffer required for a successful search. + /// + /// Generally, this corresponds to the maximum length of a line (including + /// its terminator), but if context settings are enabled, then this must + /// include the sum of the longest N lines. + /// + /// Note that this must account for whether the test is using multi line + /// search or not, since multi line search requires being able to fit the + /// entire haystack into memory. + fn minimal_heap_limit(&self, multi_line: bool) -> usize { + if multi_line { + 1 + self.haystack.len() + } else if self.before_context == 0 && self.after_context == 0 { + 1 + self.haystack.lines().map(|s| s.len()).max().unwrap_or(0) + } else { + let mut lens: Vec = + self.haystack.lines().map(|s| s.len()).collect(); + lens.sort(); + lens.reverse(); + + let context_count = + if self.passthru { + self.haystack.lines().count() + } else { + // Why do we add 2 here? Well, we need to add 1 in order to + // have room to search at least one line. We add another + // because the implementation will occasionally include + // an additional line when handling the context. There's + // no particularly good reason, other than keeping the + // implementation simple. + 2 + self.before_context + self.after_context + }; + + // We add 1 to each line since `str::lines` doesn't include the + // line terminator. + lens.into_iter() + .take(context_count) + .map(|len| len + 1) + .sum::() + } + } + + /// Returns true if and only if the given label should be included as part + /// of executing `test`. + /// + /// Inclusion is determined by the filter specified. If no filter has been + /// given, then this always returns `true`. + fn include(&self, label: &str) -> bool { + let re = match self.filter { + None => return true, + Some(ref re) => re, + }; + re.is_match(label) + } + + /// Configs generates a set of all search configurations that should be + /// tested. The configs generated are based on the configuration in this + /// builder. + fn configs(&self) -> Vec { + let mut configs = vec![]; + + let matcher = RegexMatcher::new(&self.pattern); + let mut builder = SearcherBuilder::new(); + builder + .line_number(false) + .invert_match(self.invert_match) + .binary_detection(self.binary.clone()) + .after_context(self.after_context) + .before_context(self.before_context) + .passthru(self.passthru); + + if self.by_line { + let mut matcher = matcher.clone(); + let mut builder = builder.clone(); + + let expected_reader = + self.expected_no_line_number.as_ref().unwrap().to_string(); + let expected_slice = match self.expected_slice_no_line_number { + None => expected_reader.clone(), + Some(ref e) => e.to_string(), + }; + configs.push(TesterConfig { + label: "byline-noterm-nonumber".to_string(), + expected_reader: expected_reader.clone(), + expected_slice: expected_slice.clone(), + builder: builder.clone(), + matcher: matcher.clone(), + }); + + if self.auto_heap_limit { + builder.heap_limit(Some(self.minimal_heap_limit(false))); + configs.push(TesterConfig { + label: "byline-noterm-nonumber-heaplimit".to_string(), + expected_reader: expected_reader.clone(), + expected_slice: expected_slice.clone(), + builder: builder.clone(), + matcher: matcher.clone(), + }); + builder.heap_limit(None); + } + + matcher.set_line_term(Some(LineTerminator::byte(b'\n'))); + configs.push(TesterConfig { + label: "byline-term-nonumber".to_string(), + expected_reader: expected_reader.clone(), + expected_slice: expected_slice.clone(), + builder: builder.clone(), + matcher: matcher.clone(), + }); + + matcher.every_line_is_candidate(true); + configs.push(TesterConfig { + label: "byline-term-nonumber-candidates".to_string(), + expected_reader: expected_reader.clone(), + expected_slice: expected_slice.clone(), + builder: builder.clone(), + matcher: matcher.clone(), + }); + } + if self.by_line && self.line_number { + let mut matcher = matcher.clone(); + let mut builder = builder.clone(); + + let expected_reader = + self.expected_with_line_number.as_ref().unwrap().to_string(); + let expected_slice = match self.expected_slice_with_line_number { + None => expected_reader.clone(), + Some(ref e) => e.to_string(), + }; + + builder.line_number(true); + configs.push(TesterConfig { + label: "byline-noterm-number".to_string(), + expected_reader: expected_reader.clone(), + expected_slice: expected_slice.clone(), + builder: builder.clone(), + matcher: matcher.clone(), + }); + + matcher.set_line_term(Some(LineTerminator::byte(b'\n'))); + configs.push(TesterConfig { + label: "byline-term-number".to_string(), + expected_reader: expected_reader.clone(), + expected_slice: expected_slice.clone(), + builder: builder.clone(), + matcher: matcher.clone(), + }); + + matcher.every_line_is_candidate(true); + configs.push(TesterConfig { + label: "byline-term-number-candidates".to_string(), + expected_reader: expected_reader.clone(), + expected_slice: expected_slice.clone(), + builder: builder.clone(), + matcher: matcher.clone(), + }); + } + if self.multi_line { + let mut builder = builder.clone(); + let expected_slice = match self.expected_slice_no_line_number { + None => { + self.expected_no_line_number.as_ref().unwrap().to_string() + } + Some(ref e) => e.to_string(), + }; + + builder.multi_line(true); + configs.push(TesterConfig { + label: "multiline-nonumber".to_string(), + expected_reader: expected_slice.clone(), + expected_slice: expected_slice.clone(), + builder: builder.clone(), + matcher: matcher.clone(), + }); + + if self.auto_heap_limit { + builder.heap_limit(Some(self.minimal_heap_limit(true))); + configs.push(TesterConfig { + label: "multiline-nonumber-heaplimit".to_string(), + expected_reader: expected_slice.clone(), + expected_slice: expected_slice.clone(), + builder: builder.clone(), + matcher: matcher.clone(), + }); + builder.heap_limit(None); + } + } + if self.multi_line && self.line_number { + let mut builder = builder.clone(); + let expected_slice = match self.expected_slice_with_line_number { + None => { + self.expected_with_line_number + .as_ref().unwrap().to_string() + } + Some(ref e) => e.to_string(), + }; + + builder.multi_line(true); + builder.line_number(true); + configs.push(TesterConfig { + label: "multiline-number".to_string(), + expected_reader: expected_slice.clone(), + expected_slice: expected_slice.clone(), + builder: builder.clone(), + matcher: matcher.clone(), + }); + + builder.heap_limit(Some(self.minimal_heap_limit(true))); + configs.push(TesterConfig { + label: "multiline-number-heaplimit".to_string(), + expected_reader: expected_slice.clone(), + expected_slice: expected_slice.clone(), + builder: builder.clone(), + matcher: matcher.clone(), + }); + builder.heap_limit(None); + } + configs + } +} + +#[derive(Debug)] +struct TesterConfig { + label: String, + expected_reader: String, + expected_slice: String, + builder: SearcherBuilder, + matcher: RegexMatcher, +} + +impl TesterConfig { + /// Execute a search using a reader. This exercises the incremental search + /// strategy, where the entire contents of the corpus aren't necessarily + /// in memory at once. + fn search_reader(&self, haystack: &str) -> String { + let mut sink = KitchenSink::new(); + let mut searcher = self.builder.build(); + let result = searcher.search_reader( + &self.matcher, + haystack.as_bytes(), + &mut sink, + ); + if let Err(err) = result { + let label = format!("reader-{}", self.label); + panic!("error running '{}': {}", label, err); + } + String::from_utf8(sink.as_bytes().to_vec()).unwrap() + } + + /// Execute a search using a slice. This exercises the search routines that + /// have the entire contents of the corpus in memory at one time. + fn search_slice(&self, haystack: &str) -> String { + let mut sink = KitchenSink::new(); + let mut searcher = self.builder.build(); + let result = searcher.search_slice( + &self.matcher, + haystack.as_bytes(), + &mut sink, + ); + if let Err(err) = result { + let label = format!("slice-{}", self.label); + panic!("error running '{}': {}", label, err); + } + String::from_utf8(sink.as_bytes().to_vec()).unwrap() + } +} + +#[cfg(test)] +mod tests { + use grep_matcher::{Match, Matcher}; + + use super::*; + + fn m(start: usize, end: usize) -> Match { + Match::new(start, end) + } + + #[test] + fn empty_line1() { + let haystack = b""; + let matcher = RegexMatcher::new(r"^$"); + + assert_eq!(matcher.find_at(haystack, 0), Ok(Some(m(0, 0)))); + } + + #[test] + fn empty_line2() { + let haystack = b"\n"; + let matcher = RegexMatcher::new(r"^$"); + + assert_eq!(matcher.find_at(haystack, 0), Ok(Some(m(0, 0)))); + assert_eq!(matcher.find_at(haystack, 1), Ok(Some(m(1, 1)))); + } + + #[test] + fn empty_line3() { + let haystack = b"\n\n"; + let matcher = RegexMatcher::new(r"^$"); + + assert_eq!(matcher.find_at(haystack, 0), Ok(Some(m(0, 0)))); + assert_eq!(matcher.find_at(haystack, 1), Ok(Some(m(1, 1)))); + assert_eq!(matcher.find_at(haystack, 2), Ok(Some(m(2, 2)))); + } + + #[test] + fn empty_line4() { + let haystack = b"a\n\nb\n"; + let matcher = RegexMatcher::new(r"^$"); + + assert_eq!(matcher.find_at(haystack, 0), Ok(Some(m(2, 2)))); + assert_eq!(matcher.find_at(haystack, 1), Ok(Some(m(2, 2)))); + assert_eq!(matcher.find_at(haystack, 2), Ok(Some(m(2, 2)))); + assert_eq!(matcher.find_at(haystack, 3), Ok(Some(m(5, 5)))); + assert_eq!(matcher.find_at(haystack, 4), Ok(Some(m(5, 5)))); + assert_eq!(matcher.find_at(haystack, 5), Ok(Some(m(5, 5)))); + } + + #[test] + fn empty_line5() { + let haystack = b"a\n\nb\nc"; + let matcher = RegexMatcher::new(r"^$"); + + assert_eq!(matcher.find_at(haystack, 0), Ok(Some(m(2, 2)))); + assert_eq!(matcher.find_at(haystack, 1), Ok(Some(m(2, 2)))); + assert_eq!(matcher.find_at(haystack, 2), Ok(Some(m(2, 2)))); + assert_eq!(matcher.find_at(haystack, 3), Ok(None)); + assert_eq!(matcher.find_at(haystack, 4), Ok(None)); + assert_eq!(matcher.find_at(haystack, 5), Ok(None)); + assert_eq!(matcher.find_at(haystack, 6), Ok(None)); + } + + #[test] + fn empty_line6() { + let haystack = b"a\n"; + let matcher = RegexMatcher::new(r"^$"); + + assert_eq!(matcher.find_at(haystack, 0), Ok(Some(m(2, 2)))); + assert_eq!(matcher.find_at(haystack, 1), Ok(Some(m(2, 2)))); + assert_eq!(matcher.find_at(haystack, 2), Ok(Some(m(2, 2)))); + } +} diff -Nru ripgrep-0.6.0/grep-searcher/UNLICENSE ripgrep-0.10.0.3/grep-searcher/UNLICENSE --- ripgrep-0.6.0/grep-searcher/UNLICENSE 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/grep-searcher/UNLICENSE 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff -Nru ripgrep-0.6.0/GUIDE.md ripgrep-0.10.0.3/GUIDE.md --- ripgrep-0.6.0/GUIDE.md 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/GUIDE.md 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,687 @@ +## User Guide + +This guide is intended to give an elementary description of ripgrep and an +overview of its capabilities. This guide assumes that ripgrep is +[installed](README.md#installation) +and that readers have passing familiarity with using command line tools. This +also assumes a Unix-like system, although most commands are probably easily +translatable to any command line shell environment. + + +### Table of Contents + +* [Basics](#basics) +* [Recursive search](#recursive-search) +* [Automatic filtering](#automatic-filtering) +* [Manual filtering: globs](#manual-filtering-globs) +* [Manual filtering: file types](#manual-filtering-file-types) +* [Replacements](#replacements) +* [Configuration file](#configuration-file) +* [File encoding](#file-encoding) +* [Common options](#common-options) + + +### Basics + +ripgrep is a command line tool that searches your files for patterns that +you give it. ripgrep behaves as if reading each file line by line. If a line +matches the pattern provided to ripgrep, then that line will be printed. If a +line does not match the pattern, then the line is not printed. + +The best way to see how this works is with an example. To show an example, we +need something to search. Let's try searching ripgrep's source code. First +grab a ripgrep source archive from +https://github.com/BurntSushi/ripgrep/archive/0.7.1.zip +and extract it: + +``` +$ curl -LO https://github.com/BurntSushi/ripgrep/archive/0.7.1.zip +$ unzip 0.7.1.zip +$ cd ripgrep-0.7.1 +$ ls +benchsuite grep tests Cargo.toml LICENSE-MIT +ci ignore wincolor CHANGELOG.md README.md +complete pkg appveyor.yml compile snapcraft.yaml +doc src build.rs COPYING UNLICENSE +globset termcolor Cargo.lock HomebrewFormula +``` + +Let's try our first search by looking for all occurrences of the word `fast` +in `README.md`: + +``` +$ rg fast README.md +75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement +88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while +119:### Is it really faster than everything else? +124:Summarizing, `ripgrep` is fast because: +129: optimizations to make searching very fast. +``` + +(**Note:** If you see an error message from ripgrep saying that it didn't +search any files, then re-run ripgrep with the `--debug` flag. One likely cause +of this is that you have a `*` rule in a `$HOME/.gitignore` file.) + +So what happened here? ripgrep read the contents of `README.md`, and for each +line that contained `fast`, ripgrep printed it to your terminal. ripgrep also +included the line number for each line by default. If your terminal supports +colors, then your output might actually look something like this screenshot: + +[![A screenshot of a sample search ripgrep](https://burntsushi.net/stuff/ripgrep-guide-sample.png)](https://burntsushi.net/stuff/ripgrep-guide-sample.png) + +In this example, we searched for something called a "literal" string. This +means that our pattern was just some normal text that we asked ripgrep to +find. But ripgrep supports the ability to specify patterns via [regular +expressions](https://en.wikipedia.org/wiki/Regular_expression). As an example, +what if we wanted to find all lines have a word that contains `fast` followed +by some number of other letters? + +``` +$ rg 'fast\w+' README.md +75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement +119:### Is it really faster than everything else? +``` + +In this example, we used the pattern `fast\w+`. This pattern tells ripgrep to +look for any lines containing the letters `fast` followed by *one or more* +word-like characters. Namely, `\w` matches characters that compose words (like +`a` and `L` but unlike `.` and ` `). The `+` after the `\w` means, "match the +previous pattern one or more times." This means that the word `fast` won't +match because there are no word characters following the final `t`. But a word +like `faster` will. `faste` would also match! + +Here's a different variation on this same theme: + +``` +$ rg 'fast\w*' README.md +75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement +88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while +119:### Is it really faster than everything else? +124:Summarizing, `ripgrep` is fast because: +129: optimizations to make searching very fast. +``` + +In this case, we used `fast\w*` for our pattern instead of `fast\w+`. The `*` +means that it should match *zero* or more times. In this case, ripgrep will +print the same lines as the pattern `fast`, but if your terminal supports +colors, you'll notice that `faster` will be highlighted instead of just the +`fast` prefix. + +It is beyond the scope of this guide to provide a full tutorial on regular +expressions, but ripgrep's specific syntax is documented here: +https://docs.rs/regex/0.2.5/regex/#syntax + + +### Recursive search + +In the previous section, we showed how to use ripgrep to search a single file. +In this section, we'll show how to use ripgrep to search an entire directory +of files. In fact, *recursively* searching your current working directory is +the default mode of operation for ripgrep, which means doing this is very +simple. + +Using our unzipped archive of ripgrep source code, here's how to find all +function definitions whose name is `write`: + +``` +$ rg 'fn write\(' +src/printer.rs +469: fn write(&mut self, buf: &[u8]) { + +termcolor/src/lib.rs +227: fn write(&mut self, b: &[u8]) -> io::Result { +250: fn write(&mut self, b: &[u8]) -> io::Result { +428: fn write(&mut self, b: &[u8]) -> io::Result { self.wtr.write(b) } +441: fn write(&mut self, b: &[u8]) -> io::Result { self.wtr.write(b) } +454: fn write(&mut self, buf: &[u8]) -> io::Result { +511: fn write(&mut self, buf: &[u8]) -> io::Result { +848: fn write(&mut self, buf: &[u8]) -> io::Result { +915: fn write(&mut self, buf: &[u8]) -> io::Result { +949: fn write(&mut self, buf: &[u8]) -> io::Result { +1114: fn write(&mut self, buf: &[u8]) -> io::Result { +1348: fn write(&mut self, buf: &[u8]) -> io::Result { +1353: fn write(&mut self, buf: &[u8]) -> io::Result { +``` + +(**Note:** We escape the `(` here because `(` has special significance inside +regular expressions. You could also use `rg -F 'fn write('` to achieve the +same thing, where `-F` interprets your pattern as a literal string instead of +a regular expression.) + +In this example, we didn't specify a file at all. Instead, ripgrep defaulted +to searching your current directory in the absence of a path. In general, +`rg foo` is equivalent to `rg foo ./`. + +This particular search showed us results in both the `src` and `termcolor` +directories. The `src` directory is the core ripgrep code where as `termcolor` +is a dependency of ripgrep (and is used by other tools). What if we only wanted +to search core ripgrep code? Well, that's easy, just specify the directory you +want: + +``` +$ rg 'fn write\(' src +src/printer.rs +469: fn write(&mut self, buf: &[u8]) { +``` + +Here, ripgrep limited its search to the `src` directory. Another way of doing +this search would be to `cd` into the `src` directory and simply use `rg 'fn +write\('` again. + + +### Automatic filtering + +After recursive search, ripgrep's most important feature is what it *doesn't* +search. By default, when you search a directory, ripgrep will ignore all of +the following: + +1. Files and directories that match the rules in your `.gitignore` glob + pattern. +2. Hidden files and directories. +3. Binary files. (ripgrep considers any file with a `NUL` byte to be binary.) +4. Symbolic links aren't followed. + +All of these things can be toggled using various flags provided by ripgrep: + +1. You can disable `.gitignore` handling with the `--no-ignore` flag. +2. Hidden files and directories can be searched with the `--hidden` flag. +3. Binary files can be searched via the `--text` (`-a` for short) flag. + Be careful with this flag! Binary files may emit control characters to your + terminal, which might cause strange behavior. +4. ripgrep can follow symlinks with the `--follow` (`-L` for short) flag. + +As a special convenience, ripgrep also provides a flag called `--unrestricted` +(`-u` for short). Repeated uses of this flag will cause ripgrep to disable +more and more of its filtering. That is, `-u` will disable `.gitignore` +handling, `-uu` will search hidden files and directories and `-uuu` will search +binary files. This is useful when you're using ripgrep and you aren't sure +whether its filtering is hiding results from you. Tacking on a couple `-u` +flags is a quick way to find out. (Use the `--debug` flag if you're still +perplexed, and if that doesn't help, +[file an issue](https://github.com/BurntSushi/ripgrep/issues/new).) + +ripgrep's `.gitignore` handling actually goes a bit beyond just `.gitignore` +files. ripgrep will also respect repository specific rules found in +`$GIT_DIR/info/exclude`, as well as any global ignore rules in your +`core.excludesFile` (which is usually `$XDG_CONFIG_HOME/git/ignore` on +Unix-like systems). + +Sometimes you want to search files that are in your `.gitignore`, so it is +possible to specify additional ignore rules or overrides in a `.ignore` +(application agnostic) or `.rgignore` (ripgrep specific) file. + +For example, let's say you have a `.gitignore` file that looks like this: + +``` +log/ +``` + +This generally means that any `log` directory won't be tracked by `git`. +However, perhaps it contains useful output that you'd like to include in your +searches, but you still don't want to track it in `git`. You can achieve this +by creating a `.ignore` file in the same directory as the `.gitignore` file +with the following contents: + +``` +!log/ +``` + +ripgrep treats `.ignore` files with higher precedence than `.gitignore` files +(and treats `.rgignore` files with higher precedence than `.ignore` files). +This means ripgrep will see the `!log/` whitelist rule first and search that +directory. + +Like `.gitignore`, a `.ignore` file can be placed in any directory. Its rules +will be processed with respect to the directory it resides in, just like +`.gitignore`. + +For a more in depth description of how glob patterns in a `.gitignore` file +are interpreted, please see `man gitignore`. + + +### Manual filtering: globs + +In the previous section, we talked about ripgrep's filtering that it does by +default. It is "automatic" because it reacts to your environment. That is, it +uses already existing `.gitignore` files to produce more relevant search +results. + +In addition to automatic filtering, ripgrep also provides more manual or ad hoc +filtering. This comes in two varieties: additional glob patterns specified in +your ripgrep commands and file type filtering. This section covers glob +patterns while the next section covers file type filtering. + +In our ripgrep source code (see [Basics](#basics) for instructions on how to +get a source archive to search), let's say we wanted to see which things depend +on `clap`, our argument parser. + +We could do this: + +``` +$ rg clap +[lots of results] +``` + +But this shows us many things, and we're only interested in where we wrote +`clap` as a dependency. Instead, we could limit ourselves to TOML files, which +is how dependencies are communicated to Rust's build tool, Cargo: + +``` +$ rg clap -g '*.toml' +Cargo.toml +35:clap = "2.26" +51:clap = "2.26" +``` + +The `-g '*.toml'` syntax says, "make sure every file searched matches this +glob pattern." Note that we put `'*.toml'` in single quotes to prevent our +shell from expanding the `*`. + +If we wanted, we could tell ripgrep to search anything *but* `*.toml` files: + +``` +$ rg clap -g '!*.toml' +[lots of results] +``` + +This will give you a lot of results again as above, but they won't include +files ending with `.toml`. Note that the use of a `!` here to mean "negation" +is a bit non-standard, but it was chosen to be consistent with how globs in +`.gitignore` files are written. (Although, the meaning is reversed. In +`.gitignore` files, a `!` prefix means whitelist, and on the command line, a +`!` means blacklist.) + +Globs are interpreted in exactly the same way as `.gitignore` patterns. That +is, later globs will override earlier globs. For example, the following command +will search only `*.toml` files: + +``` +$ rg clap -g '!*.toml' -g '*.toml' +``` + +Interestingly, reversing the order of the globs in this case will match +nothing, since the presence of at least one non-blacklist glob will institute a +requirement that every file searched must match at least one glob. In this +case, the blacklist glob takes precedence over the previous glob and prevents +any file from being searched at all! + + +### Manual filtering: file types + +Over time, you might notice that you use the same glob patterns over and over. +For example, you might find yourself doing a lot of searches where you only +want to see results for Rust files: + +``` +$ rg 'fn run' -g '*.rs' +``` + +Instead of writing out the glob every time, you can use ripgrep's support for +file types: + +``` +$ rg 'fn run' --type rust +``` + +or, more succinctly, + +``` +$ rg 'fn run' -trust +``` + +The way the `--type` flag functions is simple. It acts as a name that is +assigned to one or more globs that match the relevant files. This lets you +write a single type that might encompass a broad range of file extensions. For +example, if you wanted to search C files, you'd have to check both C source +files and C header files: + +``` +$ rg 'int main' -g '*.{c,h}' +``` + +or you could just use the C file type: + +``` +$ rg 'int main' -tc +``` + +Just as you can write blacklist globs, you can blacklist file types too: + +``` +$ rg clap --type-not rust +``` + +or, more succinctly, + +``` +$ rg clap -Trust +``` + +That is, `-t` means "include files of this type" where as `-T` means "exclude +files of this type." + +To see the globs that make up a type, run `rg --type-list`: + +``` +$ rg --type-list | rg '^make:' +make: *.mak, *.mk, GNUmakefile, Gnumakefile, Makefile, gnumakefile, makefile +``` + +By default, ripgrep comes with a bunch of pre-defined types. Generally, these +types correspond to well known public formats. But you can define your own +types as well. For example, perhaps you frequently search "web" files, which +consist of Javascript, HTML and CSS: + +``` +$ rg --type-add 'web:*.html' --type-add 'web:*.css' --type-add 'web:*.js' -tweb title +``` + +or, more succinctly, + +``` +$ rg --type-add 'web:*.{html,css,js}' -tweb title +``` + +The above command defines a new type, `web`, corresponding to the glob +`*.{html,css,js}`. It then applies the new filter with `-tweb` and searches for +the pattern `title`. If you ran + +``` +$ rg --type-add 'web:*.{html,css,js}' --type-list +``` + +Then you would see your `web` type show up in the list, even though it is not +part of ripgrep's built-in types. + +It is important to stress here that the `--type-add` flag only applies to the +current command. It does not add a new file type and save it somewhere in a +persistent form. If you want a type to be available in every ripgrep command, +then you should either create a shell alias: + +``` +alias rg="rg --type-add 'web:*.{html,css,js}'" +``` + +or add `--type-add=web:*.{html,css,js}` to your ripgrep configuration file. +([Configuration files](#configuration-file) are covered in more detail later.) + + +### Replacements + +ripgrep provides a limited ability to modify its output by replacing matched +text with some other text. This is easiest to explain with an example. Remember +when we searched for the word `fast` in ripgrep's README? + +``` +$ rg fast README.md +75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement +88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while +119:### Is it really faster than everything else? +124:Summarizing, `ripgrep` is fast because: +129: optimizations to make searching very fast. +``` + +What if we wanted to *replace* all occurrences of `fast` with `FAST`? That's +easy with ripgrep's `--replace` flag: + +``` +$ rg fast README.md --replace FAST +75: FASTer than both. (N.B. It is not, strictly speaking, a "drop-in" replacement +88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays FAST while +119:### Is it really FASTer than everything else? +124:Summarizing, `ripgrep` is FAST because: +129: optimizations to make searching very FAST. +``` + +or, more succinctly, + +``` +$ rg fast README.md -r FAST +[snip] +``` + +In essence, the `--replace` flag applies *only* to the matching portion of text +in the output. If you instead wanted to replace an entire line of text, then +you need to include the entire line in your match. For example: + +``` +$ rg '^.*fast.*$' README.md -r FAST +75:FAST +88:FAST +119:FAST +124:FAST +129:FAST +``` + +Alternatively, you can combine the `--only-matching` (or `-o` for short) with +the `--replace` flag to achieve the same result: + +``` +$ rg fast README.md --only-matching --replace FAST +75:FAST +88:FAST +119:FAST +124:FAST +129:FAST +``` + +or, more succinctly, + +``` +$ rg fast README.md -or FAST +[snip] +``` + +Finally, replacements can include capturing groups. For example, let's say +we wanted to find all occurrences of `fast` followed by another word and +join them together with a dash. The pattern we might use for that is +`fast\s+(\w+)`, which matches `fast`, followed by any amount of whitespace, +followed by any number of "word" characters. We put the `\w+` in a "capturing +group" (indicated by parentheses) so that we can reference it later in our +replacement string. For example: + +``` +$ rg 'fast\s+(\w+)' README.md -r 'fast-$1' +88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast-while +124:Summarizing, `ripgrep` is fast-because: +``` + +Our replacement string here, `fast-$1`, consists of `fast-` followed by the +contents of the capturing group at index `1`. (Capturing groups actually start +at index 0, but the `0`th capturing group always corresponds to the entire +match. The capturing group at index `1` always corresponds to the first +explicit capturing group found in the regex pattern.) + +Capturing groups can also be named, which is sometimes more convenient than +using the indices. For example, the following command is equivalent to the +above command: + +``` +$ rg 'fast\s+(?P\w+)' README.md -r 'fast-$word' +88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast-while +124:Summarizing, `ripgrep` is fast-because: +``` + +It is important to note that ripgrep **will never modify your files**. The +`--replace` flag only controls ripgrep's output. (And there is no flag to let +you do a replacement in a file.) + + +### Configuration file + +It is possible that ripgrep's default options aren't suitable in every case. +For that reason, and because shell aliases aren't always convenient, ripgrep +supports configuration files. + +Setting up a configuration file is simple. ripgrep will not look in any +predetermined directory for a config file automatically. Instead, you need to +set the `RIPGREP_CONFIG_PATH` environment variable to the file path of your +config file. Once the environment variable is set, open the file and just type +in the flags you want set automatically. There are only two rules for +describing the format of the config file: + +1. Every line is a shell argument, after trimming ASCII whitespace. +2. Lines starting with `#` (optionally preceded by any amount of + ASCII whitespace) are ignored. + +In particular, there is no escaping. Each line is given to ripgrep as a single +command line argument verbatim. + +Here's an example of a configuration file, which demonstrates some of the +formatting peculiarities: + +``` +$ cat $HOME/.ripgreprc +# Don't let ripgrep vomit really long lines to my terminal. +--max-columns=150 + +# Add my 'web' type. +--type-add +web:*.{html,css,js}* + +# Using glob patterns to include/exclude files or folders +--glob=!git/* + +# or +--glob +!git/* + +# Set the colors. +--colors=line:none +--colors=line:style:bold + +# Because who cares about case!? +--smart-case +``` + +When we use a flag that has a value, we either put the flag and the value on +the same line but delimited by an `=` sign (e.g., `--max-columns=150`), or we +put the flag and the value on two different lines. This is because ripgrep's +argument parser knows to treat the single argument `--max-columns=150` as a +flag with a value, but if we had written `--max-columns 150` in our +configuration file, then ripgrep's argument parser wouldn't know what to do +with it. + +Putting the flag and value on different lines is exactly equivalent and is a +matter of style. + +Comments are encouraged so that you remember what the config is doing. Empty +lines are OK too. + +So let's say you're using the above configuration file, but while you're at a +terminal, you really want to be able to see lines longer than 150 columns. What +do you do? Thankfully, all you need to do is pass `--max-columns 0` (or `-M0` +for short) on the command line, which will override your configuration file's +setting. This works because ripgrep's configuration file is *prepended* to the +explicit arguments you give it on the command line. Since flags given later +override flags given earlier, everything works as expected. This works for most +other flags as well, and each flag's documentation states which other flags +override it. + +If you're confused about what configuration file ripgrep is reading arguments +from, then running ripgrep with the `--debug` flag should help clarify things. +The debug output should note what config file is being loaded and the arguments +that have been read from the configuration. + +Finally, if you want to make absolutely sure that ripgrep *isn't* reading a +configuration file, then you can pass the `--no-config` flag, which will always +prevent ripgrep from reading extraneous configuration from the environment, +regardless of what other methods of configuration are added to ripgrep in the +future. + + +### File encoding + +[Text encoding](https://en.wikipedia.org/wiki/Character_encoding) is a complex +topic, but we can try to summarize its relevancy to ripgrep: + +* Files are generally just a bundle of bytes. There is no reliable way to know + their encoding. +* Either the encoding of the pattern must match the encoding of the files being + searched, or a form of transcoding must be performed converts either the + pattern or the file to the same encoding as the other. +* ripgrep tends to work best on plain text files, and among plain text files, + the most popular encodings likely consist of ASCII, latin1 or UTF-8. As + a special exception, UTF-16 is prevalent in Windows environments + +In light of the above, here is how ripgrep behaves: + +* All input is assumed to be ASCII compatible (which means every byte that + corresponds to an ASCII codepoint actually is an ASCII codepoint). This + includes ASCII itself, latin1 and UTF-8. +* ripgrep works best with UTF-8. For example, ripgrep's regular expression + engine supports Unicode features. Namely, character classes like `\w` will + match all word characters by Unicode's definition and `.` will match any + Unicode codepoint instead of any byte. These constructions assume UTF-8, + so they simply won't match when they come across bytes in a file that aren't + UTF-8. +* To handle the UTF-16 case, ripgrep will do something called "BOM sniffing" + by default. That is, the first three bytes of a file will be read, and if + they correspond to a UTF-16 BOM, then ripgrep will transcode the contents of + the file from UTF-16 to UTF-8, and then execute the search on the transcoded + version of the file. (This incurs a performance penalty since transcoding + is slower than regex searching.) +* To handle other cases, ripgrep provides a `-E/--encoding` flag, which permits + you to specify an encoding from the + [Encoding Standard](https://encoding.spec.whatwg.org/#concept-encoding-get). + ripgrep will assume *all* files searched are the encoding specified and + will perform a transcoding step just like in the UTF-16 case described above. + +By default, ripgrep will not require its input be valid UTF-8. That is, ripgrep +can and will search arbitrary bytes. The key here is that if you're searching +content that isn't UTF-8, then the usefulness of your pattern will degrade. If +you're searching bytes that aren't ASCII compatible, then it's likely the +pattern won't find anything. With all that said, this mode of operation is +important, because it lets you find ASCII or UTF-8 *within* files that are +otherwise arbitrary bytes. + +Finally, it is possible to disable ripgrep's Unicode support from within the +pattern regular expression. For example, let's say you wanted `.` to match any +byte rather than any Unicode codepoint. (You might want this while searching a +binary file, since `.` by default will not match invalid UTF-8.) You could do +this by disabling Unicode via a regular expression flag: + +``` +$ rg '(?-u:.)' +``` + +This works for any part of the pattern. For example, the following will find +any Unicode word character followed by any ASCII word character followed by +another Unicode word character: + +``` +$ rg '\w(?-u:\w)\w' +``` + + +### Common options + +ripgrep has a lot of flags. Too many to keep in your head at once. This section +is intended to give you a sampling of some of the most important and frequently +used options that will likely impact how you use ripgrep on a regular basis. + +* `-h`: Show ripgrep's condensed help output. +* `--help`: Show ripgrep's longer form help output. (Nearly what you'd find in + ripgrep's man page, so pipe it into a pager!) +* `-i/--ignore-case`: When searching for a pattern, ignore case differences. + That is `rg -i fast` matches `fast`, `fASt`, `FAST`, etc. +* `-S/--smart-case`: This is similar to `--ignore-case`, but disables itself + if the pattern contains any uppercase letters. Usually this flag is put into + alias or a config file. +* `-w/--word-regexp`: Require that all matches of the pattern be surrounded + by word boundaries. That is, given `pattern`, the `--word-regexp` flag will + cause ripgrep to behave as if `pattern` were actually `\b(?:pattern)\b`. +* `-c/--count`: Report a count of total matched lines. +* `--files`: Print the files that ripgrep *would* search, but don't actually + search them. +* `-a/--text`: Search binary files as if they were plain text. +* `-z/--search-zip`: Search compressed files (gzip, bzip2, lzma, xz). This is + disabled by default. +* `-C/--context`: Show the lines surrounding a match. +* `--sort-files`: Force ripgrep to sort its output by file name. (This disables + parallelism, so it might be slower.) +* `-L/--follow`: Follow symbolic links while recursively searching. +* `-M/--max-columns`: Limit the length of lines printed by ripgrep. +* `--debug`: Shows ripgrep's debug output. This is useful for understanding + why a particular file might be ignored from search, or what kinds of + configuration ripgrep is loading from the environment. diff -Nru ripgrep-0.6.0/HomebrewFormula/ripgrep-bin.rb ripgrep-0.10.0.3/HomebrewFormula/ripgrep-bin.rb --- ripgrep-0.6.0/HomebrewFormula/ripgrep-bin.rb 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/HomebrewFormula/ripgrep-bin.rb 2018-09-10 21:10:55.000000000 +0000 @@ -1,17 +1,23 @@ class RipgrepBin < Formula - version '0.5.2' - desc "Search tool like grep and The Silver Searcher." + version '0.10.0' + desc "Recursively search directories for a regex pattern." homepage "https://github.com/BurntSushi/ripgrep" - url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz" - sha256 "a0326a84af8517ad707d8c7cccba6e112de27822c391cc0937e4727fbb6c48f4" + + if OS.mac? + url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz" + sha256 "32754b4173ac87a7bfffd436d601a49362676eb1841ab33440f2f49c002c8967" + elsif OS.linux? + url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-unknown-linux-musl.tar.gz" + sha256 "c76080aa807a339b44139885d77d15ad60ab8cdd2c2fdaf345d0985625bc0f97" + end conflicts_with "ripgrep" def install bin.install "rg" - man1.install "rg.1" + man1.install "doc/rg.1" - bash_completion.install "complete/rg.bash-completion" + bash_completion.install "complete/rg.bash" fish_completion.install "complete/rg.fish" zsh_completion.install "complete/_rg" end diff -Nru ripgrep-0.6.0/ignore/Cargo.toml ripgrep-0.10.0.3/ignore/Cargo.toml --- ripgrep-0.6.0/ignore/Cargo.toml 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ignore/Cargo.toml 2018-09-10 21:10:55.000000000 +0000 @@ -1,6 +1,6 @@ [package] name = "ignore" -version = "0.2.2" #:version +version = "0.4.4" #:version authors = ["Andrew Gallant "] description = """ A fast library for efficiently matching ignore files such as `.gitignore` @@ -18,20 +18,21 @@ bench = false [dependencies] -crossbeam = "0.2" -globset = { version = "0.2.0", path = "../globset" } -lazy_static = "0.2" -log = "0.3" -memchr = "1" -regex = "0.2.1" -thread_local = "0.3.2" -walkdir = "1.0.7" +crossbeam-channel = "0.2.4" +globset = { version = "0.4.2", path = "../globset" } +lazy_static = "1.1.0" +log = "0.4.5" +memchr = "2.0.2" +regex = "1.0.5" +same-file = "1.0.3" +thread_local = "0.3.6" +walkdir = "2.2.5" + +[target.'cfg(windows)'.dependencies.winapi-util] +version = "0.1.1" [dev-dependencies] -tempdir = "0.3.5" +tempdir = "0.3.7" [features] simd-accel = ["globset/simd-accel"] - -[profile.release] -debug = true diff -Nru ripgrep-0.6.0/ignore/examples/walk.rs ripgrep-0.10.0.3/ignore/examples/walk.rs --- ripgrep-0.6.0/ignore/examples/walk.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ignore/examples/walk.rs 2018-09-10 21:10:55.000000000 +0000 @@ -1,17 +1,12 @@ -#![allow(dead_code, unused_imports, unused_mut, unused_variables)] - -extern crate crossbeam; +extern crate crossbeam_channel as channel; extern crate ignore; extern crate walkdir; use std::env; use std::io::{self, Write}; use std::path::Path; -use std::sync::Arc; -use std::sync::atomic::{AtomicUsize, Ordering}; use std::thread; -use crossbeam::sync::MsQueue; use ignore::WalkBuilder; use walkdir::WalkDir; @@ -19,7 +14,7 @@ let mut path = env::args().nth(1).unwrap(); let mut parallel = false; let mut simple = false; - let queue: Arc>> = Arc::new(MsQueue::new()); + let (tx, rx) = channel::bounded::(100); if path == "parallel" { path = env::args().nth(2).unwrap(); parallel = true; @@ -28,10 +23,9 @@ simple = true; } - let stdout_queue = queue.clone(); let stdout_thread = thread::spawn(move || { let mut stdout = io::BufWriter::new(io::stdout()); - while let Some(dent) = stdout_queue.pop() { + for dent in rx { write_path(&mut stdout, dent.path()); } }); @@ -39,28 +33,26 @@ if parallel { let walker = WalkBuilder::new(path).threads(6).build_parallel(); walker.run(|| { - let queue = queue.clone(); + let tx = tx.clone(); Box::new(move |result| { use ignore::WalkState::*; - queue.push(Some(DirEntry::Y(result.unwrap()))); + tx.send(DirEntry::Y(result.unwrap())); Continue }) }); } else if simple { - let mut stdout = io::BufWriter::new(io::stdout()); let walker = WalkDir::new(path); for result in walker { - queue.push(Some(DirEntry::X(result.unwrap()))); + tx.send(DirEntry::X(result.unwrap())); } } else { - let mut stdout = io::BufWriter::new(io::stdout()); let walker = WalkBuilder::new(path).build(); for result in walker { - queue.push(Some(DirEntry::Y(result.unwrap()))); + tx.send(DirEntry::Y(result.unwrap())); } } - queue.push(None); + drop(tx); stdout_thread.join().unwrap(); } diff -Nru ripgrep-0.6.0/ignore/README.md ripgrep-0.10.0.3/ignore/README.md --- ripgrep-0.6.0/ignore/README.md 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ignore/README.md 2018-09-10 21:10:55.000000000 +0000 @@ -4,7 +4,7 @@ various filters such as globs, file types and `.gitignore` files. This crate also provides lower level direct access to gitignore and file type matchers. -[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep) +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) [![](https://img.shields.io/crates/v/ignore.svg)](https://crates.io/crates/ignore) @@ -20,7 +20,7 @@ ```toml [dependencies] -ignore = "0.2" +ignore = "0.4" ``` and this to your crate root: diff -Nru ripgrep-0.6.0/ignore/src/dir.rs ripgrep-0.10.0.3/ignore/src/dir.rs --- ripgrep-0.6.0/ignore/src/dir.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ignore/src/dir.rs 2018-09-10 21:10:55.000000000 +0000 @@ -14,7 +14,7 @@ // well. use std::collections::HashMap; -use std::ffi::OsString; +use std::ffi::{OsString, OsStr}; use std::path::{Path, PathBuf}; use std::sync::{Arc, RwLock}; @@ -65,6 +65,8 @@ hidden: bool, /// Whether to read .ignore files. ignore: bool, + /// Whether to respect any ignore files in parent directories. + parents: bool, /// Whether to read git's global gitignore file. git_global: bool, /// Whether to read .gitignore files. @@ -73,13 +75,6 @@ git_exclude: bool, } -impl IgnoreOptions { - /// Returns true if at least one type of ignore rules should be matched. - fn has_any_ignore_options(&self) -> bool { - self.ignore || self.git_global || self.git_ignore || self.git_exclude - } -} - /// Ignore is a matcher useful for recursively walking one or more directories. #[derive(Clone, Debug)] pub struct Ignore(Arc); @@ -109,8 +104,12 @@ /// The absolute base path of this matcher. Populated only if parent /// directories are added. absolute_base: Option>, - /// Explicit ignore matchers specified by the caller. + /// Explicit global ignore matchers specified by the caller. explicit_ignores: Arc>, + /// Ignore files used in addition to `.ignore` + custom_ignore_filenames: Arc>, + /// The matcher for custom ignore files + custom_ignore_matcher: Gitignore, /// The matcher for .ignore files. ignore_matcher: Gitignore, /// A global gitignore matcher, usually from $XDG_CONFIG_HOME/git/ignore. @@ -127,7 +126,6 @@ impl Ignore { /// Return the directory path of this matcher. - #[allow(dead_code)] pub fn path(&self) -> &Path { &self.0.dir } @@ -155,6 +153,15 @@ &self, path: P, ) -> (Ignore, Option) { + if !self.0.opts.parents + && !self.0.opts.git_ignore + && !self.0.opts.git_exclude + && !self.0.opts.git_global + { + // If we never need info from parent directories, then don't do + // anything. + return (self.clone(), None); + } if !self.is_root() { panic!("Ignore::add_parents called on non-root matcher"); } @@ -187,6 +194,7 @@ errs.maybe_push(err); igtmp.is_absolute_parent = true; igtmp.absolute_base = Some(absolute_base.clone()); + igtmp.has_git = parent.join(".git").exists(); ig = Ignore(Arc::new(igtmp)); compiled.insert(parent.as_os_str().to_os_string(), ig.clone()); } @@ -211,14 +219,21 @@ /// Like add_child, but takes a full path and returns an IgnoreInner. fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option) { - static IG_NAMES: &'static [&'static str] = &[".rgignore", ".ignore"]; - let mut errs = PartialErrorBuilder::default(); + let custom_ig_matcher = + if self.0.custom_ignore_filenames.is_empty() { + Gitignore::empty() + } else { + let (m, err) = + create_gitignore(&dir, &self.0.custom_ignore_filenames); + errs.maybe_push(err); + m + }; let ig_matcher = if !self.0.opts.ignore { Gitignore::empty() } else { - let (m, err) = create_gitignore(&dir, IG_NAMES); + let (m, err) = create_gitignore(&dir, &[".ignore"]); errs.maybe_push(err); m }; @@ -247,16 +262,29 @@ is_absolute_parent: false, absolute_base: self.0.absolute_base.clone(), explicit_ignores: self.0.explicit_ignores.clone(), + custom_ignore_filenames: self.0.custom_ignore_filenames.clone(), + custom_ignore_matcher: custom_ig_matcher, ignore_matcher: ig_matcher, git_global_matcher: self.0.git_global_matcher.clone(), git_ignore_matcher: gi_matcher, git_exclude_matcher: gi_exclude_matcher, - has_git: dir.join(".git").is_dir(), + has_git: dir.join(".git").exists(), opts: self.0.opts, }; (ig, errs.into_error_option()) } + /// Returns true if at least one type of ignore rule should be matched. + fn has_any_ignore_rules(&self) -> bool { + let opts = self.0.opts; + let has_custom_ignore_files = !self.0.custom_ignore_filenames.is_empty(); + let has_explicit_ignores = !self.0.explicit_ignores.is_empty(); + + opts.ignore || opts.git_global || opts.git_ignore + || opts.git_exclude || has_custom_ignore_files + || has_explicit_ignores + } + /// Returns a match indicating whether the given file path should be /// ignored or not. /// @@ -285,7 +313,7 @@ } } let mut whitelisted = Match::None; - if self.0.opts.has_any_ignore_options() { + if self.has_any_ignore_rules() { let mat = self.matched_ignore(path, is_dir); if mat.is_ignore() { return mat; @@ -315,46 +343,59 @@ path: &Path, is_dir: bool, ) -> Match> { - let (mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit) = - (Match::None, Match::None, Match::None, Match::None); + let (mut m_custom_ignore, mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit) = + (Match::None, Match::None, Match::None, Match::None, Match::None); + let any_git = self.parents().any(|ig| ig.0.has_git); let mut saw_git = false; for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) { + if m_custom_ignore.is_none() { + m_custom_ignore = + ig.0.custom_ignore_matcher.matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } if m_ignore.is_none() { m_ignore = ig.0.ignore_matcher.matched(path, is_dir) .map(IgnoreMatch::gitignore); } - if !saw_git && m_gi.is_none() { + if any_git && !saw_git && m_gi.is_none() { m_gi = ig.0.git_ignore_matcher.matched(path, is_dir) .map(IgnoreMatch::gitignore); } - if !saw_git && m_gi_exclude.is_none() { + if any_git && !saw_git && m_gi_exclude.is_none() { m_gi_exclude = ig.0.git_exclude_matcher.matched(path, is_dir) .map(IgnoreMatch::gitignore); } saw_git = saw_git || ig.0.has_git; } - if let Some(abs_parent_path) = self.absolute_base() { - let path = abs_parent_path.join(path); - for ig in self.parents().skip_while(|ig|!ig.0.is_absolute_parent) { - if m_ignore.is_none() { - m_ignore = - ig.0.ignore_matcher.matched(&path, is_dir) - .map(IgnoreMatch::gitignore); - } - if !saw_git && m_gi.is_none() { - m_gi = - ig.0.git_ignore_matcher.matched(&path, is_dir) - .map(IgnoreMatch::gitignore); + if self.0.opts.parents { + if let Some(abs_parent_path) = self.absolute_base() { + let path = abs_parent_path.join(path); + for ig in self.parents().skip_while(|ig|!ig.0.is_absolute_parent) { + if m_custom_ignore.is_none() { + m_custom_ignore = + ig.0.custom_ignore_matcher.matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if m_ignore.is_none() { + m_ignore = + ig.0.ignore_matcher.matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if any_git && !saw_git && m_gi.is_none() { + m_gi = + ig.0.git_ignore_matcher.matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if any_git && !saw_git && m_gi_exclude.is_none() { + m_gi_exclude = + ig.0.git_exclude_matcher.matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + saw_git = saw_git || ig.0.has_git; } - if !saw_git && m_gi_exclude.is_none() { - m_gi_exclude = - ig.0.git_exclude_matcher.matched(&path, is_dir) - .map(IgnoreMatch::gitignore); - } - saw_git = saw_git || ig.0.has_git; } } for gi in self.0.explicit_ignores.iter().rev() { @@ -363,10 +404,16 @@ } m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore); } - let m_global = self.0.git_global_matcher.matched(&path, is_dir) - .map(IgnoreMatch::gitignore); + let m_global = + if any_git { + self.0.git_global_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore) + } else { + Match::None + }; - m_ignore.or(m_gi).or(m_gi_exclude).or(m_global).or(m_explicit) + m_custom_ignore.or(m_ignore).or(m_gi).or(m_gi_exclude).or(m_global).or(m_explicit) } /// Returns an iterator over parent ignore matchers, including this one. @@ -409,8 +456,10 @@ overrides: Arc, /// A type matcher (default is empty). types: Arc, - /// Explicit ignore matchers. + /// Explicit global ignore matchers. explicit_ignores: Vec, + /// Ignore files in addition to .ignore. + custom_ignore_filenames: Vec, /// Ignore config. opts: IgnoreOptions, } @@ -426,9 +475,11 @@ overrides: Arc::new(Override::empty()), types: Arc::new(Types::empty()), explicit_ignores: vec![], + custom_ignore_filenames: vec![], opts: IgnoreOptions { hidden: true, ignore: true, + parents: true, git_global: true, git_ignore: true, git_exclude: true, @@ -451,6 +502,7 @@ } gi }; + Ignore(Arc::new(IgnoreInner { compiled: Arc::new(RwLock::new(HashMap::new())), dir: self.dir.clone(), @@ -460,6 +512,8 @@ is_absolute_parent: true, absolute_base: None, explicit_ignores: Arc::new(self.explicit_ignores.clone()), + custom_ignore_filenames: Arc::new(self.custom_ignore_filenames.clone()), + custom_ignore_matcher: Gitignore::empty(), ignore_matcher: Gitignore::empty(), git_global_matcher: Arc::new(git_global_matcher), git_ignore_matcher: Gitignore::empty(), @@ -495,6 +549,20 @@ self } + /// Add a custom ignore file name + /// + /// These ignore files have higher precedence than all other ignore files. + /// + /// When specifying multiple names, earlier names have lower precedence than + /// later names. + pub fn add_custom_ignore_filename>( + &mut self, + file_name: S + ) -> &mut IgnoreBuilder { + self.custom_ignore_filenames.push(file_name.as_ref().to_os_string()); + self + } + /// Enables ignoring hidden files. /// /// This is enabled by default. @@ -514,6 +582,17 @@ self } + /// Enables reading ignore files from parent directories. + /// + /// If this is enabled, then .gitignore files in parent directories of each + /// file path given are respected. Otherwise, they are ignored. + /// + /// This is enabled by default. + pub fn parents(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.parents = yes; + self + } + /// Add a global gitignore matcher. /// /// Its precedence is lower than both normal `.gitignore` files and @@ -556,14 +635,14 @@ /// order given (earlier names have lower precedence than later names). /// /// I/O errors are ignored. -pub fn create_gitignore( +pub fn create_gitignore>( dir: &Path, - names: &[&str], + names: &[T], ) -> (Gitignore, Option) { let mut builder = GitignoreBuilder::new(dir); let mut errs = PartialErrorBuilder::default(); for name in names { - let gipath = dir.join(name); + let gipath = dir.join(name.as_ref()); errs.maybe_push_ignore_io(builder.add(gipath)); } let gi = match builder.build() { @@ -635,6 +714,7 @@ #[test] fn gitignore() { let td = TempDir::new("ignore-test-").unwrap(); + mkdirp(td.path().join(".git")); wfile(td.path().join(".gitignore"), "foo\n!bar"); let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); @@ -645,6 +725,18 @@ } #[test] + fn gitignore_no_git() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_none()); + assert!(ig.matched("bar", false).is_none()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] fn ignore() { let td = TempDir::new("ignore-test-").unwrap(); wfile(td.path().join(".ignore"), "foo\n!bar"); @@ -656,6 +748,53 @@ assert!(ig.matched("baz", false).is_none()); } + #[test] + fn custom_ignore() { + let td = TempDir::new("ignore-test-").unwrap(); + let custom_ignore = ".customignore"; + wfile(td.path().join(custom_ignore), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new() + .add_custom_ignore_filename(custom_ignore) + .build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + // Tests that a custom ignore file will override an .ignore. + #[test] + fn custom_ignore_over_ignore() { + let td = TempDir::new("ignore-test-").unwrap(); + let custom_ignore = ".customignore"; + wfile(td.path().join(".ignore"), "foo"); + wfile(td.path().join(custom_ignore), "!foo"); + + let (ig, err) = IgnoreBuilder::new() + .add_custom_ignore_filename(custom_ignore) + .build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_whitelist()); + } + + // Tests that earlier custom ignore files have lower precedence than later. + #[test] + fn custom_ignore_precedence() { + let td = TempDir::new("ignore-test-").unwrap(); + let custom_ignore1 = ".customignore1"; + let custom_ignore2 = ".customignore2"; + wfile(td.path().join(custom_ignore1), "foo"); + wfile(td.path().join(custom_ignore2), "!foo"); + + let (ig, err) = IgnoreBuilder::new() + .add_custom_ignore_filename(custom_ignore1) + .add_custom_ignore_filename(custom_ignore2) + .build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_whitelist()); + } + // Tests that an .ignore will override a .gitignore. #[test] fn ignore_over_gitignore() { @@ -706,6 +845,7 @@ #[test] fn errored_partial() { let td = TempDir::new("ignore-test-").unwrap(); + mkdirp(td.path().join(".git")); wfile(td.path().join(".gitignore"), "f**oo\nbar"); let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); diff -Nru ripgrep-0.6.0/ignore/src/gitignore.rs ripgrep-0.10.0.3/ignore/src/gitignore.rs --- ripgrep-0.6.0/ignore/src/gitignore.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ignore/src/gitignore.rs 2018-09-10 21:10:55.000000000 +0000 @@ -66,6 +66,12 @@ pub fn is_only_dir(&self) -> bool { self.is_only_dir } + + /// Returns true if and only if this glob has a `**/` prefix. + fn has_doublestar_prefix(&self) -> bool { + self.actual.starts_with("**/") + || (self.actual == "**" && self.is_only_dir) + } } /// Gitignore is a matcher for the globs in one or more gitignore files @@ -77,7 +83,7 @@ globs: Vec, num_ignores: u64, num_whitelists: u64, - matches: Arc>>>, + matches: Option>>>>, } impl Gitignore { @@ -137,7 +143,14 @@ /// /// Its path is empty. pub fn empty() -> Gitignore { - GitignoreBuilder::new("").build().unwrap() + Gitignore { + set: GlobSet::empty(), + root: PathBuf::from(""), + globs: vec![], + num_ignores: 0, + num_whitelists: 0, + matches: None, + } } /// Returns the directory containing this gitignore matcher. @@ -207,6 +220,11 @@ /// determined by a common suffix of the directory containing this /// gitignore) is stripped. If there is no common suffix/prefix overlap, /// then `path` is assumed to be relative to this matcher. + /// + /// # Panics + /// + /// This method panics if the given file path is not under the root path + /// of this matcher. pub fn matched_path_or_any_parents>( &self, path: P, @@ -216,10 +234,8 @@ return Match::None; } let mut path = self.strip(path.as_ref()); - debug_assert!( - !path.has_root(), - "path is expect to be under the root" - ); + assert!(!path.has_root(), "path is expected to be under the root"); + match self.matched_stripped(path, is_dir) { Match::None => (), // walk up a_match => return a_match, @@ -243,7 +259,7 @@ return Match::None; } let path = path.as_ref(); - let _matches = self.matches.get_default(); + let _matches = self.matches.as_ref().unwrap().get_default(); let mut matches = _matches.borrow_mut(); let candidate = Candidate::new(path); self.set.matches_candidate_into(&candidate, &mut *matches); @@ -278,7 +294,10 @@ // BUT, a file name might not have any directory components to it, // in which case, we don't want to accidentally strip any part of the // file name. - if !is_file_name(path) { + // + // As an additional special case, if the root is just `.`, then we + // shouldn't try to strip anything, e.g., when path begins with a `.`. + if self.root != Path::new(".") && !is_file_name(path) { if let Some(p) = strip_prefix(&self.root, path) { path = p; // If we're left with a leading slash, get rid of it. @@ -292,6 +311,7 @@ } /// Builds a matcher for a single set of globs from a .gitignore file. +#[derive(Clone, Debug)] pub struct GitignoreBuilder { builder: GlobSetBuilder, root: PathBuf, @@ -322,20 +342,20 @@ pub fn build(&self) -> Result { let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count(); let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count(); - let set = try!( + let set = self.builder.build().map_err(|err| { Error::Glob { glob: None, err: err.to_string(), } - })); + })?; Ok(Gitignore { set: set, root: self.root.clone(), globs: self.globs.clone(), num_ignores: nignore as u64, num_whitelists: nwhite as u64, - matches: Arc::new(ThreadLocal::default()), + matches: Some(Arc::new(ThreadLocal::default())), }) } @@ -383,7 +403,7 @@ gitignore: &str, ) -> Result<&mut GitignoreBuilder, Error> { for line in gitignore.lines() { - try!(self.add_line(from.clone(), line)); + self.add_line(from.clone(), line)?; } Ok(self) } @@ -416,7 +436,6 @@ is_only_dir: false, }; let mut literal_separator = false; - let has_slash = line.chars().any(|c| c == '/'); let mut is_absolute = false; if line.starts_with("\\!") || line.starts_with("\\#") { line = &line[1..]; @@ -447,15 +466,15 @@ // If there is a literal slash, then we note that so that globbing // doesn't let wildcards match slashes. glob.actual = line.to_string(); - if has_slash { + if is_absolute || line.chars().any(|c| c == '/') { literal_separator = true; } - // If there was a leading slash, then this is a glob that must - // match the entire path name. Otherwise, we should let it match - // anywhere, so use a **/ prefix. - if !is_absolute { + // If there was a slash, then this is a glob that must match the entire + // path name. Otherwise, we should let it match anywhere, so use a **/ + // prefix. + if !literal_separator { // ... but only if we don't already have a **/ prefix. - if !glob.actual.starts_with("**/") { + if !glob.has_doublestar_prefix() { glob.actual = format!("**/{}", glob.actual); } } @@ -465,17 +484,18 @@ if glob.actual.ends_with("/**") { glob.actual = format!("{}/*", glob.actual); } - let parsed = try!( + let parsed = GlobBuilder::new(&glob.actual) .literal_separator(literal_separator) .case_insensitive(self.case_insensitive) + .backslash_escape(true) .build() .map_err(|err| { Error::Glob { glob: Some(glob.original.clone()), err: err.kind().to_string(), } - })); + })?; self.builder.add(parsed); self.globs.push(glob); Ok(self) @@ -483,6 +503,8 @@ /// Toggle whether the globs should be matched case insensitively or not. /// + /// When this option is changed, only globs added after the change will be affected. + /// /// This is disabled by default. pub fn case_insensitive( &mut self, yes: bool @@ -496,16 +518,27 @@ /// /// Note that the file path returned may not exist. fn gitconfig_excludes_path() -> Option { - gitconfig_contents() - .and_then(|data| parse_excludes_file(&data)) - .or_else(excludes_file_default) + // git supports $HOME/.gitconfig and $XDG_CONFIG_DIR/git/config. Notably, + // both can be active at the same time, where $HOME/.gitconfig takes + // precedent. So if $HOME/.gitconfig defines a `core.excludesFile`, then + // we're done. + match gitconfig_home_contents().and_then(|x| parse_excludes_file(&x)) { + Some(path) => return Some(path), + None => {} + } + match gitconfig_xdg_contents().and_then(|x| parse_excludes_file(&x)) { + Some(path) => return Some(path), + None => {} + } + excludes_file_default() } -/// Returns the file contents of git's global config file, if one exists. -fn gitconfig_contents() -> Option> { - let home = match env::var_os("HOME") { +/// Returns the file contents of git's global config file, if one exists, in +/// the user's home directory. +fn gitconfig_home_contents() -> Option> { + let home = match home_dir() { None => return None, - Some(home) => PathBuf::from(home), + Some(home) => home, }; let mut file = match File::open(home.join(".gitconfig")) { Err(_) => return None, @@ -515,13 +548,28 @@ file.read_to_end(&mut contents).ok().map(|_| contents) } +/// Returns the file contents of git's global config file, if one exists, in +/// the user's XDG_CONFIG_DIR directory. +fn gitconfig_xdg_contents() -> Option> { + let path = env::var_os("XDG_CONFIG_HOME") + .and_then(|x| if x.is_empty() { None } else { Some(PathBuf::from(x)) }) + .or_else(|| home_dir().map(|p| p.join(".config"))) + .map(|x| x.join("git/config")); + let mut file = match path.and_then(|p| File::open(p).ok()) { + None => return None, + Some(file) => io::BufReader::new(file), + }; + let mut contents = vec![]; + file.read_to_end(&mut contents).ok().map(|_| contents) +} + /// Returns the default file path for a global .gitignore file. /// /// Specifically, this respects XDG_CONFIG_HOME. fn excludes_file_default() -> Option { env::var_os("XDG_CONFIG_HOME") .and_then(|x| if x.is_empty() { None } else { Some(PathBuf::from(x)) }) - .or_else(|| env::home_dir().map(|p| p.join(".config"))) + .or_else(|| home_dir().map(|p| p.join(".config"))) .map(|x| x.join("git/ignore")) } @@ -533,7 +581,8 @@ // a full INI parser. Yuck. lazy_static! { static ref RE: Regex = Regex::new( - r"(?ium)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap(); + r"(?im)^\s*excludesfile\s*=\s*(.+)\s*$" + ).unwrap(); }; let caps = match RE.captures(data) { None => return None, @@ -544,13 +593,22 @@ /// Expands ~ in file paths to the value of $HOME. fn expand_tilde(path: &str) -> String { - let home = match env::var("HOME") { - Err(_) => return path.to_string(), - Ok(home) => home, + let home = match home_dir() { + None => return path.to_string(), + Some(home) => home.to_string_lossy().into_owned(), }; path.replace("~", &home) } +/// Returns the location of the user's home directory. +fn home_dir() -> Option { + // We're fine with using env::home_dir for now. Its bugs are, IMO, pretty + // minor corner cases. We should still probably eventually migrate to + // the `dirs` crate to get a proper implementation. + #![allow(deprecated)] + env::home_dir() +} + #[cfg(test)] mod tests { use std::path::Path; @@ -617,9 +675,20 @@ ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock"); ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz"); ignored!(ig27, ROOT, "foo/", "xyz/foo", true); - ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs"); - ignored!(ig29, "./src", "/llvm/", "./src/llvm", true); - ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true); + ignored!(ig28, "./src", "/llvm/", "./src/llvm", true); + ignored!(ig29, ROOT, "node_modules/ ", "node_modules", true); + ignored!(ig30, ROOT, "**/", "foo/bar", true); + ignored!(ig31, ROOT, "path1/*", "path1/foo"); + ignored!(ig32, ROOT, ".a/b", ".a/b"); + ignored!(ig33, "./", ".a/b", ".a/b"); + ignored!(ig34, ".", ".a/b", ".a/b"); + ignored!(ig35, "./.", ".a/b", ".a/b"); + ignored!(ig36, "././", ".a/b", ".a/b"); + ignored!(ig37, "././.", ".a/b", ".a/b"); + ignored!(ig38, ROOT, "\\[", "["); + ignored!(ig39, ROOT, "\\?", "?"); + ignored!(ig40, ROOT, "\\*", "*"); + ignored!(ig41, ROOT, "\\a", "a"); not_ignored!(ignot1, ROOT, "amonths", "months"); not_ignored!(ignot2, ROOT, "monthsa", "months"); @@ -638,6 +707,9 @@ ignot14, "./third_party/protobuf", "m4/ltoptions.m4", "./third_party/protobuf/csharp/src/packages/repositories.config"); not_ignored!(ignot15, ROOT, "!/bar", "foo/bar"); + not_ignored!(ignot16, ROOT, "*\n!**/", "foo", true); + not_ignored!(ignot17, ROOT, "src/*.rs", "src/grep/src/main.rs"); + not_ignored!(ignot18, ROOT, "path1/*", "path2/path1/foo"); fn bytes(s: &str) -> Vec { s.to_string().into_bytes() diff -Nru ripgrep-0.6.0/ignore/src/lib.rs ripgrep-0.10.0.3/ignore/src/lib.rs --- ripgrep-0.6.0/ignore/src/lib.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ignore/src/lib.rs 2018-09-10 21:10:55.000000000 +0000 @@ -46,7 +46,7 @@ #![deny(missing_docs)] -extern crate crossbeam; +extern crate crossbeam_channel as channel; extern crate globset; #[macro_use] extern crate lazy_static; @@ -54,10 +54,13 @@ extern crate log; extern crate memchr; extern crate regex; +extern crate same_file; #[cfg(test)] extern crate tempdir; extern crate thread_local; extern crate walkdir; +#[cfg(windows)] +extern crate winapi_util; use std::error; use std::fmt; @@ -115,7 +118,7 @@ Glob { /// The original glob that caused this error. This glob, when /// available, always corresponds to the glob provided by an end user. - /// e.g., It is the glob as writtein in a `.gitignore` file. + /// e.g., It is the glob as written in a `.gitignore` file. /// /// (This glob may be distinct from the glob that is actually /// compiled, after accounting for `gitignore` semantics.) @@ -129,6 +132,44 @@ InvalidDefinition, } +impl Clone for Error { + fn clone(&self) -> Error { + match *self { + Error::Partial(ref errs) => Error::Partial(errs.clone()), + Error::WithLineNumber { line, ref err } => { + Error::WithLineNumber { line: line, err: err.clone() } + } + Error::WithPath { ref path, ref err } => { + Error::WithPath { path: path.clone(), err: err.clone() } + } + Error::WithDepth { depth, ref err } => { + Error::WithDepth { depth: depth, err: err.clone() } + } + Error::Loop { ref ancestor, ref child } => { + Error::Loop { + ancestor: ancestor.clone(), + child: child.clone() + } + } + Error::Io(ref err) => { + match err.raw_os_error() { + Some(e) => Error::Io(io::Error::from_raw_os_error(e)), + None => { + Error::Io(io::Error::new(err.kind(), err.to_string())) + } + } + } + Error::Glob { ref glob, ref err } => { + Error::Glob { glob: glob.clone(), err: err.clone() } + } + Error::UnrecognizedFileType(ref err) => { + Error::UnrecognizedFileType(err.clone()) + } + Error::InvalidDefinition => Error::InvalidDefinition, + } + } +} + impl Error { /// Returns true if this is a partial error. /// @@ -198,6 +239,29 @@ } errline.with_path(path) } + + /// Build an error from a walkdir error. + fn from_walkdir(err: walkdir::Error) -> Error { + let depth = err.depth(); + if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) { + return Error::WithDepth { + depth: depth, + err: Box::new(Error::Loop { + ancestor: anc.to_path_buf(), + child: child.to_path_buf(), + }), + }; + } + let path = err.path().map(|p| p.to_path_buf()); + let mut ig_err = Error::Io(io::Error::from(err)); + if let Some(path) = path { + ig_err = Error::WithPath { + path: path, + err: Box::new(ig_err), + }; + } + ig_err + } } impl error::Error for Error { @@ -258,30 +322,6 @@ } } -impl From for Error { - fn from(err: walkdir::Error) -> Error { - let depth = err.depth(); - if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) { - return Error::WithDepth { - depth: depth, - err: Box::new(Error::Loop { - ancestor: anc.to_path_buf(), - child: child.to_path_buf(), - }), - }; - } - let path = err.path().map(|p| p.to_path_buf()); - let mut ig_err = Error::Io(io::Error::from(err)); - if let Some(path) = path { - ig_err = Error::WithPath { - path: path, - err: Box::new(ig_err), - }; - } - ig_err - } -} - #[derive(Debug, Default)] struct PartialErrorBuilder(Vec); diff -Nru ripgrep-0.6.0/ignore/src/overrides.rs ripgrep-0.10.0.3/ignore/src/overrides.rs --- ripgrep-0.6.0/ignore/src/overrides.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ignore/src/overrides.rs 2018-09-10 21:10:55.000000000 +0000 @@ -124,7 +124,7 @@ /// /// Once a matcher is built, no new globs can be added to it. pub fn build(&self) -> Result { - Ok(Override(try!(self.builder.build()))) + Ok(Override(self.builder.build()?)) } /// Add a glob to the set of overrides. @@ -134,17 +134,19 @@ /// namely, `!` at the beginning of a glob will ignore a file. Without `!`, /// all matches of the glob provided are treated as whitelist matches. pub fn add(&mut self, glob: &str) -> Result<&mut OverrideBuilder, Error> { - try!(self.builder.add_line(None, glob)); + self.builder.add_line(None, glob)?; Ok(self) } /// Toggle whether the globs should be matched case insensitively or not. /// + /// When this option is changed, only globs added after the change will be affected. + /// /// This is disabled by default. pub fn case_insensitive( &mut self, yes: bool ) -> Result<&mut OverrideBuilder, Error> { - try!(self.builder.case_insensitive(yes)); + self.builder.case_insensitive(yes)?; Ok(self) } } @@ -202,8 +204,9 @@ #[test] fn gitignore() { let ov = ov(&["/foo", "bar/*.rs", "baz/**"]); + assert!(ov.matched("bar/lib.rs", false).is_whitelist()); assert!(ov.matched("bar/wat/lib.rs", false).is_ignore()); - assert!(ov.matched("wat/bar/lib.rs", false).is_whitelist()); + assert!(ov.matched("wat/bar/lib.rs", false).is_ignore()); assert!(ov.matched("foo", false).is_whitelist()); assert!(ov.matched("wat/foo", false).is_ignore()); assert!(ov.matched("baz", false).is_ignore()); diff -Nru ripgrep-0.6.0/ignore/src/types.rs ripgrep-0.10.0.3/ignore/src/types.rs --- ripgrep-0.6.0/ignore/src/types.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ignore/src/types.rs 2018-09-10 21:10:55.000000000 +0000 @@ -98,11 +98,17 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[ ("agda", &["*.agda", "*.lagda"]), + ("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]), + ("aidl", &["*.aidl"]), + ("amake", &["*.mk", "*.bp"]), ("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]), ("asm", &["*.asm", "*.s", "*.S"]), + ("avro", &["*.avdl", "*.avpr", "*.avsc"]), ("awk", &["*.awk"]), + ("bazel", &["*.bzl", "WORKSPACE", "BUILD"]), ("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]), - ("c", &["*.c", "*.h", "*.H"]), + ("bzip2", &["*.bz2"]), + ("c", &["*.c", "*.h", "*.H", "*.cats"]), ("cabal", &["*.cabal"]), ("cbor", &["*.cbor"]), ("ceylon", &["*.ceylon"]), @@ -110,67 +116,118 @@ ("cmake", &["*.cmake", "CMakeLists.txt"]), ("coffeescript", &["*.coffee"]), ("creole", &["*.creole"]), - ("config", &["*.config"]), + ("config", &["*.cfg", "*.conf", "*.config", "*.ini"]), ("cpp", &[ "*.C", "*.cc", "*.cpp", "*.cxx", - "*.h", "*.H", "*.hh", "*.hpp", "*.inl", + "*.h", "*.H", "*.hh", "*.hpp", "*.hxx", "*.inl", ]), ("crystal", &["Projectfile", "*.cr"]), ("cs", &["*.cs"]), ("csharp", &["*.cs"]), ("cshtml", &["*.cshtml"]), ("css", &["*.css", "*.scss"]), + ("csv", &["*.csv"]), ("cython", &["*.pyx"]), ("dart", &["*.dart"]), ("d", &["*.d"]), + ("dhall", &["*.dhall"]), + ("docker", &["*Dockerfile*"]), ("elisp", &["*.el"]), ("elixir", &["*.ex", "*.eex", "*.exs"]), + ("elm", &["*.elm"]), ("erlang", &["*.erl", "*.hrl"]), + ("fidl", &["*.fidl"]), ("fish", &["*.fish"]), ("fortran", &[ "*.f", "*.F", "*.f77", "*.F77", "*.pfo", "*.f90", "*.F90", "*.f95", "*.F95", ]), ("fsharp", &["*.fs", "*.fsx", "*.fsi"]), + ("gn", &["*.gn", "*.gni"]), ("go", &["*.go"]), + ("gzip", &["*.gz"]), ("groovy", &["*.groovy", "*.gradle"]), ("h", &["*.h", "*.hpp"]), ("hbs", &["*.hbs"]), - ("haskell", &["*.hs", "*.lhs"]), + ("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]), + ("hs", &["*.hs", "*.lhs"]), ("html", &["*.htm", "*.html", "*.ejs"]), - ("java", &["*.java"]), - ("jinja", &["*.jinja", "*.jinja2"]), + ("idris", &["*.idr", "*.lidr"]), + ("java", &["*.java", "*.jsp"]), + ("jinja", &["*.j2", "*.jinja", "*.jinja2"]), ("js", &[ "*.js", "*.jsx", "*.vue", ]), - ("json", &["*.json"]), + ("json", &["*.json", "composer.lock"]), ("jsonl", &["*.jsonl"]), ("julia", &["*.jl"]), + ("jupyter", &["*.ipynb", "*.jpynb"]), ("jl", &["*.jl"]), ("kotlin", &["*.kt", "*.kts"]), ("less", &["*.less"]), + ("license", &[ + // General + "COPYING", "COPYING[.-]*", + "COPYRIGHT", "COPYRIGHT[.-]*", + "EULA", "EULA[.-]*", + "licen[cs]e", "licen[cs]e.*", + "LICEN[CS]E", "LICEN[CS]E[.-]*", "*[.-]LICEN[CS]E*", + "NOTICE", "NOTICE[.-]*", + "PATENTS", "PATENTS[.-]*", + "UNLICEN[CS]E", "UNLICEN[CS]E[.-]*", + // GPL (gpl.txt, etc.) + "agpl[.-]*", + "gpl[.-]*", + "lgpl[.-]*", + // Other license-specific (APACHE-2.0.txt, etc.) + "AGPL-*[0-9]*", + "APACHE-*[0-9]*", + "BSD-*[0-9]*", + "CC-BY-*", + "GFDL-*[0-9]*", + "GNU-*[0-9]*", + "GPL-*[0-9]*", + "LGPL-*[0-9]*", + "MIT-*[0-9]*", + "MPL-*[0-9]*", + "OFL-*[0-9]*", + ]), ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]), ("log", &["*.log"]), ("lua", &["*.lua"]), + ("lzma", &["*.lzma"]), + ("lz4", &["*.lz4"]), ("m4", &["*.ac", "*.m4"]), - ("make", &["gnumakefile", "Gnumakefile", "GNUmakefile", "makefile", "Makefile", "*.mk", "*.mak"]), + ("make", &[ + "gnumakefile", "Gnumakefile", "GNUmakefile", + "makefile", "Makefile", + "*.mk", "*.mak" + ]), + ("mako", &["*.mako", "*.mao"]), ("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]), ("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]), + ("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]), ("matlab", &["*.m"]), ("mk", &["mkfile"]), ("ml", &["*.ml"]), - ("msbuild", &["*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets"]), + ("msbuild", &[ + "*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets" + ]), ("nim", &["*.nim"]), ("nix", &["*.nix"]), ("objc", &["*.h", "*.m"]), ("objcpp", &["*.h", "*.mm"]), ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]), ("org", &["*.org"]), + ("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]), ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]), ("pdf", &["*.pdf"]), ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]), ("pod", &["*.pod"]), + ("protobuf", &["*.proto"]), ("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]), + ("puppet", &["*.erb", "*.pp", "*.rb"]), + ("purs", &["*.purs"]), ("py", &["*.py"]), ("qmake", &["*.pro", "*.pri", "*.prf"]), ("readme", &["README*", "*README"]), @@ -180,32 +237,74 @@ ("ruby", &["Gemfile", "*.gemspec", ".irbrc", "Rakefile", "*.rb"]), ("rust", &["*.rs"]), ("sass", &["*.sass", "*.scss"]), - ("scala", &["*.scala"]), - ("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]), + ("scala", &["*.scala", "*.sbt"]), + ("sh", &[ + // Portable/misc. init files + ".login", ".logout", ".profile", "profile", + // bash-specific init files + ".bash_login", "bash_login", + ".bash_logout", "bash_logout", + ".bash_profile", "bash_profile", + ".bashrc", "bashrc", "*.bashrc", + // csh-specific init files + ".cshrc", "*.cshrc", + // ksh-specific init files + ".kshrc", "*.kshrc", + // tcsh-specific init files + ".tcshrc", + // zsh-specific init files + ".zshenv", "zshenv", + ".zlogin", "zlogin", + ".zlogout", "zlogout", + ".zprofile", "zprofile", + ".zshrc", "zshrc", + // Extensions + "*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh", + ]), + ("smarty", &["*.tpl"]), + ("sml", &["*.sml", "*.sig"]), + ("soy", &["*.soy"]), ("spark", &["*.spark"]), + ("sql", &["*.sql", "*.psql"]), ("stylus", &["*.styl"]), - ("sql", &["*.sql"]), ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]), ("svg", &["*.svg"]), ("swift", &["*.swift"]), ("swig", &["*.def", "*.i"]), + ("systemd", &[ + "*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path", + "*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target", + "*.timer", + ]), ("taskpaper", &["*.taskpaper"]), ("tcl", &["*.tcl"]), ("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib"]), ("textile", &["*.textile"]), + ("tf", &["*.tf"]), ("ts", &["*.ts", "*.tsx"]), ("txt", &["*.txt"]), ("toml", &["*.toml", "Cargo.lock"]), ("twig", &["*.twig"]), ("vala", &["*.vala"]), ("vb", &["*.vb"]), + ("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]), + ("vhdl", &["*.vhd", "*.vhdl"]), ("vim", &["*.vim"]), ("vimscript", &["*.vim"]), ("wiki", &["*.mediawiki", "*.wiki"]), - ("xml", &["*.xml"]), + ("webidl", &["*.idl", "*.webidl", "*.widl"]), + ("xml", &["*.xml", "*.xml.dist"]), + ("xz", &["*.xz"]), ("yacc", &["*.y"]), ("yaml", &["*.yaml", "*.yml"]), - ("zsh", &["zshenv", ".zshenv", "zprofile", ".zprofile", "zshrc", ".zshrc", "zlogin", ".zlogin", "zlogout", ".zlogout", "*.zsh"]), + ("zsh", &[ + ".zshenv", "zshenv", + ".zlogin", "zlogin", + ".zlogout", "zlogout", + ".zprofile", "zprofile", + ".zshrc", "zshrc", + "*.zsh", + ]), ]; /// Glob represents a single glob in a set of file type definitions. @@ -451,7 +550,7 @@ } }; for (iglob, glob) in def.globs.iter().enumerate() { - build_set.add(try!( + build_set.add( GlobBuilder::new(glob) .literal_separator(true) .build() @@ -460,14 +559,14 @@ glob: Some(glob.to_string()), err: err.kind().to_string(), } - }))); + })?); glob_to_selection.push((isel, iglob)); } selections.push(selection.clone().map(move |_| def)); } - let set = try!(build_set.build().map_err(|err| { + let set = build_set.build().map_err(|err| { Error::Glob { glob: None, err: err.to_string() } - })); + })?; Ok(Types { defs: defs, selections: selections, @@ -580,7 +679,7 @@ for type_name in types { let globs = self.types.get(type_name).unwrap().globs.clone(); for glob in globs { - try!(self.add(name, &glob)); + self.add(name, &glob)?; } } Ok(()) diff -Nru ripgrep-0.6.0/ignore/src/walk.rs ripgrep-0.10.0.3/ignore/src/walk.rs --- ripgrep-0.6.0/ignore/src/walk.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ignore/src/walk.rs 2018-09-10 21:10:55.000000000 +0000 @@ -1,5 +1,5 @@ use std::cmp; -use std::ffi::{OsStr, OsString}; +use std::ffi::OsStr; use std::fmt; use std::fs::{self, FileType, Metadata}; use std::io; @@ -10,8 +10,9 @@ use std::time::Duration; use std::vec; -use crossbeam::sync::MsQueue; -use walkdir::{self, WalkDir, WalkDirIterator, is_same_file}; +use channel; +use same_file::Handle; +use walkdir::{self, WalkDir}; use dir::{Ignore, IgnoreBuilder}; use gitignore::GitignoreBuilder; @@ -23,7 +24,7 @@ /// /// The error typically refers to a problem parsing ignore files in a /// particular directory. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct DirEntry { dent: DirEntryInner, err: Option, @@ -35,9 +36,17 @@ self.dent.path() } + /// The full path that this entry represents. + /// Analogous to [`path`], but moves ownership of the path. + /// + /// [`path`]: struct.DirEntry.html#method.path + pub fn into_path(self) -> PathBuf { + self.dent.into_path() + } + /// Whether this entry corresponds to a symbolic link or not. - pub fn path_is_symbolic_link(&self) -> bool { - self.dent.path_is_symbolic_link() + pub fn path_is_symlink(&self) -> bool { + self.dent.path_is_symlink() } /// Returns true if and only if this entry corresponds to stdin. @@ -83,11 +92,17 @@ /// Returns an error, if one exists, associated with processing this entry. /// /// An example of an error is one that occurred while parsing an ignore - /// file. + /// file. Errors related to traversing a directory tree itself are reported + /// as part of yielding the directory entry, and not with this method. pub fn error(&self) -> Option<&Error> { self.err.as_ref() } + /// Returns true if and only if this entry points to a directory. + fn is_dir(&self) -> bool { + self.dent.is_dir() + } + fn new_stdin() -> DirEntry { DirEntry { dent: DirEntryInner::Stdin, @@ -120,7 +135,7 @@ /// /// Specifically, (3) has to essentially re-create the DirEntry implementation /// from WalkDir. -#[derive(Debug)] +#[derive(Clone, Debug)] enum DirEntryInner { Stdin, Walkdir(walkdir::DirEntry), @@ -137,12 +152,21 @@ } } - fn path_is_symbolic_link(&self) -> bool { + fn into_path(self) -> PathBuf { + use self::DirEntryInner::*; + match self { + Stdin => PathBuf::from(""), + Walkdir(x) => x.into_path(), + Raw(x) => x.into_path(), + } + } + + fn path_is_symlink(&self) -> bool { use self::DirEntryInner::*; match *self { Stdin => false, - Walkdir(ref x) => x.path_is_symbolic_link(), - Raw(ref x) => x.path_is_symbolic_link(), + Walkdir(ref x) => x.path_is_symlink(), + Raw(ref x) => x.path_is_symlink(), } } @@ -199,6 +223,7 @@ #[cfg(unix)] fn ino(&self) -> Option { + use walkdir::DirEntryExt; use self::DirEntryInner::*; match *self { Stdin => None, @@ -206,10 +231,16 @@ Raw(ref x) => Some(x.ino()), } } + + /// Returns true if and only if this entry points to a directory. + fn is_dir(&self) -> bool { + self.file_type().map(|ft| ft.is_dir()).unwrap_or(false) + } } /// DirEntryRaw is essentially copied from the walkdir crate so that we can /// build `DirEntry`s from whole cloth in the parallel iterator. +#[derive(Clone)] struct DirEntryRaw { /// The path as reported by the `fs::ReadDir` iterator (even if it's a /// symbolic link). @@ -224,6 +255,10 @@ /// The underlying inode number (Unix only). #[cfg(unix)] ino: u64, + /// The underlying metadata (Windows only). We store this on Windows + /// because this comes for free while reading a directory. + #[cfg(windows)] + metadata: fs::Metadata, } impl fmt::Debug for DirEntryRaw { @@ -244,11 +279,29 @@ &self.path } - fn path_is_symbolic_link(&self) -> bool { + fn into_path(self) -> PathBuf { + self.path + } + + fn path_is_symlink(&self) -> bool { self.ty.is_symlink() || self.follow_link } fn metadata(&self) -> Result { + self.metadata_internal() + } + + #[cfg(windows)] + fn metadata_internal(&self) -> Result { + if self.follow_link { + fs::metadata(&self.path) + } else { + Ok(self.metadata.clone()) + }.map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path)) + } + + #[cfg(not(windows))] + fn metadata_internal(&self) -> Result { if self.follow_link { fs::metadata(&self.path) } else { @@ -277,28 +330,36 @@ depth: usize, ent: &fs::DirEntry, ) -> Result { - let ty = try!(ent.file_type().map_err(|err| { + let ty = ent.file_type().map_err(|err| { let err = Error::Io(io::Error::from(err)).with_path(ent.path()); Error::WithDepth { depth: depth, err: Box::new(err), } - })); - Ok(DirEntryRaw::from_entry_os(depth, ent, ty)) + })?; + DirEntryRaw::from_entry_os(depth, ent, ty) } - #[cfg(not(unix))] + #[cfg(windows)] fn from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, - ) -> DirEntryRaw { - DirEntryRaw { + ) -> Result { + let md = ent.metadata().map_err(|err| { + let err = Error::Io(io::Error::from(err)).with_path(ent.path()); + Error::WithDepth { + depth: depth, + err: Box::new(err), + } + })?; + Ok(DirEntryRaw { path: ent.path(), ty: ty, follow_link: false, depth: depth, - } + metadata: md, + }) } #[cfg(unix)] @@ -306,42 +367,51 @@ depth: usize, ent: &fs::DirEntry, ty: fs::FileType, - ) -> DirEntryRaw { + ) -> Result { use std::os::unix::fs::DirEntryExt; - DirEntryRaw { + Ok(DirEntryRaw { path: ent.path(), ty: ty, follow_link: false, depth: depth, ino: ent.ino(), - } + }) } #[cfg(not(unix))] - fn from_link(depth: usize, pb: PathBuf) -> Result { - let md = try!(fs::metadata(&pb).map_err(|err| { + fn from_path( + depth: usize, + pb: PathBuf, + link: bool, + ) -> Result { + let md = fs::metadata(&pb).map_err(|err| { Error::Io(err).with_path(&pb) - })); + })?; Ok(DirEntryRaw { path: pb, ty: md.file_type(), - follow_link: true, + follow_link: link, depth: depth, + metadata: md, }) } #[cfg(unix)] - fn from_link(depth: usize, pb: PathBuf) -> Result { + fn from_path( + depth: usize, + pb: PathBuf, + link: bool, + ) -> Result { use std::os::unix::fs::MetadataExt; - let md = try!(fs::metadata(&pb).map_err(|err| { + let md = fs::metadata(&pb).map_err(|err| { Error::Io(err).with_path(&pb) - })); + })?; Ok(DirEntryRaw { path: pb, ty: md.file_type(), - follow_link: true, + follow_link: link, depth: depth, ino: md.ino(), }) @@ -400,12 +470,19 @@ pub struct WalkBuilder { paths: Vec, ig_builder: IgnoreBuilder, - parents: bool, max_depth: Option, max_filesize: Option, follow_links: bool, - sorter: Option cmp::Ordering + 'static>>, + same_file_system: bool, + sorter: Option, threads: usize, + skip: Option>, +} + +#[derive(Clone)] +enum Sorter { + ByName(Arc cmp::Ordering + Send + Sync + 'static>), + ByPath(Arc cmp::Ordering + Send + Sync + 'static>), } impl fmt::Debug for WalkBuilder { @@ -413,11 +490,11 @@ f.debug_struct("WalkBuilder") .field("paths", &self.paths) .field("ig_builder", &self.ig_builder) - .field("parents", &self.parents) .field("max_depth", &self.max_depth) .field("max_filesize", &self.max_filesize) .field("follow_links", &self.follow_links) .field("threads", &self.threads) + .field("skip", &self.skip) .finish() } } @@ -433,12 +510,13 @@ WalkBuilder { paths: vec![path.as_ref().to_path_buf()], ig_builder: IgnoreBuilder::new(), - parents: true, max_depth: None, max_filesize: None, follow_links: false, + same_file_system: false, sorter: None, threads: 0, + skip: None, } } @@ -446,19 +524,30 @@ pub fn build(&self) -> Walk { let follow_links = self.follow_links; let max_depth = self.max_depth; - let cmp = self.sorter.clone(); + let sorter = self.sorter.clone(); let its = self.paths.iter().map(move |p| { if p == Path::new("-") { (p.to_path_buf(), None) } else { let mut wd = WalkDir::new(p); wd = wd.follow_links(follow_links || p.is_file()); + wd = wd.same_file_system(self.same_file_system); if let Some(max_depth) = max_depth { wd = wd.max_depth(max_depth); } - if let Some(ref cmp) = cmp { - let cmp = cmp.clone(); - wd = wd.sort_by(move |a, b| cmp(a, b)); + if let Some(ref sorter) = sorter { + match sorter.clone() { + Sorter::ByName(cmp) => { + wd = wd.sort_by(move |a, b| { + cmp(a.file_name(), b.file_name()) + }); + } + Sorter::ByPath(cmp) => { + wd = wd.sort_by(move |a, b| { + cmp(a.path(), b.path()) + }); + } + } } (p.to_path_buf(), Some(WalkEventIter::from(wd))) } @@ -470,7 +559,7 @@ ig_root: ig_root.clone(), ig: ig_root.clone(), max_filesize: self.max_filesize, - parents: self.parents, + skip: self.skip.clone(), } } @@ -486,8 +575,9 @@ max_depth: self.max_depth, max_filesize: self.max_filesize, follow_links: self.follow_links, - parents: self.parents, + same_file_system: self.same_file_system, threads: self.threads, + skip: self.skip.clone(), } } @@ -532,7 +622,7 @@ self } - /// Add an ignore file to the matcher. + /// Add a global ignore file to the matcher. /// /// This has lower precedence than all other sources of ignore rules. /// @@ -551,6 +641,20 @@ errs.into_error_option() } + /// Add a custom ignore file name + /// + /// These ignore files have higher precedence than all other ignore files. + /// + /// When specifying multiple names, earlier names have lower precedence than + /// later names. + pub fn add_custom_ignore_filename>( + &mut self, + file_name: S + ) -> &mut WalkBuilder { + self.ig_builder.add_custom_ignore_filename(file_name); + self + } + /// Add an override matcher. /// /// By default, no override matcher is used. @@ -571,6 +675,29 @@ self } + /// Enables all the standard ignore filters. + /// + /// This toggles, as a group, all the filters that are enabled by default: + /// + /// - [hidden()](#method.hidden) + /// - [parents()](#method.parents) + /// - [ignore()](#method.ignore) + /// - [git_ignore()](#method.git_ignore) + /// - [git_global()](#method.git_global) + /// - [git_exclude()](#method.git_exclude) + /// + /// They may still be toggled individually after calling this function. + /// + /// This is (by definition) enabled by default. + pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder { + self.hidden(yes) + .parents(yes) + .ignore(yes) + .git_ignore(yes) + .git_global(yes) + .git_exclude(yes) + } + /// Enables ignoring hidden files. /// /// This is enabled by default. @@ -581,14 +708,12 @@ /// Enables reading ignore files from parent directories. /// - /// If this is enabled, then the parent directories of each file path given - /// are traversed for ignore files (subject to the ignore settings on - /// this builder). Note that file paths are canonicalized with respect to - /// the current working directory in order to determine parent directories. + /// If this is enabled, then .gitignore files in parent directories of each + /// file path given are respected. Otherwise, they are ignored. /// /// This is enabled by default. pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder { - self.parents = yes; + self.ig_builder.parents(yes); self } @@ -610,6 +735,8 @@ /// does not exist or does not specify `core.excludesFile`, then /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not /// set or is empty, then `$HOME/.config/git/ignore` is used instead. + /// + /// This is enabled by default. pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder { self.ig_builder.git_global(yes); self @@ -637,17 +764,78 @@ self } - /// Set a function for sorting directory entries. + /// Set a function for sorting directory entries by their path. + /// + /// If a compare function is set, the resulting iterator will return all + /// paths in sorted order. The compare function will be called to compare + /// entries from the same directory. + /// + /// This is like `sort_by_file_name`, except the comparator accepts + /// a `&Path` instead of the base file name, which permits it to sort by + /// more criteria. + /// + /// This method will override any previous sorter set by this method or + /// by `sort_by_file_name`. + /// + /// Note that this is not used in the parallel iterator. + pub fn sort_by_file_path( + &mut self, + cmp: F, + ) -> &mut WalkBuilder + where F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static + { + self.sorter = Some(Sorter::ByPath(Arc::new(cmp))); + self + } + + /// Set a function for sorting directory entries by file name. /// /// If a compare function is set, the resulting iterator will return all /// paths in sorted order. The compare function will be called to compare /// names from entries from the same directory using only the name of the /// entry. /// + /// This method will override any previous sorter set by this method or + /// by `sort_by_file_path`. + /// /// Note that this is not used in the parallel iterator. - pub fn sort_by(&mut self, cmp: F) -> &mut WalkBuilder - where F: Fn(&OsString, &OsString) -> cmp::Ordering + 'static { - self.sorter = Some(Arc::new(cmp)); + pub fn sort_by_file_name(&mut self, cmp: F) -> &mut WalkBuilder + where F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static + { + self.sorter = Some(Sorter::ByName(Arc::new(cmp))); + self + } + + /// Do not cross file system boundaries. + /// + /// When this option is enabled, directory traversal will not descend into + /// directories that are on a different file system from the root path. + /// + /// Currently, this option is only supported on Unix and Windows. If this + /// option is used on an unsupported platform, then directory traversal + /// will immediately return an error and will not yield any entries. + pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder { + self.same_file_system = yes; + self + } + + /// Do not yield directory entries that are believed to correspond to + /// stdout. + /// + /// This is useful when a command is invoked via shell redirection to a + /// file that is also being read. For example, `grep -r foo ./ > results` + /// might end up trying to search `results` even though it is also writing + /// to it, which could cause an unbounded feedback loop. Setting this + /// option prevents this from happening by skipping over the `results` + /// file. + /// + /// This is disabled by default. + pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder { + if yes { + self.skip = stdout_handle().map(Arc::new); + } else { + self.skip = None; + } self } } @@ -664,7 +852,7 @@ ig_root: Ignore, ig: Ignore, max_filesize: Option, - parents: bool, + skip: Option>, } impl Walk { @@ -677,12 +865,17 @@ WalkBuilder::new(path).build() } - fn skip_entry(&self, ent: &walkdir::DirEntry) -> bool { + fn skip_entry(&self, ent: &DirEntry) -> Result { if ent.depth() == 0 { - return false; + return Ok(false); } - let is_dir = ent.file_type().is_dir(); + if let Some(ref stdout) = self.skip { + if path_equals(ent, stdout)? { + return Ok(true); + } + } + let is_dir = ent.file_type().map_or(false, |ft| ft.is_dir()); let max_size = self.max_filesize; let should_skip_path = skip_path(&self.ig, ent.path(), is_dir); let should_skip_filesize = if !is_dir && max_size.is_some() { @@ -691,7 +884,7 @@ false }; - should_skip_path || should_skip_filesize + Ok(should_skip_path || should_skip_filesize) } } @@ -711,7 +904,7 @@ } Some((path, Some(it))) => { self.it = Some(it); - if self.parents && path.is_dir() { + if path.is_dir() { let (ig, err) = self.ig_root.add_parents(path); self.ig = ig; if let Some(err) = err { @@ -727,13 +920,18 @@ }; match ev { Err(err) => { - return Some(Err(Error::from(err))); + return Some(Err(Error::from_walkdir(err))); } Ok(WalkEvent::Exit) => { self.ig = self.ig.parent().unwrap(); } Ok(WalkEvent::Dir(ent)) => { - if self.skip_entry(&ent) { + let mut ent = DirEntry::new_walkdir(ent, None); + let should_skip = match self.skip_entry(&ent) { + Err(err) => return Some(Err(err)), + Ok(should_skip) => should_skip, + }; + if should_skip { self.it.as_mut().unwrap().it.skip_current_dir(); // Still need to push this on the stack because // we'll get a WalkEvent::Exit event for this dir. @@ -744,13 +942,19 @@ } let (igtmp, err) = self.ig.add_child(ent.path()); self.ig = igtmp; - return Some(Ok(DirEntry::new_walkdir(ent, err))); + ent.err = err; + return Some(Ok(ent)); } Ok(WalkEvent::File(ent)) => { - if self.skip_entry(&ent) { + let ent = DirEntry::new_walkdir(ent, None); + let should_skip = match self.skip_entry(&ent) { + Err(err) => return Some(Err(err)), + Ok(should_skip) => should_skip, + }; + if should_skip { continue; } - return Some(Ok(DirEntry::new_walkdir(ent, None))); + return Some(Ok(ent)); } } } @@ -763,7 +967,7 @@ /// the entire contents of a directory have been enumerated. struct WalkEventIter { depth: usize, - it: walkdir::Iter, + it: walkdir::IntoIter, next: Option>, } @@ -847,11 +1051,12 @@ pub struct WalkParallel { paths: vec::IntoIter, ig_root: Ignore, - parents: bool, max_filesize: Option, max_depth: Option, follow_links: bool, + same_file_system: bool, threads: usize, + skip: Option>, } impl WalkParallel { @@ -864,18 +1069,43 @@ ) where F: FnMut() -> Box) -> WalkState + Send + 'static> { let mut f = mkf(); let threads = self.threads(); - let queue = Arc::new(MsQueue::new()); + // TODO: Figure out how to use a bounded channel here. With an + // unbounded channel, the workers can run away and fill up memory + // with all of the file paths. But a bounded channel doesn't work since + // our producers are also are consumers, so they end up getting stuck. + // + // We probably need to rethink parallel traversal completely to fix + // this. The best case scenario would be finding a way to use rayon + // to do this. + let (tx, rx) = channel::unbounded(); let mut any_work = false; // Send the initial set of root paths to the pool of workers. // Note that we only send directories. For files, we send to them the // callback directly. for path in self.paths { - let dent = + let (dent, root_device) = if path == Path::new("-") { - DirEntry::new_stdin() + (DirEntry::new_stdin(), None) } else { - match DirEntryRaw::from_link(0, path) { - Ok(dent) => DirEntry::new_raw(dent, None), + let root_device = + if !self.same_file_system { + None + } else { + match device_num(&path) { + Ok(root_device) => Some(root_device), + Err(err) => { + let err = Error::Io(err).with_path(path); + if f(Err(err)).is_quit() { + return; + } + continue; + } + } + }; + match DirEntryRaw::from_path(0, path, false) { + Ok(dent) => { + (DirEntry::new_raw(dent, None), root_device) + } Err(err) => { if f(Err(err)).is_quit() { return; @@ -884,9 +1114,10 @@ } } }; - queue.push(Message::Work(Work { + tx.send(Message::Work(Work { dent: dent, ignore: self.ig_root.clone(), + root_device: root_device, })); any_work = true; } @@ -902,20 +1133,23 @@ for _ in 0..threads { let worker = Worker { f: mkf(), - queue: queue.clone(), + tx: tx.clone(), + rx: rx.clone(), quit_now: quit_now.clone(), is_waiting: false, is_quitting: false, num_waiting: num_waiting.clone(), num_quitting: num_quitting.clone(), threads: threads, - parents: self.parents, max_depth: self.max_depth, max_filesize: self.max_filesize, follow_links: self.follow_links, + skip: self.skip.clone(), }; handles.push(thread::spawn(|| worker.run())); } + drop(tx); + drop(rx); for handle in handles { handle.join().unwrap(); } @@ -949,12 +1183,20 @@ dent: DirEntry, /// Any ignore matchers that have been built for this directory's parents. ignore: Ignore, + /// The root device number. When present, only files with the same device + /// number should be considered. + root_device: Option, } impl Work { /// Returns true if and only if this work item is a directory. fn is_dir(&self) -> bool { - self.dent.file_type().map_or(false, |t| t.is_dir()) + self.dent.is_dir() + } + + /// Returns true if and only if this work item is a symlink. + fn is_symlink(&self) -> bool { + self.dent.file_type().map_or(false, |ft| ft.is_symlink()) } /// Adds ignore rules for parent directories. @@ -1003,8 +1245,10 @@ struct Worker { /// The caller's callback. f: Box) -> WalkState + Send + 'static>, - /// A queue of work items. This is multi-producer and multi-consumer. - queue: Arc>, + /// The push side of our mpmc queue. + tx: channel::Sender, + /// The receive side of our mpmc queue. + rx: channel::Receiver, /// Whether all workers should quit at the next opportunity. Note that /// this is distinct from quitting because of exhausting the contents of /// a directory. Instead, this is used when the caller's callback indicates @@ -1020,9 +1264,6 @@ num_quitting: Arc, /// The total number of workers. threads: usize, - /// Whether to create ignore matchers for parents of caller specified - /// directories. - parents: bool, /// The maximum depth of directories to descend. A value of `0` means no /// descension at all. max_depth: Option, @@ -1032,6 +1273,9 @@ /// Whether to follow symbolic links or not. When this is enabled, loop /// detection is performed. follow_links: bool, + /// A file handle to skip, currently is either `None` or stdout, if it's + /// a file and it has been requested to skip files identical to stdout. + skip: Option>, } impl Worker { @@ -1043,19 +1287,17 @@ while let Some(mut work) = self.get_work() { // If the work is not a directory, then we can just execute the // caller's callback immediately and move on. - if !work.is_dir() { + if work.is_symlink() || !work.is_dir() { if (self.f)(Ok(work.dent)).is_quit() { self.quit_now(); return; } continue; } - if self.parents { - if let Some(err) = work.add_parents() { - if (self.f)(Err(err)).is_quit() { - self.quit_now(); - return; - } + if let Some(err) = work.add_parents() { + if (self.f)(Err(err)).is_quit() { + self.quit_now(); + return; } } let readdir = match work.read_dir() { @@ -1068,6 +1310,23 @@ continue; } }; + let descend = + if let Some(root_device) = work.root_device { + match is_same_file_system(root_device, work.dent.path()) { + Ok(true) => true, + Ok(false) => false, + Err(err) => { + if (self.f)(Err(err)).is_quit() { + self.quit_now(); + return; + } + false + } + } + } else { + true + }; + let depth = work.dent.depth(); match (self.f)(Ok(work.dent)) { WalkState::Continue => {} @@ -1077,11 +1336,20 @@ return; } } + if !descend { + continue; + } if self.max_depth.map_or(false, |max| depth >= max) { continue; } for result in readdir { - if self.run_one(&work.ignore, depth + 1, result).is_quit() { + let state = self.run_one( + &work.ignore, + depth + 1, + work.root_device, + result, + ); + if state.is_quit() { self.quit_now(); return; } @@ -1105,6 +1373,7 @@ &mut self, ig: &Ignore, depth: usize, + root_device: Option, result: Result, ) -> WalkState { let fs_dent = match result { @@ -1122,31 +1391,46 @@ let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink()); if self.follow_links && is_symlink { let path = dent.path().to_path_buf(); - dent = match DirEntryRaw::from_link(depth, path) { + dent = match DirEntryRaw::from_path(depth, path, true) { Ok(dent) => DirEntry::new_raw(dent, None), Err(err) => { return (self.f)(Err(err)); } }; - if dent.file_type().map_or(false, |ft| ft.is_dir()) { + if dent.is_dir() { if let Err(err) = check_symlink_loop(ig, dent.path(), depth) { return (self.f)(Err(err)); } } } - let is_dir = dent.file_type().map_or(false, |ft| ft.is_dir()); + if let Some(ref stdout) = self.skip { + let is_stdout = match path_equals(&dent, stdout) { + Ok(is_stdout) => is_stdout, + Err(err) => return (self.f)(Err(err)), + }; + if is_stdout { + return WalkState::Continue; + } + } + let is_dir = dent.is_dir(); let max_size = self.max_filesize; let should_skip_path = skip_path(ig, dent.path(), is_dir); - let should_skip_filesize = if !is_dir && max_size.is_some() { - skip_filesize(max_size.unwrap(), dent.path(), &dent.metadata().ok()) - } else { - false - }; + let should_skip_filesize = + if !is_dir && max_size.is_some() { + skip_filesize( + max_size.unwrap(), + dent.path(), + &dent.metadata().ok(), + ) + } else { + false + }; if !should_skip_path && !should_skip_filesize { - self.queue.push(Message::Work(Work { + self.tx.send(Message::Work(Work { dent: dent, ignore: ig.clone(), + root_device: root_device, })); } WalkState::Continue @@ -1161,7 +1445,7 @@ if self.is_quit_now() { return None; } - match self.queue.try_pop() { + match self.rx.try_recv() { Some(Message::Work(work)) => { self.waiting(false); self.quitting(false); @@ -1203,7 +1487,7 @@ self.quitting(false); if self.num_waiting() == self.threads { for _ in 0..self.threads { - self.queue.push(Message::Quit); + self.tx.send(Message::Quit); } } else { // You're right to consider this suspicious, but it's @@ -1276,11 +1560,14 @@ child_path: &Path, child_depth: usize, ) -> Result<(), Error> { + let hchild = Handle::from_path(child_path).map_err(|err| { + Error::from(err).with_path(child_path).with_depth(child_depth) + })?; for ig in ig_parent.parents().take_while(|ig| !ig.is_absolute_parent()) { - let same = try!(is_same_file(ig.path(), child_path).map_err(|err| { + let h = Handle::from_path(ig.path()).map_err(|err| { Error::from(err).with_path(child_path).with_depth(child_depth) - })); - if same { + })?; + if hchild == h { return Err(Error::Loop { ancestor: ig.path().to_path_buf(), child: child_path.to_path_buf(), @@ -1314,7 +1601,11 @@ } } -fn skip_path(ig: &Ignore, path: &Path, is_dir: bool) -> bool { +fn skip_path( + ig: &Ignore, + path: &Path, + is_dir: bool, +) -> bool { let m = ig.matched(path, is_dir); if m.is_ignore() { debug!("ignoring {}: {:?}", path.display(), m); @@ -1327,6 +1618,83 @@ } } +/// Returns a handle to stdout for filtering search. +/// +/// A handle is returned if and only if stdout is being redirected to a file. +/// The handle returned corresponds to that file. +/// +/// This can be used to ensure that we do not attempt to search a file that we +/// may also be writing to. +fn stdout_handle() -> Option { + let h = match Handle::stdout() { + Err(_) => return None, + Ok(h) => h, + }; + let md = match h.as_file().metadata() { + Err(_) => return None, + Ok(md) => md, + }; + if !md.is_file() { + return None; + } + Some(h) +} + +/// Returns true if and only if the given directory entry is believed to be +/// equivalent to the given handle. If there was a problem querying the path +/// for information to determine equality, then that error is returned. +fn path_equals(dent: &DirEntry, handle: &Handle) -> Result { + #[cfg(unix)] + fn never_equal(dent: &DirEntry, handle: &Handle) -> bool { + dent.ino() != Some(handle.ino()) + } + + #[cfg(not(unix))] + fn never_equal(_: &DirEntry, _: &Handle) -> bool { + false + } + + // If we know for sure that these two things aren't equal, then avoid + // the costly extra stat call to determine equality. + if dent.is_stdin() || never_equal(dent, handle) { + return Ok(false); + } + Handle::from_path(dent.path()) + .map(|h| &h == handle) + .map_err(|err| Error::Io(err).with_path(dent.path())) +} + +/// Returns true if and only if the given path is on the same device as the +/// given root device. +fn is_same_file_system(root_device: u64, path: &Path) -> Result { + let dent_device = device_num(path) + .map_err(|err| Error::Io(err).with_path(path))?; + Ok(root_device == dent_device) +} + +#[cfg(unix)] +fn device_num>(path: P)-> io::Result { + use std::os::unix::fs::MetadataExt; + + path.as_ref().metadata().map(|md| md.dev()) +} + + #[cfg(windows)] +fn device_num>(path: P) -> io::Result { + use winapi_util::{Handle, file}; + + let h = Handle::from_path_any(path)?; + file::information(h).map(|info| info.volume_serial_number()) +} + +#[cfg(not(any(unix, windows)))] +fn device_num>(_: P)-> io::Result { + Err(io::Error::new( + io::ErrorKind::Other, + "walkdir: same_file_system option not supported on this platform", + )) +} + #[cfg(test)] mod tests { use std::fs::{self, File}; @@ -1336,7 +1704,7 @@ use tempdir::TempDir; - use super::{WalkBuilder, WalkState}; + use super::{DirEntry, WalkBuilder, WalkState}; fn wfile>(path: P, contents: &str) { let mut file = File::create(path).unwrap(); @@ -1387,28 +1755,32 @@ prefix: &Path, builder: &WalkBuilder, ) -> Vec { - let paths = Arc::new(Mutex::new(vec![])); - let prefix = Arc::new(prefix.to_path_buf()); + let mut paths = vec![]; + for dent in walk_collect_entries_parallel(builder) { + let path = dent.path().strip_prefix(prefix).unwrap(); + if path.as_os_str().is_empty() { + continue; + } + paths.push(normal_path(path.to_str().unwrap())); + } + paths.sort(); + paths + } + + fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec { + let dents = Arc::new(Mutex::new(vec![])); builder.build_parallel().run(|| { - let paths = paths.clone(); - let prefix = prefix.clone(); + let dents = dents.clone(); Box::new(move |result| { - let dent = match result { - Err(_) => return WalkState::Continue, - Ok(dent) => dent, - }; - let path = dent.path().strip_prefix(&**prefix).unwrap(); - if path.as_os_str().is_empty() { - return WalkState::Continue; + if let Ok(dent) = result { + dents.lock().unwrap().push(dent); } - let mut paths = paths.lock().unwrap(); - paths.push(normal_path(path.to_str().unwrap())); WalkState::Continue }) }); - let mut paths = paths.lock().unwrap(); - paths.sort(); - paths.to_vec() + + let dents = dents.lock().unwrap(); + dents.to_vec() } fn mkpaths(paths: &[&str]) -> Vec { @@ -1423,9 +1795,9 @@ expected: &[&str], ) { let got = walk_collect(prefix, builder); - assert_eq!(got, mkpaths(expected)); + assert_eq!(got, mkpaths(expected), "single threaded"); let got = walk_collect_parallel(prefix, builder); - assert_eq!(got, mkpaths(expected)); + assert_eq!(got, mkpaths(expected), "parallel"); } #[test] @@ -1442,8 +1814,45 @@ } #[test] + fn custom_ignore() { + let td = TempDir::new("walk-test-").unwrap(); + let custom_ignore = ".customignore"; + mkdirp(td.path().join("a")); + wfile(td.path().join(custom_ignore), "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + builder.add_custom_ignore_filename(&custom_ignore); + assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]); + } + + #[test] + fn custom_ignore_exclusive_use() { + let td = TempDir::new("walk-test-").unwrap(); + let custom_ignore = ".customignore"; + mkdirp(td.path().join("a")); + wfile(td.path().join(custom_ignore), "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + builder.ignore(false); + builder.git_ignore(false); + builder.git_global(false); + builder.git_exclude(false); + builder.add_custom_ignore_filename(&custom_ignore); + assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]); + } + + #[test] fn gitignore() { let td = TempDir::new("walk-test-").unwrap(); + mkdirp(td.path().join(".git")); mkdirp(td.path().join("a")); wfile(td.path().join(".gitignore"), "foo"); wfile(td.path().join("foo"), ""); @@ -1473,8 +1882,27 @@ } #[test] + fn explicit_ignore_exclusive_use() { + let td = TempDir::new("walk-test-").unwrap(); + let igpath = td.path().join(".not-an-ignore"); + mkdirp(td.path().join("a")); + wfile(&igpath, "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + builder.standard_filters(false); + assert!(builder.add_ignore(&igpath).is_none()); + assert_paths(td.path(), &builder, + &[".not-an-ignore", "bar", "a", "a/bar"]); + } + + #[test] fn gitignore_parent() { let td = TempDir::new("walk-test-").unwrap(); + mkdirp(td.path().join(".git")); mkdirp(td.path().join("a")); wfile(td.path().join(".gitignore"), "foo"); wfile(td.path().join("a/foo"), ""); @@ -1549,6 +1977,27 @@ #[cfg(unix)] // because symlinks on windows are weird #[test] + fn first_path_not_symlink() { + let td = TempDir::new("walk-test-").unwrap(); + mkdirp(td.path().join("foo")); + + let dents = WalkBuilder::new(td.path().join("foo")) + .build() + .into_iter() + .collect::, _>>() + .unwrap(); + assert_eq!(1, dents.len()); + assert!(!dents[0].path_is_symlink()); + + let dents = walk_collect_entries_parallel( + &WalkBuilder::new(td.path().join("foo")), + ); + assert_eq!(1, dents.len()); + assert!(!dents[0].path_is_symlink()); + } + + #[cfg(unix)] // because symlinks on windows are weird + #[test] fn symlink_loop() { let td = TempDir::new("walk-test-").unwrap(); mkdirp(td.path().join("a/b")); @@ -1562,4 +2011,40 @@ "a", "a/b", ]); } + + // It's a little tricky to test the 'same_file_system' option since + // we need an environment with more than one file system. We adopt a + // heuristic where /sys is typically a distinct volume on Linux and roll + // with that. + #[test] + #[cfg(target_os = "linux")] + fn same_file_system() { + use super::device_num; + + // If for some reason /sys doesn't exist or isn't a directory, just + // skip this test. + if !Path::new("/sys").is_dir() { + return; + } + + // If our test directory actually isn't a different volume from /sys, + // then this test is meaningless and we shouldn't run it. + let td = TempDir::new("walk-test-").unwrap(); + if device_num(td.path()).unwrap() == device_num("/sys").unwrap() { + return; + } + + mkdirp(td.path().join("same_file")); + symlink("/sys", td.path().join("same_file").join("alink")); + + // Create a symlink to sys and enable following symlinks. If the + // same_file_system option doesn't work, then this probably will hit a + // permission error. Otherwise, it should just skip over the symlink + // completely. + let mut builder = WalkBuilder::new(td.path()); + builder.follow_links(true).same_file_system(true); + assert_paths(td.path(), &builder, &[ + "same_file", "same_file/alink", + ]); + } } diff -Nru ripgrep-0.6.0/ignore/tests/gitignore_matched_path_or_any_parents_tests.rs ripgrep-0.10.0.3/ignore/tests/gitignore_matched_path_or_any_parents_tests.rs --- ripgrep-0.6.0/ignore/tests/gitignore_matched_path_or_any_parents_tests.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/ignore/tests/gitignore_matched_path_or_any_parents_tests.rs 2018-09-10 21:10:55.000000000 +0000 @@ -1,13 +1,11 @@ extern crate ignore; - use std::path::Path; use ignore::gitignore::{Gitignore, GitignoreBuilder}; - -const IGNORE_FILE: &'static str = "tests/gitignore_matched_path_or_any_parents_tests.gitignore"; - +const IGNORE_FILE: &'static str = + "tests/gitignore_matched_path_or_any_parents_tests.gitignore"; fn get_gitignore() -> Gitignore { let mut builder = GitignoreBuilder::new("ROOT"); @@ -16,9 +14,8 @@ builder.build().unwrap() } - #[test] -#[should_panic(expected = "path is expect to be under the root")] +#[should_panic(expected = "path is expected to be under the root")] fn test_path_should_be_under_root() { let gitignore = get_gitignore(); let path = "/tmp/some_file"; @@ -26,11 +23,12 @@ assert!(false); } - #[test] fn test_files_in_root() { let gitignore = get_gitignore(); - let m = |path: &str| gitignore.matched_path_or_any_parents(Path::new(path), false); + let m = |path: &str| { + gitignore.matched_path_or_any_parents(Path::new(path), false) + }; // 0x assert!(m("ROOT/file_root_00").is_ignore()); @@ -61,7 +59,9 @@ #[test] fn test_files_in_deep() { let gitignore = get_gitignore(); - let m = |path: &str| gitignore.matched_path_or_any_parents(Path::new(path), false); + let m = |path: &str| { + gitignore.matched_path_or_any_parents(Path::new(path), false) + }; // 0x assert!(m("ROOT/parent_dir/file_deep_00").is_ignore()); @@ -92,8 +92,9 @@ #[test] fn test_dirs_in_root() { let gitignore = get_gitignore(); - let m = - |path: &str, is_dir: bool| gitignore.matched_path_or_any_parents(Path::new(path), is_dir); + let m = |path: &str, is_dir: bool| { + gitignore.matched_path_or_any_parents(Path::new(path), is_dir) + }; // 00 assert!(m("ROOT/dir_root_00", true).is_ignore()); @@ -196,32 +197,37 @@ #[test] fn test_dirs_in_deep() { let gitignore = get_gitignore(); - let m = - |path: &str, is_dir: bool| gitignore.matched_path_or_any_parents(Path::new(path), is_dir); + let m = |path: &str, is_dir: bool| { + gitignore.matched_path_or_any_parents(Path::new(path), is_dir) + }; // 00 assert!(m("ROOT/parent_dir/dir_deep_00", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_00/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_00/child_dir", true).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_00/child_dir/file", false).is_ignore()); + assert!( + m("ROOT/parent_dir/dir_deep_00/child_dir/file", false).is_ignore() + ); // 01 assert!(m("ROOT/parent_dir/dir_deep_01", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_01/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_01/child_dir", true).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_01/child_dir/file", false).is_ignore()); + assert!( + m("ROOT/parent_dir/dir_deep_01/child_dir/file", false).is_ignore() + ); // 02 - assert!(m("ROOT/parent_dir/dir_deep_02", true).is_none()); // dir itself doesn't match - assert!(m("ROOT/parent_dir/dir_deep_02/file", false).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_02/child_dir", true).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_02/child_dir/file", false).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_02", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_02/file", false).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_02/child_dir", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_02/child_dir/file", false).is_none()); // 03 - assert!(m("ROOT/parent_dir/dir_deep_03", true).is_none()); // dir itself doesn't match - assert!(m("ROOT/parent_dir/dir_deep_03/file", false).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_03/child_dir", true).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_03/child_dir/file", false).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_03", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_03/file", false).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_03/child_dir", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_03/child_dir/file", false).is_none()); // 10 assert!(m("ROOT/parent_dir/dir_deep_10", true).is_none()); @@ -251,47 +257,67 @@ assert!(m("ROOT/parent_dir/dir_deep_20", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_20/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_20/child_dir", true).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_20/child_dir/file", false).is_ignore()); + assert!( + m("ROOT/parent_dir/dir_deep_20/child_dir/file", false).is_ignore() + ); // 21 assert!(m("ROOT/parent_dir/dir_deep_21", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_21/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_21/child_dir", true).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_21/child_dir/file", false).is_ignore()); + assert!( + m("ROOT/parent_dir/dir_deep_21/child_dir/file", false).is_ignore() + ); // 22 - assert!(m("ROOT/parent_dir/dir_deep_22", true).is_none()); // dir itself doesn't match + // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_22", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_22/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_22/child_dir", true).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_22/child_dir/file", false).is_ignore()); + assert!( + m("ROOT/parent_dir/dir_deep_22/child_dir/file", false).is_ignore() + ); // 23 - assert!(m("ROOT/parent_dir/dir_deep_23", true).is_none()); // dir itself doesn't match + // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_23", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_23/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_23/child_dir", true).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_23/child_dir/file", false).is_ignore()); + assert!( + m("ROOT/parent_dir/dir_deep_23/child_dir/file", false).is_ignore() + ); // 30 assert!(m("ROOT/parent_dir/dir_deep_30", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_30/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_30/child_dir", true).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_30/child_dir/file", false).is_ignore()); + assert!( + m("ROOT/parent_dir/dir_deep_30/child_dir/file", false).is_ignore() + ); // 31 assert!(m("ROOT/parent_dir/dir_deep_31", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_31/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_31/child_dir", true).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_31/child_dir/file", false).is_ignore()); + assert!( + m("ROOT/parent_dir/dir_deep_31/child_dir/file", false).is_ignore() + ); // 32 - assert!(m("ROOT/parent_dir/dir_deep_32", true).is_none()); // dir itself doesn't match + // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_32", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_32/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_32/child_dir", true).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_32/child_dir/file", false).is_ignore()); + assert!( + m("ROOT/parent_dir/dir_deep_32/child_dir/file", false).is_ignore() + ); // 33 - assert!(m("ROOT/parent_dir/dir_deep_33", true).is_none()); // dir itself doesn't match + // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_33", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_33/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_33/child_dir", true).is_ignore()); - assert!(m("ROOT/parent_dir/dir_deep_33/child_dir/file", false).is_ignore()); + assert!( + m("ROOT/parent_dir/dir_deep_33/child_dir/file", false).is_ignore() + ); } diff -Nru ripgrep-0.6.0/ISSUE_TEMPLATE.md ripgrep-0.10.0.3/ISSUE_TEMPLATE.md --- ripgrep-0.6.0/ISSUE_TEMPLATE.md 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/ISSUE_TEMPLATE.md 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,53 @@ +#### What version of ripgrep are you using? + +Replace this text with the output of `rg --version`. + +#### How did you install ripgrep? + +If you installed ripgrep with snap and are getting strange file permission or +file not found errors, then please do not file a bug. Instead, use one of the +Github binary releases. + +#### What operating system are you using ripgrep on? + +Replace this text with your operating system and version. + +#### Describe your question, feature request, or bug. + +If a question, please describe the problem you're trying to solve and give +as much context as possible. + +If a feature request, please describe the behavior you want and the motivation. +Please also provide an example of how ripgrep would be used if your feature +request were added. + +If a bug, please see below. + +#### If this is a bug, what are the steps to reproduce the behavior? + +If possible, please include both your search patterns and the corpus on which +you are searching. Unless the bug is very obvious, then it is unlikely that it +will be fixed if the ripgrep maintainers cannot reproduce it. + +If the corpus is too big and you cannot decrease its size, file the bug anyway +and the ripgrep maintainers will help figure out next steps. + +#### If this is a bug, what is the actual behavior? + +Show the command you ran and the actual output. Include the `--debug` flag in +your invocation of ripgrep. + +If the output is large, put it in a gist: https://gist.github.com/ + +If the output is small, put it in code fences: + +``` +your +output +goes +here +``` + +#### If this is a bug, what is the expected behavior? + +What do you think ripgrep should have done? diff -Nru ripgrep-0.6.0/pkg/archlinux/.gitignore ripgrep-0.10.0.3/pkg/archlinux/.gitignore --- ripgrep-0.6.0/pkg/archlinux/.gitignore 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/pkg/archlinux/.gitignore 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -*.xz -src -pkg -*.gz diff -Nru ripgrep-0.6.0/pkg/archlinux/PKGBUILD ripgrep-0.10.0.3/pkg/archlinux/PKGBUILD --- ripgrep-0.6.0/pkg/archlinux/PKGBUILD 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/pkg/archlinux/PKGBUILD 1970-01-01 00:00:00.000000000 +0000 @@ -1,37 +0,0 @@ -# Contributor: Andrew Gallant -# Maintainer: Andrew Gallant -pkgname=ripgrep -pkgver=0.2.3 -pkgrel=1 -pkgdesc="A search tool that combines the usability of The Silver Searcher with the raw speed of grep." -arch=('i686' 'x86_64') -url="https://github.com/BurntSushi/ripgrep" -license=('UNLICENSE') -makedepends=('cargo') -source=("https://github.com/BurntSushi/$pkgname/archive/$pkgver.tar.gz") -sha256sums=('a88531558d2023df76190ea2e52bee50d739eabece8a57df29abbad0c6bdb917') - -build() { - cd "$pkgname-$pkgver" - if command -v rustup > /dev/null 2>&1; then - RUSTFLAGS="-C target-cpu=native" rustup run nightly \ - cargo build --release --features simd-accel - elif rustc --version | grep -q nightly; then - RUSTFLAGS="-C target-cpu=native" \ - cargo build --release --features simd-accel - else - cargo build --release - fi -} - -package() { - cd "$pkgname-$pkgver" - - install -Dm755 "target/release/rg" "$pkgdir/usr/bin/rg" - install -Dm644 "doc/rg.1" "$pkgdir/usr/share/man/man1/rg.1" - install -Dm644 "README.md" "$pkgdir/usr/share/doc/ripgrep/README.md" - install -Dm644 "COPYING" "$pkgdir/usr/share/doc/ripgrep/COPYING" - install -Dm644 "LICENSE-MIT" "$pkgdir/usr/share/doc/ripgrep/LICENSE-MIT" - install -Dm644 "UNLICENSE" "$pkgdir/usr/share/doc/ripgrep/UNLICENSE" - install -Dm644 "CHANGELOG.md" "$pkgdir/usr/share/doc/ripgrep/CHANGELOG.md" -} diff -Nru ripgrep-0.6.0/pkg/brew/ripgrep-bin.rb ripgrep-0.10.0.3/pkg/brew/ripgrep-bin.rb --- ripgrep-0.6.0/pkg/brew/ripgrep-bin.rb 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/pkg/brew/ripgrep-bin.rb 2018-09-10 21:10:55.000000000 +0000 @@ -1,17 +1,23 @@ class RipgrepBin < Formula - version '0.5.2' - desc "Search tool like grep and The Silver Searcher." + version '0.10.0' + desc "Recursively search directories for a regex pattern." homepage "https://github.com/BurntSushi/ripgrep" - url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz" - sha256 "a0326a84af8517ad707d8c7cccba6e112de27822c391cc0937e4727fbb6c48f4" + + if OS.mac? + url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz" + sha256 "32754b4173ac87a7bfffd436d601a49362676eb1841ab33440f2f49c002c8967" + elsif OS.linux? + url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-unknown-linux-musl.tar.gz" + sha256 "c76080aa807a339b44139885d77d15ad60ab8cdd2c2fdaf345d0985625bc0f97" + end conflicts_with "ripgrep" def install bin.install "rg" - man1.install "rg.1" + man1.install "doc/rg.1" - bash_completion.install "complete/rg.bash-completion" + bash_completion.install "complete/rg.bash" fish_completion.install "complete/rg.fish" zsh_completion.install "complete/_rg" end diff -Nru ripgrep-0.6.0/README.md ripgrep-0.10.0.3/README.md --- ripgrep-0.6.0/README.md 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/README.md 2018-09-10 21:10:55.000000000 +0000 @@ -1,160 +1,205 @@ ripgrep (rg) ------------ -`ripgrep` is a line oriented search tool that combines the usability of The -Silver Searcher (similar to `ack`) with the raw speed of GNU grep. `ripgrep` -works by recursively searching your current directory for a regex pattern. -`ripgrep` has first class support on Windows, Mac and Linux, with binary -downloads available for -[every release](https://github.com/BurntSushi/ripgrep/releases). +ripgrep is a line-oriented search tool that recursively searches your current +directory for a regex pattern while respecting your gitignore rules. ripgrep +has first class support on Windows, macOS and Linux, with binary downloads +available for [every release](https://github.com/BurntSushi/ripgrep/releases). +ripgrep is similar to other popular search tools like The Silver Searcher, +ack and grep. -[![Linux build status](https://travis-ci.org/BurntSushi/ripgrep.svg?branch=master)](https://travis-ci.org/BurntSushi/ripgrep) +[![Linux build status](https://travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) -[![](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep) +[![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep) Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + ### CHANGELOG Please see the [CHANGELOG](CHANGELOG.md) for a release history. +### Documentation quick links + +* [Installation](#installation) +* [User Guide](GUIDE.md) +* [Frequently Asked Questions](FAQ.md) +* [Regex syntax](https://docs.rs/regex/1/regex/#syntax) +* [Configuration files](GUIDE.md#configuration-file) +* [Shell completions](FAQ.md#complete) +* [Building](#building) + + ### Screenshot of search results [![A screenshot of a sample search with ripgrep](http://burntsushi.net/stuff/ripgrep1.png)](http://burntsushi.net/stuff/ripgrep1.png) + ### Quick examples comparing tools This example searches the entire Linux kernel source tree (after running `make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where all matches must be words. Timings were collected on a system with an Intel i7-6900K 3.2 GHz, and -ripgrep was compiled using the `compile` script in this repo. +ripgrep was compiled with SIMD enabled. Please remember that a single benchmark is never enough! See my -[blog post on `ripgrep`](http://blog.burntsushi.net/ripgrep/) +[blog post on ripgrep](http://blog.burntsushi.net/ripgrep/) for a very detailed comparison with more benchmarks and analysis. | Tool | Command | Line count | Time | | ---- | ------- | ---------- | ---- | -| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 450 | **0.134s** | -| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 450 | 0.753s | -| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 0.823s | -| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 2.880s | -| [sift](https://github.com/svent/sift) | `sift --git -n -w '[A-Z]+_SUSPEND'` | 450 | 3.656s | -| [The Platinum Searcher](https://github.com/monochromegane/the_platinum_searcher) | `pt -w -e '[A-Z]+_SUSPEND'` | 450 | 12.369s | -| [ack](https://github.com/petdance/ack2) | `ack -w '[A-Z]+_SUSPEND'` | 1878 | 16.952s | +| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 450 | **0.106s** | +| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 0.553s | +| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 450 | 0.589s | +| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 2.266s | +| [sift](https://github.com/svent/sift) | `sift --git -n -w '[A-Z]+_SUSPEND'` | 450 | 3.505s | +| [ack](https://github.com/petdance/ack2) | `ack -w '[A-Z]+_SUSPEND'` | 1878 | 6.823s | +| [The Platinum Searcher](https://github.com/monochromegane/the_platinum_searcher) | `pt -w -e '[A-Z]+_SUSPEND'` | 450 | 14.208s | (Yes, `ack` [has](https://github.com/petdance/ack2/issues/445) a [bug](https://github.com/petdance/ack2/issues/14).) Here's another benchmark that disregards gitignore files and searches with a whitelist instead. The corpus is the same as in the previous benchmark, and the -flags passed to each command ensures that they are doing equivalent work: +flags passed to each command ensure that they are doing equivalent work: | Tool | Command | Line count | Time | | ---- | ------- | ---------- | ---- | -| ripgrep | `rg -L -u -tc -n -w '[A-Z]+_SUSPEND'` | 404 | **0.108s** | -| [ucg](https://github.com/gvansickle/ucg) | `ucg --type=cc -w '[A-Z]+_SUSPEND'` | 392 | 0.219s | -| [GNU grep](https://www.gnu.org/software/grep/) | `egrep -R -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 404 | 0.733s | +| ripgrep | `rg -L -u -tc -n -w '[A-Z]+_SUSPEND'` | 404 | **0.079s** | +| [ucg](https://github.com/gvansickle/ucg) | `ucg --type=cc -w '[A-Z]+_SUSPEND'` | 390 | 0.163s | +| [GNU grep](https://www.gnu.org/software/grep/) | `egrep -R -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 404 | 0.611s | (`ucg` [has slightly different behavior in the presence of symbolic links](https://github.com/gvansickle/ucg/issues/106).) -And finally, a straight up comparison between ripgrep and GNU grep on a single +And finally, a straight-up comparison between ripgrep and GNU grep on a single large file (~9.3GB, [`OpenSubtitles2016.raw.en.gz`](http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.en.gz)): | Tool | Command | Line count | Time | | ---- | ------- | ---------- | ---- | -| ripgrep | `rg -w 'Sherlock [A-Z]\w+'` | 5268 | **2.520s** | -| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C egrep -w 'Sherlock [A-Z]\w+'` | 5268 | 7.143s | +| ripgrep | `rg -w 'Sherlock [A-Z]\w+'` | 5268 | **2.108s** | +| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C egrep -w 'Sherlock [A-Z]\w+'` | 5268 | 7.014s | In the above benchmark, passing the `-n` flag (for showing line numbers) -increases the times to `3.081s` for ripgrep and `11.403s` for GNU grep. +increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep. -### Why should I use `ripgrep`? -* It can replace both The Silver Searcher and GNU grep because it is faster - than both. (N.B. It is not, strictly speaking, a "drop-in" replacement for - both, but the feature sets are far more similar than different.) -* Like The Silver Searcher, `ripgrep` defaults to recursive directory search - and won't search files ignored by your `.gitignore` files. It also ignores - hidden and binary files by default. `ripgrep` also implements full support - for `.gitignore`, where as there are many bugs related to that functionality - in The Silver Searcher. -* `ripgrep` can search specific types of files. For example, `rg -tpy foo` +### Why should I use ripgrep? + +* It can replace many use cases served by other search tools + because it contains most of their features and is generally faster. (See + [the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly + replace grep.) +* Like other tools specialized to code search, ripgrep defaults to recursive + directory search and won't search files ignored by your `.gitignore` files. + It also ignores hidden and binary files by default. ripgrep also implements + full support for `.gitignore`, whereas there are many bugs related to that + functionality in other code search tools claiming to provide the same + functionality. +* ripgrep can search specific types of files. For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs foo` excludes Javascript - files from your search. `ripgrep` can be taught about new file types with + files from your search. ripgrep can be taught about new file types with custom matching rules. -* `ripgrep` supports many features found in `grep`, such as showing the context +* ripgrep supports many features found in `grep`, such as showing the context of search results, searching multiple patterns, highlighting matches with - color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while + color and full Unicode support. Unlike GNU grep, ripgrep stays fast while supporting Unicode (which is always on). -* `ripgrep` supports searching files in text encodings other than UTF-8, such +* ripgrep has optional support for switching its regex engine to use PCRE2. + Among other things, this makes it possible to use look-around and + backreferences in your patterns, which are not supported in ripgrep's default + regex engine. PCRE2 support is enabled with `-P`. +* ripgrep supports searching files in text encodings other than UTF-8, such as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for automatically detecting UTF-16 is provided. Other text encodings must be specifically specified with the `-E/--encoding` flag.) +* ripgrep supports searching files compressed in a common format (gzip, xz, + lzma, bzip2 or lz4) with the `-z/--search-zip` flag. +* ripgrep supports arbitrary input preprocessing filters which could be PDF + text extraction, less supported decompression, decrypting, automatic encoding + detection and so on. -In other words, use `ripgrep` if you like speed, filtering by default, fewer +In other words, use ripgrep if you like speed, filtering by default, fewer bugs and Unicode support. -### Why shouldn't I use `ripgrep`? -I'd like to try to convince you why you *shouldn't* use `ripgrep`. This should -give you a glimpse at some important downsides or missing features of -`ripgrep`. - -* `ripgrep` uses a regex engine based on finite automata, so if you want fancy - regex features such as backreferences or look around, `ripgrep` won't give - them to you. `ripgrep` does support lots of things though, including, but not - limited to: lazy quantification (e.g., `a+?`), repetitions (e.g., `a{2,5}`), - begin/end assertions (e.g., `^\w+$`), word boundaries (e.g., `\bfoo\b`), and - support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or - `\p{Lu}` to match any uppercase letter). (Fancier regexes will never be - supported.) -* `ripgrep` doesn't yet support searching compressed files. (Likely to be - supported in the future.) -* `ripgrep` doesn't have multiline search. (Unlikely to ever be supported.) +### Why shouldn't I use ripgrep? + +Despite initially not wanting to add every feature under the sun to ripgrep, +over time, ripgrep has grown support for most features found in other file +searching tools. This includes searching for results spanning across multiple +lines, and opt-in support for PCRE2, which provides look-around and +backreference support. + +At this point, the primary reasons not to use ripgrep probably consist of one +or more of the following: + +* You need a portable and ubiquitous tool. While ripgrep works on Windows, + macOS and Linux, it is not ubiquitous and it does not conform to any + standard such as POSIX. The best tool for this job is good old grep. +* There still exists some other feature (or bug) not listed in this README that + you rely on that's in another tool that isn't in ripgrep. +* There is a performance edge case where ripgrep doesn't do well where another + tool does do well. (Please file a bug report!) +* ripgrep isn't possible to install on your machine or isn't available for your + platform. (Please file a bug report!) -In other words, if you like fancy regexes, searching compressed files or -multiline search, then `ripgrep` may not quite meet your needs (yet). ### Is it really faster than everything else? -Yes. A large number of benchmarks with detailed analysis for each is +Generally, yes. A large number of benchmarks with detailed analysis for each is [available on my blog](http://blog.burntsushi.net/ripgrep/). -Summarizing, `ripgrep` is fast because: +Summarizing, ripgrep is fast because: * It is built on top of [Rust's regex engine](https://github.com/rust-lang-nursery/regex). Rust's regex engine uses finite automata, SIMD and aggressive literal - optimizations to make searching very fast. + optimizations to make searching very fast. (PCRE2 support can be opted into + with the `-P/--pcre2` flag.) * Rust's regex library maintains performance with full Unicode support by building UTF-8 decoding directly into its deterministic finite automaton engine. * It supports searching with either memory maps or by searching incrementally with an intermediate buffer. The former is better for single files and the - latter is better for large directories. `ripgrep` chooses the best searching + latter is better for large directories. ripgrep chooses the best searching strategy for you automatically. * Applies your ignore patterns in `.gitignore` files using a - [`RegexSet`](https://doc.rust-lang.org/regex/regex/struct.RegexSet.html). + [`RegexSet`](https://docs.rs/regex/1/regex/struct.RegexSet.html). That means a single file path can be matched against multiple glob patterns simultaneously. * It uses a lock-free parallel recursive directory iterator, courtesy of [`crossbeam`](https://docs.rs/crossbeam) and [`ignore`](https://docs.rs/ignore). + +### Feature comparison + +Andy Lester, author of [ack](https://beyondgrep.com/), has published an +excellent table comparing the features of ack, ag, git-grep, GNU grep and +ripgrep: https://beyondgrep.com/feature-comparison/ + +Note that ripgrep has grown a few significant new features recently that +are not yet present in Andy's table. This includes, but is not limited to, +configuration files, passthru, support for searching compressed files, +multiline search and opt-in fancy regex support via PCRE2. + + ### Installation -The binary name for `ripgrep` is `rg`. +The binary name for ripgrep is `rg`. -[Binaries for `ripgrep` are available for Windows, Mac and -Linux.](https://github.com/BurntSushi/ripgrep/releases) Linux binaries are -static executables. Windows binaries are available either as built with MinGW -(GNU) or with Microsoft Visual C++ (MSVC). When possible, prefer MSVC over GNU, -but you'll need to have the -[Microsoft VC++ 2015 redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145) +**[Archives of precompiled binaries for ripgrep are available for Windows, +macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Users of +platforms not explicitly mentioned below are advised to download one of these +archives. + +Linux binaries are static executables. Windows binaries are available either as +built with MinGW (GNU) or with Microsoft Visual C++ (MSVC). When possible, +prefer MSVC over GNU, but you'll need to have the [Microsoft VC++ 2015 +redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145) installed. -If you're a **Mac OS X Homebrew** user, then you can install ripgrep either +If you're a **macOS Homebrew** or a **Linuxbrew** user, +then you can install ripgrep either from homebrew-core, (compiled with rust stable, no SIMD): ``` @@ -166,195 +211,150 @@ ``` $ brew tap burntsushi/ripgrep https://github.com/BurntSushi/ripgrep.git -$ brew install burntsushi/ripgrep/ripgrep-bin +$ brew install ripgrep-bin ``` -If you're a **Windows Chocolatey** user, then you can install `ripgrep` from the [official repo](https://chocolatey.org/packages/ripgrep): +If you're a **MacPorts** user, then you can install ripgrep from the +[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep): ``` -$ choco install ripgrep +$ sudo port install ripgrep ``` -If you're an **Arch Linux** user, then you can install `ripgrep` from the official repos: +If you're a **Windows Chocolatey** user, then you can install ripgrep from the +[official repo](https://chocolatey.org/packages/ripgrep): ``` -$ pacman -S ripgrep -``` - -If you're a **Gentoo** user, you can install `ripgrep` from the [official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep): - -``` -$ emerge ripgrep +$ choco install ripgrep ``` -If you're a **Fedora 24+** user, you can install `ripgrep` from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): +If you're a **Windows Scoop** user, then you can install ripgrep from the +[official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json): ``` -$ dnf copr enable carlwgeorge/ripgrep -$ dnf install ripgrep +$ scoop install ripgrep ``` -If you're a **RHEL/CentOS 7** user, you can install `ripgrep` from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): +If you're an **Arch Linux** user, then you can install ripgrep from the official repos: ``` -$ yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo -$ yum install ripgrep +$ pacman -S ripgrep ``` -If you're a **Nix** user, you can install `ripgrep` from -[nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/text/ripgrep/default.nix): +If you're a **Gentoo** user, you can install ripgrep from the +[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep): ``` -$ nix-env --install ripgrep -$ # (Or using the attribute name, which is also `ripgrep`.) +$ emerge sys-apps/ripgrep ``` -If you're a **Rust programmer**, `ripgrep` can be installed with `cargo`. Note -that this requires you to have **Rust 1.12 or newer** installed. +If you're a **Fedora 27+** user, you can install ripgrep from official +repositories. ``` -$ cargo install ripgrep +$ sudo dnf install ripgrep ``` -`ripgrep` isn't currently in any other package repositories. -[I'd like to change that](https://github.com/BurntSushi/ripgrep/issues/10). - -### Whirlwind tour - -The command line usage of `ripgrep` doesn't differ much from other tools that -perform a similar function, so you probably already know how to use `ripgrep`. -The full details can be found in `rg --help`, but let's go on a whirlwind tour. - -`ripgrep` detects when its printing to a terminal, and will automatically -colorize your output and show line numbers, just like The Silver Searcher. -Coloring works on Windows too! Colors can be controlled more granularly with -the `--color` flag. - -One last thing before we get started: generally speaking, `ripgrep` assumes the -input is reading is UTF-8. However, if ripgrep notices a file is encoded as -UTF-16, then it will know how to search it. For other encodings, you'll need to -explicitly specify them with the `-E/--encoding` flag. - -To recursively search the current directory, while respecting all `.gitignore` -files, ignore hidden files and directories and skip binary files: +If you're a **Fedora 24+** user, you can install ripgrep from +[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): ``` -$ rg foobar +$ sudo dnf copr enable carlwgeorge/ripgrep +$ sudo dnf install ripgrep ``` -The above command also respects all `.ignore` files, including in parent -directories. `.ignore` files can be used when `.gitignore` files are -insufficient. In all cases, `.ignore` patterns take precedence over -`.gitignore`. - -To ignore all ignore files, use `-u`. To additionally search hidden files -and directories, use `-uu`. To additionally search binary files, use `-uuu`. -(In other words, "search everything, dammit!") In particular, `rg -uuu` is -similar to `grep -a -r`. +If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the +[official repo](http://software.opensuse.org/package/ripgrep): ``` -$ rg -uu foobar # similar to `grep -r` -$ rg -uuu foobar # similar to `grep -a -r` +$ sudo zypper install ripgrep ``` -(Tip: If your ignore files aren't being adhered to like you expect, run your -search with the `--debug` flag.) - -Make the search case insensitive with `-i`, invert the search with `-v` or -show the 2 lines before and after every search result with `-C2`. - -Force all matches to be surrounded by word boundaries with `-w`. - -Search and replace (find first and last names and swap them): +If you're a **RHEL/CentOS 7** user, you can install ripgrep from +[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): ``` -$ rg '([A-Z][a-z]+)\s+([A-Z][a-z]+)' --replace '$2, $1' +$ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo +$ sudo yum install ripgrep ``` -Named groups are supported: +If you're a **Nix** user, you can install ripgrep from +[nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/text/ripgrep/default.nix): ``` -$ rg '(?P[A-Z][a-z]+)\s+(?P[A-Z][a-z]+)' --replace '$last, $first' +$ nix-env --install ripgrep +$ # (Or using the attribute name, which is also ripgrep.) ``` -Up the ante with full Unicode support, by matching any uppercase Unicode letter -followed by any sequence of lowercase Unicode letters (good luck doing this -with other search tools!): +If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**), +then ripgrep can be installed using a binary `.deb` file provided in each +[ripgrep release](https://github.com/BurntSushi/ripgrep/releases). Note that +ripgrep is not in the official Debian or Ubuntu repositories. ``` -$ rg '(\p{Lu}\p{Ll}+)\s+(\p{Lu}\p{Ll}+)' --replace '$2, $1' +$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/0.10.0/ripgrep_0.10.0_amd64.deb +$ sudo dpkg -i ripgrep_0.10.0_amd64.deb ``` -Search only files matching a particular glob: - +If you run Debian Buster (currently Debian testing) or Debian sid, ripgrep is +[officially maintained by Debian](https://tracker.debian.org/pkg/rust-ripgrep). ``` -$ rg foo -g 'README.*' +$ sudo apt-get install ripgrep ``` - +(N.B. Various snaps for ripgrep on Ubuntu are also available, but none of them +seem to work right and generate a number of very strange bug reports that I +don't know how to fix and don't have the time to fix. Therefore, it is no +longer a recommended installation option.) -Or exclude files matching a particular glob: +If you're a **FreeBSD** user, then you can install ripgrep from the +[official ports](https://www.freshports.org/textproc/ripgrep/): ``` -$ rg foo -g '!*.min.js' +# pkg install ripgrep ``` -Search and return paths matching a particular glob (i.e., `-g` flag in ag/ack): +If you're an **OpenBSD** user, then you can install ripgrep from the +[official ports](http://openports.se/textproc/ripgrep): ``` -$ rg -g 'doc*' --files +$ doas pkg_add ripgrep ``` -Search only HTML and CSS files: +If you're a **NetBSD** user, then you can install ripgrep from +[pkgsrc](http://pkgsrc.se/textproc/ripgrep): ``` -$ rg -thtml -tcss foobar +# pkgin install ripgrep ``` -Search everything except for Javascript files: - -``` -$ rg -Tjs foobar -``` +If you're a **Rust programmer**, ripgrep can be installed with `cargo`. -To see a list of types supported, run `rg --type-list`. To add a new type, use -`--type-add`, which must be accompanied by a pattern for searching (`rg` won't -persist your type settings): +* Note that the minimum supported version of Rust for ripgrep is **1.28.0**, + although ripgrep may work with older versions. +* Note that the binary may be bigger than expected because it contains debug + symbols. This is intentional. To remove debug symbols and therefore reduce + the file size, run `strip` on the binary. ``` -$ rg --type-add 'foo:*.{foo,foobar}' -tfoo bar +$ cargo install ripgrep ``` -The type `foo` will now match any file ending with the `.foo` or `.foobar` -extensions. - -### Regex syntax - -The syntax supported is -[documented as part of Rust's regex library](https://doc.rust-lang.org/regex/regex/index.html#syntax). - -### Shell completions - -Shell completion files are included in the release tarball for Bash, Fish, Zsh -and PowerShell. - -For **bash**, move `complete/rg.bash-completion` to `$XDG_CONFIG_HOME/bash_completion` -or `/etc/bash_completion.d/`. - -For **fish**, move `complete/rg.fish` to `$HOME/.config/fish/completions/`. +When compiling with Rust 1.27 or newer, this will automatically enable SIMD +optimizations for search. -For **PowerShell**, add `. _rg.ps1` to your PowerShell -[profile](https://technet.microsoft.com/en-us/library/bb613488(v=vs.85).aspx) -(note the leading period). If the `_rg.ps1` file is not on your `PATH`, do -`. /path/to/_rg.ps1` instead. +ripgrep isn't currently in any other package repositories. +[I'd like to change that](https://github.com/BurntSushi/ripgrep/issues/10). -For **zsh**, move `complete/_rg` to one of your `$fpath` directories. ### Building -`ripgrep` is written in Rust, so you'll need to grab a +ripgrep is written in Rust, so you'll need to grab a [Rust installation](https://www.rust-lang.org/) in order to compile it. -`ripgrep` compiles with Rust 1.12 (stable) or newer. Building is easy: +ripgrep compiles with Rust 1.28.0 (stable) or newer. In general, ripgrep tracks +the latest stable release of the Rust compiler. + +To build ripgrep: ``` $ git clone https://github.com/BurntSushi/ripgrep @@ -364,118 +364,64 @@ 0.1.3 ``` -If you have a Rust nightly compiler, then you can enable optional SIMD -acceleration like so: +If you have a Rust nightly compiler and a recent Intel CPU, then you can enable +additional optional SIMD acceleration like so: ``` RUSTFLAGS="-C target-cpu=native" cargo build --release --features 'simd-accel avx-accel' ``` If your machine doesn't support AVX instructions, then simply remove -`avx-accel` from the features list. Similarly for SIMD. +`avx-accel` from the features list. Similarly for SIMD (which corresponds +roughly to SSE instructions). -### Running tests +The `simd-accel` and `avx-accel` features enable SIMD support in certain +ripgrep dependencies (responsible for counting lines and transcoding). They +are not necessary to get SIMD optimizations for search; those are enabled +automatically. Hopefully, some day, the `simd-accel` and `avx-accel` features +will similarly become unnecessary. -`ripgrep` is relatively well tested, including both unit tests and integration -tests. To run the full test suite, use: +Finally, optional PCRE2 support can be built with ripgrep by enabling the +`pcre2` feature: ``` -$ cargo test +$ cargo build --release --features 'pcre2' ``` -from the repository root. - -### Tips - -#### Windows Powershell - -##### Powershell Profile - -To customize powershell on start-up there is a special powershell script that has to be created. -In order to find its location type `$profile` -See [more](https://technet.microsoft.com/en-us/library/bb613488(v=vs.85).aspx) for profile details. +(Tip: use `--features 'pcre2 simd-accel avx-accel'` to also include compile +time SIMD optimizations, which will only work with a nightly compiler.) -Any powershell code in this file gets evaluated at the start of console. -This way you can have own aliases to be created at start. +Enabling the PCRE2 feature works with a stable Rust compiler and will +attempt to automatically find and link with your system's PCRE2 library via +`pkg-config`. If one doesn't exist, then ripgrep will build PCRE2 from source +using your system's C compiler and then statically link it into the final +executable. Static linking can be forced even when there is an available PCRE2 +system library by either building ripgrep with the MUSL target or by setting +`PCRE2_SYS_STATIC=1`. -##### Setup function alias +ripgrep can be built with the MUSL target on Linux by first installing the MUSL +library on your system (consult your friendly neighborhood package manager). +Then you just need to add MUSL support to your Rust toolchain and rebuild +ripgrep, which yields a fully static executable: -Often you can find a need to make alias for the favourite utility. - -But powershell function aliases do not behave like your typical linux shell alias. - -You always need to propagate arguments and **Stdin** input. -But it cannot be done simply as `function grep() { $input | rg.exe --hidden $args }` - -Use below example as reference to how setup alias in powershell. - -```powershell -function grep { - $count = @($input).Count - $input.Reset() - - if ($count) { - $input | rg.exe --hidden $args - } - else { - rg.exe --hidden $args - } -} +``` +$ rustup target add x86_64-unknown-linux-musl +$ cargo build --release --target x86_64-unknown-linux-musl ``` -Powershell special variables: -* input - is powershell **Stdin** object that allows you to access its content. -* args - is array of arguments passed to this function. - -This alias checks whether there is **Stdin** input and propagates only if there is some lines. -Otherwise empty `$input` will make powershell to trigger `rg` to search empty **Stdin** - -##### Piping non-ASCII content to ripgrep - -When piping input into native executables in PowerShell, the encoding of the -input is controlled by the `$OutputEncoding` variable. By default, this is set -to US-ASCII, and any characters in the pipeline that don't have encodings in -US-ASCII are converted to `?` (question mark) characters. - -To change this setting, set `$OutputEncoding` to a different encoding, as -represented by a .NET encoding object. Some common examples are below. The -value of this variable is reset when PowerShell restarts, so to make this -change take effect every time PowerShell is started add a line setting the -variable into your PowerShell profile. - -Example `$OutputEncoding` settings: -* UTF-8 without BOM: `$OutputEncoding = [System.Text.UTF8Encoding]::new()` -* The console's output encoding: -`$OutputEncoding = [System.Console]::OutputEncoding` - -If you continue to have encoding problems, you can also force the encoding -that the console will use for printing to UTF-8 with -`[System.Console]::OutputEncoding = [System.Text.Encoding]::UTF8`. This -will also reset when PowerShell is restarted, so you can add that line -to your profile as well if you want to make the setting permanent. - -### Known issues +Applying the `--features` flag from above works as expected. If you want to +build a static executable with MUSL and with PCRE2, then you will need to have +`musl-gcc` installed, which might be in a separate package from the actual +MUSL library, depending on your Linux distribution. -#### I just hit Ctrl+C in the middle of ripgrep's output and now my terminal's foreground color is wrong! -Type in `color` in cmd.exe (Command Prompt) and `echo -ne "\033[0m"` on Unix -to restore your original foreground color. +### Running tests -In PowerShell, you can add the following code to your profile which will -restore the original foreground color when `Reset-ForegroundColor` is called. -Including the `Set-Alias` line will allow you to call it with simply `color`. +ripgrep is relatively well-tested, including both unit tests and integration +tests. To run the full test suite, use: -```powershell -$OrigFgColor = $Host.UI.RawUI.ForegroundColor -function Reset-ForegroundColor { - $Host.UI.RawUI.ForegroundColor = $OrigFgColor -} -Set-Alias -Name color -Value Reset-ForegroundColor +``` +$ cargo test --all ``` -PR [#187](https://github.com/BurntSushi/ripgrep/pull/187) fixed this, and it -was later deprecated in -[#281](https://github.com/BurntSushi/ripgrep/issues/281). A full explanation is -available [here][msys issue explanation]. - -[msys issue explanation]: https://github.com/BurntSushi/ripgrep/issues/281#issuecomment-269093893 +from the repository root. diff -Nru ripgrep-0.6.0/scripts/copy-examples ripgrep-0.10.0.3/scripts/copy-examples --- ripgrep-0.6.0/scripts/copy-examples 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/scripts/copy-examples 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,33 @@ +#!/usr/bin/env python + +from __future__ import absolute_import, division, print_function +import argparse +import codecs +import os.path +import re + +RE_EACH_CODE_BLOCK = re.compile( + r'(?s)(?:```|\{\{< high rust[^>]+>\}\})[^\n]*\n(.*?)(?:```|\{\{< /high >\}\})' # noqa +) +RE_MARKER = re.compile(r'^(?:# )?//([^/].*)$') +RE_STRIP_COMMENT = re.compile(r'^# ?') + +if __name__ == '__main__': + p = argparse.ArgumentParser() + p.add_argument('--rust-file', default='src/cookbook.rs') + p.add_argument('--example-dir', default='grep/examples') + args = p.parse_args() + + with codecs.open(args.rust_file, encoding='utf-8') as f: + rustcode = f.read() + for m in RE_EACH_CODE_BLOCK.finditer(rustcode): + lines = m.group(1).splitlines() + marker, codelines = lines[0], lines[1:] + m = RE_MARKER.search(marker) + if m is None: + continue + + code = '\n'.join(RE_STRIP_COMMENT.sub('', line) for line in codelines) + fpath = os.path.join(args.example_dir, m.group(1)) + with codecs.open(fpath, mode='w+', encoding='utf-8') as f: + print(code, file=f) diff -Nru ripgrep-0.6.0/snapcraft.yaml ripgrep-0.10.0.3/snapcraft.yaml --- ripgrep-0.6.0/snapcraft.yaml 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/snapcraft.yaml 2018-09-10 21:10:55.000000000 +0000 @@ -1,5 +1,5 @@ name: ripgrep # you probably want to 'snapcraft register ' -version: '0.5.1' # just for humans, typically '1.2+git' or '1.3.2' +version: '0.8.1' # just for humans, typically '1.2+git' or '1.3.2' summary: Fast file searcher # 79 char long summary description: | ripgrep combines the usability of The Silver Searcher with the raw speed of grep. @@ -11,5 +11,5 @@ source: . apps: rg: - command: env PATH=$SNAP/bin:$PATH rg - aliases: [rg] \ No newline at end of file + adapter: none + command: ./bin/rg diff -Nru ripgrep-0.6.0/src/app.rs ripgrep-0.10.0.3/src/app.rs --- ripgrep-0.6.0/src/app.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/src/app.rs 2018-09-10 21:10:55.000000000 +0000 @@ -1,29 +1,43 @@ -use std::collections::HashMap; +// This module defines the set of command line arguments that ripgrep supports, +// including some light validation. +// +// This module is purposely written in a bare-bones way, since it is included +// in ripgrep's build.rs file as a way to generate a man page and completion +// files for common shells. +// +// The only other place that ripgrep deals with clap is in src/args.rs, which +// is where we read clap's configuration from the end user's arguments and turn +// it into a ripgrep-specific configuration type that is not coupled with clap. -use clap::{App, AppSettings, Arg, ArgSettings}; +use clap::{self, App, AppSettings}; -const ABOUT: &'static str = " +const ABOUT: &str = " ripgrep (rg) recursively searches your current directory for a regex pattern. +By default, ripgrep will respect your .gitignore and automatically skip hidden +files/directories and binary files. -ripgrep's regex engine uses finite automata and guarantees linear time -searching. Because of this, features like backreferences and arbitrary -lookaround are not supported. - -Note that ripgrep may abort unexpectedly when using default settings if it -searches a file that is simultaneously truncated. This behavior can be avoided -by passing the --no-mmap flag. +ripgrep's default regex engine uses finite automata and guarantees linear +time searching. Because of this, features like backreferences and arbitrary +look-around are not supported. However, if ripgrep is built with PCRE2, then +the --pcre2 flag can be used to enable backreferences and look-around. + +ripgrep supports configuration files. Set RIPGREP_CONFIG_PATH to a +configuration file. The file can specify one shell argument per line. Lines +starting with '#' are ignored. For more details, see the man page or the +README. Project home page: https://github.com/BurntSushi/ripgrep Use -h for short descriptions and --help for more details."; -const USAGE: &'static str = " - rg [options] PATTERN [path ...] - rg [options] [-e PATTERN ...] [-f FILE ...] [path ...] - rg [options] --files [path ...] - rg [options] --type-list"; +const USAGE: &str = " + rg [OPTIONS] PATTERN [PATH ...] + rg [OPTIONS] [-e PATTERN ...] [-f PATTERNFILE ...] [PATH ...] + rg [OPTIONS] --files [PATH ...] + rg [OPTIONS] --type-list + command | rg [OPTIONS] PATTERN"; -const TEMPLATE: &'static str = "\ +const TEMPLATE: &str = "\ {bin} {version} {author} {about} @@ -37,539 +51,2316 @@ {unified}"; /// Build a clap application parameterized by usage strings. -/// -/// The function given should take a clap argument name and return a help -/// string. `app` will panic if a usage string is not defined. -/// -/// This is an intentionally stand-alone module so that it can be used easily -/// in a `build.rs` script to build shell completion files. pub fn app() -> App<'static, 'static> { - let arg = |name| { - Arg::with_name(name) - .help(USAGES[name].short) - .long_help(USAGES[name].long) - }; - let flag = |name| arg(name).long(name); + // We need to specify our version in a static because we've painted clap + // into a corner. We've told it that every string we give it will be + // 'static, but we need to build the version string dynamically. We can + // fake the 'static lifetime with lazy_static. + lazy_static! { + static ref LONG_VERSION: String = long_version(None); + } - App::new("ripgrep") + let mut app = App::new("ripgrep") .author(crate_authors!()) .version(crate_version!()) .long_version(LONG_VERSION.as_str()) .about(ABOUT) .max_term_width(100) .setting(AppSettings::UnifiedHelpMessage) + .setting(AppSettings::AllArgsOverrideSelf) .usage(USAGE) .template(TEMPLATE) - .help_message("Prints help information. Use --help for more details.") - // First, set up primary positional/flag arguments. - .arg(arg("PATTERN") - .required_unless_one(&[ - "file", "files", "help-short", "help", "regexp", "type-list", - "ripgrep-version", - ])) - .arg(arg("path").multiple(true)) - .arg(flag("regexp").short("e") - .takes_value(true).multiple(true).number_of_values(1) - .set(ArgSettings::AllowLeadingHyphen) - .value_name("PATTERN")) - .arg(flag("files") - // This should also conflict with `PATTERN`, but the first file - // path will actually be in `PATTERN`. - .conflicts_with_all(&["file", "regexp", "type-list"])) - .arg(flag("type-list") - .conflicts_with_all(&["file", "files", "PATTERN", "regexp"])) - // Second, set up common flags. - .arg(flag("text").short("a")) - .arg(flag("count").short("c")) - .arg(flag("color") - .value_name("WHEN") - .takes_value(true) - .hide_possible_values(true) - .possible_values(&["never", "auto", "always", "ansi"]) - .default_value_if("vimgrep", None, "never")) - .arg(flag("colors").value_name("SPEC") - .takes_value(true).multiple(true).number_of_values(1)) - .arg(flag("encoding").short("E").value_name("ENCODING") - .takes_value(true).number_of_values(1)) - .arg(flag("fixed-strings").short("F")) - .arg(flag("glob").short("g") - .takes_value(true).multiple(true).number_of_values(1) - .set(ArgSettings::AllowLeadingHyphen) - .value_name("GLOB")) - .arg(flag("iglob") - .takes_value(true).multiple(true).number_of_values(1) - .set(ArgSettings::AllowLeadingHyphen) - .value_name("GLOB")) - .arg(flag("ignore-case").short("i")) - .arg(flag("line-number").short("n")) - .arg(flag("no-line-number").short("N").overrides_with("line-number")) - .arg(flag("quiet").short("q")) - .arg(flag("type").short("t") - .takes_value(true).multiple(true).number_of_values(1) - .value_name("TYPE")) - .arg(flag("type-not").short("T") - .takes_value(true).multiple(true).number_of_values(1) - .value_name("TYPE")) - .arg(flag("unrestricted").short("u") - .multiple(true)) - .arg(flag("invert-match").short("v")) - .arg(flag("word-regexp").short("w").overrides_with("line-regexp")) - .arg(flag("line-regexp").short("x")) - // Third, set up less common flags. - .arg(flag("after-context").short("A") - .value_name("NUM").takes_value(true) - .validator(validate_number)) - .arg(flag("before-context").short("B") - .value_name("NUM").takes_value(true) - .validator(validate_number)) - .arg(flag("context").short("C") - .value_name("NUM").takes_value(true) - .validator(validate_number)) - .arg(flag("column")) - .arg(flag("context-separator") - .value_name("SEPARATOR").takes_value(true)) - .arg(flag("dfa-size-limit") - .value_name("NUM+SUFFIX?").takes_value(true)) - .arg(flag("debug")) - .arg(flag("file").short("f") - .value_name("FILE").takes_value(true) - .set(ArgSettings::AllowLeadingHyphen) - .multiple(true).number_of_values(1)) - .arg(flag("files-with-matches").short("l")) - .arg(flag("files-without-match")) - .arg(flag("with-filename").short("H")) - .arg(flag("no-filename").overrides_with("with-filename")) - .arg(flag("heading")) - .arg(flag("no-heading").overrides_with("heading")) - .arg(flag("hidden")) - .arg(flag("ignore-file") - .value_name("FILE").takes_value(true) - .set(ArgSettings::AllowLeadingHyphen) - .multiple(true).number_of_values(1)) - .arg(flag("follow").short("L")) - .arg(flag("max-count") - .short("m").value_name("NUM").takes_value(true) - .validator(validate_number)) - .arg(flag("max-filesize") - .value_name("NUM+SUFFIX?").takes_value(true)) - .arg(flag("maxdepth") - .value_name("NUM").takes_value(true) - .validator(validate_number)) - .arg(flag("mmap")) - .arg(flag("no-messages")) - .arg(flag("no-mmap")) - .arg(flag("no-ignore")) - .arg(flag("no-ignore-parent")) - .arg(flag("no-ignore-vcs")) - .arg(flag("null").short("0")) - .arg(flag("only-matching").short("o").conflicts_with("replace")) - .arg(flag("path-separator").value_name("SEPARATOR").takes_value(true)) - .arg(flag("pretty").short("p")) - .arg(flag("replace").short("r") - .set(ArgSettings::AllowLeadingHyphen) - .value_name("ARG").takes_value(true)) - .arg(flag("regex-size-limit") - .value_name("NUM+SUFFIX?").takes_value(true)) - .arg(flag("case-sensitive").short("s")) - .arg(flag("smart-case").short("S")) - .arg(flag("sort-files")) - .arg(flag("threads") - .short("j").value_name("ARG").takes_value(true) - .validator(validate_number)) - .arg(flag("vimgrep").overrides_with("count")) - .arg(flag("max-columns").short("M") - .value_name("NUM").takes_value(true) - .validator(validate_number)) - .arg(flag("type-add") - .value_name("TYPE").takes_value(true) - .multiple(true).number_of_values(1)) - .arg(flag("type-clear") - .value_name("TYPE").takes_value(true) - .multiple(true).number_of_values(1)) -} - -struct Usage { - short: &'static str, - long: &'static str, -} - -macro_rules! doc { - ($map:expr, $name:expr, $short:expr) => { - doc!($map, $name, $short, $short) - }; - ($map:expr, $name:expr, $short:expr, $long:expr) => { - $map.insert($name, Usage { - short: $short, - long: concat!($long, "\n "), - }); + .help_message("Prints help information. Use --help for more details."); + for arg in all_args_and_flags() { + app = app.arg(arg.claparg); + } + app +} + +/// Return the "long" format of ripgrep's version string. +/// +/// If a revision hash is given, then it is used. If one isn't given, then +/// the RIPGREP_BUILD_GIT_HASH env var is inspected for it. If that isn't set, +/// then a revision hash is not included in the version string returned. +pub fn long_version(revision_hash: Option<&str>) -> String { + // Do we have a git hash? + // (Yes, if ripgrep was built on a machine with `git` installed.) + let hash = match revision_hash.or(option_env!("RIPGREP_BUILD_GIT_HASH")) { + None => String::new(), + Some(githash) => format!(" (rev {})", githash), }; + // Put everything together. + let runtime = runtime_cpu_features(); + if runtime.is_empty() { + format!( + "{}{}\n{} (compiled)", + crate_version!(), + hash, + compile_cpu_features().join(" ") + ) + } else { + format!( + "{}{}\n{} (compiled)\n{} (runtime)", + crate_version!(), + hash, + compile_cpu_features().join(" "), + runtime.join(" ") + ) + } +} + +/// Returns the relevant CPU features enabled at compile time. +fn compile_cpu_features() -> Vec<&'static str> { + let mut features = vec![]; + if cfg!(feature = "simd-accel") { + features.push("+SIMD"); + } else { + features.push("-SIMD"); + } + if cfg!(feature = "avx-accel") { + features.push("+AVX"); + } else { + features.push("-AVX"); + } + features } -lazy_static! { - static ref LONG_VERSION: String = { - let mut features: Vec<&str> = vec![]; - - if cfg!(feature = "avx-accel") { - features.push("+AVX"); - } else { - features.push("-AVX"); +/// Returns the relevant CPU features enabled at runtime. +#[cfg(target_arch = "x86_64")] +fn runtime_cpu_features() -> Vec<&'static str> { + // This is kind of a dirty violation of abstraction, since it assumes + // knowledge about what specific SIMD features are being used. + + let mut features = vec![]; + if is_x86_feature_detected!("ssse3") { + features.push("+SIMD"); + } else { + features.push("-SIMD"); + } + if is_x86_feature_detected!("avx2") { + features.push("+AVX"); + } else { + features.push("-AVX"); + } + features +} + +/// Returns the relevant CPU features enabled at runtime. +#[cfg(not(target_arch = "x86_64"))] +fn runtime_cpu_features() -> Vec<&'static str> { + vec![] +} + +/// Arg is a light alias for a clap::Arg that is specialized to compile time +/// string literals. +type Arg = clap::Arg<'static, 'static>; + +/// RGArg is a light wrapper around a clap::Arg and also contains some metadata +/// about the underlying Arg so that it can be inspected for other purposes +/// (e.g., hopefully generating a man page). +/// +/// Note that this type is purposely overly constrained to ripgrep's particular +/// use of clap. +#[allow(dead_code)] +#[derive(Clone)] +pub struct RGArg { + /// The underlying clap argument. + claparg: Arg, + /// The name of this argument. This is always present and is the name + /// used in the code to find the value of an argument at runtime. + pub name: &'static str, + /// A short documentation string describing this argument. This string + /// should fit on a single line and be a complete sentence. + /// + /// This is shown in the `-h` output. + pub doc_short: &'static str, + /// A longer documentation string describing this argument. This usually + /// starts with the contents of `doc_short`. This is also usually many + /// lines, potentially paragraphs, and may contain examples and additional + /// prose. + /// + /// This is shown in the `--help` output. + pub doc_long: &'static str, + /// Whether this flag is hidden or not. + /// + /// This is typically used for uncommon flags that only serve to override + /// other flags. For example, --no-ignore is a prominent flag that disables + /// ripgrep's gitignore functionality, but --ignore re-enables it. Since + /// gitignore support is enabled by default, use of the --ignore flag is + /// somewhat niche and relegated to special cases when users make use of + /// configuration files to set defaults. + /// + /// Generally, these flags should be documented in the documentation for + /// the flag they override. + pub hidden: bool, + /// The type of this argument. + pub kind: RGArgKind, +} + +/// The kind of a ripgrep argument. +/// +/// This can be one of three possibilities: a positional argument, a boolean +/// switch flag or a flag that accepts exactly one argument. Each variant +/// stores argument type specific data. +/// +/// Note that clap supports more types of arguments than this, but we don't +/// (and probably shouldn't) use them in ripgrep. +/// +/// Finally, note that we don't capture *all* state about an argument in this +/// type. Some state is only known to clap. There isn't any particular reason +/// why; the state we do capture is motivated by use cases (like generating +/// documentation). +#[derive(Clone)] +pub enum RGArgKind { + /// A positional argument. + Positional { + /// The name of the value used in the `-h/--help` output. By + /// convention, this is an all-uppercase string. e.g., `PATH` or + /// `PATTERN`. + value_name: &'static str, + /// Whether an argument can be repeated multiple times or not. + /// + /// The only argument this applies to is PATH, where an end user can + /// specify multiple paths for ripgrep to search. + /// + /// If this is disabled, then an argument can only be provided once. + /// For example, PATTERN is one such argument. (Note that the + /// -e/--regexp flag is distinct from the positional PATTERN argument, + /// and it can be provided multiple times.) + multiple: bool, + }, + /// A boolean switch. + Switch { + /// The long name of a flag. This is always non-empty. + long: &'static str, + /// The short name of a flag. This is empty if a flag only has a long + /// name. + short: Option<&'static str>, + /// Whether this switch can be provided multiple times where meaning + /// is attached to the number of times this flag is given. + /// + /// Note that every switch can be provided multiple times. This + /// particular state indicates whether all instances of a switch are + /// relevant or not. + /// + /// For example, the -u/--unrestricted flag can be provided multiple + /// times where each repeated use of it indicates more relaxing of + /// ripgrep's filtering. Conversely, the -i/--ignore-case flag can + /// also be provided multiple times, but it is simply considered either + /// present or not. In these cases, -u/--unrestricted has `multiple` + /// set to `true` while -i/--ignore-case has `multiple` set to `false`. + multiple: bool, + }, + /// A flag the accepts a single value. + Flag { + /// The long name of a flag. This is always non-empty. + long: &'static str, + /// The short name of a flag. This is empty if a flag only has a long + /// name. + short: Option<&'static str>, + /// The name of the value used in the `-h/--help` output. By + /// convention, this is an all-uppercase string. e.g., `PATH` or + /// `PATTERN`. + value_name: &'static str, + /// Whether this flag can be provided multiple times with multiple + /// distinct values. + /// + /// Note that every flag can be provided multiple times. This + /// particular state indicates whether all instances of a flag are + /// relevant or not. + /// + /// For example, the -g/--glob flag can be provided multiple times and + /// all of its values should be interpreted by ripgrep. Conversely, + /// while the -C/--context flag can also be provided multiple times, + /// only its last instance is used while all previous instances are + /// ignored. In these cases, -g/--glob has `multiple` set to `true` + /// while -C/--context has `multiple` set to `false`. + multiple: bool, + /// A set of possible values for this flag. If an end user provides + /// any value other than what's in this set, then clap will report an + /// error. + possible_values: Vec<&'static str>, + } +} + +impl RGArg { + /// Create a positional argument. + /// + /// The `long_name` parameter is the name of the argument, e.g., `pattern`. + /// The `value_name` parameter is a name that describes the type of + /// argument this flag accepts. It should be in uppercase, e.g., PATH or + /// PATTERN. + fn positional(name: &'static str, value_name: &'static str) -> RGArg { + RGArg { + claparg: Arg::with_name(name).value_name(value_name), + name: name, + doc_short: "", + doc_long: "", + hidden: false, + kind: RGArgKind::Positional { + value_name: value_name, + multiple: false, + }, } + } - if cfg!(feature = "simd-accel") { - features.push("+SIMD"); - } else { - features.push("-SIMD"); + /// Create a boolean switch. + /// + /// The `long_name` parameter is the name of the flag, e.g., `--long-name`. + /// + /// All switches may be repeated an arbitrary number of times. If a switch + /// is truly boolean, that consumers of clap's configuration should only + /// check whether the flag is present or not. Otherwise, consumers may + /// inspect the number of times the switch is used. + fn switch(long_name: &'static str) -> RGArg { + let claparg = Arg::with_name(long_name) + .long(long_name); + RGArg { + claparg: claparg, + name: long_name, + doc_short: "", + doc_long: "", + hidden: false, + kind: RGArgKind::Switch { + long: long_name, + short: None, + multiple: false, + }, } + } - format!("{}\n{}", crate_version!(), features.join(" ")) - }; + /// Create a flag. A flag always accepts exactly one argument. + /// + /// The `long_name` parameter is the name of the flag, e.g., `--long-name`. + /// The `value_name` parameter is a name that describes the type of + /// argument this flag accepts. It should be in uppercase, e.g., PATH or + /// PATTERN. + /// + /// All flags may be repeated an arbitrary number of times. If a flag has + /// only one logical value, that consumers of clap's configuration should + /// only use the last value. + fn flag(long_name: &'static str, value_name: &'static str) -> RGArg { + let claparg = Arg::with_name(long_name) + .long(long_name) + .value_name(value_name) + .takes_value(true) + .number_of_values(1); + RGArg { + claparg: claparg, + name: long_name, + doc_short: "", + doc_long: "", + hidden: false, + kind: RGArgKind::Flag { + long: long_name, + short: None, + value_name: value_name, + multiple: false, + possible_values: vec![], + } + } + } + + /// Set the short flag name. + /// + /// This panics if this arg isn't a switch or a flag. + fn short(mut self, name: &'static str) -> RGArg { + match self.kind { + RGArgKind::Positional{..} => panic!("expected switch or flag"), + RGArgKind::Switch { ref mut short, .. } => { + *short = Some(name); + } + RGArgKind::Flag { ref mut short, .. } => { + *short = Some(name); + } + } + self.claparg = self.claparg.short(name); + self + } + + /// Set the "short" help text. + /// + /// This should be a single line. It is shown in the `-h` output. + fn help(mut self, text: &'static str) -> RGArg { + self.doc_short = text; + self.claparg = self.claparg.help(text); + self + } + + /// Set the "long" help text. + /// + /// This should be at least a single line, usually longer. It is shown in + /// the `--help` output. + fn long_help(mut self, text: &'static str) -> RGArg { + self.doc_long = text; + self.claparg = self.claparg.long_help(text); + self + } + + /// Enable this argument to accept multiple values. + /// + /// Note that while switches and flags can always be repeated an arbitrary + /// number of times, this particular method enables the flag to be + /// logically repeated where each occurrence of the flag may have + /// significance. That is, when this is disabled, then a switch is either + /// present or not and a flag has exactly one value (the last one given). + /// When this is enabled, then a switch has a count corresponding to the + /// number of times it is used and a flag's value is a list of all values + /// given. + /// + /// For the most part, this distinction is resolved by consumers of clap's + /// configuration. + fn multiple(mut self) -> RGArg { + // Why not put `multiple` on RGArg proper? Because it's useful to + // document it distinct for each different kind. See RGArgKind docs. + match self.kind { + RGArgKind::Positional { ref mut multiple, .. } => { + *multiple = true; + } + RGArgKind::Switch { ref mut multiple, .. } => { + *multiple = true; + } + RGArgKind::Flag { ref mut multiple, .. } => { + *multiple = true; + } + } + self.claparg = self.claparg.multiple(true); + self + } + + /// Hide this flag from all documentation. + fn hidden(mut self) -> RGArg { + self.hidden = true; + self.claparg = self.claparg.hidden(true); + self + } + + /// Set the possible values for this argument. If this argument is not + /// a flag, then this panics. + /// + /// If the end user provides any value other than what is given here, then + /// clap will report an error to the user. + /// + /// Note that this will suppress clap's automatic output of possible values + /// when using -h/--help, so users of this method should provide + /// appropriate documentation for the choices in the "long" help text. + fn possible_values(mut self, values: &[&'static str]) -> RGArg { + match self.kind { + RGArgKind::Positional{..} => panic!("expected flag"), + RGArgKind::Switch{..} => panic!("expected flag"), + RGArgKind::Flag { ref mut possible_values, .. } => { + *possible_values = values.to_vec(); + self.claparg = self.claparg + .possible_values(values) + .hide_possible_values(true); + } + } + self + } - static ref USAGES: HashMap<&'static str, Usage> = { - let mut h = HashMap::new(); - doc!(h, "help-short", - "Show short help output.", - "Show short help output. Use --help to show more details."); - doc!(h, "help", - "Show verbose help output.", - "When given, more details about flags are provided."); - doc!(h, "ripgrep-version", - "Prints version information."); - - doc!(h, "PATTERN", - "A regular expression used for searching.", - "A regular expression used for searching. To match a pattern \ - beginning with a dash, use the -e/--regexp option."); - doc!(h, "regexp", - "Use pattern to search.", - "Use pattern to search. This option can be provided multiple \ - times, where all patterns given are searched. This is also \ - useful when searching for patterns that start with a dash."); - doc!(h, "path", - "A file or directory to search.", - "A file or directory to search. Directories are searched \ - recursively."); - doc!(h, "files", - "Print each file that would be searched.", - "Print each file that would be searched without actually \ - performing the search. This is useful to determine whether a \ - particular file is being searched or not."); - doc!(h, "type-list", - "Show all supported file types.", - "Show all supported file types and their corresponding globs."); - - doc!(h, "text", - "Search binary files as if they were text."); - doc!(h, "count", - "Only show count of matches for each file."); - doc!(h, "color", - "When to use color. [default: auto]", - "When to use color in the output. The possible values are never, \ - auto, always or ansi. The default is auto. When always is used, \ - coloring is attempted based on your environment. When ansi is \ - used, coloring is forcefully done using ANSI escape color \ - codes."); - doc!(h, "colors", - "Configure color settings and styles.", - "This flag specifies color settings for use in the output. \ - This flag may be provided multiple times. Settings are applied \ - iteratively. Colors are limited to one of eight choices: \ - red, blue, green, cyan, magenta, yellow, white and black. \ - Styles are limited to nobold, bold, nointense or intense.\n\n\ - The format of the flag is {type}:{attribute}:{value}. {type} \ - should be one of path, line, column or match. {attribute} can \ - be fg, bg or style. {value} is either a color (for fg and bg) \ - or a text style. A special format, {type}:none, will clear all \ - color settings for {type}.\n\nFor example, the following \ - command will change the match color to magenta and the \ - background color for line numbers to yellow:\n\n\ - rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo."); - doc!(h, "encoding", - "Specify the text encoding of files to search.", - "Specify the text encoding that ripgrep will use on all files \ - searched. The default value is 'auto', which will cause ripgrep \ - to do a best effort automatic detection of encoding on a \ - per-file basis. Other supported values can be found in the list \ - of labels here: \ - https://encoding.spec.whatwg.org/#concept-encoding-get"); - doc!(h, "fixed-strings", - "Treat the pattern as a literal string.", - "Treat the pattern as a literal string instead of a regular \ - expression. When this flag is used, special regular expression \ - meta characters such as (){}*+. do not need to be escaped."); - doc!(h, "glob", - "Include or exclude files/directories.", - "Include or exclude files/directories for searching that \ - match the given glob. This always overrides any other \ - ignore logic. Multiple glob flags may be used. Globbing \ - rules match .gitignore globs. Precede a glob with a ! \ - to exclude it."); - doc!(h, "iglob", - "Include or exclude files/directories case insensitively.", - "Include or exclude files/directories for searching that \ - match the given glob. This always overrides any other \ - ignore logic. Multiple glob flags may be used. Globbing \ - rules match .gitignore globs. Precede a glob with a ! \ - to exclude it. Globs are matched case insensitively."); - doc!(h, "ignore-case", - "Case insensitive search.", - "Case insensitive search. This is overridden by \ - --case-sensitive."); - doc!(h, "line-number", - "Show line numbers.", - "Show line numbers (1-based). This is enabled by default when \ - searching in a tty."); - doc!(h, "no-line-number", - "Suppress line numbers.", - "Suppress line numbers. This is enabled by default when NOT \ - searching in a tty."); - doc!(h, "quiet", - "Do not print anything to stdout.", - "Do not print anything to stdout. If a match is found in a file, \ - stop searching. This is useful when ripgrep is used only for \ - its exit code."); - doc!(h, "type", - "Only search files matching TYPE.", - "Only search files matching TYPE. Multiple type flags may be \ - provided. Use the --type-list flag to list all available \ - types."); - doc!(h, "type-not", - "Do not search files matching TYPE.", - "Do not search files matching TYPE. Multiple type-not flags may \ - be provided. Use the --type-list flag to list all available \ - types."); - doc!(h, "unrestricted", - "Reduce the level of \"smart\" searching.", - "Reduce the level of \"smart\" searching. A single -u \ - won't respect .gitignore (etc.) files. Two -u flags will \ - additionally search hidden files and directories. Three \ - -u flags will additionally search binary files. -uu is \ - roughly equivalent to grep -r and -uuu is roughly \ - equivalent to grep -a -r."); - doc!(h, "invert-match", - "Invert matching.", - "Invert matching. Show lines that don't match given patterns."); - doc!(h, "word-regexp", - "Only show matches surrounded by word boundaries.", - "Only show matches surrounded by word boundaries. This is \ - equivalent to putting \\b before and after all of the search \ - patterns."); - doc!(h, "line-regexp", - "Only show matches surrounded by line boundaries.", - "Only show matches surrounded by line boundaries. This is \ - equivalent to putting ^...$ around all of the search patterns."); - - doc!(h, "after-context", - "Show NUM lines after each match."); - doc!(h, "before-context", - "Show NUM lines before each match."); - doc!(h, "context", - "Show NUM lines before and after each match."); - doc!(h, "column", - "Show column numbers", - "Show column numbers (1-based). This only shows the column \ - numbers for the first match on each line. This does not try \ - to account for Unicode. One byte is equal to one column. This \ - implies --line-number."); - doc!(h, "context-separator", - "Set the context separator string. [default: --]", - "The string used to separate non-contiguous context lines in the \ - output. Escape sequences like \\x7F or \\t may be used. The \ - default value is --."); - doc!(h, "debug", - "Show debug messages.", - "Show debug messages. Please use this when filing a bug report."); - doc!(h, "dfa-size-limit", - "The upper size limit of the generated dfa.", - "The upper size limit of the generated dfa. The default limit is \ - 10M. This should only be changed on very large regex inputs \ - where the (slower) fallback regex engine may otherwise be used. \ - \n\nThe argument accepts the same size suffixes as allowed in \ - the 'max-filesize' argument."); - doc!(h, "file", - "Search for patterns from the given file.", - "Search for patterns from the given file, with one pattern per \ - line. When this flag is used or multiple times or in \ - combination with the -e/--regexp flag, then all patterns \ - provided are searched. Empty pattern lines will match all input \ - lines, and the newline is not counted as part of the pattern."); - doc!(h, "files-with-matches", - "Only show the paths with at least one match."); - doc!(h, "files-without-match", - "Only show the paths that contains zero matches."); - doc!(h, "with-filename", - "Show file name for each match.", - "Prefix each match with the file name that contains it. This is \ - the default when more than one file is searched."); - doc!(h, "no-filename", - "Never show the file name for a match.", - "Never show the file name for a match. This is the default when \ - one file is searched."); - doc!(h, "heading", - "Show matches grouped by each file.", - "This shows the file name above clusters of matches from each \ - file instead of showing the file name for every match. This is \ - the default mode at a tty."); - doc!(h, "no-heading", - "Don't group matches by each file.", - "Don't group matches by each file. If -H/--with-filename is \ - enabled, then file names will be shown for every line matched. \ - This is the default mode when not at a tty."); - doc!(h, "hidden", - "Search hidden files and directories.", - "Search hidden files and directories. By default, hidden files \ - and directories are skipped."); - doc!(h, "ignore-file", - "Specify additional ignore files.", - "Specify additional ignore files for filtering file paths. \ - Ignore files should be in the gitignore format and are matched \ - relative to the current working directory. These ignore files \ - have lower precedence than all other ignore files. When \ - specifying multiple ignore files, earlier files have lower \ - precedence than later files."); - doc!(h, "follow", - "Follow symbolic links."); - doc!(h, "max-count", - "Limit the number of matches.", - "Limit the number of matching lines per file searched to NUM."); - doc!(h, "max-filesize", - "Ignore files larger than NUM in size.", - "Ignore files larger than NUM in size. Does not ignore \ - directories. \ - \n\nThe input format accepts suffixes of K, M or G which \ - correspond to kilobytes, megabytes and gigabytes. If no suffix \ - is provided the input is treated as bytes. \ - \n\nExample: --max-filesize 50K or --max-filesize 80M"); - doc!(h, "maxdepth", - "Descend at most NUM directories.", - "Limit the depth of directory traversal to NUM levels beyond \ - the paths given. A value of zero only searches the \ - starting-points themselves.\n\nFor example, \ - 'rg --maxdepth 0 dir/' is a no-op because dir/ will not be \ - descended into. 'rg --maxdepth 1 dir/' will search only the \ - direct children of dir/."); - doc!(h, "mmap", - "Searching using memory maps when possible.", - "Search using memory maps when possible. This is enabled by \ - default when ripgrep thinks it will be faster. Note that memory \ - map searching doesn't currently support all options, so if an \ - incompatible option (e.g., --context) is given with --mmap, \ - then memory maps will not be used."); - doc!(h, "no-messages", - "Suppress all error messages.", - "Suppress all error messages. This is equivalent to redirecting \ - stderr to /dev/null."); - doc!(h, "no-mmap", - "Never use memory maps.", - "Never use memory maps, even when they might be faster."); - doc!(h, "no-ignore", - "Don't respect ignore files.", - "Don't respect ignore files (.gitignore, .ignore, etc.). This \ - implies --no-ignore-parent and --no-ignore-vcs."); - doc!(h, "no-ignore-parent", - "Don't respect ignore files in parent directories.", - "Don't respect ignore files (.gitignore, .ignore, etc.) in \ - parent directories."); - doc!(h, "no-ignore-vcs", - "Don't respect VCS ignore files", - "Don't respect version control ignore files (.gitignore, etc.). \ - This implies --no-ignore-parent. Note that .ignore files will \ - continue to be respected."); - doc!(h, "null", - "Print NUL byte after file names", - "Whenever a file name is printed, follow it with a NUL byte. \ - This includes printing file names before matches, and when \ - printing a list of matching files such as with --count, \ - --files-with-matches and --files. This option is useful for use \ - with xargs."); - doc!(h, "only-matching", - "Print only matched parts of a line.", - "Print only the matched (non-empty) parts of a matching line, \ - with each such part on a separate output line."); - doc!(h, "path-separator", - "Path separator to use when printing file paths.", - "The path separator to use when printing file paths. This \ - defaults to your platform's path separator, which is / on Unix \ - and \\ on Windows. This flag is intended for overriding the \ - default when the environment demands it (e.g., cygwin). A path \ - separator is limited to a single byte."); - doc!(h, "pretty", - "Alias for --color always --heading --line-number."); - doc!(h, "replace", - "Replace matches with string given.", - "Replace every match with the string given when printing \ - results. Neither this flag nor any other flag will modify your \ - files.\n\nCapture group indices (e.g., $5) and names \ - (e.g., $foo) are supported in the replacement string.\n\n\ - Note that the replacement by default replaces each match, and \ - NOT the entire line. To replace the entire line, you should \ - match the entire line."); - doc!(h, "regex-size-limit", - "The upper size limit of the compiled regex.", - "The upper size limit of the compiled regex. The default limit \ - is 10M. \n\nThe argument accepts the same size suffixes as \ - allowed in the 'max-filesize' argument."); - doc!(h, "case-sensitive", - "Search case sensitively.", - "Search case sensitively. This overrides -i/--ignore-case and \ - -S/--smart-case."); - doc!(h, "smart-case", - "Smart case search.", - "Searches case insensitively if the pattern is all lowercase. \ - Search case sensitively otherwise. This is overridden by \ - either -s/--case-sensitive or -i/--ignore-case."); - doc!(h, "sort-files", - "Sort results by file path. Implies --threads=1.", - "Sort results by file path. Note that this currently \ - disables all parallelism and runs search in a single thread."); - doc!(h, "threads", - "The approximate number of threads to use.", - "The approximate number of threads to use. A value of 0 (which \ - is the default) causes ripgrep to choose the thread count \ - using heuristics."); - doc!(h, "vimgrep", - "Show results in vim compatible format.", - "Show results with every match on its own line, including \ - line numbers and column numbers. With this option, a line with \ - more than one match will be printed more than once."); - doc!(h, "max-columns", - "Don't print lines longer than this limit in bytes.", - "Don't print lines longer than this limit in bytes. Longer lines \ - are omitted, and only the number of matches in that line is \ - printed."); - - doc!(h, "type-add", - "Add a new glob for a file type.", - "Add a new glob for a particular file type. Only one glob can be \ - added at a time. Multiple --type-add flags can be provided. \ - Unless --type-clear is used, globs are added to any existing \ - globs defined inside of ripgrep.\n\nNote that this MUST be \ - passed to every invocation of ripgrep. Type settings are NOT \ - persisted.\n\nExample: \ - rg --type-add 'foo:*.foo' -tfoo PATTERN.\n\n\ - --type-add can also be used to include rules from other types \ - with the special include directive. The include directive \ - permits specifying one or more other type names (separated by a \ - comma) that have been defined and its rules will automatically \ - be imported into the type specified. For example, to create a \ - type called src that matches C++, Python and Markdown files, \ - one can use:\n\n\ - --type-add 'src:include:cpp,py,md'\n\n\ - Additional glob rules can still be added to the src type by \ - using the --type-add flag again:\n\n\ - --type-add 'src:include:cpp,py,md' --type-add 'src:*.foo'\n\n\ - Note that type names must consist only of Unicode letters or \ - numbers. Punctuation characters are not allowed."); - doc!(h, "type-clear", - "Clear globs for given file type.", - "Clear the file type globs previously defined for TYPE. This \ - only clears the default type definitions that are found inside \ - of ripgrep.\n\nNote that this MUST be passed to every \ - invocation of ripgrep. Type settings are NOT persisted."); + /// Add an alias to this argument. + /// + /// Aliases are not show in the output of -h/--help. + fn alias(mut self, name: &'static str) -> RGArg { + self.claparg = self.claparg.alias(name); + self + } + + /// Permit this flag to have values that begin with a hypen. + /// + /// This panics if this arg is not a flag. + fn allow_leading_hyphen(mut self) -> RGArg { + match self.kind { + RGArgKind::Positional{..} => panic!("expected flag"), + RGArgKind::Switch{..} => panic!("expected flag"), + RGArgKind::Flag {..} => { + self.claparg = self.claparg.allow_hyphen_values(true); + } + } + self + } - h - }; + /// Sets this argument to a required argument, unless one of the given + /// arguments is provided. + fn required_unless(mut self, names: &[&'static str]) -> RGArg { + self.claparg = self.claparg.required_unless_one(names); + self + } + + /// Sets conflicting arguments. That is, if this argument is used whenever + /// any of the other arguments given here are used, then clap will report + /// an error. + fn conflicts(mut self, names: &[&'static str]) -> RGArg { + self.claparg = self.claparg.conflicts_with_all(names); + self + } + + /// Sets an overriding argument. That is, if this argument and the given + /// argument are both provided by an end user, then the "last" one will + /// win. ripgrep will behave as if any previous instantiations did not + /// happen. + fn overrides(mut self, name: &'static str) -> RGArg { + self.claparg = self.claparg.overrides_with(name); + self + } + + /// Sets the default value of this argument if and only if the argument + /// given is present. + fn default_value_if( + mut self, + value: &'static str, + arg_name: &'static str, + ) -> RGArg { + self.claparg = self.claparg.default_value_if(arg_name, None, value); + self + } + + /// Indicate that any value given to this argument should be a number. If + /// it's not a number, then clap will report an error to the end user. + fn number(mut self) -> RGArg { + self.claparg = self.claparg.validator(|val| { + val.parse::().map(|_| ()).map_err(|err| err.to_string()) + }); + self + } +} + +// We add an extra space to long descriptions so that a blank line is inserted +// between flag descriptions in --help output. +macro_rules! long { + ($lit:expr) => { concat!($lit, " ") } +} + +/// Generate a sequence of all positional and flag arguments. +pub fn all_args_and_flags() -> Vec { + let mut args = vec![]; + // The positional arguments must be defined first and in order. + arg_pattern(&mut args); + arg_path(&mut args); + // Flags can be defined in any order, but we do it alphabetically. Note + // that each function may define multiple flags. For example, + // `flag_encoding` defines `--encoding` and `--no-encoding`. Most `--no` + // flags are hidden and merely mentioned in the docs of the corresponding + // "positive" flag. + flag_after_context(&mut args); + flag_before_context(&mut args); + flag_block_buffered(&mut args); + flag_byte_offset(&mut args); + flag_case_sensitive(&mut args); + flag_color(&mut args); + flag_colors(&mut args); + flag_column(&mut args); + flag_context(&mut args); + flag_context_separator(&mut args); + flag_count(&mut args); + flag_count_matches(&mut args); + flag_crlf(&mut args); + flag_debug(&mut args); + flag_dfa_size_limit(&mut args); + flag_encoding(&mut args); + flag_file(&mut args); + flag_files(&mut args); + flag_files_with_matches(&mut args); + flag_files_without_match(&mut args); + flag_fixed_strings(&mut args); + flag_follow(&mut args); + flag_glob(&mut args); + flag_heading(&mut args); + flag_hidden(&mut args); + flag_iglob(&mut args); + flag_ignore_case(&mut args); + flag_ignore_file(&mut args); + flag_invert_match(&mut args); + flag_json(&mut args); + flag_line_buffered(&mut args); + flag_line_number(&mut args); + flag_line_regexp(&mut args); + flag_max_columns(&mut args); + flag_max_count(&mut args); + flag_max_depth(&mut args); + flag_max_filesize(&mut args); + flag_mmap(&mut args); + flag_multiline(&mut args); + flag_multiline_dotall(&mut args); + flag_no_config(&mut args); + flag_no_ignore(&mut args); + flag_no_ignore_global(&mut args); + flag_no_ignore_messages(&mut args); + flag_no_ignore_parent(&mut args); + flag_no_ignore_vcs(&mut args); + flag_no_messages(&mut args); + flag_no_pcre2_unicode(&mut args); + flag_null(&mut args); + flag_null_data(&mut args); + flag_one_file_system(&mut args); + flag_only_matching(&mut args); + flag_path_separator(&mut args); + flag_passthru(&mut args); + flag_pcre2(&mut args); + flag_pre(&mut args); + flag_pre_glob(&mut args); + flag_pretty(&mut args); + flag_quiet(&mut args); + flag_regex_size_limit(&mut args); + flag_regexp(&mut args); + flag_replace(&mut args); + flag_search_zip(&mut args); + flag_smart_case(&mut args); + flag_sort_files(&mut args); + flag_sort(&mut args); + flag_sortr(&mut args); + flag_stats(&mut args); + flag_text(&mut args); + flag_threads(&mut args); + flag_trim(&mut args); + flag_type(&mut args); + flag_type_add(&mut args); + flag_type_clear(&mut args); + flag_type_list(&mut args); + flag_type_not(&mut args); + flag_unrestricted(&mut args); + flag_vimgrep(&mut args); + flag_with_filename(&mut args); + flag_word_regexp(&mut args); + args +} + +fn arg_pattern(args: &mut Vec) { + const SHORT: &str = "A regular expression used for searching."; + const LONG: &str = long!("\ +A regular expression used for searching. To match a pattern beginning with a +dash, use the -e/--regexp flag. + +For example, to search for the literal '-foo', you can use this flag: + + rg -e -foo + +You can also use the special '--' delimiter to indicate that no more flags +will be provided. Namely, the following is equivalent to the above: + + rg -- -foo +"); + let arg = RGArg::positional("pattern", "PATTERN") + .help(SHORT).long_help(LONG) + .required_unless(&[ + "file", "files", "regexp", "type-list", + ]); + args.push(arg); +} + +fn arg_path(args: &mut Vec) { + const SHORT: &str = "A file or directory to search."; + const LONG: &str = long!("\ +A file or directory to search. Directories are searched recursively. Paths \ +specified on the command line override glob and ignore rules. \ +"); + let arg = RGArg::positional("path", "PATH") + .help(SHORT).long_help(LONG) + .multiple(); + args.push(arg); +} + +fn flag_after_context(args: &mut Vec) { + const SHORT: &str = "Show NUM lines after each match."; + const LONG: &str = long!("\ +Show NUM lines after each match. + +This overrides the --context flag. +"); + let arg = RGArg::flag("after-context", "NUM").short("A") + .help(SHORT).long_help(LONG) + .number() + .overrides("context"); + args.push(arg); +} + +fn flag_before_context(args: &mut Vec) { + const SHORT: &str = "Show NUM lines before each match."; + const LONG: &str = long!("\ +Show NUM lines before each match. + +This overrides the --context flag. +"); + let arg = RGArg::flag("before-context", "NUM").short("B") + .help(SHORT).long_help(LONG) + .number() + .overrides("context"); + args.push(arg); +} + +fn flag_block_buffered(args: &mut Vec) { + const SHORT: &str = "Force block buffering."; + const LONG: &str = long!("\ +When enabled, ripgrep will use block buffering. That is, whenever a matching +line is found, it will be written to an in-memory buffer and will not be +written to stdout until the buffer reaches a certain size. This is the default +when ripgrep's stdout is redirected to a pipeline or a file. When ripgrep's +stdout is connected to a terminal, line buffering will be used. Forcing block +buffering can be useful when dumping a large amount of contents to a terminal. + +Forceful block buffering can be disabled with --no-block-buffered. Note that +using --no-block-buffered causes ripgrep to revert to its default behavior of +automatically detecting the buffering strategy. To force line buffering, use +the --line-buffered flag. +"); + let arg = RGArg::switch("block-buffered") + .help(SHORT).long_help(LONG) + .overrides("no-block-buffered") + .overrides("line-buffered") + .overrides("no-line-buffered"); + args.push(arg); + + let arg = RGArg::switch("no-block-buffered") + .hidden() + .overrides("block-buffered") + .overrides("line-buffered") + .overrides("no-line-buffered"); + args.push(arg); +} + +fn flag_byte_offset(args: &mut Vec) { + const SHORT: &str = + "Print the 0-based byte offset for each matching line."; + const LONG: &str = long!("\ +Print the 0-based byte offset within the input file before each line of output. +If -o (--only-matching) is specified, print the offset of the matching part +itself. + +If ripgrep does transcoding, then the byte offset is in terms of the the result +of transcoding and not the original data. This applies similarly to another +transformation on the source, such as decompression or a --pre filter. Note +that when the PCRE2 regex engine is used, then UTF-8 transcoding is done by +default. +"); + let arg = RGArg::switch("byte-offset").short("b") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_case_sensitive(args: &mut Vec) { + const SHORT: &str = "Search case sensitively (default)."; + const LONG: &str = long!("\ +Search case sensitively. + +This overrides the -i/--ignore-case and -S/--smart-case flags. +"); + let arg = RGArg::switch("case-sensitive").short("s") + .help(SHORT).long_help(LONG) + .overrides("ignore-case") + .overrides("smart-case"); + args.push(arg); +} + +fn flag_color(args: &mut Vec) { + const SHORT: &str = "Controls when to use color."; + const LONG: &str = long!("\ +This flag controls when to use colors. The default setting is 'auto', which +means ripgrep will try to guess when to use colors. For example, if ripgrep is +printing to a terminal, then it will use colors, but if it is redirected to a +file or a pipe, then it will suppress color output. ripgrep will suppress color +output in some other circumstances as well. For example, if the TERM +environment variable is not set or set to 'dumb', then ripgrep will not use +colors. + +The possible values for this flag are: + + never Colors will never be used. + auto The default. ripgrep tries to be smart. + always Colors will always be used regardless of where output is sent. + ansi Like 'always', but emits ANSI escapes (even in a Windows console). + +When the --vimgrep flag is given to ripgrep, then the default value for the +--color flag changes to 'never'. +"); + let arg = RGArg::flag("color", "WHEN") + .help(SHORT).long_help(LONG) + .possible_values(&["never", "auto", "always", "ansi"]) + .default_value_if("never", "vimgrep"); + args.push(arg); +} + +fn flag_colors(args: &mut Vec) { + const SHORT: &str = "Configure color settings and styles."; + const LONG: &str = long!("\ +This flag specifies color settings for use in the output. This flag may be +provided multiple times. Settings are applied iteratively. Colors are limited +to one of eight choices: red, blue, green, cyan, magenta, yellow, white and +black. Styles are limited to nobold, bold, nointense, intense, nounderline +or underline. + +The format of the flag is `{type}:{attribute}:{value}`. `{type}` should be +one of path, line, column or match. `{attribute}` can be fg, bg or style. +`{value}` is either a color (for fg and bg) or a text style. A special format, +`{type}:none`, will clear all color settings for `{type}`. + +For example, the following command will change the match color to magenta and +the background color for line numbers to yellow: + + rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo. + +Extended colors can be used for `{value}` when the terminal supports ANSI color +sequences. These are specified as either 'x' (256-color) or 'x,x,x' (24-bit +truecolor) where x is a number between 0 and 255 inclusive. x may be given as +a normal decimal number or a hexadecimal number, which is prefixed by `0x`. + +For example, the following command will change the match background color to +that represented by the rgb value (0,128,255): + + rg --colors 'match:bg:0,128,255' + +or, equivalently, + + rg --colors 'match:bg:0x0,0x80,0xFF' + +Note that the the intense and nointense style flags will have no effect when +used alongside these extended color codes. +"); + let arg = RGArg::flag("colors", "COLOR_SPEC") + .help(SHORT).long_help(LONG) + .multiple(); + args.push(arg); +} + +fn flag_column(args: &mut Vec) { + const SHORT: &str = "Show column numbers."; + const LONG: &str = long!("\ +Show column numbers (1-based). This only shows the column numbers for the first +match on each line. This does not try to account for Unicode. One byte is equal +to one column. This implies --line-number. + +This flag can be disabled with --no-column. +"); + let arg = RGArg::switch("column") + .help(SHORT).long_help(LONG) + .overrides("no-column"); + args.push(arg); + + let arg = RGArg::switch("no-column") + .hidden() + .overrides("column"); + args.push(arg); +} + +fn flag_context(args: &mut Vec) { + const SHORT: &str = "Show NUM lines before and after each match."; + const LONG: &str = long!("\ +Show NUM lines before and after each match. This is equivalent to providing +both the -B/--before-context and -A/--after-context flags with the same value. + +This overrides both the -B/--before-context and -A/--after-context flags. +"); + let arg = RGArg::flag("context", "NUM").short("C") + .help(SHORT).long_help(LONG) + .number() + .overrides("before-context") + .overrides("after-context"); + args.push(arg); +} + +fn flag_context_separator(args: &mut Vec) { + const SHORT: &str = "Set the context separator string."; + const LONG: &str = long!("\ +The string used to separate non-contiguous context lines in the output. Escape +sequences like \\x7F or \\t may be used. The default value is --. +"); + let arg = RGArg::flag("context-separator", "SEPARATOR") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_count(args: &mut Vec) { + const SHORT: &str = "Only show the count of matching lines for each file."; + const LONG: &str = long!("\ +This flag suppresses normal output and shows the number of lines that match +the given patterns for each file searched. Each file containing a match has its +path and count printed on each line. Note that this reports the number of lines +that match and not the total number of matches. + +If only one file is given to ripgrep, then only the count is printed if there +is a match. The --with-filename flag can be used to force printing the file +path in this case. + +This overrides the --count-matches flag. Note that when --count is combined +with --only-matching, then ripgrep behaves as if --count-matches was given. +"); + let arg = RGArg::switch("count").short("c") + .help(SHORT).long_help(LONG).overrides("count-matches"); + args.push(arg); +} + +fn flag_count_matches(args: &mut Vec) { + const SHORT: &str = + "Only show the count of individual matches for each file."; + const LONG: &str = long!("\ +This flag suppresses normal output and shows the number of individual +matches of the given patterns for each file searched. Each file +containing matches has its path and match count printed on each line. +Note that this reports the total number of individual matches and not +the number of lines that match. + +If only one file is given to ripgrep, then only the count is printed if there +is a match. The --with-filename flag can be used to force printing the file +path in this case. + +This overrides the --count flag. Note that when --count is combined with +--only-matching, then ripgrep behaves as if --count-matches was given. +"); + let arg = RGArg::switch("count-matches") + .help(SHORT).long_help(LONG).overrides("count"); + args.push(arg); +} + +fn flag_crlf(args: &mut Vec) { + const SHORT: &str = "Support CRLF line terminators (useful on Windows)."; + const LONG: &str = long!("\ +When enabled, ripgrep will treat CRLF ('\\r\\n') as a line terminator instead +of just '\\n'. + +Principally, this permits '$' in regex patterns to match just before CRLF +instead of just before LF. The underlying regex engine may not support this +natively, so ripgrep will translate all instances of '$' to '(?:\\r??$)'. This +may produce slightly different than desired match offsets. It is intended as a +work-around until the regex engine supports this natively. + +CRLF support can be disabled with --no-crlf. +"); + let arg = RGArg::switch("crlf") + .help(SHORT).long_help(LONG) + .overrides("no-crlf") + .overrides("null-data"); + args.push(arg); + + let arg = RGArg::switch("no-crlf") + .hidden() + .overrides("crlf"); + args.push(arg); +} + +fn flag_debug(args: &mut Vec) { + const SHORT: &str = "Show debug messages."; + const LONG: &str = long!("\ +Show debug messages. Please use this when filing a bug report. + +The --debug flag is generally useful for figuring out why ripgrep skipped +searching a particular file. The debug messages should mention all files +skipped and why they were skipped. + +To get even more debug output, use the --trace flag, which implies --debug +along with additional trace data. With --trace, the output could be quite +large and is generally more useful for development. +"); + let arg = RGArg::switch("debug") + .help(SHORT).long_help(LONG); + args.push(arg); + + let arg = RGArg::switch("trace") + .hidden() + .overrides("debug"); + args.push(arg); +} + +fn flag_dfa_size_limit(args: &mut Vec) { + const SHORT: &str = "The upper size limit of the regex DFA."; + const LONG: &str = long!("\ +The upper size limit of the regex DFA. The default limit is 10M. This should +only be changed on very large regex inputs where the (slower) fallback regex +engine may otherwise be used if the limit is reached. + +The argument accepts the same size suffixes as allowed in with the +--max-filesize flag. +"); + let arg = RGArg::flag("dfa-size-limit", "NUM+SUFFIX?") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_encoding(args: &mut Vec) { + const SHORT: &str = "Specify the text encoding of files to search."; + const LONG: &str = long!("\ +Specify the text encoding that ripgrep will use on all files searched. The +default value is 'auto', which will cause ripgrep to do a best effort automatic +detection of encoding on a per-file basis. Other supported values can be found +in the list of labels here: +https://encoding.spec.whatwg.org/#concept-encoding-get + +This flag can be disabled with --no-encoding. +"); + let arg = RGArg::flag("encoding", "ENCODING").short("E") + .help(SHORT).long_help(LONG); + args.push(arg); + + let arg = RGArg::switch("no-encoding") + .hidden() + .overrides("encoding"); + args.push(arg); +} + +fn flag_file(args: &mut Vec) { + const SHORT: &str = "Search for patterns from the given file."; + const LONG: &str = long!("\ +Search for patterns from the given file, with one pattern per line. When this +flag is used multiple times or in combination with the -e/--regexp flag, +then all patterns provided are searched. Empty pattern lines will match all +input lines, and the newline is not counted as part of the pattern. + +A line is printed if and only if it matches at least one of the patterns. +"); + let arg = RGArg::flag("file", "PATTERNFILE").short("f") + .help(SHORT).long_help(LONG) + .multiple() + .allow_leading_hyphen(); + args.push(arg); +} + +fn flag_files(args: &mut Vec) { + const SHORT: &str = "Print each file that would be searched."; + const LONG: &str = long!("\ +Print each file that would be searched without actually performing the search. +This is useful to determine whether a particular file is being search or not. +"); + let arg = RGArg::switch("files") + .help(SHORT).long_help(LONG) + // This also technically conflicts with pattern, but the first file + // path will actually be in pattern. + .conflicts(&["file", "regexp", "type-list"]); + args.push(arg); +} + +fn flag_files_with_matches(args: &mut Vec) { + const SHORT: &str = "Only print the paths with at least one match."; + const LONG: &str = long!("\ +Only print the paths with at least one match. + +This overrides --files-without-match. +"); + let arg = RGArg::switch("files-with-matches").short("l") + .help(SHORT).long_help(LONG) + .overrides("files-without-match"); + args.push(arg); +} + +fn flag_files_without_match(args: &mut Vec) { + const SHORT: &str = "Only print the paths that contain zero matches."; + const LONG: &str = long!("\ +Only print the paths that contain zero matches. This inverts/negates the +--files-with-matches flag. + +This overrides --files-with-matches. +"); + let arg = RGArg::switch("files-without-match") + .help(SHORT).long_help(LONG) + .overrides("files-with-matches"); + args.push(arg); +} + +fn flag_fixed_strings(args: &mut Vec) { + const SHORT: &str = "Treat the pattern as a literal string."; + const LONG: &str = long!("\ +Treat the pattern as a literal string instead of a regular expression. When +this flag is used, special regular expression meta characters such as .(){}*+ +do not need to be escaped. + +This flag can be disabled with --no-fixed-strings. +"); + let arg = RGArg::switch("fixed-strings").short("F") + .help(SHORT).long_help(LONG) + .overrides("no-fixed-strings"); + args.push(arg); + + let arg = RGArg::switch("no-fixed-strings") + .hidden() + .overrides("fixed-strings"); + args.push(arg); +} + +fn flag_follow(args: &mut Vec) { + const SHORT: &str = "Follow symbolic links."; + const LONG: &str = long!("\ +When this flag is enabled, ripgrep will follow symbolic links while traversing +directories. This is disabled by default. Note that ripgrep will check for +symbolic link loops and report errors if it finds one. + +This flag can be disabled with --no-follow. +"); + let arg = RGArg::switch("follow").short("L") + .help(SHORT).long_help(LONG) + .overrides("no-follow"); + args.push(arg); + + let arg = RGArg::switch("no-follow") + .hidden() + .overrides("follow"); + args.push(arg); +} + +fn flag_glob(args: &mut Vec) { + const SHORT: &str = "Include or exclude files."; + const LONG: &str = long!("\ +Include or exclude files and directories for searching that match the given +glob. This always overrides any other ignore logic. Multiple glob flags may be +used. Globbing rules match .gitignore globs. Precede a glob with a ! to exclude +it. +"); + let arg = RGArg::flag("glob", "GLOB").short("g") + .help(SHORT).long_help(LONG) + .multiple() + .allow_leading_hyphen(); + args.push(arg); +} + +fn flag_heading(args: &mut Vec) { + const SHORT: &str = "Print matches grouped by each file."; + const LONG: &str = long!("\ +This flag prints the file path above clusters of matches from each file instead +of printing the file path as a prefix for each matched line. This is the +default mode when printing to a terminal. + +This overrides the --no-heading flag. +"); + let arg = RGArg::switch("heading") + .help(SHORT).long_help(LONG) + .overrides("no-heading"); + args.push(arg); + + const NO_SHORT: &str = "Don't group matches by each file."; + const NO_LONG: &str = long!("\ +Don't group matches by each file. If --no-heading is provided in addition to +the -H/--with-filename flag, then file paths will be printed as a prefix for +every matched line. This is the default mode when not printing to a terminal. + +This overrides the --heading flag. +"); + let arg = RGArg::switch("no-heading") + .help(NO_SHORT).long_help(NO_LONG) + .overrides("heading"); + args.push(arg); +} + +fn flag_hidden(args: &mut Vec) { + const SHORT: &str = "Search hidden files and directories."; + const LONG: &str = long!("\ +Search hidden files and directories. By default, hidden files and directories +are skipped. Note that if a hidden file or a directory is whitelisted in an +ignore file, then it will be searched even if this flag isn't provided. + +This flag can be disabled with --no-hidden. +"); + let arg = RGArg::switch("hidden") + .help(SHORT).long_help(LONG) + .overrides("no-hidden"); + args.push(arg); + + let arg = RGArg::switch("no-hidden") + .hidden() + .overrides("hidden"); + args.push(arg); +} + +fn flag_iglob(args: &mut Vec) { + const SHORT: &str = + "Include or exclude files case insensitively."; + const LONG: &str = long!("\ +Include or exclude files and directories for searching that match the given +glob. This always overrides any other ignore logic. Multiple glob flags may be +used. Globbing rules match .gitignore globs. Precede a glob with a ! to exclude +it. Globs are matched case insensitively. +"); + let arg = RGArg::flag("iglob", "GLOB") + .help(SHORT).long_help(LONG) + .multiple() + .allow_leading_hyphen(); + args.push(arg); +} + +fn flag_ignore_case(args: &mut Vec) { + const SHORT: &str = "Case insensitive search."; + const LONG: &str = long!("\ +When this flag is provided, the given patterns will be searched case +insensitively. The case insensitivity rules used by ripgrep conform to +Unicode's \"simple\" case folding rules. + +This flag overrides -s/--case-sensitive and -S/--smart-case. +"); + let arg = RGArg::switch("ignore-case").short("i") + .help(SHORT).long_help(LONG) + .overrides("case-sensitive") + .overrides("smart-case"); + args.push(arg); +} + +fn flag_ignore_file(args: &mut Vec) { + const SHORT: &str = "Specify additional ignore files."; + const LONG: &str = long!("\ +Specifies a path to one or more .gitignore format rules files. These patterns +are applied after the patterns found in .gitignore and .ignore are applied +and are matched relative to the current working directory. Multiple additional +ignore files can be specified by using the --ignore-file flag several times. +When specifying multiple ignore files, earlier files have lower precedence +than later files. + +If you are looking for a way to include or exclude files and directories +directly on the command line, then used -g instead. +"); + let arg = RGArg::flag("ignore-file", "PATH") + .help(SHORT).long_help(LONG) + .multiple() + .allow_leading_hyphen(); + args.push(arg); +} + +fn flag_invert_match(args: &mut Vec) { + const SHORT: &str = "Invert matching."; + const LONG: &str = long!("\ +Invert matching. Show lines that do not match the given patterns. +"); + let arg = RGArg::switch("invert-match").short("v") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_json(args: &mut Vec) { + const SHORT: &str = "Show search results in a JSON Lines format."; + const LONG: &str = long!("\ +Enable printing results in a JSON Lines format. + +When this flag is provided, ripgrep will emit a sequence of messages, each +encoded as a JSON object, where there are five different message types: + +**begin** - A message that indicates a file is being searched and contains at +least one match. + +**end** - A message the indicates a file is done being searched. This message +also include summary statistics about the search for a particular file. + +**match** - A message that indicates a match was found. This includes the text +and offsets of the match. + +**context** - A message that indicates a contextual line was found. This +includes the text of the line, along with any match information if the search +was inverted. + +**summary** - The final message emitted by ripgrep that contains summary +statistics about the search across all files. + +Since file paths or the contents of files are not guaranteed to be valid UTF-8 +and JSON itself must be representable by a Unicode encoding, ripgrep will emit +all data elements as objects with one of two keys: 'text' or 'bytes'. 'text' is +a normal JSON string when the data is valid UTF-8 while 'bytes' is the base64 +encoded contents of the data. + +The JSON Lines format is only supported for showing search results. It cannot +be used with other flags that emit other types of output, such as --files, +--files-with-matches, --files-without-match, --count or --count-matches. +ripgrep will report an error if any of the aforementioned flags are used in +concert with --json. + +Other flags that control aspects of the standard output such as +--only-matching, --heading, --replace, --max-columns, etc., have no effect +when --json is set. + +A more complete description of the JSON format used can be found here: +https://docs.rs/grep-printer/*/grep_printer/struct.JSON.html + +The JSON Lines format can be disabled with --no-json. +"); + let arg = RGArg::switch("json") + .help(SHORT).long_help(LONG) + .overrides("no-json") + .conflicts(&[ + "count", "count-matches", + "files", "files-with-matches", "files-without-match", + ]); + args.push(arg); + + let arg = RGArg::switch("no-json") + .hidden() + .overrides("json"); + args.push(arg); +} + +fn flag_line_buffered(args: &mut Vec) { + const SHORT: &str = "Force line buffering."; + const LONG: &str = long!("\ +When enabled, ripgrep will use line buffering. That is, whenever a matching +line is found, it will be flushed to stdout immediately. This is the default +when ripgrep's stdout is connected to a terminal, but otherwise, ripgrep will +use block buffering, which is typically faster. This flag forces ripgrep to +use line buffering even if it would otherwise use block buffering. This is +typically useful in shell pipelines, e.g., +'tail -f something.log | rg foo --line-buffered | rg bar'. + +Forceful line buffering can be disabled with --no-line-buffered. Note that +using --no-line-buffered causes ripgrep to revert to its default behavior of +automatically detecting the buffering strategy. To force block buffering, use +the --block-buffered flag. +"); + let arg = RGArg::switch("line-buffered") + .help(SHORT).long_help(LONG) + .overrides("no-line-buffered") + .overrides("block-buffered") + .overrides("no-block-buffered"); + args.push(arg); + + let arg = RGArg::switch("no-line-buffered") + .hidden() + .overrides("line-buffered") + .overrides("block-buffered") + .overrides("no-block-buffered"); + args.push(arg); +} + +fn flag_line_number(args: &mut Vec) { + const SHORT: &str = "Show line numbers."; + const LONG: &str = long!("\ +Show line numbers (1-based). This is enabled by default when searching in a +terminal. +"); + let arg = RGArg::switch("line-number").short("n") + .help(SHORT).long_help(LONG) + .overrides("no-line-number"); + args.push(arg); + + const NO_SHORT: &str = "Suppress line numbers."; + const NO_LONG: &str = long!("\ +Suppress line numbers. This is enabled by default when not searching in a +terminal. +"); + let arg = RGArg::switch("no-line-number").short("N") + .help(NO_SHORT).long_help(NO_LONG) + .overrides("line-number"); + args.push(arg); +} + +fn flag_line_regexp(args: &mut Vec) { + const SHORT: &str = "Only show matches surrounded by line boundaries."; + const LONG: &str = long!("\ +Only show matches surrounded by line boundaries. This is equivalent to putting +^...$ around all of the search patterns. In other words, this only prints lines +where the entire line participates in a match. + +This overrides the --word-regexp flag. +"); + let arg = RGArg::switch("line-regexp").short("x") + .help(SHORT).long_help(LONG) + .overrides("word-regexp"); + args.push(arg); +} + +fn flag_max_columns(args: &mut Vec) { + const SHORT: &str = "Don't print lines longer than this limit."; + const LONG: &str = long!("\ +Don't print lines longer than this limit in bytes. Longer lines are omitted, +and only the number of matches in that line is printed. + +When this flag is omitted or is set to 0, then it has no effect. +"); + let arg = RGArg::flag("max-columns", "NUM").short("M") + .help(SHORT).long_help(LONG) + .number(); + args.push(arg); +} + +fn flag_max_count(args: &mut Vec) { + const SHORT: &str = "Limit the number of matches."; + const LONG: &str = long!("\ +Limit the number of matching lines per file searched to NUM. +"); + let arg = RGArg::flag("max-count", "NUM").short("m") + .help(SHORT).long_help(LONG) + .number(); + args.push(arg); +} + +fn flag_max_depth(args: &mut Vec) { + const SHORT: &str = "Descend at most NUM directories."; + const LONG: &str = long!("\ +Limit the depth of directory traversal to NUM levels beyond the paths given. A +value of zero only searches the explicitly given paths themselves. + +For example, 'rg --max-depth 0 dir/' is a no-op because dir/ will not be +descended into. 'rg --max-depth 1 dir/' will search only the direct children of +'dir'. +"); + let arg = RGArg::flag("max-depth", "NUM") + .help(SHORT).long_help(LONG) + .alias("maxdepth") + .number(); + args.push(arg); +} + +fn flag_max_filesize(args: &mut Vec) { + const SHORT: &str = "Ignore files larger than NUM in size."; + const LONG: &str = long!("\ +Ignore files larger than NUM in size. This does not apply to directories. + +The input format accepts suffixes of K, M or G which correspond to kilobytes, +megabytes and gigabytes, respectively. If no suffix is provided the input is +treated as bytes. + +Examples: --max-filesize 50K or --max-filesize 80M +"); + let arg = RGArg::flag("max-filesize", "NUM+SUFFIX?") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_mmap(args: &mut Vec) { + const SHORT: &str = "Search using memory maps when possible."; + const LONG: &str = long!("\ +Search using memory maps when possible. This is enabled by default when ripgrep +thinks it will be faster. + +Memory map searching doesn't currently support all options, so if an +incompatible option (e.g., --context) is given with --mmap, then memory maps +will not be used. + +Note that ripgrep may abort unexpectedly when --mmap if it searches a file that +is simultaneously truncated. + +This flag overrides --no-mmap. +"); + let arg = RGArg::switch("mmap") + .help(SHORT).long_help(LONG) + .overrides("no-mmap"); + args.push(arg); + + const NO_SHORT: &str = "Never use memory maps."; + const NO_LONG: &str = long!("\ +Never use memory maps, even when they might be faster. + +This flag overrides --mmap. +"); + let arg = RGArg::switch("no-mmap") + .help(NO_SHORT).long_help(NO_LONG) + .overrides("mmap"); + args.push(arg); +} + +fn flag_multiline(args: &mut Vec) { + const SHORT: &str = "Enable matching across multiple lines."; + const LONG: &str = long!("\ +Enable matching across multiple lines. + +When multiline mode is enabled, ripgrep will lift the restriction that a match +cannot include a line terminator. For example, when multiline mode is not +enabled (the default), then the regex '\\p{any}' will match any Unicode +codepoint other than '\\n'. Similarly, the regex '\\n' is explicitly forbidden, +and if you try to use it, ripgrep will return an error. However, when multiline +mode is enabled, '\\p{any}' will match any Unicode codepoint, including '\\n', +and regexes like '\\n' are permitted. + +An important caveat is that multiline mode does not change the match semantics +of '.'. Namely, in most regex matchers, a '.' will by default match any +character other than '\\n', and this is true in ripgrep as well. In order to +make '.' match '\\n', you must enable the \"dot all\" flag inside the regex. +For example, both '(?s).' and '(?s:.)' have the same semantics, where '.' will +match any character, including '\\n'. Alternatively, the '--multiline-dotall' +flag may be passed to make the \"dot all\" behavior the default. This flag only +applies when multiline search is enabled. + +There is no limit on the number of the lines that a single match can span. + +**WARNING**: Because of how the underlying regex engine works, multiline +searches may be slower than normal line-oriented searches, and they may also +use more memory. In particular, when multiline mode is enabled, ripgrep +requires that each file it searches is laid out contiguously in memory +(either by reading it onto the heap or by memory-mapping it). Things that +cannot be memory-mapped (such as stdin) will be consumed until EOF before +searching can begin. In general, ripgrep will only do these things when +necessary. Specifically, if the --multiline flag is provided but the regex +does not contain patterns that would match '\\n' characters, then ripgrep +will automatically avoid reading each file into memory before searching it. +Nevertheless, if you only care about matches spanning at most one line, then it +is always better to disable multiline mode. + +This flag can be disabled with --no-multiline. +"); + let arg = RGArg::switch("multiline").short("U") + .help(SHORT).long_help(LONG) + .overrides("no-multiline"); + args.push(arg); + + let arg = RGArg::switch("no-multiline") + .hidden() + .overrides("multiline"); + args.push(arg); +} + +fn flag_multiline_dotall(args: &mut Vec) { + const SHORT: &str = "Make '.' match new lines when multiline is enabled."; + const LONG: &str = long!("\ +This flag enables \"dot all\" in your regex pattern, which causes '.' to match +newlines when multiline searching is enabled. This flag has no effect if +multiline searching isn't enabled with the --multiline flag. + +Normally, a '.' will match any character except newlines. While this behavior +typically isn't relevant for line-oriented matching (since matches can span at +most one line), this can be useful when searching with the -U/--multiline flag. +By default, the multiline mode runs without this flag. + +This flag is generally intended to be used in an alias or your ripgrep config +file if you prefer \"dot all\" semantics by default. Note that regardless of +whether this flag is used, \"dot all\" semantics can still be controlled via +inline flags in the regex pattern itself, e.g., '(?s:.)' always enables \"dot +all\" whereas '(?-s:.)' always disables \"dot all\". + +This flag can be disabled with --no-multiline-dotall. +"); + let arg = RGArg::switch("multiline-dotall") + .help(SHORT).long_help(LONG) + .overrides("no-multiline-dotall"); + args.push(arg); + + let arg = RGArg::switch("no-multiline-dotall") + .hidden() + .overrides("multiline-dotall"); + args.push(arg); +} + +fn flag_no_config(args: &mut Vec) { + const SHORT: &str = "Never read configuration files."; + const LONG: &str = long!("\ +Never read configuration files. When this flag is present, ripgrep will not +respect the RIPGREP_CONFIG_PATH environment variable. + +If ripgrep ever grows a feature to automatically read configuration files in +pre-defined locations, then this flag will also disable that behavior as well. +"); + let arg = RGArg::switch("no-config") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_no_ignore(args: &mut Vec) { + const SHORT: &str = "Don't respect ignore files."; + const LONG: &str = long!("\ +Don't respect ignore files (.gitignore, .ignore, etc.). This implies +--no-ignore-parent and --no-ignore-vcs. + +This flag can be disabled with the --ignore flag. +"); + let arg = RGArg::switch("no-ignore") + .help(SHORT).long_help(LONG) + .overrides("ignore"); + args.push(arg); + + let arg = RGArg::switch("ignore") + .hidden() + .overrides("no-ignore"); + args.push(arg); +} + +fn flag_no_ignore_global(args: &mut Vec) { + const SHORT: &str = "Don't respect global ignore files."; + const LONG: &str = long!("\ +Don't respect ignore files that come from \"global\" sources such as git's +`core.excludesFile` configuration option (which defaults to +`$HOME/.config/git/ignore`). + +This flag can be disabled with the --ignore-global flag. +"); + let arg = RGArg::switch("no-ignore-global") + .help(SHORT).long_help(LONG) + .overrides("ignore-global"); + args.push(arg); + + let arg = RGArg::switch("ignore-global") + .hidden() + .overrides("no-ignore-global"); + args.push(arg); +} + +fn flag_no_ignore_messages(args: &mut Vec) { + const SHORT: &str = "Suppress gitignore parse error messages."; + const LONG: &str = long!("\ +Suppresses all error messages related to parsing ignore files such as .ignore +or .gitignore. + +This flag can be disabled with the --ignore-messages flag. +"); + let arg = RGArg::switch("no-ignore-messages") + .help(SHORT).long_help(LONG) + .overrides("ignore-messages"); + args.push(arg); + + let arg = RGArg::switch("ignore-messages") + .hidden() + .overrides("no-ignore-messages"); + args.push(arg); +} + +fn flag_no_ignore_parent(args: &mut Vec) { + const SHORT: &str = "Don't respect ignore files in parent directories."; + const LONG: &str = long!("\ +Don't respect ignore files (.gitignore, .ignore, etc.) in parent directories. + +This flag can be disabled with the --ignore-parent flag. +"); + let arg = RGArg::switch("no-ignore-parent") + .help(SHORT).long_help(LONG) + .overrides("ignore-parent"); + args.push(arg); + + let arg = RGArg::switch("ignore-parent") + .hidden() + .overrides("no-ignore-parent"); + args.push(arg); +} + +fn flag_no_ignore_vcs(args: &mut Vec) { + const SHORT: &str = "Don't respect VCS ignore files."; + const LONG: &str = long!("\ +Don't respect version control ignore files (.gitignore, etc.). This implies +--no-ignore-parent for VCS files. Note that .ignore files will continue to be +respected. + +This flag can be disabled with the --ignore-vcs flag. +"); + let arg = RGArg::switch("no-ignore-vcs") + .help(SHORT).long_help(LONG) + .overrides("ignore-vcs"); + args.push(arg); + + let arg = RGArg::switch("ignore-vcs") + .hidden() + .overrides("no-ignore-vcs"); + args.push(arg); +} + +fn flag_no_messages(args: &mut Vec) { + const SHORT: &str = "Suppress some error messages."; + const LONG: &str = long!("\ +Suppress all error messages related to opening and reading files. Error +messages related to the syntax of the pattern given are still shown. + +This flag can be disabled with the --messages flag. +"); + let arg = RGArg::switch("no-messages") + .help(SHORT).long_help(LONG) + .overrides("messages"); + args.push(arg); + + let arg = RGArg::switch("messages") + .hidden() + .overrides("no-messages"); + args.push(arg); +} + +fn flag_no_pcre2_unicode(args: &mut Vec) { + const SHORT: &str = "Disable Unicode mode for PCRE2 matching."; + const LONG: &str = long!("\ +When PCRE2 matching is enabled, this flag will disable Unicode mode, which is +otherwise enabled by default. If PCRE2 matching is not enabled, then this flag +has no effect. + +When PCRE2's Unicode mode is enabled, several different types of patterns +become Unicode aware. This includes '\\b', '\\B', '\\w', '\\W', '\\d', '\\D', +'\\s' and '\\S'. Similarly, the '.' meta character will match any Unicode +codepoint instead of any byte. Caseless matching will also use Unicode simple +case folding instead of ASCII-only case insensitivity. + +Unicode mode in PCRE2 represents a critical trade off in the user experience +of ripgrep. In particular, unlike the default regex engine, PCRE2 does not +support the ability to search possibly invalid UTF-8 with Unicode features +enabled. Instead, PCRE2 *requires* that everything it searches when Unicode +mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for the purposes +of ripgrep, we only discuss UTF-8.) This means that if you have PCRE2's Unicode +mode enabled and you attempt to search invalid UTF-8, then the search for that +file will halt and print an error. For this reason, when PCRE2's Unicode mode +is enabled, ripgrep will automatically \"fix\" invalid UTF-8 sequences by +replacing them with the Unicode replacement codepoint. + +If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode +is enabled, then pass the --no-encoding flag to disable all transcoding. + +Related flags: --pcre2 + +This flag can be disabled with --pcre2-unicode. +"); + let arg = RGArg::switch("no-pcre2-unicode") + .help(SHORT).long_help(LONG) + .overrides("pcre2-unicode"); + args.push(arg); + + let arg = RGArg::switch("pcre2-unicode") + .hidden() + .overrides("no-pcre2-unicode"); + args.push(arg); +} + +fn flag_null(args: &mut Vec) { + const SHORT: &str = "Print a NUL byte after file paths."; + const LONG: &str = long!("\ +Whenever a file path is printed, follow it with a NUL byte. This includes +printing file paths before matches, and when printing a list of matching files +such as with --count, --files-with-matches and --files. This option is useful +for use with xargs. +"); + let arg = RGArg::switch("null").short("0") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_null_data(args: &mut Vec) { + const SHORT: &str = "Use NUL as a line terminator instead of \\n."; + const LONG: &str = long!("\ +Enabling this option causes ripgrep to use NUL as a line terminator instead of +the default of '\\n'. + +This is useful when searching large binary files that would otherwise have very +long lines if '\\n' were used as the line terminator. In particular, ripgrep +requires that, at a minimum, each line must fit into memory. Using NUL instead +can be a useful stopgap to keep memory requirements low and avoid OOM (out of +memory) conditions. + +This is also useful for processing NUL delimited data, such as that emitted +when using ripgrep's -0/--null flag or find's --print0 flag. + +Using this flag implies -a/--text. +"); + let arg = RGArg::switch("null-data") + .help(SHORT).long_help(LONG) + .overrides("crlf"); + args.push(arg); +} + +fn flag_one_file_system(args: &mut Vec) { + const SHORT: &str = + "Do not descend into directories on other file systems."; + const LONG: &str = long!("\ +When enabled, ripgrep will not cross file system boundaries relative to where +the search started from. + +Note that this applies to each path argument given to ripgrep. For example, in +the command 'rg --one-file-system /foo/bar /quux/baz', ripgrep will search both +'/foo/bar' and '/quux/baz' even if they are on different file systems, but will +not cross a file system boundary when traversing each path's directory tree. + +This is similar to find's '-xdev' or '-mount' flag. + +This flag can be disabled with --no-one-file-system. +"); + let arg = RGArg::switch("one-file-system") + .help(SHORT).long_help(LONG) + .overrides("no-one-file-system"); + args.push(arg); + + let arg = RGArg::switch("no-one-file-system") + .hidden() + .overrides("one-file-system"); + args.push(arg); +} + +fn flag_only_matching(args: &mut Vec) { + const SHORT: &str = "Print only matches parts of a line."; + const LONG: &str = long!("\ +Print only the matched (non-empty) parts of a matching line, with each such +part on a separate output line. +"); + let arg = RGArg::switch("only-matching").short("o") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_path_separator(args: &mut Vec) { + const SHORT: &str = "Set the path separator."; + const LONG: &str = long!("\ +Set the path separator to use when printing file paths. This defaults to your +platform's path separator, which is / on Unix and \\ on Windows. This flag is +intended for overriding the default when the environment demands it (e.g., +cygwin). A path separator is limited to a single byte. +"); + let arg = RGArg::flag("path-separator", "SEPARATOR") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_passthru(args: &mut Vec) { + const SHORT: &str = "Print both matching and non-matching lines."; + const LONG: &str = long!("\ +Print both matching and non-matching lines. + +Another way to achieve a similar effect is by modifying your pattern to match +the empty string. For example, if you are searching using 'rg foo' then using +'rg \"^|foo\"' instead will emit every line in every file searched, but only +occurrences of 'foo' will be highlighted. This flag enables the same behavior +without needing to modify the pattern. +"); + let arg = RGArg::switch("passthru") + .help(SHORT).long_help(LONG) + .alias("passthrough"); + args.push(arg); +} + +fn flag_pcre2(args: &mut Vec) { + const SHORT: &str = "Enable PCRE2 matching."; + const LONG: &str = long!("\ +When this flag is present, ripgrep will use the PCRE2 regex engine instead of +its default regex engine. + +This is generally useful when you want to use features such as look-around +or backreferences. + +Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in +your build of ripgrep, then using this flag will result in ripgrep printing +an error message and exiting. + +Related flags: --no-pcre2-unicode + +This flag can be disabled with --no-pcre2. +"); + let arg = RGArg::switch("pcre2").short("P") + .help(SHORT).long_help(LONG) + .overrides("no-pcre2"); + args.push(arg); + + let arg = RGArg::switch("no-pcre2") + .hidden() + .overrides("pcre2"); + args.push(arg); +} + +fn flag_pre(args: &mut Vec) { + const SHORT: &str = "search outputs of COMMAND FILE for each FILE"; + const LONG: &str = long!("\ +For each input FILE, search the standard output of COMMAND FILE rather than the +contents of FILE. This option expects the COMMAND program to either be an +absolute path or to be available in your PATH. Either an empty string COMMAND +or the `--no-pre` flag will disable this behavior. + + WARNING: When this flag is set, ripgrep will unconditionally spawn a + process for every file that is searched. Therefore, this can incur an + unnecessarily large performance penalty if you don't otherwise need the + flexibility offered by this flag. + +A preprocessor is not run when ripgrep is searching stdin. + +When searching over sets of files that may require one of several decoders +as preprocessors, COMMAND should be a wrapper program or script which first +classifies FILE based on magic numbers/content or based on the FILE name and +then dispatches to an appropriate preprocessor. Each COMMAND also has its +standard input connected to FILE for convenience. + +For example, a shell script for COMMAND might look like: + + case \"$1\" in + *.pdf) + exec pdftotext \"$1\" - + ;; + *) + case $(file \"$1\") in + *Zstandard*) + exec pzstd -cdq + ;; + *) + exec cat + ;; + esac + ;; + esac + +The above script uses `pdftotext` to convert a PDF file to plain text. For +all other files, the script uses the `file` utility to sniff the type of the +file based on its contents. If it is a compressed file in the Zstandard format, +then `pzstd` is used to decompress the contents to stdout. + +This overrides the -z/--search-zip flag. +"); + let arg = RGArg::flag("pre", "COMMAND") + .help(SHORT).long_help(LONG) + .overrides("no-pre") + .overrides("search-zip"); + args.push(arg); + + let arg = RGArg::switch("no-pre") + .hidden() + .overrides("pre"); + args.push(arg); +} + +fn flag_pre_glob(args: &mut Vec) { + const SHORT: &str = + "Include or exclude files from a preprocessing command."; + const LONG: &str = long!("\ +This flag works in conjunction with the --pre flag. Namely, when one or more +--pre-glob flags are given, then only files that match the given set of globs +will be handed to the command specified by the --pre flag. Any non-matching +files will be searched without using the preprocessor command. + +This flag is useful when searching many files with the --pre flag. Namely, +it permits the ability to avoid process overhead for files that don't need +preprocessing. For example, given the following shell script, 'pre-pdftotext': + + #!/bin/sh + + pdftotext \"$1\" - + +then it is possible to use '--pre pre-pdftotext --pre-glob \'*.pdf\'' to make +it so ripgrep only executes the 'pre-pdftotext' command on files with a '.pdf' +extension. + +Multiple --pre-glob flags may be used. Globbing rules match .gitignore globs. +Precede a glob with a ! to exclude it. + +This flag has no effect if the --pre flag is not used. +"); + let arg = RGArg::flag("pre-glob", "GLOB") + .help(SHORT).long_help(LONG) + .multiple() + .allow_leading_hyphen(); + args.push(arg); +} + +fn flag_pretty(args: &mut Vec) { + const SHORT: &str = "Alias for --color always --heading --line-number."; + const LONG: &str = long!("\ +This is a convenience alias for '--color always --heading --line-number'. This +flag is useful when you still want pretty output even if you're piping ripgrep +to another program or file. For example: 'rg -p foo | less -R'. +"); + let arg = RGArg::switch("pretty").short("p") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_quiet(args: &mut Vec) { + const SHORT: &str = "Do not print anything to stdout."; + const LONG: &str = long!("\ +Do not print anything to stdout. If a match is found in a file, then ripgrep +will stop searching. This is useful when ripgrep is used only for its exit +code (which will be an error if no matches are found). + +When --files is used, then ripgrep will stop finding files after finding the +first file that matches all ignore rules. +"); + let arg = RGArg::switch("quiet").short("q") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_regex_size_limit(args: &mut Vec) { + const SHORT: &str = "The upper size limit of the compiled regex."; + const LONG: &str = long!("\ +The upper size limit of the compiled regex. The default limit is 10M. + +The argument accepts the same size suffixes as allowed in the --max-filesize +flag. +"); + let arg = RGArg::flag("regex-size-limit", "NUM+SUFFIX?") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_regexp(args: &mut Vec) { + const SHORT: &str = "A pattern to search for."; + const LONG: &str = long!("\ +A pattern to search for. This option can be provided multiple times, where +all patterns given are searched. Lines matching at least one of the provided +patterns are printed. This flag can also be used when searching for patterns +that start with a dash. + +For example, to search for the literal '-foo', you can use this flag: + + rg -e -foo + +You can also use the special '--' delimiter to indicate that no more flags +will be provided. Namely, the following is equivalent to the above: + + rg -- -foo +"); + let arg = RGArg::flag("regexp", "PATTERN").short("e") + .help(SHORT).long_help(LONG) + .multiple() + .allow_leading_hyphen(); + args.push(arg); +} + +fn flag_replace(args: &mut Vec) { + const SHORT: &str = "Replace matches with the given text."; + const LONG: &str = long!("\ +Replace every match with the text given when printing results. Neither this +flag nor any other ripgrep flag will modify your files. + +Capture group indices (e.g., $5) and names (e.g., $foo) are supported in the +replacement string. + +Note that the replacement by default replaces each match, and NOT the entire +line. To replace the entire line, you should match the entire line. + +This flag can be used with the -o/--only-matching flag. +"); + let arg = RGArg::flag("replace", "REPLACEMENT_TEXT").short("r") + .help(SHORT).long_help(LONG) + .allow_leading_hyphen(); + args.push(arg); +} + +fn flag_search_zip(args: &mut Vec) { + const SHORT: &str = "Search in compressed files."; + const LONG: &str = long!("\ +Search in compressed files. Currently gz, bz2, xz, lzma and lz4 files are +supported. This option expects the decompression binaries to be available in +your PATH. + +This flag can be disabled with --no-search-zip. +"); + let arg = RGArg::switch("search-zip").short("z") + .help(SHORT).long_help(LONG) + .overrides("no-search-zip") + .overrides("pre"); + args.push(arg); + + let arg = RGArg::switch("no-search-zip") + .hidden() + .overrides("search-zip"); + args.push(arg); +} + +fn flag_smart_case(args: &mut Vec) { + const SHORT: &str = "Smart case search."; + const LONG: &str = long!("\ +Searches case insensitively if the pattern is all lowercase. Search case +sensitively otherwise. + +This overrides the -s/--case-sensitive and -i/--ignore-case flags. +"); + let arg = RGArg::switch("smart-case").short("S") + .help(SHORT).long_help(LONG) + .overrides("case-sensitive") + .overrides("ignore-case"); + args.push(arg); +} + +fn flag_sort_files(args: &mut Vec) { + const SHORT: &str = "DEPRECATED"; + const LONG: &str = long!("\ +DEPRECATED: Use --sort or --sortr instead. + +Sort results by file path. Note that this currently disables all parallelism +and runs search in a single thread. + +This flag can be disabled with --no-sort-files. +"); + let arg = RGArg::switch("sort-files") + .help(SHORT).long_help(LONG) + .hidden() + .overrides("no-sort-files") + .overrides("sort") + .overrides("sortr"); + args.push(arg); + + let arg = RGArg::switch("no-sort-files") + .hidden() + .overrides("sort-files") + .overrides("sort") + .overrides("sortr"); + args.push(arg); +} + +fn flag_sort(args: &mut Vec) { + const SHORT: &str = + "Sort results in ascending order. Implies --threads=1."; + const LONG: &str = long!("\ +This flag enables sorting of results in ascending order. The possible values +for this flag are: + + path Sort by file path. + modified Sort by the last modified time on a file. + accessed Sort by the last accessed time on a file. + created Sort by the creation time on a file. + none Do not sort results. + +If the sorting criteria isn't available on your system (for example, creation +time is not available on ext4 file systems), then ripgrep will attempt to +detect this and print an error without searching any results. Otherwise, the +sort order is unspecified. + +To sort results in reverse or descending order, use the --sortr flag. Also, +this flag overrides --sortr. + +Note that sorting results currently always forces ripgrep to abandon +parallelism and run in a single thread. +"); + let arg = RGArg::flag("sort", "SORTBY") + .help(SHORT).long_help(LONG) + .possible_values(&["path", "modified", "accessed", "created", "none"]) + .overrides("sortr") + .overrides("sort-files") + .overrides("no-sort-files"); + args.push(arg); +} + +fn flag_sortr(args: &mut Vec) { + const SHORT: &str = + "Sort results in descending order. Implies --threads=1."; + const LONG: &str = long!("\ +This flag enables sorting of results in descending order. The possible values +for this flag are: + + path Sort by file path. + modified Sort by the last modified time on a file. + accessed Sort by the last accessed time on a file. + created Sort by the creation time on a file. + none Do not sort results. + +If the sorting criteria isn't available on your system (for example, creation +time is not available on ext4 file systems), then ripgrep will attempt to +detect this and print an error without searching any results. Otherwise, the +sort order is unspecified. + +To sort results in ascending order, use the --sort flag. Also, this flag +overrides --sort. + +Note that sorting results currently always forces ripgrep to abandon +parallelism and run in a single thread. +"); + let arg = RGArg::flag("sortr", "SORTBY") + .help(SHORT).long_help(LONG) + .possible_values(&["path", "modified", "accessed", "created", "none"]) + .overrides("sort") + .overrides("sort-files") + .overrides("no-sort-files"); + args.push(arg); +} + +fn flag_stats(args: &mut Vec) { + const SHORT: &str = "Print statistics about this ripgrep search."; + const LONG: &str = long!("\ +Print aggregate statistics about this ripgrep search. When this flag is +present, ripgrep will print the following stats to stdout at the end of the +search: number of matched lines, number of files with matches, number of files +searched, and the time taken for the entire search to complete. + +This set of aggregate statistics may expand over time. + +Note that this flag has no effect if --files, --files-with-matches or +--files-without-match is passed. + +This flag can be disabled with --no-stats. +"); + let arg = RGArg::switch("stats") + .help(SHORT).long_help(LONG) + .overrides("no-stats"); + args.push(arg); + + let arg = RGArg::switch("no-stats") + .hidden() + .overrides("stats"); + args.push(arg); +} + +fn flag_text(args: &mut Vec) { + const SHORT: &str = "Search binary files as if they were text."; + const LONG: &str = long!("\ +Search binary files as if they were text. When this flag is present, ripgrep's +binary file detection is disabled. This means that when a binary file is +searched, its contents may be printed if there is a match. This may cause +escape codes to be printed that alter the behavior of your terminal. + +When binary file detection is enabled it is imperfect. In general, it uses +a simple heuristic. If a NUL byte is seen during search, then the file is +considered binary and search stops (unless this flag is present). + +Note that when the `-u/--unrestricted` flag is provided for a third time, then +this flag is automatically enabled. + +This flag can be disabled with --no-text. +"); + let arg = RGArg::switch("text").short("a") + .help(SHORT).long_help(LONG) + .overrides("no-text"); + args.push(arg); + + let arg = RGArg::switch("no-text") + .hidden() + .overrides("text"); + args.push(arg); +} + +fn flag_threads(args: &mut Vec) { + const SHORT: &str = "The approximate number of threads to use."; + const LONG: &str = long!("\ +The approximate number of threads to use. A value of 0 (which is the default) +causes ripgrep to choose the thread count using heuristics. +"); + let arg = RGArg::flag("threads", "NUM").short("j") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_trim(args: &mut Vec) { + const SHORT: &str = "Trim prefixed whitespace from matches."; + const LONG: &str = long!("\ +When set, all ASCII whitespace at the beginning of each line printed will be +trimmed. + +This flag can be disabled with --no-trim. +"); + let arg = RGArg::switch("trim") + .help(SHORT).long_help(LONG) + .overrides("no-trim"); + args.push(arg); + + let arg = RGArg::switch("no-trim") + .hidden() + .overrides("trim"); + args.push(arg); +} + +fn flag_type(args: &mut Vec) { + const SHORT: &str = "Only search files matching TYPE."; + const LONG: &str = long!("\ +Only search files matching TYPE. Multiple type flags may be provided. Use the +--type-list flag to list all available types. +"); + let arg = RGArg::flag("type", "TYPE").short("t") + .help(SHORT).long_help(LONG) + .multiple(); + args.push(arg); +} + +fn flag_type_add(args: &mut Vec) { + const SHORT: &str = "Add a new glob for a file type."; + const LONG: &str = long!("\ +Add a new glob for a particular file type. Only one glob can be added at a +time. Multiple --type-add flags can be provided. Unless --type-clear is used, +globs are added to any existing globs defined inside of ripgrep. + +Note that this MUST be passed to every invocation of ripgrep. Type settings are +NOT persisted. + +Example: + + rg --type-add 'foo:*.foo' -tfoo PATTERN. + +--type-add can also be used to include rules from other types with the special +include directive. The include directive permits specifying one or more other +type names (separated by a comma) that have been defined and its rules will +automatically be imported into the type specified. For example, to create a +type called src that matches C++, Python and Markdown files, one can use: + + --type-add 'src:include:cpp,py,md' + +Additional glob rules can still be added to the src type by using the +--type-add flag again: + + --type-add 'src:include:cpp,py,md' --type-add 'src:*.foo' + +Note that type names must consist only of Unicode letters or numbers. +Punctuation characters are not allowed. +"); + let arg = RGArg::flag("type-add", "TYPE_SPEC") + .help(SHORT).long_help(LONG) + .multiple(); + args.push(arg); +} + +fn flag_type_clear(args: &mut Vec) { + const SHORT: &str = "Clear globs for a file type."; + const LONG: &str = long!("\ +Clear the file type globs previously defined for TYPE. This only clears the +default type definitions that are found inside of ripgrep. + +Note that this MUST be passed to every invocation of ripgrep. Type settings are +NOT persisted. +"); + let arg = RGArg::flag("type-clear", "TYPE") + .help(SHORT).long_help(LONG) + .multiple(); + args.push(arg); +} + +fn flag_type_not(args: &mut Vec) { + const SHORT: &str = "Do not search files matching TYPE."; + const LONG: &str = long!("\ +Do not search files matching TYPE. Multiple type-not flags may be provided. Use +the --type-list flag to list all available types. +"); + let arg = RGArg::flag("type-not", "TYPE").short("T") + .help(SHORT).long_help(LONG) + .multiple(); + args.push(arg); +} + +fn flag_type_list(args: &mut Vec) { + const SHORT: &str = "Show all supported file types."; + const LONG: &str = long!("\ +Show all supported file types and their corresponding globs. +"); + let arg = RGArg::switch("type-list") + .help(SHORT).long_help(LONG) + // This also technically conflicts with PATTERN, but the first file + // path will actually be in PATTERN. + .conflicts(&["file", "files", "pattern", "regexp"]); + args.push(arg); +} + +fn flag_unrestricted(args: &mut Vec) { + const SHORT: &str = "Reduce the level of \"smart\" searching."; + const LONG: &str = long!("\ +Reduce the level of \"smart\" searching. A single -u won't respect .gitignore +(etc.) files. Two -u flags will additionally search hidden files and +directories. Three -u flags will additionally search binary files. + +-uu is roughly equivalent to grep -r and -uuu is roughly equivalent to grep -a +-r. +"); + let arg = RGArg::switch("unrestricted").short("u") + .help(SHORT).long_help(LONG) + .multiple(); + args.push(arg); +} + +fn flag_vimgrep(args: &mut Vec) { + const SHORT: &str = "Show results in vim compatible format."; + const LONG: &str = long!("\ +Show results with every match on its own line, including line numbers and +column numbers. With this option, a line with more than one match will be +printed more than once. +"); + let arg = RGArg::switch("vimgrep") + .help(SHORT).long_help(LONG); + args.push(arg); +} + +fn flag_with_filename(args: &mut Vec) { + const SHORT: &str = "Print the file path with the matched lines."; + const LONG: &str = long!("\ +Display the file path for matches. This is the default when more than one +file is searched. If --heading is enabled (the default when printing to a +terminal), the file path will be shown above clusters of matches from each +file; otherwise, the file name will be shown as a prefix for each matched line. + +This flag overrides --no-filename. +"); + let arg = RGArg::switch("with-filename").short("H") + .help(SHORT).long_help(LONG) + .overrides("no-filename"); + args.push(arg); + + const NO_SHORT: &str = "Never print the file path with the matched lines."; + const NO_LONG: &str = long!("\ +Never print the file path with the matched lines. This is the default when +ripgrep is explicitly instructed to search one file or stdin. + +This flag overrides --with-filename. +"); + let arg = RGArg::switch("no-filename") + .help(NO_SHORT).long_help(NO_LONG) + .overrides("with-filename"); + args.push(arg); } -fn validate_number(s: String) -> Result<(), String> { - s.parse::().map(|_|()).map_err(|err| err.to_string()) +fn flag_word_regexp(args: &mut Vec) { + const SHORT: &str = "Only show matches surrounded by word boundaries."; + const LONG: &str = long!("\ +Only show matches surrounded by word boundaries. This is roughly equivalent to +putting \\b before and after all of the search patterns. + +This overrides the --line-regexp flag. +"); + let arg = RGArg::switch("word-regexp").short("w") + .help(SHORT).long_help(LONG) + .overrides("line-regexp"); + args.push(arg); } diff -Nru ripgrep-0.6.0/src/args.rs ripgrep-0.10.0.3/src/args.rs --- ripgrep-0.6.0/src/args.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/src/args.rs 2018-09-10 21:10:55.000000000 +0000 @@ -2,80 +2,116 @@ use std::env; use std::ffi::OsStr; use std::fs; -use std::io::{self, BufRead}; -use std::ops; +use std::io; use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::time::SystemTime; use clap; -use encoding_rs::Encoding; -use env_logger; -use grep::{Grep, GrepBuilder}; +use grep::cli; +use grep::matcher::LineTerminator; +#[cfg(feature = "pcre2")] +use grep::pcre2::{ + RegexMatcher as PCRE2RegexMatcher, + RegexMatcherBuilder as PCRE2RegexMatcherBuilder, +}; +use grep::printer::{ + ColorSpecs, Stats, + JSON, JSONBuilder, + Standard, StandardBuilder, + Summary, SummaryBuilder, SummaryKind, + default_color_specs, +}; +use grep::regex::{ + RegexMatcher as RustRegexMatcher, + RegexMatcherBuilder as RustRegexMatcherBuilder, +}; +use grep::searcher::{ + BinaryDetection, Encoding, MmapChoice, Searcher, SearcherBuilder, +}; +use ignore::overrides::{Override, OverrideBuilder}; +use ignore::types::{FileTypeDef, Types, TypesBuilder}; +use ignore::{Walk, WalkBuilder, WalkParallel}; use log; use num_cpus; +use path_printer::{PathPrinter, PathPrinterBuilder}; use regex; -use same_file; -use termcolor; +use termcolor::{ + WriteColor, + BufferWriter, ColorChoice, +}; use app; -use atty; -use ignore::overrides::{Override, OverrideBuilder}; -use ignore::types::{FileTypeDef, Types, TypesBuilder}; -use ignore; -use printer::{ColorSpecs, Printer}; -use unescape::unescape; -use worker::{Worker, WorkerBuilder}; - +use config; +use logger::Logger; +use messages::{set_messages, set_ignore_messages}; +use search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder}; +use subject::SubjectBuilder; use Result; -/// `Args` are transformed/normalized from `ArgMatches`. -#[derive(Debug)] -pub struct Args { +/// The command that ripgrep should execute based on the command line +/// configuration. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Command { + /// Search using exactly one thread. + Search, + /// Search using possibly many threads. + SearchParallel, + /// The command line parameters suggest that a search should occur, but + /// ripgrep knows that a match can never be found (e.g., no given patterns + /// or --max-count=0). + SearchNever, + /// Show the files that would be searched, but don't actually search them, + /// and use exactly one thread. + Files, + /// Show the files that would be searched, but don't actually search them, + /// and perform directory traversal using possibly many threads. + FilesParallel, + /// List all file type definitions configured, including the default file + /// types and any additional file types added to the command line. + Types, +} + +impl Command { + /// Returns true if and only if this command requires executing a search. + fn is_search(&self) -> bool { + use self::Command::*; + + match *self { + Search | SearchParallel => true, + SearchNever | Files | FilesParallel | Types => false, + } + } +} + +/// The primary configuration object used throughout ripgrep. It provides a +/// high-level convenient interface to the provided command line arguments. +/// +/// An `Args` object is cheap to clone and can be used from multiple threads +/// simultaneously. +#[derive(Clone, Debug)] +pub struct Args(Arc); + +#[derive(Clone, Debug)] +struct ArgsImp { + /// Mid-to-low level routines for extracting CLI arguments. + matches: ArgMatches, + /// The patterns provided at the command line and/or via the -f/--file + /// flag. This may be empty. + patterns: Vec, + /// A matcher built from the patterns. + /// + /// It's important that this is only built once, since building this goes + /// through regex compilation and various types of analyses. That is, if + /// you need many of theses (one per thread, for example), it is better to + /// build it once and then clone it. + matcher: PatternMatcher, + /// The paths provided at the command line. This is guaranteed to be + /// non-empty. (If no paths are provided, then a default path is created.) paths: Vec, - after_context: usize, - before_context: usize, - color_choice: termcolor::ColorChoice, - colors: ColorSpecs, - column: bool, - context_separator: Vec, - count: bool, - encoding: Option<&'static Encoding>, - files_with_matches: bool, - files_without_matches: bool, - eol: u8, - files: bool, - follow: bool, - glob_overrides: Override, - grep: Grep, - heading: bool, - hidden: bool, - ignore_files: Vec, - invert_match: bool, - line_number: bool, - line_per_match: bool, - max_columns: Option, - max_count: Option, - max_filesize: Option, - maxdepth: Option, - mmap: bool, - no_ignore: bool, - no_ignore_parent: bool, - no_ignore_vcs: bool, - no_messages: bool, - null: bool, - only_matching: bool, - path_separator: Option, - quiet: bool, - quiet_matched: QuietMatched, - replace: Option>, - sort_files: bool, - stdout_handle: Option, - text: bool, - threads: usize, - type_list: bool, - types: Types, - with_filename: bool, + /// Returns true if and only if `paths` had to be populated with a single + /// default path. + using_default_path: bool, } impl Args { @@ -87,312 +123,1055 @@ /// /// Also, initialize a global logger. pub fn parse() -> Result { - let matches = app::app().get_matches(); + // We parse the args given on CLI. This does not include args from + // the config. We use the CLI args as an initial configuration while + // trying to parse config files. If a config file exists and has + // arguments, then we re-parse argv, otherwise we just use the matches + // we have here. + let early_matches = ArgMatches::new(app::app().get_matches()); + set_messages(!early_matches.is_present("no-messages")); + set_ignore_messages(!early_matches.is_present("no-ignore-messages")); + + if let Err(err) = Logger::init() { + return Err(format!("failed to initialize logger: {}", err).into()); + } + if early_matches.is_present("trace") { + log::set_max_level(log::LevelFilter::Trace); + } else if early_matches.is_present("debug") { + log::set_max_level(log::LevelFilter::Debug); + } else { + log::set_max_level(log::LevelFilter::Warn); + } - let mut logb = env_logger::LogBuilder::new(); - if matches.is_present("debug") { - logb.filter(None, log::LogLevelFilter::Debug); + let matches = early_matches.reconfigure(); + // The logging level may have changed if we brought in additional + // arguments from a configuration file, so recheck it and set the log + // level as appropriate. + if matches.is_present("trace") { + log::set_max_level(log::LevelFilter::Trace); + } else if matches.is_present("debug") { + log::set_max_level(log::LevelFilter::Debug); } else { - logb.filter(None, log::LogLevelFilter::Warn); + log::set_max_level(log::LevelFilter::Warn); } - if let Err(err) = logb.init() { - errored!("failed to initialize logger: {}", err); + set_messages(!matches.is_present("no-messages")); + set_ignore_messages(!matches.is_present("no-ignore-messages")); + matches.to_args() + } + + /// Return direct access to command line arguments. + fn matches(&self) -> &ArgMatches { + &self.0.matches + } + + /// Return the patterns found in the command line arguments. This includes + /// patterns read via the -f/--file flags. + fn patterns(&self) -> &[String] { + &self.0.patterns + } + + /// Return the matcher builder from the patterns. + fn matcher(&self) -> &PatternMatcher { + &self.0.matcher + } + + /// Return the paths found in the command line arguments. This is + /// guaranteed to be non-empty. In the case where no explicit arguments are + /// provided, a single default path is provided automatically. + fn paths(&self) -> &[PathBuf] { + &self.0.paths + } + + /// Returns true if and only if `paths` had to be populated with a default + /// path, which occurs only when no paths were given as command line + /// arguments. + fn using_default_path(&self) -> bool { + self.0.using_default_path + } + + /// Return the printer that should be used for formatting the output of + /// search results. + /// + /// The returned printer will write results to the given writer. + fn printer(&self, wtr: W) -> Result> { + match self.matches().output_kind() { + OutputKind::Standard => { + let separator_search = self.command()? == Command::Search; + self.matches() + .printer_standard(self.paths(), wtr, separator_search) + .map(Printer::Standard) + } + OutputKind::Summary => { + self.matches() + .printer_summary(self.paths(), wtr) + .map(Printer::Summary) + } + OutputKind::JSON => { + self.matches() + .printer_json(wtr) + .map(Printer::JSON) + } } - ArgMatches(matches).to_args() + } +} + +/// High level public routines for building data structures used by ripgrep +/// from command line arguments. +impl Args { + /// Create a new buffer writer for multi-threaded printing with color + /// support. + pub fn buffer_writer(&self) -> Result { + let mut wtr = BufferWriter::stdout(self.matches().color_choice()); + wtr.separator(self.matches().file_separator()?); + Ok(wtr) + } + + /// Return the high-level command that ripgrep should run. + pub fn command(&self) -> Result { + let is_one_search = self.matches().is_one_search(self.paths()); + let threads = self.matches().threads()?; + let one_thread = is_one_search || threads == 1; + + Ok(if self.matches().is_present("type-list") { + Command::Types + } else if self.matches().is_present("files") { + if one_thread { + Command::Files + } else { + Command::FilesParallel + } + } else if self.matches().can_never_match(self.patterns()) { + Command::SearchNever + } else if one_thread { + Command::Search + } else { + Command::SearchParallel + }) } - /// Returns true if ripgrep should print the files it will search and exit - /// (but not do any actual searching). - pub fn files(&self) -> bool { - self.files + /// Builder a path printer that can be used for printing just file paths, + /// with optional color support. + /// + /// The printer will print paths to the given writer. + pub fn path_printer( + &self, + wtr: W, + ) -> Result> { + let mut builder = PathPrinterBuilder::new(); + builder + .color_specs(self.matches().color_specs()?) + .separator(self.matches().path_separator()?) + .terminator(self.matches().path_terminator().unwrap_or(b'\n')); + Ok(builder.build(wtr)) + } + + /// Returns true if and only if the search should quit after finding the + /// first match. + pub fn quit_after_match(&self) -> Result { + Ok(self.matches().is_present("quiet") && self.stats()?.is_none()) } - /// Create a new line based matcher. The matcher returned can be used - /// across multiple threads simultaneously. This matcher only supports - /// basic searching of regular expressions in a single buffer. + /// Build a worker for executing searches. /// - /// The pattern and other flags are taken from the command line. - pub fn grep(&self) -> Grep { - self.grep.clone() + /// Search results are written to the given writer. + pub fn search_worker( + &self, + wtr: W, + ) -> Result> { + let matcher = self.matcher().clone(); + let printer = self.printer(wtr)?; + let searcher = self.matches().searcher(self.paths())?; + let mut builder = SearchWorkerBuilder::new(); + builder + .json_stats(self.matches().is_present("json")) + .preprocessor(self.matches().preprocessor()) + .preprocessor_globs(self.matches().preprocessor_globs()?) + .search_zip(self.matches().is_present("search-zip")); + Ok(builder.build(matcher, searcher, printer)) + } + + /// Returns a zero value for tracking statistics if and only if it has been + /// requested. + /// + /// When this returns a `Stats` value, then it is guaranteed that the + /// search worker will be configured to track statistics as well. + pub fn stats(&self) -> Result> { + Ok(if self.command()?.is_search() && self.matches().stats() { + Some(Stats::new()) + } else { + None + }) } - /// Whether ripgrep should be quiet or not. - pub fn quiet(&self) -> bool { - self.quiet + /// Return a builder for constructing subjects. A subject represents a + /// single unit of something to search. Typically, this corresponds to a + /// file or a stream such as stdin. + pub fn subject_builder(&self) -> SubjectBuilder { + let mut builder = SubjectBuilder::new(); + builder.strip_dot_prefix(self.using_default_path()); + builder + } + + /// Execute the given function with a writer to stdout that enables color + /// support based on the command line configuration. + pub fn stdout(&self) -> cli::StandardStream { + let color = self.matches().color_choice(); + if self.matches().is_present("line-buffered") { + cli::stdout_buffered_line(color) + } else if self.matches().is_present("block-buffered") { + cli::stdout_buffered_block(color) + } else { + cli::stdout(color) + } } - /// Returns a thread safe boolean for determining whether to quit a search - /// early when quiet mode is enabled. + /// Return the type definitions compiled into ripgrep. /// - /// If quiet mode is disabled, then QuietMatched.has_match always returns - /// false. - pub fn quiet_matched(&self) -> QuietMatched { - self.quiet_matched.clone() + /// If there was a problem reading and parsing the type definitions, then + /// this returns an error. + pub fn type_defs(&self) -> Result> { + Ok(self.matches().types()?.definitions().to_vec()) + } + + /// Return a walker that never uses additional threads. + pub fn walker(&self) -> Result { + Ok(self.matches().walker_builder(self.paths())?.build()) + } + + /// Return a walker that never uses additional threads. + pub fn walker_parallel(&self) -> Result { + Ok(self.matches().walker_builder(self.paths())?.build_parallel()) + } +} + +/// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to +/// the parsed arguments. +#[derive(Clone, Debug)] +struct ArgMatches(clap::ArgMatches<'static>); + +/// The output format. Generally, this corresponds to the printer that ripgrep +/// uses to show search results. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum OutputKind { + /// Classic grep-like or ack-like format. + Standard, + /// Show matching files and possibly the number of matches in each file. + Summary, + /// Emit match information in the JSON Lines format. + JSON, +} + +/// The sort criteria, if present. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct SortBy { + /// Whether to reverse the sort criteria (i.e., descending order). + reverse: bool, + /// The actual sorting criteria. + kind: SortByKind, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum SortByKind { + /// No sorting at all. + None, + /// Sort by path. + Path, + /// Sort by last modified time. + LastModified, + /// Sort by last accessed time. + LastAccessed, + /// Sort by creation time. + Created, +} + +impl SortBy { + fn asc(kind: SortByKind) -> SortBy { + SortBy { reverse: false, kind: kind } + } + + fn desc(kind: SortByKind) -> SortBy { + SortBy { reverse: true, kind: kind } + } + + fn none() -> SortBy { + SortBy::asc(SortByKind::None) } - /// Create a new printer of individual search results that writes to the - /// writer given. - pub fn printer(&self, wtr: W) -> Printer { - let mut p = Printer::new(wtr) - .colors(self.colors.clone()) - .column(self.column) - .context_separator(self.context_separator.clone()) - .eol(self.eol) - .heading(self.heading) - .line_per_match(self.line_per_match) - .null(self.null) - .only_matching(self.only_matching) - .path_separator(self.path_separator) - .with_filename(self.with_filename) - .max_columns(self.max_columns); - if let Some(ref rep) = self.replace { - p = p.replace(rep.clone()); + /// Try to check that the sorting criteria selected is actually supported. + /// If it isn't, then an error is returned. + fn check(&self) -> Result<()> { + match self.kind { + SortByKind::None | SortByKind::Path => {} + SortByKind::LastModified => { + env::current_exe()?.metadata()?.modified()?; + } + SortByKind::LastAccessed => { + env::current_exe()?.metadata()?.accessed()?; + } + SortByKind::Created => { + env::current_exe()?.metadata()?.created()?; + } } - p + Ok(()) } - /// Retrieve the configured file separator. - pub fn file_separator(&self) -> Option> { - let use_heading_sep = - self.heading - && !self.count - && !self.files_with_matches - && !self.files_without_matches; - if use_heading_sep { - Some(b"".to_vec()) - } else if self.before_context > 0 || self.after_context > 0 { - Some(self.context_separator.clone()) + fn configure_walk_builder(self, builder: &mut WalkBuilder) { + // This isn't entirely optimal. In particular, we will wind up issuing + // a stat for many files redundantly. Aside from having potentially + // inconsistent results with respect to sorting, this is also slow. + // We could fix this here at the expense of memory by caching stat + // calls. A better fix would be to find a way to push this down into + // directory traversal itself, but that's a somewhat nasty change. + match self.kind { + SortByKind::None => {} + SortByKind::Path => { + if self.reverse { + builder.sort_by_file_name(|a, b| a.cmp(b).reverse()); + } else { + builder.sort_by_file_name(|a, b| a.cmp(b)); + } + } + SortByKind::LastModified => { + builder.sort_by_file_path(move |a, b| { + sort_by_metadata_time( + a, b, + self.reverse, + |md| md.modified(), + ) + }); + } + SortByKind::LastAccessed => { + builder.sort_by_file_path(move |a, b| { + sort_by_metadata_time( + a, b, + self.reverse, + |md| md.accessed(), + ) + }); + } + SortByKind::Created => { + builder.sort_by_file_path(move |a, b| { + sort_by_metadata_time( + a, b, + self.reverse, + |md| md.created(), + ) + }); + } + } + } +} + +impl SortByKind { + fn new(kind: &str) -> SortByKind { + match kind { + "none" => SortByKind::None, + "path" => SortByKind::Path, + "modified" => SortByKind::LastModified, + "accessed" => SortByKind::LastAccessed, + "created" => SortByKind::Created, + _ => SortByKind::None, + } + } +} + +impl ArgMatches { + /// Create an ArgMatches from clap's parse result. + fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches { + ArgMatches(clap_matches) + } + + /// Run clap and return the matches using a config file if present. If clap + /// determines a problem with the user provided arguments (or if --help or + /// --version are given), then an error/usage/version will be printed and + /// the process will exit. + /// + /// If there are no additional arguments from the environment (e.g., a + /// config file), then the given matches are returned as is. + fn reconfigure(self) -> ArgMatches { + // If the end user says no config, then respect it. + if self.is_present("no-config") { + debug!("not reading config files because --no-config is present"); + return self; + } + // If the user wants ripgrep to use a config file, then parse args + // from that first. + let mut args = config::args(); + if args.is_empty() { + return self; + } + let mut cliargs = env::args_os(); + if let Some(bin) = cliargs.next() { + args.insert(0, bin); + } + args.extend(cliargs); + debug!("final argv: {:?}", args); + ArgMatches::new(app::app().get_matches_from(args)) + } + + /// Convert the result of parsing CLI arguments into ripgrep's higher level + /// configuration structure. + fn to_args(self) -> Result { + // We compute these once since they could be large. + let patterns = self.patterns()?; + let matcher = self.matcher(&patterns)?; + let mut paths = self.paths(); + let using_default_path = + if paths.is_empty() { + paths.push(self.path_default()); + true + } else { + false + }; + Ok(Args(Arc::new(ArgsImp { + matches: self, + patterns: patterns, + matcher: matcher, + paths: paths, + using_default_path: using_default_path, + }))) + } +} + +/// High level routines for converting command line arguments into various +/// data structures used by ripgrep. +/// +/// Methods are sorted alphabetically. +impl ArgMatches { + /// Return the matcher that should be used for searching. + /// + /// If there was a problem building the matcher (e.g., a syntax error), + /// then this returns an error. + #[cfg(feature = "pcre2")] + fn matcher(&self, patterns: &[String]) -> Result { + if self.is_present("pcre2") { + let matcher = self.matcher_pcre2(patterns)?; + Ok(PatternMatcher::PCRE2(matcher)) } else { - None + let matcher = match self.matcher_rust(patterns) { + Ok(matcher) => matcher, + Err(err) => { + return Err(From::from(suggest_pcre2(err.to_string()))); + } + }; + Ok(PatternMatcher::RustRegex(matcher)) + } + } + + /// Return the matcher that should be used for searching. + /// + /// If there was a problem building the matcher (e.g., a syntax error), + /// then this returns an error. + #[cfg(not(feature = "pcre2"))] + fn matcher(&self, patterns: &[String]) -> Result { + if self.is_present("pcre2") { + return Err(From::from( + "PCRE2 is not available in this build of ripgrep", + )); + } + let matcher = self.matcher_rust(patterns)?; + Ok(PatternMatcher::RustRegex(matcher)) + } + + /// Build a matcher using Rust's regex engine. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + fn matcher_rust(&self, patterns: &[String]) -> Result { + let mut builder = RustRegexMatcherBuilder::new(); + builder + .case_smart(self.case_smart()) + .case_insensitive(self.case_insensitive()) + .multi_line(true) + .unicode(true) + .octal(false) + .word(self.is_present("word-regexp")); + if self.is_present("multiline") { + builder.dot_matches_new_line(self.is_present("multiline-dotall")); + if self.is_present("crlf") { + builder + .crlf(true) + .line_terminator(None); + } + } else { + builder + .line_terminator(Some(b'\n')) + .dot_matches_new_line(false); + if self.is_present("crlf") { + builder.crlf(true); + } + // We don't need to set this in multiline mode since mulitline + // matchers don't use optimizations related to line terminators. + // Moreover, a mulitline regex used with --null-data should + // be allowed to match NUL bytes explicitly, which this would + // otherwise forbid. + if self.is_present("null-data") { + builder.line_terminator(Some(b'\x00')); + } + } + if let Some(limit) = self.regex_size_limit()? { + builder.size_limit(limit); + } + if let Some(limit) = self.dfa_size_limit()? { + builder.dfa_size_limit(limit); } + Ok(builder.build(&patterns.join("|"))?) } - /// Returns true if the given arguments are known to never produce a match. - pub fn never_match(&self) -> bool { - self.max_count == Some(0) + /// Build a matcher using PCRE2. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + #[cfg(feature = "pcre2")] + fn matcher_pcre2(&self, patterns: &[String]) -> Result { + let mut builder = PCRE2RegexMatcherBuilder::new(); + builder + .case_smart(self.case_smart()) + .caseless(self.case_insensitive()) + .multi_line(true) + .word(self.is_present("word-regexp")); + // For whatever reason, the JIT craps out during regex compilation with + // a "no more memory" error on 32 bit systems. So don't use it there. + if !cfg!(target_pointer_width = "32") { + builder.jit_if_available(true); + } + if self.pcre2_unicode() { + builder.utf(true).ucp(true); + if self.encoding()?.is_some() { + // SAFETY: If an encoding was specified, then we're guaranteed + // to get valid UTF-8, so we can disable PCRE2's UTF checking. + // (Feeding invalid UTF-8 to PCRE2 is undefined behavior.) + unsafe { + builder.disable_utf_check(); + } + } + } + if self.is_present("multiline") { + builder.dotall(self.is_present("multiline-dotall")); + } + if self.is_present("crlf") { + builder.crlf(true); + } + Ok(builder.build(&patterns.join("|"))?) } - /// Create a new writer for single-threaded searching with color support. - pub fn stdout(&self) -> termcolor::StandardStream { - termcolor::StandardStream::stdout(self.color_choice) + /// Build a JSON printer that writes results to the given writer. + fn printer_json(&self, wtr: W) -> Result> { + let mut builder = JSONBuilder::new(); + builder + .pretty(false) + .max_matches(self.max_count()?) + .always_begin_end(false); + Ok(builder.build(wtr)) } - /// Returns a handle to stdout for filtering search. + /// Build a Standard printer that writes results to the given writer. + /// + /// The given paths are used to configure aspects of the printer. /// - /// A handle is returned if and only if ripgrep's stdout is being - /// redirected to a file. The handle returned corresponds to that file. + /// If `separator_search` is true, then the returned printer will assume + /// the responsibility of printing a separator between each set of + /// search results, when appropriate (e.g., when contexts are enabled). + /// When it's set to false, the caller is responsible for handling + /// separators. /// - /// This can be used to ensure that we do not attempt to search a file - /// that ripgrep is writing to. - pub fn stdout_handle(&self) -> Option<&same_file::Handle> { - self.stdout_handle.as_ref() + /// In practice, we want the printer to handle it in the single threaded + /// case but not in the multi-threaded case. + fn printer_standard( + &self, + paths: &[PathBuf], + wtr: W, + separator_search: bool, + ) -> Result> { + let mut builder = StandardBuilder::new(); + builder + .color_specs(self.color_specs()?) + .stats(self.stats()) + .heading(self.heading()) + .path(self.with_filename(paths)) + .only_matching(self.is_present("only-matching")) + .per_match(self.is_present("vimgrep")) + .replacement(self.replacement()) + .max_columns(self.max_columns()?) + .max_matches(self.max_count()?) + .column(self.column()) + .byte_offset(self.is_present("byte-offset")) + .trim_ascii(self.is_present("trim")) + .separator_search(None) + .separator_context(Some(self.context_separator())) + .separator_field_match(b":".to_vec()) + .separator_field_context(b"-".to_vec()) + .separator_path(self.path_separator()?) + .path_terminator(self.path_terminator()); + if separator_search { + builder.separator_search(self.file_separator()?); + } + Ok(builder.build(wtr)) } - /// Create a new buffer writer for multi-threaded searching with color - /// support. - pub fn buffer_writer(&self) -> termcolor::BufferWriter { - let mut wtr = termcolor::BufferWriter::stdout(self.color_choice); - wtr.separator(self.file_separator()); - wtr + /// Build a Summary printer that writes results to the given writer. + /// + /// The given paths are used to configure aspects of the printer. + /// + /// This panics if the output format is not `OutputKind::Summary`. + fn printer_summary( + &self, + paths: &[PathBuf], + wtr: W, + ) -> Result> { + let mut builder = SummaryBuilder::new(); + builder + .kind(self.summary_kind().expect("summary format")) + .color_specs(self.color_specs()?) + .stats(self.stats()) + .path(self.with_filename(paths)) + .max_matches(self.max_count()?) + .separator_field(b":".to_vec()) + .separator_path(self.path_separator()?) + .path_terminator(self.path_terminator()); + Ok(builder.build(wtr)) + } + + /// Build a searcher from the command line parameters. + fn searcher(&self, paths: &[PathBuf]) -> Result { + let (ctx_before, ctx_after) = self.contexts()?; + let line_term = + if self.is_present("crlf") { + LineTerminator::crlf() + } else if self.is_present("null-data") { + LineTerminator::byte(b'\x00') + } else { + LineTerminator::byte(b'\n') + }; + let mut builder = SearcherBuilder::new(); + builder + .line_terminator(line_term) + .invert_match(self.is_present("invert-match")) + .line_number(self.line_number(paths)) + .multi_line(self.is_present("multiline")) + .before_context(ctx_before) + .after_context(ctx_after) + .passthru(self.is_present("passthru")) + .memory_map(self.mmap_choice(paths)) + .binary_detection(self.binary_detection()) + .encoding(self.encoding()?); + Ok(builder.build()) + } + + /// Return a builder for recursively traversing a directory while + /// respecting ignore rules. + /// + /// If there was a problem parsing the CLI arguments necessary for + /// constructing the builder, then this returns an error. + fn walker_builder(&self, paths: &[PathBuf]) -> Result { + let mut builder = WalkBuilder::new(&paths[0]); + for path in &paths[1..] { + builder.add(path); + } + for path in self.ignore_paths() { + if let Some(err) = builder.add_ignore(path) { + ignore_message!("{}", err); + } + } + builder + .max_depth(self.usize_of("max-depth")?) + .follow_links(self.is_present("follow")) + .max_filesize(self.max_file_size()?) + .threads(self.threads()?) + .same_file_system(self.is_present("one-file-system")) + .skip_stdout(true) + .overrides(self.overrides()?) + .types(self.types()?) + .hidden(!self.hidden()) + .parents(!self.no_ignore_parent()) + .ignore(!self.no_ignore()) + .git_global( + !self.no_ignore() + && !self.no_ignore_vcs() + && !self.no_ignore_global()) + .git_ignore(!self.no_ignore() && !self.no_ignore_vcs()) + .git_exclude(!self.no_ignore() && !self.no_ignore_vcs()); + if !self.no_ignore() { + builder.add_custom_ignore_filename(".rgignore"); + } + let sortby = self.sort_by()?; + sortby.check()?; + sortby.configure_walk_builder(&mut builder); + Ok(builder) } +} - /// Return the paths that should be searched. - pub fn paths(&self) -> &[PathBuf] { - &self.paths +/// Mid level routines for converting command line arguments into various types +/// of data structures. +/// +/// Methods are sorted alphabetically. +impl ArgMatches { + /// Returns the form of binary detection to perform. + fn binary_detection(&self) -> BinaryDetection { + let none = + self.is_present("text") + || self.unrestricted_count() >= 3 + || self.is_present("null-data"); + if none { + BinaryDetection::none() + } else { + BinaryDetection::quit(b'\x00') + } } - /// Returns true if there is exactly one file path given to search. - pub fn is_one_path(&self) -> bool { - self.paths.len() == 1 - && (self.paths[0] == Path::new("-") || self.paths[0].is_file()) + /// Returns true if the command line configuration implies that a match + /// can never be shown. + fn can_never_match(&self, patterns: &[String]) -> bool { + patterns.is_empty() || self.max_count().ok() == Some(Some(0)) } - /// Create a worker whose configuration is taken from the - /// command line. - pub fn worker(&self) -> Worker { - WorkerBuilder::new(self.grep()) - .after_context(self.after_context) - .before_context(self.before_context) - .count(self.count) - .encoding(self.encoding) - .files_with_matches(self.files_with_matches) - .files_without_matches(self.files_without_matches) - .eol(self.eol) - .line_number(self.line_number) - .invert_match(self.invert_match) - .max_count(self.max_count) - .mmap(self.mmap) - .no_messages(self.no_messages) - .quiet(self.quiet) - .text(self.text) - .build() + /// Returns true if and only if case should be ignore. + /// + /// If --case-sensitive is present, then case is never ignored, even if + /// --ignore-case is present. + fn case_insensitive(&self) -> bool { + self.is_present("ignore-case") && !self.is_present("case-sensitive") } - /// Returns the number of worker search threads that should be used. - pub fn threads(&self) -> usize { - self.threads + /// Returns true if and only if smart case has been enabled. + /// + /// If either --ignore-case of --case-sensitive are present, then smart + /// case is disabled. + fn case_smart(&self) -> bool { + self.is_present("smart-case") + && !self.is_present("ignore-case") + && !self.is_present("case-sensitive") } - /// Returns a list of type definitions currently loaded. - pub fn type_defs(&self) -> &[FileTypeDef] { - self.types.definitions() + /// Returns the user's color choice based on command line parameters and + /// environment. + fn color_choice(&self) -> ColorChoice { + let preference = match self.value_of_lossy("color") { + None => "auto".to_string(), + Some(v) => v, + }; + if preference == "always" { + ColorChoice::Always + } else if preference == "ansi" { + ColorChoice::AlwaysAnsi + } else if preference == "auto" { + if cli::is_tty_stdout() || self.is_present("pretty") { + ColorChoice::Auto + } else { + ColorChoice::Never + } + } else { + ColorChoice::Never + } } - /// Returns true if ripgrep should print the type definitions currently - /// loaded and then exit. - pub fn type_list(&self) -> bool { - self.type_list + /// Returns the color specifications given by the user on the CLI. + /// + /// If the was a problem parsing any of the provided specs, then an error + /// is returned. + fn color_specs(&self) -> Result { + // Start with a default set of color specs. + let mut specs = default_color_specs(); + for spec_str in self.values_of_lossy_vec("colors") { + specs.push(spec_str.parse()?); + } + Ok(ColorSpecs::new(&specs)) } - /// Returns true if error messages should be suppressed. - pub fn no_messages(&self) -> bool { - self.no_messages + /// Returns true if and only if column numbers should be shown. + fn column(&self) -> bool { + if self.is_present("no-column") { + return false; + } + self.is_present("column") || self.is_present("vimgrep") } - /// Create a new recursive directory iterator over the paths in argv. - pub fn walker(&self) -> ignore::Walk { - self.walker_builder().build() + /// Returns the before and after contexts from the command line. + /// + /// If a context setting was absent, then `0` is returned. + /// + /// If there was a problem parsing the values from the user as an integer, + /// then an error is returned. + fn contexts(&self) -> Result<(usize, usize)> { + let after = self.usize_of("after-context")?.unwrap_or(0); + let before = self.usize_of("before-context")?.unwrap_or(0); + let both = self.usize_of("context")?.unwrap_or(0); + Ok(if both > 0 { + (both, both) + } else { + (before, after) + }) } - /// Create a new parallel recursive directory iterator over the paths - /// in argv. - pub fn walker_parallel(&self) -> ignore::WalkParallel { - self.walker_builder().build_parallel() + /// Returns the unescaped context separator in UTF-8 bytes. + /// + /// If one was not provided, the default `--` is returned. + fn context_separator(&self) -> Vec { + match self.value_of_os("context-separator") { + None => b"--".to_vec(), + Some(sep) => cli::unescape_os(&sep), + } } - fn walker_builder(&self) -> ignore::WalkBuilder { - let paths = self.paths(); - let mut wd = ignore::WalkBuilder::new(&paths[0]); - for path in &paths[1..] { - wd.add(path); + /// Returns whether the -c/--count or the --count-matches flags were + /// passed from the command line. + /// + /// If --count-matches and --invert-match were passed in, behave + /// as if --count and --invert-match were passed in (i.e. rg will + /// count inverted matches as per existing behavior). + fn counts(&self) -> (bool, bool) { + let count = self.is_present("count"); + let count_matches = self.is_present("count-matches"); + let invert_matches = self.is_present("invert-match"); + let only_matching = self.is_present("only-matching"); + if count_matches && invert_matches { + // Treat `-v --count-matches` as `-v -c`. + (true, false) + } else if count && only_matching { + // Treat `-c --only-matching` as `--count-matches`. + (false, true) + } else { + (count, count_matches) } - for path in &self.ignore_files { - if let Some(err) = wd.add_ignore(path) { - if !self.no_messages { - eprintln!("{}", err); - } - } + } + + /// Parse the dfa-size-limit argument option into a byte count. + fn dfa_size_limit(&self) -> Result> { + let r = self.parse_human_readable_size("dfa-size-limit")?; + u64_to_usize("dfa-size-limit", r) + } + + /// Returns the type of encoding to use. + /// + /// This only returns an encoding if one is explicitly specified. When no + /// encoding is present, the Searcher will still do BOM sniffing for UTF-16 + /// and transcode seamlessly. + fn encoding(&self) -> Result> { + if self.is_present("no-encoding") { + return Ok(None); + } + let label = match self.value_of_lossy("encoding") { + None if self.pcre2_unicode() => "utf-8".to_string(), + None => return Ok(None), + Some(label) => label, + }; + if label == "auto" { + return Ok(None); } + Ok(Some(Encoding::new(&label)?)) + } - wd.follow_links(self.follow); - wd.hidden(!self.hidden); - wd.max_depth(self.maxdepth); - wd.max_filesize(self.max_filesize); - wd.overrides(self.glob_overrides.clone()); - wd.types(self.types.clone()); - wd.git_global(!self.no_ignore && !self.no_ignore_vcs); - wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs); - wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs); - wd.ignore(!self.no_ignore); - wd.parents(!self.no_ignore_parent); - wd.threads(self.threads()); - if self.sort_files { - wd.sort_by(|a, b| a.cmp(b)); + /// Return the file separator to use based on the CLI configuration. + fn file_separator(&self) -> Result>> { + // File separators are only used for the standard grep-line format. + if self.output_kind() != OutputKind::Standard { + return Ok(None); } - wd + + let (ctx_before, ctx_after) = self.contexts()?; + Ok(if self.heading() { + Some(b"".to_vec()) + } else if ctx_before > 0 || ctx_after > 0 { + Some(self.context_separator().clone()) + } else { + None + }) } -} -/// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to -/// several options/flags. -struct ArgMatches<'a>(clap::ArgMatches<'a>); + /// Returns true if and only if matches should be grouped with file name + /// headings. + fn heading(&self) -> bool { + if self.is_present("no-heading") || self.is_present("vimgrep") { + false + } else { + cli::is_tty_stdout() + || self.is_present("heading") + || self.is_present("pretty") + } + } -impl<'a> ops::Deref for ArgMatches<'a> { - type Target = clap::ArgMatches<'a>; - fn deref(&self) -> &clap::ArgMatches<'a> { &self.0 } -} - -impl<'a> ArgMatches<'a> { - /// Convert the result of parsing CLI arguments into ripgrep's - /// configuration. - fn to_args(&self) -> Result { - let paths = self.paths(); - let line_number = self.line_number(&paths); - let mmap = try!(self.mmap(&paths)); - let with_filename = self.with_filename(&paths); - let (before_context, after_context) = try!(self.contexts()); - let quiet = self.is_present("quiet"); - let args = Args { - paths: paths, - after_context: after_context, - before_context: before_context, - color_choice: self.color_choice(), - colors: try!(self.color_specs()), - column: self.column(), - context_separator: self.context_separator(), - count: self.is_present("count"), - encoding: try!(self.encoding()), - files_with_matches: self.is_present("files-with-matches"), - files_without_matches: self.is_present("files-without-match"), - eol: b'\n', - files: self.is_present("files"), - follow: self.is_present("follow"), - glob_overrides: try!(self.overrides()), - grep: try!(self.grep()), - heading: self.heading(), - hidden: self.hidden(), - ignore_files: self.ignore_files(), - invert_match: self.is_present("invert-match"), - line_number: line_number, - line_per_match: self.is_present("vimgrep"), - max_columns: try!(self.usize_of("max-columns")), - max_count: try!(self.usize_of("max-count")).map(|max| max as u64), - max_filesize: try!(self.max_filesize()), - maxdepth: try!(self.usize_of("maxdepth")), - mmap: mmap, - no_ignore: self.no_ignore(), - no_ignore_parent: self.no_ignore_parent(), - no_ignore_vcs: self.no_ignore_vcs(), - no_messages: self.is_present("no-messages"), - null: self.is_present("null"), - only_matching: self.is_present("only-matching"), - path_separator: try!(self.path_separator()), - quiet: quiet, - quiet_matched: QuietMatched::new(quiet), - replace: self.replace(), - sort_files: self.is_present("sort-files"), - stdout_handle: self.stdout_handle(), - text: self.text(), - threads: try!(self.threads()), - type_list: self.is_present("type-list"), - types: try!(self.types()), - with_filename: with_filename, + /// Returns true if and only if hidden files/directories should be + /// searched. + fn hidden(&self) -> bool { + self.is_present("hidden") || self.unrestricted_count() >= 2 + } + + /// Return all of the ignore file paths given on the command line. + fn ignore_paths(&self) -> Vec { + let paths = match self.values_of_os("ignore-file") { + None => return vec![], + Some(paths) => paths, }; - if args.mmap { - debug!("will try to use memory maps"); + paths.map(|p| Path::new(p).to_path_buf()).collect() + } + + /// Returns true if and only if ripgrep is invoked in a way where it knows + /// it search exactly one thing. + fn is_one_search(&self, paths: &[PathBuf]) -> bool { + if paths.len() != 1 { + return false; + } + self.is_only_stdin(paths) || paths[0].is_file() + } + + /// Returns true if and only if we're only searching a single thing and + /// that thing is stdin. + fn is_only_stdin(&self, paths: &[PathBuf]) -> bool { + paths == [Path::new("-")] + } + + /// Returns true if and only if we should show line numbers. + fn line_number(&self, paths: &[PathBuf]) -> bool { + if self.output_kind() == OutputKind::Summary { + return false; } - Ok(args) + if self.is_present("no-line-number") { + return false; + } + if self.output_kind() == OutputKind::JSON { + return true; + } + + // A few things can imply counting line numbers. In particular, we + // generally want to show line numbers by default when printing to a + // tty for human consumption, except for one interesting case: when + // we're only searching stdin. This makes pipelines work as expected. + (cli::is_tty_stdout() && !self.is_only_stdin(paths)) + || self.is_present("line-number") + || self.is_present("column") + || self.is_present("pretty") + || self.is_present("vimgrep") + } + + /// The maximum number of columns allowed on each line. + /// + /// If `0` is provided, then this returns `None`. + fn max_columns(&self) -> Result> { + Ok(self.usize_of_nonzero("max-columns")?.map(|n| n as u64)) + } + + /// The maximum number of matches permitted. + fn max_count(&self) -> Result> { + Ok(self.usize_of("max-count")?.map(|n| n as u64)) + } + + /// Parses the max-filesize argument option into a byte count. + fn max_file_size(&self) -> Result> { + self.parse_human_readable_size("max-filesize") + } + + /// Returns whether we should attempt to use memory maps or not. + fn mmap_choice(&self, paths: &[PathBuf]) -> MmapChoice { + // SAFETY: Memory maps are difficult to impossible to encapsulate + // safely in a portable way that doesn't simultaneously negate some of + // the benfits of using memory maps. For ripgrep's use, we never mutate + // a memory map and generally never store the contents of memory map + // in a data structure that depends on immutability. Generally + // speaking, the worst thing that can happen is a SIGBUS (if the + // underlying file is truncated while reading it), which will cause + // ripgrep to abort. This reasoning should be treated as suspect. + let maybe = unsafe { MmapChoice::auto() }; + let never = MmapChoice::never(); + if self.is_present("no-mmap") { + never + } else if self.is_present("mmap") { + maybe + } else if paths.len() <= 10 && paths.iter().all(|p| p.is_file()) { + // If we're only searching a few paths and all of them are + // files, then memory maps are probably faster. + maybe + } else { + never + } + } + + /// Returns true if ignore files should be ignored. + fn no_ignore(&self) -> bool { + self.is_present("no-ignore") || self.unrestricted_count() >= 1 + } + + /// Returns true if global ignore files should be ignored. + fn no_ignore_global(&self) -> bool { + self.is_present("no-ignore-global") || self.no_ignore() + } + + /// Returns true if parent ignore files should be ignored. + fn no_ignore_parent(&self) -> bool { + self.is_present("no-ignore-parent") || self.no_ignore() + } + + /// Returns true if VCS ignore files should be ignored. + fn no_ignore_vcs(&self) -> bool { + self.is_present("no-ignore-vcs") || self.no_ignore() + } + + /// Determine the type of output we should produce. + fn output_kind(&self) -> OutputKind { + if self.is_present("quiet") { + // While we don't technically print results (or aggregate results) + // in quiet mode, we still support the --stats flag, and those + // stats are computed by the Summary printer for now. + return OutputKind::Summary; + } else if self.is_present("json") { + return OutputKind::JSON; + } + + let (count, count_matches) = self.counts(); + let summary = + count + || count_matches + || self.is_present("files-with-matches") + || self.is_present("files-without-match"); + if summary { + OutputKind::Summary + } else { + OutputKind::Standard + } + } + + /// Builds the set of glob overrides from the command line flags. + fn overrides(&self) -> Result { + let mut builder = OverrideBuilder::new(env::current_dir()?); + for glob in self.values_of_lossy_vec("glob") { + builder.add(&glob)?; + } + // This only enables case insensitivity for subsequent globs. + builder.case_insensitive(true)?; + for glob in self.values_of_lossy_vec("iglob") { + builder.add(&glob)?; + } + Ok(builder.build()?) } /// Return all file paths that ripgrep should search. + /// + /// If no paths were given, then this returns an empty list. fn paths(&self) -> Vec { let mut paths: Vec = match self.values_of_os("path") { None => vec![], - Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(), + Some(paths) => paths.map(|p| Path::new(p).to_path_buf()).collect(), }; // If --file, --files or --regexp is given, then the first path is // always in `pattern`. if self.is_present("file") || self.is_present("files") - || self.is_present("regexp") { - if let Some(path) = self.value_of_os("PATTERN") { + || self.is_present("regexp") + { + if let Some(path) = self.value_of_os("pattern") { paths.insert(0, Path::new(path).to_path_buf()); } } - if paths.is_empty() { - paths.push(self.default_path()); - } paths } - /// Return the default path that ripgrep should search. - fn default_path(&self) -> PathBuf { - let file_is_stdin = - self.values_of_os("file").map_or(false, |mut files| { - files.any(|f| f == "-") - }); - let search_cwd = atty::is(atty::Stream::Stdin) - || !stdin_is_readable() + /// Return the default path that ripgrep should search. This should only + /// be used when ripgrep is not otherwise given at least one file path + /// as a positional argument. + fn path_default(&self) -> PathBuf { + let file_is_stdin = self.values_of_os("file") + .map_or(false, |mut files| files.any(|f| f == "-")); + let search_cwd = + !cli::is_readable_stdin() || (self.is_present("file") && file_is_stdin) || self.is_present("files") || self.is_present("type-list"); @@ -403,120 +1182,118 @@ } } - /// Return all of the ignore files given on the command line. - fn ignore_files(&self) -> Vec { - match self.values_of_os("ignore-file") { - None => return vec![], - Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(), + /// Returns the unescaped path separator as a single byte, if one exists. + /// + /// If the provided path separator is more than a single byte, then an + /// error is returned. + fn path_separator(&self) -> Result> { + let sep = match self.value_of_os("path-separator") { + None => return Ok(None), + Some(sep) => cli::unescape_os(&sep), + }; + if sep.is_empty() { + Ok(None) + } else if sep.len() > 1 { + Err(From::from(format!( + "A path separator must be exactly one byte, but \ + the given separator is {} bytes: {}\n\ + In some shells on Windows '/' is automatically \ + expanded. Use '//' instead.", + sep.len(), + cli::escape(&sep), + ))) + } else { + Ok(Some(sep[0])) } } - /// Return the pattern that should be used for searching. + /// Returns the byte that should be used to terminate paths. /// - /// If multiple -e/--regexp flags are given, then they are all collapsed - /// into one pattern. - /// - /// If any part of the pattern isn't valid UTF-8, then an error is - /// returned. - fn pattern(&self) -> Result { - Ok(try!(self.patterns()).join("|")) + /// Typically, this is only set to `\x00` when the --null flag is provided, + /// and `None` otherwise. + fn path_terminator(&self) -> Option { + if self.is_present("null") { + Some(b'\x00') + } else { + None + } } /// Get a sequence of all available patterns from the command line. /// This includes reading the -e/--regexp and -f/--file flags. /// /// Note that if -F/--fixed-strings is set, then all patterns will be - /// escaped. Similarly, if -w/--word-regexp is set, then all patterns - /// are surrounded by `\b`, and if -x/--line-regexp is set, then all - /// patterns are surrounded by `^...$`. + /// escaped. If -x/--line-regexp is set, then all patterns are surrounded + /// by `^...$`. Other things, such as --word-regexp, are handled by the + /// regex matcher itself. /// /// If any pattern is invalid UTF-8, then an error is returned. fn patterns(&self) -> Result> { if self.is_present("files") || self.is_present("type-list") { - return Ok(vec![self.empty_pattern()]); + return Ok(vec![]); } let mut pats = vec![]; match self.values_of_os("regexp") { None => { if self.values_of_os("file").is_none() { - if let Some(os_pat) = self.value_of_os("PATTERN") { - pats.push(try!(self.os_str_pattern(os_pat))); + if let Some(os_pat) = self.value_of_os("pattern") { + pats.push(self.pattern_from_os_str(os_pat)?); } } } Some(os_pats) => { for os_pat in os_pats { - pats.push(try!(self.os_str_pattern(os_pat))); + pats.push(self.pattern_from_os_str(os_pat)?); } } } - if let Some(files) = self.values_of_os("file") { - for file in files { - if file == "-" { - let stdin = io::stdin(); - for line in stdin.lock().lines() { - pats.push(self.str_pattern(&try!(line))); - } + if let Some(paths) = self.values_of_os("file") { + for path in paths { + if path == "-" { + pats.extend(cli::patterns_from_stdin()?); } else { - let f = try!(fs::File::open(file)); - for line in io::BufReader::new(f).lines() { - pats.push(self.str_pattern(&try!(line))); - } + pats.extend(cli::patterns_from_path(path)?); } } } - if pats.is_empty() { - pats.push(self.empty_pattern()) - } Ok(pats) } - /// Converts an OsStr pattern to a String pattern, including line/word - /// boundaries or escapes if applicable. + /// Returns a pattern that is guaranteed to produce an empty regular + /// expression that is valid in any position. + fn pattern_empty(&self) -> String { + // This would normally just be an empty string, which works on its + // own, but if the patterns are joined in a set of alternations, then + // you wind up with `foo|`, which is currently invalid in Rust's regex + // engine. + "(?:z{0})*".to_string() + } + + /// Converts an OsStr pattern to a String pattern. The pattern is escaped + /// if -F/--fixed-strings is set. /// /// If the pattern is not valid UTF-8, then an error is returned. - fn os_str_pattern(&self, pat: &OsStr) -> Result { - let s = try!(pattern_to_str(pat)); - Ok(self.str_pattern(s)) + fn pattern_from_os_str(&self, pat: &OsStr) -> Result { + let s = cli::pattern_from_os(pat)?; + Ok(self.pattern_from_str(s)) } - /// Converts a &str pattern to a String pattern, including line/word - /// boundaries or escapes if applicable. - fn str_pattern(&self, pat: &str) -> String { - let litpat = self.literal_pattern(pat.to_string()); - let s = self.line_pattern(self.word_pattern(litpat)); + /// Converts a &str pattern to a String pattern. The pattern is escaped + /// if -F/--fixed-strings is set. + fn pattern_from_str(&self, pat: &str) -> String { + let litpat = self.pattern_literal(pat.to_string()); + let s = self.pattern_line(litpat); if s.is_empty() { - self.empty_pattern() + self.pattern_empty() } else { s } } - /// Returns the given pattern as a literal pattern if the - /// -F/--fixed-strings flag is set. Otherwise, the pattern is returned - /// unchanged. - fn literal_pattern(&self, pat: String) -> String { - if self.is_present("fixed-strings") { - regex::escape(&pat) - } else { - pat - } - } - - /// Returns the given pattern as a word pattern if the -w/--word-regexp - /// flag is set. Otherwise, the pattern is returned unchanged. - fn word_pattern(&self, pat: String) -> String { - if self.is_present("word-regexp") { - format!(r"\b(?:{})\b", pat) - } else { - pat - } - } - /// Returns the given pattern as a line pattern if the -x/--line-regexp /// flag is set. Otherwise, the pattern is returned unchanged. - fn line_pattern(&self, pat: String) -> String { + fn pattern_line(&self, pat: String) -> String { if self.is_present("line-regexp") { format!(r"^(?:{})$", pat) } else { @@ -524,230 +1301,104 @@ } } - /// Empty pattern returns a pattern that is guaranteed to produce an empty - /// regular expression that is valid in any position. - fn empty_pattern(&self) -> String { - // This would normally just be an empty string, which works on its - // own, but if the patterns are joined in a set of alternations, then - // you wind up with `foo|`, which is invalid. - self.word_pattern("z{0}".to_string()) - } - - /// Returns true if and only if file names containing each match should - /// be emitted. - /// - /// `paths` should be a slice of all top-level file paths that ripgrep - /// will need to search. - fn with_filename(&self, paths: &[PathBuf]) -> bool { - if self.is_present("no-filename") { - false + /// Returns the given pattern as a literal pattern if the + /// -F/--fixed-strings flag is set. Otherwise, the pattern is returned + /// unchanged. + fn pattern_literal(&self, pat: String) -> String { + if self.is_present("fixed-strings") { + regex::escape(&pat) } else { - self.is_present("with-filename") - || self.is_present("vimgrep") - || paths.len() > 1 - || paths.get(0).map_or(false, |p| p.is_dir()) + pat } } - /// Returns a handle to stdout for filtering search. - /// - /// A handle is returned if and only if ripgrep's stdout is being - /// redirected to a file. The handle returned corresponds to that file. - /// - /// This can be used to ensure that we do not attempt to search a file - /// that ripgrep is writing to. - fn stdout_handle(&self) -> Option { - let h = match same_file::Handle::stdout() { - Err(_) => return None, - Ok(h) => h, + /// Returns the preprocessor command if one was specified. + fn preprocessor(&self) -> Option { + let path = match self.value_of_os("pre") { + None => return None, + Some(path) => path, }; - let md = match h.as_file().metadata() { - Err(_) => return None, - Ok(md) => md, - }; - if !md.is_file() { + if path.is_empty() { return None; } - Some(h) + Some(Path::new(path).to_path_buf()) } - /// Returns true if and only if memory map searching should be tried. - /// - /// `paths` should be a slice of all top-level file paths that ripgrep - /// will need to search. - fn mmap(&self, paths: &[PathBuf]) -> Result { - let (before, after) = try!(self.contexts()); - let enc = try!(self.encoding()); - Ok(if before > 0 || after > 0 || self.is_present("no-mmap") { - false - } else if self.is_present("mmap") { - true - } else if cfg!(target_os = "macos") { - // On Mac, memory maps appear to suck. Neat. - false - } else if enc.is_some() { - // There's no practical way to transcode a memory map that isn't - // isomorphic to searching over io::Read. - false - } else { - // If we're only searching a few paths and all of them are - // files, then memory maps are probably faster. - paths.len() <= 10 && paths.iter().all(|p| p.is_file()) - }) - } - - /// Returns true if and only if line numbers should be shown. - fn line_number(&self, paths: &[PathBuf]) -> bool { - if self.is_present("no-line-number") || self.is_present("count") { - false - } else { - let only_stdin = paths == &[Path::new("-")]; - (atty::is(atty::Stream::Stdout) && !only_stdin) - || self.is_present("line-number") - || self.is_present("column") - || self.is_present("pretty") - || self.is_present("vimgrep") + /// Builds the set of globs for filtering files to apply to the --pre + /// flag. If no --pre-globs are available, then this always returns an + /// empty set of globs. + fn preprocessor_globs(&self) -> Result { + let mut builder = OverrideBuilder::new(env::current_dir()?); + for glob in self.values_of_lossy_vec("pre-glob") { + builder.add(&glob)?; } + Ok(builder.build()?) } - /// Returns true if and only if column numbers should be shown. - fn column(&self) -> bool { - self.is_present("column") || self.is_present("vimgrep") - } - - /// Returns true if and only if matches should be grouped with file name - /// headings. - fn heading(&self) -> bool { - if self.is_present("no-heading") || self.is_present("vimgrep") { - false - } else { - atty::is(atty::Stream::Stdout) - || self.is_present("heading") - || self.is_present("pretty") - } + /// Parse the regex-size-limit argument option into a byte count. + fn regex_size_limit(&self) -> Result> { + let r = self.parse_human_readable_size("regex-size-limit")?; + u64_to_usize("regex-size-limit", r) } /// Returns the replacement string as UTF-8 bytes if it exists. - fn replace(&self) -> Option> { - self.value_of_lossy("replace").map(|s| s.into_owned().into_bytes()) + fn replacement(&self) -> Option> { + self.value_of_lossy("replace").map(|s| s.into_bytes()) } - /// Returns the unescaped context separator in UTF-8 bytes. - fn context_separator(&self) -> Vec { - match self.value_of_lossy("context-separator") { - None => b"--".to_vec(), - Some(sep) => unescape(&sep), + /// Returns the sorting criteria based on command line parameters. + fn sort_by(&self) -> Result { + // For backcompat, continue supporting deprecated --sort-files flag. + if self.is_present("sort-files") { + return Ok(SortBy::asc(SortByKind::Path)); } - } - - /// Returns the unescaped path separator in UTF-8 bytes. - fn path_separator(&self) -> Result> { - match self.value_of_lossy("path-separator") { - None => Ok(None), - Some(sep) => { - let sep = unescape(&sep); - if sep.is_empty() { - Ok(None) - } else if sep.len() > 1 { - Err(From::from(format!( - "A path separator must be exactly one byte, but \ - the given separator is {} bytes.", sep.len()))) - } else { - Ok(Some(sep[0])) - } + let sortby = match self.value_of_lossy("sort") { + None => match self.value_of_lossy("sortr") { + None => return Ok(SortBy::none()), + Some(choice) => SortBy::desc(SortByKind::new(&choice)), } - } - } - - /// Returns the before and after contexts from the command line. - /// - /// If a context setting was absent, then `0` is returned. - /// - /// If there was a problem parsing the values from the user as an integer, - /// then an error is returned. - fn contexts(&self) -> Result<(usize, usize)> { - let after = try!(self.usize_of("after-context")).unwrap_or(0); - let before = try!(self.usize_of("before-context")).unwrap_or(0); - let both = try!(self.usize_of("context")).unwrap_or(0); - Ok(if both > 0 { - (both, both) - } else { - (before, after) - }) - } - - /// Returns the user's color choice based on command line parameters and - /// environment. - fn color_choice(&self) -> termcolor::ColorChoice { - let preference = match self.0.value_of_lossy("color") { - None => "auto".to_string(), - Some(v) => v.into_owned(), + Some(choice) => SortBy::asc(SortByKind::new(&choice)), }; - if preference == "always" { - termcolor::ColorChoice::Always - } else if preference == "ansi" { - termcolor::ColorChoice::AlwaysAnsi - } else if preference == "auto" { - if atty::is(atty::Stream::Stdout) || self.is_present("pretty") { - termcolor::ColorChoice::Auto - } else { - termcolor::ColorChoice::Never - } - } else { - termcolor::ColorChoice::Never - } - } - - /// Returns the color specifications given by the user on the CLI. - /// - /// If the was a problem parsing any of the provided specs, then an error - /// is returned. - fn color_specs(&self) -> Result { - // Start with a default set of color specs. - let mut specs = vec![ - #[cfg(unix)] - "path:fg:magenta".parse().unwrap(), - #[cfg(windows)] - "path:fg:cyan".parse().unwrap(), - "line:fg:green".parse().unwrap(), - "match:fg:red".parse().unwrap(), - "match:style:bold".parse().unwrap(), - ]; - for spec_str in self.values_of_lossy_vec("colors") { - specs.push(try!(spec_str.parse())); - } - Ok(ColorSpecs::new(&specs)) + Ok(sortby) } - /// Return the text encoding specified. + /// Returns true if and only if aggregate statistics for a search should + /// be tracked. /// - /// If the label given by the caller doesn't correspond to a valid - /// supported encoding (and isn't `auto`), then return an error. - /// - /// A `None` encoding implies that the encoding should be automatically - /// detected on a per-file basis. - fn encoding(&self) -> Result> { - match self.0.value_of_lossy("encoding") { - None => Ok(None), - Some(label) => { - if label == "auto" { - return Ok(None); - } - match Encoding::for_label_no_replacement(label.as_bytes()) { - Some(enc) => Ok(Some(enc)), - None => Err(From::from( - format!("unsupported encoding: {}", label))), - } - } + /// Generally, this is only enabled when explicitly requested by in the + /// command line arguments via the --stats flag, but this can also be + /// enabled implicity via the output format, e.g., for JSON Lines. + fn stats(&self) -> bool { + self.output_kind() == OutputKind::JSON || self.is_present("stats") + } + + /// When the output format is `Summary`, this returns the type of summary + /// output to show. + /// + /// This returns `None` if the output format is not `Summary`. + fn summary_kind(&self) -> Option { + let (count, count_matches) = self.counts(); + if self.is_present("quiet") { + Some(SummaryKind::Quiet) + } else if count_matches { + Some(SummaryKind::CountMatches) + } else if count { + Some(SummaryKind::Count) + } else if self.is_present("files-with-matches") { + Some(SummaryKind::PathWithMatch) + } else if self.is_present("files-without-match") { + Some(SummaryKind::PathWithoutMatch) + } else { + None } } - /// Returns the approximate number of threads that ripgrep should use. + /// Return the number of threads that should be used for parallelism. fn threads(&self) -> Result { - if self.is_present("sort-files") { + if self.sort_by()?.kind != SortByKind::None { return Ok(1); } - let threads = try!(self.usize_of("threads")).unwrap_or(0); + let threads = self.usize_of("threads")?.unwrap_or(0); Ok(if threads == 0 { cmp::min(12, num_cpus::get()) } else { @@ -755,154 +1406,53 @@ }) } - /// Builds a grep matcher from the command line flags. - /// - /// If there was a problem extracting the pattern from the command line - /// flags, then an error is returned. - fn grep(&self) -> Result { - let smart = - self.is_present("smart-case") - && !self.is_present("ignore-case") - && !self.is_present("case-sensitive"); - let casei = - self.is_present("ignore-case") - && !self.is_present("case-sensitive"); - let mut gb = GrepBuilder::new(&try!(self.pattern())) - .case_smart(smart) - .case_insensitive(casei) - .line_terminator(b'\n'); - - if let Some(limit) = try!(self.dfa_size_limit()) { - gb = gb.dfa_size_limit(limit); - } - if let Some(limit) = try!(self.regex_size_limit()) { - gb = gb.size_limit(limit); - } - gb.build().map_err(From::from) - } - - /// Builds the set of glob overrides from the command line flags. - fn overrides(&self) -> Result { - let mut ovr = OverrideBuilder::new(try!(env::current_dir())); - for glob in self.values_of_lossy_vec("glob") { - try!(ovr.add(&glob)); - } - // this is smelly. In the long run it might make sense - // to change overridebuilder to be like globsetbuilder - // but this would be a breaking change to the ignore crate - // so it is being shelved for now... - try!(ovr.case_insensitive(true)); - for glob in self.values_of_lossy_vec("iglob") { - try!(ovr.add(&glob)); - } - ovr.build().map_err(From::from) - } - /// Builds a file type matcher from the command line flags. fn types(&self) -> Result { - let mut btypes = TypesBuilder::new(); - btypes.add_defaults(); + let mut builder = TypesBuilder::new(); + builder.add_defaults(); for ty in self.values_of_lossy_vec("type-clear") { - btypes.clear(&ty); + builder.clear(&ty); } for def in self.values_of_lossy_vec("type-add") { - try!(btypes.add_def(&def)); + builder.add_def(&def)?; } for ty in self.values_of_lossy_vec("type") { - btypes.select(&ty); + builder.select(&ty); } for ty in self.values_of_lossy_vec("type-not") { - btypes.negate(&ty); - } - btypes.build().map_err(From::from) - } - - /// Parses an argument of the form `[0-9]+(KMG)?`. - /// - /// This always returns the result as a type `u64`. This must be converted - /// to the appropriate type by the caller. - fn parse_human_readable_size_arg( - &self, - arg_name: &str, - ) -> Result> { - let arg_value = match self.value_of_lossy(arg_name) { - Some(x) => x, - None => return Ok(None) - }; - let re = regex::Regex::new("^([0-9]+)([KMG])?$").unwrap(); - let caps = try!( - re.captures(&arg_value).ok_or_else(|| { - format!("invalid format for {}", arg_name) - })); - - let value = try!(caps[1].parse::()); - let suffix = caps.get(2).map(|x| x.as_str()); - - let v_10 = value.checked_mul(1024); - let v_20 = v_10.and_then(|x| x.checked_mul(1024)); - let v_30 = v_20.and_then(|x| x.checked_mul(1024)); - - let try_suffix = |x: Option| { - if x.is_some() { - Ok(x) - } else { - Err(From::from(format!("number too large for {}", arg_name))) - } - }; - match suffix { - None => Ok(Some(value)), - Some("K") => try_suffix(v_10), - Some("M") => try_suffix(v_20), - Some("G") => try_suffix(v_30), - _ => Err(From::from(format!("invalid suffix for {}", arg_name))) + builder.negate(&ty); } + builder.build().map_err(From::from) } - /// Parse the dfa-size-limit argument option into a byte count. - fn dfa_size_limit(&self) -> Result> { - let r = try!(self.parse_human_readable_size_arg("dfa-size-limit")); - human_readable_to_usize("dfa-size-limit", r) - } - - /// Parse the regex-size-limit argument option into a byte count. - fn regex_size_limit(&self) -> Result> { - let r = try!(self.parse_human_readable_size_arg("regex-size-limit")); - human_readable_to_usize("regex-size-limit", r) - } - - /// Parses the max-filesize argument option into a byte count. - fn max_filesize(&self) -> Result> { - self.parse_human_readable_size_arg("max-filesize") - } - - /// Returns true if ignore files should be ignored. - fn no_ignore(&self) -> bool { - self.is_present("no-ignore") - || self.occurrences_of("unrestricted") >= 1 + /// Returns the number of times the `unrestricted` flag is provided. + fn unrestricted_count(&self) -> u64 { + self.occurrences_of("unrestricted") } - /// Returns true if parent ignore files should be ignored. - fn no_ignore_parent(&self) -> bool { - self.is_present("no-ignore-parent") || self.no_ignore() + /// Returns true if and only if PCRE2's Unicode mode should be enabled. + fn pcre2_unicode(&self) -> bool { + // PCRE2 Unicode is enabled by default, so only disable it when told + // to do so explicitly. + self.is_present("pcre2") && !self.is_present("no-pcre2-unicode") } - /// Returns true if VCS ignore files should be ignored. - fn no_ignore_vcs(&self) -> bool { - self.is_present("no-ignore-vcs") || self.no_ignore() - } - - /// Returns true if and only if hidden files/directories should be - /// searched. - fn hidden(&self) -> bool { - self.is_present("hidden") || self.occurrences_of("unrestricted") >= 2 - } - - /// Returns true if and only if all files should be treated as if they - /// were text, even if ripgrep would detect it as a binary file. - fn text(&self) -> bool { - self.is_present("text") || self.occurrences_of("unrestricted") >= 3 + /// Returns true if and only if file names containing each match should + /// be emitted. + fn with_filename(&self, paths: &[PathBuf]) -> bool { + if self.is_present("no-filename") { + false + } else { + self.is_present("with-filename") + || self.is_present("vimgrep") + || paths.len() > 1 + || paths.get(0).map_or(false, |p| p.is_dir()) + } } +} +/// Lower level generic helper methods for teasing values out of clap. +impl ArgMatches { /// Like values_of_lossy, but returns an empty vec if the flag is not /// present. fn values_of_lossy_vec(&self, name: &str) -> Vec { @@ -911,102 +1461,134 @@ /// Safely reads an arg value with the given name, and if it's present, /// tries to parse it as a usize value. + /// + /// If the number is zero, then it is considered absent and `None` is + /// returned. + fn usize_of_nonzero(&self, name: &str) -> Result> { + let n = match self.usize_of(name)? { + None => return Ok(None), + Some(n) => n, + }; + Ok(if n == 0 { + None + } else { + Some(n) + }) + } + + /// Safely reads an arg value with the given name, and if it's present, + /// tries to parse it as a usize value. fn usize_of(&self, name: &str) -> Result> { match self.value_of_lossy(name) { None => Ok(None), Some(v) => v.parse().map(Some).map_err(From::from), } } -} -fn pattern_to_str(s: &OsStr) -> Result<&str> { - match s.to_str() { - Some(s) => Ok(s), - None => Err(From::from(format!( - "Argument '{}' is not valid UTF-8. \ - Use hex escape sequences to match arbitrary \ - bytes in a pattern (e.g., \\xFF).", - s.to_string_lossy()))), + /// Parses an argument of the form `[0-9]+(KMG)?`. + /// + /// If the aforementioned format is not recognized, then this returns an + /// error. + fn parse_human_readable_size( + &self, + arg_name: &str, + ) -> Result> { + let size = match self.value_of_lossy(arg_name) { + None => return Ok(None), + Some(size) => size, + }; + Ok(Some(cli::parse_human_readable_size(&size)?)) } } -/// A simple thread safe abstraction for determining whether a search should -/// stop if the user has requested quiet mode. -#[derive(Clone, Debug)] -pub struct QuietMatched(Arc>); +/// The following methods mostly dispatch to the underlying clap methods +/// directly. Methods that would otherwise get a single value will fetch all +/// values and return the last one. (Clap returns the first one.) We only +/// define the ones we need. +impl ArgMatches { + fn is_present(&self, name: &str) -> bool { + self.0.is_present(name) + } -impl QuietMatched { - /// Create a new QuietMatched value. - /// - /// If quiet is true, then set_match and has_match will reflect whether - /// a search should quit or not because it found a match. - /// - /// If quiet is false, then set_match is always a no-op and has_match - /// always returns false. - fn new(quiet: bool) -> QuietMatched { - let atomic = if quiet { Some(AtomicBool::new(false)) } else { None }; - QuietMatched(Arc::new(atomic)) + fn occurrences_of(&self, name: &str) -> u64 { + self.0.occurrences_of(name) } - /// Returns true if and only if quiet mode is enabled and a match has - /// occurred. - pub fn has_match(&self) -> bool { - match *self.0 { - None => false, - Some(ref matched) => matched.load(Ordering::SeqCst), - } + fn value_of_lossy(&self, name: &str) -> Option { + self.0.value_of_lossy(name).map(|s| s.into_owned()) } - /// Sets whether a match has occurred or not. - /// - /// If quiet mode is disabled, then this is a no-op. - pub fn set_match(&self, yes: bool) -> bool { - match *self.0 { - None => false, - Some(_) if !yes => false, - Some(ref m) => { m.store(true, Ordering::SeqCst); true } - } + fn values_of_lossy(&self, name: &str) -> Option> { + self.0.values_of_lossy(name) + } + + fn value_of_os(&self, name: &str) -> Option<&OsStr> { + self.0.value_of_os(name) + } + + fn values_of_os(&self, name: &str) -> Option { + self.0.values_of_os(name) + } +} + +/// Inspect an error resulting from building a Rust regex matcher, and if it's +/// believed to correspond to a syntax error that PCRE2 could handle, then +/// add a message to suggest the use of -P/--pcre2. +#[cfg(feature = "pcre2")] +fn suggest_pcre2(msg: String) -> String { + if !msg.contains("backreferences") && !msg.contains("look-around") { + msg + } else { + format!("{} + +Consider enabling PCRE2 with the --pcre2 flag, which can handle backreferences +and look-around.", msg) } } -/// Convert the result of a `parse_human_readable_size_arg` call into -/// a `usize`, failing if the type does not fit. -fn human_readable_to_usize( +/// Convert the result of parsing a human readable file size to a `usize`, +/// failing if the type does not fit. +fn u64_to_usize( arg_name: &str, value: Option, ) -> Result> { use std::usize; - match value { - None => Ok(None), - Some(v) => { - if v <= usize::MAX as u64 { - Ok(Some(v as usize)) - } else { - let msg = format!("number too large for {}", arg_name); - Err(From::from(msg)) - } - } + let value = match value { + None => return Ok(None), + Some(value) => value, + }; + if value <= usize::MAX as u64 { + Ok(Some(value as usize)) + } else { + Err(From::from(format!("number too large for {}", arg_name))) } } -/// Returns true if and only if stdin is deemed searchable. -#[cfg(unix)] -fn stdin_is_readable() -> bool { - use std::os::unix::fs::FileTypeExt; - use same_file::Handle; - - let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) { - Err(_) => return false, - Ok(md) => md.file_type(), +/// Builds a comparator for sorting two files according to a system time +/// extracted from the file's metadata. +/// +/// If there was a problem extracting the metadata or if the time is not +/// available, then both entries compare equal. +fn sort_by_metadata_time( + p1: &Path, + p2: &Path, + reverse: bool, + get_time: G, +) -> cmp::Ordering +where G: Fn(&fs::Metadata) -> io::Result +{ + let t1 = match p1.metadata().and_then(|md| get_time(&md)) { + Ok(t) => t, + Err(_) => return cmp::Ordering::Equal, }; - ft.is_file() || ft.is_fifo() -} - -/// Returns true if and only if stdin is deemed searchable. -#[cfg(windows)] -fn stdin_is_readable() -> bool { - // On Windows, it's not clear what the possibilities are to me, so just - // always return true. - true + let t2 = match p2.metadata().and_then(|md| get_time(&md)) { + Ok(t) => t, + Err(_) => return cmp::Ordering::Equal, + }; + if reverse { + t1.cmp(&t2).reverse() + } else { + t1.cmp(&t2) + } } diff -Nru ripgrep-0.6.0/src/config.rs ripgrep-0.10.0.3/src/config.rs --- ripgrep-0.6.0/src/config.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/src/config.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,192 @@ +// This module provides routines for reading ripgrep config "rc" files. The +// primary output of these routines is a sequence of arguments, where each +// argument corresponds precisely to one shell argument. + +use std::env; +use std::error::Error; +use std::fs::File; +use std::io::{self, BufRead}; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; + +use Result; + +/// Return a sequence of arguments derived from ripgrep rc configuration files. +pub fn args() -> Vec { + let config_path = match env::var_os("RIPGREP_CONFIG_PATH") { + None => return vec![], + Some(config_path) => { + if config_path.is_empty() { + return vec![]; + } + PathBuf::from(config_path) + } + }; + let (args, errs) = match parse(&config_path) { + Ok((args, errs)) => (args, errs), + Err(err) => { + message!("{}", err); + return vec![]; + } + }; + if !errs.is_empty() { + for err in errs { + message!("{}:{}", config_path.display(), err); + } + } + debug!( + "{}: arguments loaded from config file: {:?}", + config_path.display(), + args + ); + args +} + +/// Parse a single ripgrep rc file from the given path. +/// +/// On success, this returns a set of shell arguments, in order, that should +/// be pre-pended to the arguments given to ripgrep at the command line. +/// +/// If the file could not be read, then an error is returned. If there was +/// a problem parsing one or more lines in the file, then errors are returned +/// for each line in addition to successfully parsed arguments. +fn parse>( + path: P, +) -> Result<(Vec, Vec>)> { + let path = path.as_ref(); + match File::open(&path) { + Ok(file) => parse_reader(file), + Err(err) => Err(From::from(format!("{}: {}", path.display(), err))), + } +} + +/// Parse a single ripgrep rc file from the given reader. +/// +/// Callers should not provided a buffered reader, as this routine will use its +/// own buffer internally. +/// +/// On success, this returns a set of shell arguments, in order, that should +/// be pre-pended to the arguments given to ripgrep at the command line. +/// +/// If the reader could not be read, then an error is returned. If there was a +/// problem parsing one or more lines, then errors are returned for each line +/// in addition to successfully parsed arguments. +fn parse_reader( + rdr: R, +) -> Result<(Vec, Vec>)> { + let mut bufrdr = io::BufReader::new(rdr); + let (mut args, mut errs) = (vec![], vec![]); + let mut line = vec![]; + let mut line_number = 0; + while { + line.clear(); + line_number += 1; + bufrdr.read_until(b'\n', &mut line)? > 0 + } { + trim(&mut line); + if line.is_empty() || line[0] == b'#' { + continue; + } + match bytes_to_os_string(&line) { + Ok(osstr) => { + args.push(osstr); + } + Err(err) => { + errs.push(format!("{}: {}", line_number, err).into()); + } + } + } + Ok((args, errs)) +} + +/// Trim the given bytes of whitespace according to the ASCII definition. +fn trim(x: &mut Vec) { + let upto = x.iter().take_while(|b| is_space(**b)).count(); + x.drain(..upto); + let revto = x.len() - x.iter().rev().take_while(|b| is_space(**b)).count(); + x.drain(revto..); +} + +/// Returns true if and only if the given byte is an ASCII space character. +fn is_space(b: u8) -> bool { + b == b'\t' + || b == b'\n' + || b == b'\x0B' + || b == b'\x0C' + || b == b'\r' + || b == b' ' +} + +/// On Unix, get an OsString from raw bytes. +#[cfg(unix)] +fn bytes_to_os_string(bytes: &[u8]) -> Result { + use std::os::unix::ffi::OsStringExt; + Ok(OsString::from_vec(bytes.to_vec())) +} + +/// On non-Unix (like Windows), require UTF-8. +#[cfg(not(unix))] +fn bytes_to_os_string(bytes: &[u8]) -> Result { + String::from_utf8(bytes.to_vec()).map(OsString::from).map_err(From::from) +} + +#[cfg(test)] +mod tests { + use std::ffi::OsString; + use super::parse_reader; + + #[test] + fn basic() { + let (args, errs) = parse_reader(&b"\ +# Test +--context=0 + --smart-case +-u + + + # --bar +--foo +"[..]).unwrap(); + assert!(errs.is_empty()); + let args: Vec = + args.into_iter().map(|s| s.into_string().unwrap()).collect(); + assert_eq!(args, vec![ + "--context=0", "--smart-case", "-u", "--foo", + ]); + } + + // We test that we can handle invalid UTF-8 on Unix-like systems. + #[test] + #[cfg(unix)] + fn error() { + use std::os::unix::ffi::OsStringExt; + + let (args, errs) = parse_reader(&b"\ +quux +foo\xFFbar +baz +"[..]).unwrap(); + assert!(errs.is_empty()); + assert_eq!(args, vec![ + OsString::from("quux"), + OsString::from_vec(b"foo\xFFbar".to_vec()), + OsString::from("baz"), + ]); + } + + // ... but test that invalid UTF-8 fails on Windows. + #[test] + #[cfg(not(unix))] + fn error() { + let (args, errs) = parse_reader(&b"\ +quux +foo\xFFbar +baz +"[..]).unwrap(); + assert_eq!(errs.len(), 1); + assert_eq!(args, vec![ + OsString::from("quux"), + OsString::from("baz"), + ]); + } +} diff -Nru ripgrep-0.6.0/src/decoder.rs ripgrep-0.10.0.3/src/decoder.rs --- ripgrep-0.6.0/src/decoder.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/src/decoder.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,456 +0,0 @@ -use std::cmp; -use std::io::{self, Read}; - -use encoding_rs::{Decoder, Encoding, UTF_8}; - -/// A BOM is at least 2 bytes and at most 3 bytes. -/// -/// If fewer than 2 bytes are available to be read at the beginning of a -/// reader, then a BOM is `None`. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -struct Bom { - bytes: [u8; 3], - len: usize, -} - -impl Bom { - fn as_slice(&self) -> &[u8] { - &self.bytes[0..self.len] - } - - fn decoder(&self) -> Option { - let bom = self.as_slice(); - if bom.len() < 3 { - return None; - } - if let Some((enc, _)) = Encoding::for_bom(bom) { - if enc != UTF_8 { - return Some(enc.new_decoder_with_bom_removal()); - } - } - None - } -} - -/// BomPeeker wraps `R` and satisfies the `io::Read` interface while also -/// providing a peek at the BOM if one exists. Peeking at the BOM does not -/// advance the reader. -struct BomPeeker { - rdr: R, - bom: Option, - nread: usize, -} - -impl BomPeeker { - /// Create a new BomPeeker. - /// - /// The first three bytes can be read using the `peek_bom` method, but - /// will not advance the reader. - fn new(rdr: R) -> BomPeeker { - BomPeeker { rdr: rdr, bom: None, nread: 0 } - } - - /// Peek at the first three bytes of the underlying reader. - /// - /// This does not advance the reader provided by `BomPeeker`. - /// - /// If the underlying reader does not have at least two bytes available, - /// then `None` is returned. - fn peek_bom(&mut self) -> io::Result { - if let Some(bom) = self.bom { - return Ok(bom); - } - self.bom = Some(Bom { bytes: [0; 3], len: 0 }); - let mut buf = [0u8; 3]; - let bom_len = try!(read_full(&mut self.rdr, &mut buf)); - self.bom = Some(Bom { bytes: buf, len: bom_len }); - Ok(self.bom.unwrap()) - } -} - -impl io::Read for BomPeeker { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - if self.nread < 3 { - let bom = try!(self.peek_bom()); - let bom = bom.as_slice(); - if self.nread < bom.len() { - let rest = &bom[self.nread..]; - let len = cmp::min(buf.len(), rest.len()); - buf[..len].copy_from_slice(&rest[..len]); - self.nread += len; - return Ok(len); - } - } - let nread = try!(self.rdr.read(buf)); - self.nread += nread; - Ok(nread) - } -} - -/// Like io::Read::read_exact, except it never returns UnexpectedEof and -/// instead returns the number of bytes read if EOF is seen before filling -/// `buf`. -fn read_full( - mut rdr: R, - mut buf: &mut [u8], -) -> io::Result { - let mut nread = 0; - while !buf.is_empty() { - match rdr.read(buf) { - Ok(0) => break, - Ok(n) => { - nread += n; - let tmp = buf; - buf = &mut tmp[n..]; - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e), - } - } - Ok(nread) -} - -/// A reader that transcodes to UTF-8. The source encoding is determined by -/// inspecting the BOM from the stream read from `R`, if one exists. If a -/// UTF-16 BOM exists, then the source stream is trancoded to UTF-8 with -/// invalid UTF-16 sequences translated to the Unicode replacement character. -/// In all other cases, the underlying reader is passed through unchanged. -/// -/// `R` is the type of the underlying reader and `B` is the type of an internal -/// buffer used to store the results of trancoding. -/// -/// Note that not all methods on `io::Read` work with this implementation. -/// For example, the `bytes` adapter method attempts to read a single byte at -/// a time, but this implementation requires a buffer of size at least `4`. If -/// a buffer of size less than 4 is given, then an error is returned. -pub struct DecodeReader { - /// The underlying reader, wrapped in a peeker for reading a BOM if one - /// exists. - rdr: BomPeeker, - /// The internal buffer to store transcoded bytes before they are read by - /// callers. - buf: B, - /// The current position in `buf`. Subsequent reads start here. - pos: usize, - /// The number of transcoded bytes in `buf`. Subsequent reads end here. - buflen: usize, - /// Whether this is the first read or not (in which we inspect the BOM). - first: bool, - /// Whether a "last" read has occurred. After this point, EOF will always - /// be returned. - last: bool, - /// The underlying text decoder derived from the BOM, if one exists. - decoder: Option, -} - -impl> DecodeReader { - /// Create a new transcoder that converts a source stream to valid UTF-8. - /// - /// If an encoding is specified, then it is used to transcode `rdr` to - /// UTF-8. Otherwise, if no encoding is specified, and if a UTF-16 BOM is - /// found, then the corresponding UTF-16 encoding is used to transcode - /// `rdr` to UTF-8. In all other cases, `rdr` is assumed to be at least - /// ASCII-compatible and passed through untouched. - /// - /// Errors in the encoding of `rdr` are handled with the Unicode - /// replacement character. If no encoding of `rdr` is specified, then - /// errors are not handled. - pub fn new( - rdr: R, - buf: B, - enc: Option<&'static Encoding>, - ) -> DecodeReader { - DecodeReader { - rdr: BomPeeker::new(rdr), - buf: buf, - buflen: 0, - pos: 0, - first: enc.is_none(), - last: false, - decoder: enc.map(|enc| enc.new_decoder_with_bom_removal()), - } - } - - /// Fill the internal buffer from the underlying reader. - /// - /// If there are unread bytes in the internal buffer, then we move them - /// to the beginning of the internal buffer and fill the remainder. - /// - /// If the internal buffer is too small to read additional bytes, then an - /// error is returned. - #[inline(always)] // massive perf benefit (???) - fn fill(&mut self) -> io::Result<()> { - if self.pos < self.buflen { - if self.buflen >= self.buf.as_mut().len() { - return Err(io::Error::new( - io::ErrorKind::Other, - "DecodeReader: internal buffer exhausted")); - } - let newlen = self.buflen - self.pos; - let mut tmp = Vec::with_capacity(newlen); - tmp.extend_from_slice(&self.buf.as_mut()[self.pos..self.buflen]); - self.buf.as_mut()[..newlen].copy_from_slice(&tmp); - self.buflen = newlen; - } else { - self.buflen = 0; - } - self.pos = 0; - self.buflen += - try!(self.rdr.read(&mut self.buf.as_mut()[self.buflen..])); - Ok(()) - } - - /// Transcode the inner stream to UTF-8 in `buf`. This assumes that there - /// is a decoder capable of transcoding the inner stream to UTF-8. This - /// returns the number of bytes written to `buf`. - /// - /// When this function returns, exactly one of the following things will - /// be true: - /// - /// 1. A non-zero number of bytes were written to `buf`. - /// 2. The underlying reader reached EOF. - /// 3. An error is returned: the internal buffer ran out of room. - /// 4. An I/O error occurred. - /// - /// Note that `buf` must have at least 4 bytes of space. - fn transcode(&mut self, buf: &mut [u8]) -> io::Result { - assert!(buf.len() >= 4); - if self.last { - return Ok(0); - } - if self.pos >= self.buflen { - try!(self.fill()); - } - let mut nwrite = 0; - loop { - let (_, nin, nout, _) = - self.decoder.as_mut().unwrap().decode_to_utf8( - &self.buf.as_mut()[self.pos..self.buflen], buf, false); - self.pos += nin; - nwrite += nout; - // If we've written at least one byte to the caller-provided - // buffer, then our mission is complete. - if nwrite > 0 { - break; - } - // Otherwise, we know that our internal buffer has insufficient - // data to transcode at least one char, so we attempt to refill it. - try!(self.fill()); - // Quit on EOF. - if self.buflen == 0 { - self.pos = 0; - self.last = true; - let (_, _, nout, _) = - self.decoder.as_mut().unwrap().decode_to_utf8( - &[], buf, true); - return Ok(nout); - } - } - Ok(nwrite) - } - - #[inline(never)] // impacts perf... - fn detect(&mut self) -> io::Result<()> { - let bom = try!(self.rdr.peek_bom()); - self.decoder = bom.decoder(); - Ok(()) - } -} - -impl> io::Read for DecodeReader { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - if self.first { - self.first = false; - try!(self.detect()); - } - if self.decoder.is_none() { - return self.rdr.read(buf); - } - // When decoding UTF-8, we need at least 4 bytes of space to guarantee - // that we can decode at least one codepoint. If we don't have it, we - // can either return `0` for the number of bytes read or return an - // error. Since `0` would be interpreted as a possibly premature EOF, - // we opt for an error. - if buf.len() < 4 { - return Err(io::Error::new( - io::ErrorKind::Other, - "DecodeReader: byte buffer must have length at least 4")); - } - self.transcode(buf) - } -} - -#[cfg(test)] -mod tests { - use std::io::Read; - - use encoding_rs::Encoding; - - use super::{Bom, BomPeeker, DecodeReader}; - - fn read_to_string(mut rdr: R) -> String { - let mut s = String::new(); - rdr.read_to_string(&mut s).unwrap(); - s - } - - #[test] - fn peeker_empty() { - let buf = []; - let mut peeker = BomPeeker::new(&buf[..]); - assert_eq!(Bom { bytes: [0; 3], len: 0}, peeker.peek_bom().unwrap()); - - let mut tmp = [0; 100]; - assert_eq!(0, peeker.read(&mut tmp).unwrap()); - } - - #[test] - fn peeker_one() { - let buf = [1]; - let mut peeker = BomPeeker::new(&buf[..]); - assert_eq!( - Bom { bytes: [1, 0, 0], len: 1}, - peeker.peek_bom().unwrap()); - - let mut tmp = [0; 100]; - assert_eq!(1, peeker.read(&mut tmp).unwrap()); - assert_eq!(1, tmp[0]); - assert_eq!(0, peeker.read(&mut tmp).unwrap()); - } - - #[test] - fn peeker_two() { - let buf = [1, 2]; - let mut peeker = BomPeeker::new(&buf[..]); - assert_eq!( - Bom { bytes: [1, 2, 0], len: 2}, - peeker.peek_bom().unwrap()); - - let mut tmp = [0; 100]; - assert_eq!(2, peeker.read(&mut tmp).unwrap()); - assert_eq!(1, tmp[0]); - assert_eq!(2, tmp[1]); - assert_eq!(0, peeker.read(&mut tmp).unwrap()); - } - - #[test] - fn peeker_three() { - let buf = [1, 2, 3]; - let mut peeker = BomPeeker::new(&buf[..]); - assert_eq!( - Bom { bytes: [1, 2, 3], len: 3}, - peeker.peek_bom().unwrap()); - - let mut tmp = [0; 100]; - assert_eq!(3, peeker.read(&mut tmp).unwrap()); - assert_eq!(1, tmp[0]); - assert_eq!(2, tmp[1]); - assert_eq!(3, tmp[2]); - assert_eq!(0, peeker.read(&mut tmp).unwrap()); - } - - #[test] - fn peeker_four() { - let buf = [1, 2, 3, 4]; - let mut peeker = BomPeeker::new(&buf[..]); - assert_eq!( - Bom { bytes: [1, 2, 3], len: 3}, - peeker.peek_bom().unwrap()); - - let mut tmp = [0; 100]; - assert_eq!(3, peeker.read(&mut tmp).unwrap()); - assert_eq!(1, tmp[0]); - assert_eq!(2, tmp[1]); - assert_eq!(3, tmp[2]); - assert_eq!(1, peeker.read(&mut tmp).unwrap()); - assert_eq!(4, tmp[0]); - assert_eq!(0, peeker.read(&mut tmp).unwrap()); - } - - #[test] - fn peeker_one_at_a_time() { - let buf = [1, 2, 3, 4]; - let mut peeker = BomPeeker::new(&buf[..]); - - let mut tmp = [0; 1]; - assert_eq!(0, peeker.read(&mut tmp[..0]).unwrap()); - assert_eq!(0, tmp[0]); - assert_eq!(1, peeker.read(&mut tmp).unwrap()); - assert_eq!(1, tmp[0]); - assert_eq!(1, peeker.read(&mut tmp).unwrap()); - assert_eq!(2, tmp[0]); - assert_eq!(1, peeker.read(&mut tmp).unwrap()); - assert_eq!(3, tmp[0]); - assert_eq!(1, peeker.read(&mut tmp).unwrap()); - assert_eq!(4, tmp[0]); - } - - // In cases where all we have is a bom, we expect the bytes to be - // passed through unchanged. - #[test] - fn trans_utf16_bom() { - let srcbuf = vec![0xFF, 0xFE]; - let mut dstbuf = vec![0; 8 * (1<<10)]; - let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1<<10)], None); - let n = rdr.read(&mut dstbuf).unwrap(); - assert_eq!(&*srcbuf, &dstbuf[..n]); - - let srcbuf = vec![0xFE, 0xFF]; - let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1<<10)], None); - let n = rdr.read(&mut dstbuf).unwrap(); - assert_eq!(&*srcbuf, &dstbuf[..n]); - - let srcbuf = vec![0xEF, 0xBB, 0xBF]; - let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1<<10)], None); - let n = rdr.read(&mut dstbuf).unwrap(); - assert_eq!(&*srcbuf, &dstbuf[..n]); - } - - // Test basic UTF-16 decoding. - #[test] - fn trans_utf16_basic() { - let srcbuf = vec![0xFF, 0xFE, 0x61, 0x00]; - let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1<<10)], None); - assert_eq!("a", read_to_string(&mut rdr)); - - let srcbuf = vec![0xFE, 0xFF, 0x00, 0x61]; - let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1<<10)], None); - assert_eq!("a", read_to_string(&mut rdr)); - } - - // Test incomplete UTF-16 decoding. This ensures we see a replacement char - // if the stream ends with an unpaired code unit. - #[test] - fn trans_utf16_incomplete() { - let srcbuf = vec![0xFF, 0xFE, 0x61, 0x00, 0x00]; - let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1<<10)], None); - assert_eq!("a\u{FFFD}", read_to_string(&mut rdr)); - } - - macro_rules! test_trans_simple { - ($name:ident, $enc:expr, $srcbytes:expr, $dst:expr) => { - #[test] - fn $name() { - let srcbuf = &$srcbytes[..]; - let enc = Encoding::for_label($enc.as_bytes()); - let mut rdr = DecodeReader::new( - &*srcbuf, vec![0; 8 * (1<<10)], enc); - assert_eq!($dst, read_to_string(&mut rdr)); - } - } - } - - // This isn't exhaustive obviously, but it lets us test base level support. - test_trans_simple!(trans_simple_auto, "does not exist", b"\xD0\x96", "Ж"); - test_trans_simple!(trans_simple_utf8, "utf-8", b"\xD0\x96", "Ж"); - test_trans_simple!(trans_simple_utf16le, "utf-16le", b"\x16\x04", "Ж"); - test_trans_simple!(trans_simple_utf16be, "utf-16be", b"\x04\x16", "Ж"); - test_trans_simple!(trans_simple_chinese, "chinese", b"\xA7\xA8", "Ж"); - test_trans_simple!(trans_simple_korean, "korean", b"\xAC\xA8", "Ж"); - test_trans_simple!( - trans_simple_big5_hkscs, "big5-hkscs", b"\xC7\xFA", "Ж"); - test_trans_simple!(trans_simple_gbk, "gbk", b"\xA7\xA8", "Ж"); - test_trans_simple!(trans_simple_sjis, "sjis", b"\x84\x47", "Ж"); - test_trans_simple!(trans_simple_eucjp, "euc-jp", b"\xA7\xA8", "Ж"); - test_trans_simple!(trans_simple_latin1, "latin1", b"\xA9", "©"); -} diff -Nru ripgrep-0.6.0/src/logger.rs ripgrep-0.10.0.3/src/logger.rs --- ripgrep-0.6.0/src/logger.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/src/logger.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,68 @@ +// This module defines a super simple logger that works with the `log` crate. +// We don't need anything fancy; just basic log levels and the ability to +// print to stderr. We therefore avoid bringing in extra dependencies just +// for this functionality. + +use log::{self, Log}; + +/// The simplest possible logger that logs to stderr. +/// +/// This logger does no filtering. Instead, it relies on the `log` crates +/// filtering via its global max_level setting. +#[derive(Debug)] +pub struct Logger(()); + +const LOGGER: &'static Logger = &Logger(()); + +impl Logger { + /// Create a new logger that logs to stderr and initialize it as the + /// global logger. If there was a problem setting the logger, then an + /// error is returned. + pub fn init() -> Result<(), log::SetLoggerError> { + log::set_logger(LOGGER) + } +} + +impl Log for Logger { + fn enabled(&self, _: &log::Metadata) -> bool { + // We set the log level via log::set_max_level, so we don't need to + // implement filtering here. + true + } + + fn log(&self, record: &log::Record) { + match (record.file(), record.line()) { + (Some(file), Some(line)) => { + eprintln!( + "{}|{}|{}:{}: {}", + record.level(), + record.target(), + file, + line, + record.args() + ); + } + (Some(file), None) => { + eprintln!( + "{}|{}|{}: {}", + record.level(), + record.target(), + file, + record.args() + ); + } + _ => { + eprintln!( + "{}|{}: {}", + record.level(), + record.target(), + record.args() + ); + } + } + } + + fn flush(&self) { + // We use eprintln! which is flushed on every call. + } +} diff -Nru ripgrep-0.6.0/src/main.rs ripgrep-0.10.0.3/src/main.rs --- ripgrep-0.6.0/src/main.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/src/main.rs 2018-09-10 21:10:55.000000000 +0000 @@ -1,331 +1,287 @@ -extern crate atty; -extern crate bytecount; #[macro_use] extern crate clap; -extern crate encoding_rs; -extern crate env_logger; extern crate grep; extern crate ignore; #[macro_use] extern crate lazy_static; #[macro_use] extern crate log; -extern crate memchr; -extern crate memmap; extern crate num_cpus; extern crate regex; -extern crate same_file; +#[macro_use] +extern crate serde_json; extern crate termcolor; -use std::error::Error; +use std::io::{self, Write}; use std::process; -use std::result; -use std::sync::Arc; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::mpsc; -use std::thread; +use std::sync::{Arc, Mutex}; +use std::time::Instant; -use args::Args; -use worker::Work; +use ignore::WalkState; -macro_rules! errored { - ($($tt:tt)*) => { - return Err(From::from(format!($($tt)*))); - } -} +use args::Args; +use subject::Subject; -macro_rules! eprintln { - ($($tt:tt)*) => {{ - use std::io::Write; - let _ = writeln!(&mut ::std::io::stderr(), $($tt)*); - }} -} +#[macro_use] +mod messages; mod app; mod args; -mod decoder; -mod pathutil; -mod printer; -mod search_buffer; -mod search_stream; -mod unescape; -mod worker; +mod config; +mod logger; +mod path_printer; +mod search; +mod subject; -pub type Result = result::Result>; +type Result = ::std::result::Result>; fn main() { - match Args::parse().map(Arc::new).and_then(run) { - Ok(0) => process::exit(1), - Ok(_) => process::exit(0), + match Args::parse().and_then(try_main) { + Ok(true) => process::exit(0), + Ok(false) => process::exit(1), Err(err) => { eprintln!("{}", err); - process::exit(1); + process::exit(2); } } } -fn run(args: Arc) -> Result { - if args.never_match() { - return Ok(0); - } - let threads = args.threads(); - if args.files() { - if threads == 1 || args.is_one_path() { - run_files_one_thread(args) - } else { - run_files_parallel(args) - } - } else if args.type_list() { - run_types(args) - } else if threads == 1 || args.is_one_path() { - run_one_thread(args) - } else { - run_parallel(args) +fn try_main(args: Args) -> Result { + use args::Command::*; + + match args.command()? { + Search => search(args), + SearchParallel => search_parallel(args), + SearchNever => Ok(false), + Files => files(args), + FilesParallel => files_parallel(args), + Types => types(args), } } -fn run_parallel(args: Arc) -> Result { - let bufwtr = Arc::new(args.buffer_writer()); - let quiet_matched = args.quiet_matched(); - let paths_searched = Arc::new(AtomicUsize::new(0)); - let match_count = Arc::new(AtomicUsize::new(0)); - - args.walker_parallel().run(|| { +/// The top-level entry point for single-threaded search. This recursively +/// steps through the file list (current directory by default) and searches +/// each file sequentially. +fn search(args: Args) -> Result { + let started_at = Instant::now(); + let quit_after_match = args.quit_after_match()?; + let subject_builder = args.subject_builder(); + let mut stats = args.stats()?; + let mut searcher = args.search_worker(args.stdout())?; + let mut matched = false; + + for result in args.walker()? { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => continue, + }; + let search_result = match searcher.search(&subject) { + Ok(search_result) => search_result, + Err(err) => { + // A broken pipe means graceful termination. + if err.kind() == io::ErrorKind::BrokenPipe { + break; + } + message!("{}: {}", subject.path().display(), err); + continue; + } + }; + matched = matched || search_result.has_match(); + if let Some(ref mut stats) = stats { + *stats += search_result.stats().unwrap(); + } + if matched && quit_after_match { + break; + } + } + if let Some(ref stats) = stats { + let elapsed = Instant::now().duration_since(started_at); + // We don't care if we couldn't print this successfully. + let _ = searcher.print_stats(elapsed, stats); + } + Ok(matched) +} + +/// The top-level entry point for multi-threaded search. The parallelism is +/// itself achieved by the recursive directory traversal. All we need to do is +/// feed it a worker for performing a search on each file. +fn search_parallel(args: Args) -> Result { + use std::sync::atomic::AtomicBool; + use std::sync::atomic::Ordering::SeqCst; + + let quit_after_match = args.quit_after_match()?; + let started_at = Instant::now(); + let subject_builder = Arc::new(args.subject_builder()); + let bufwtr = Arc::new(args.buffer_writer()?); + let stats = Arc::new(args.stats()?.map(Mutex::new)); + let matched = Arc::new(AtomicBool::new(false)); + let mut searcher_err = None; + args.walker_parallel()?.run(|| { let args = args.clone(); - let quiet_matched = quiet_matched.clone(); - let paths_searched = paths_searched.clone(); - let match_count = match_count.clone(); - let bufwtr = bufwtr.clone(); - let mut buf = bufwtr.buffer(); - let mut worker = args.worker(); - Box::new(move |result| { - use ignore::WalkState::*; - - if quiet_matched.has_match() { - return Quit; + let bufwtr = Arc::clone(&bufwtr); + let stats = Arc::clone(&stats); + let matched = Arc::clone(&matched); + let subject_builder = Arc::clone(&subject_builder); + let mut searcher = match args.search_worker(bufwtr.buffer()) { + Ok(searcher) => searcher, + Err(err) => { + searcher_err = Some(err); + return Box::new(move |_| { + WalkState::Quit + }); } - let dent = match get_or_log_dir_entry( - result, - args.stdout_handle(), - args.no_messages(), - ) { - None => return Continue, - Some(dent) => dent, + }; + + Box::new(move |result| { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => return WalkState::Continue, + }; + searcher.printer().get_mut().clear(); + let search_result = match searcher.search(&subject) { + Ok(search_result) => search_result, + Err(err) => { + message!("{}: {}", subject.path().display(), err); + return WalkState::Continue; + } }; - paths_searched.fetch_add(1, Ordering::SeqCst); - buf.clear(); - { - // This block actually executes the search and prints the - // results into outbuf. - let mut printer = args.printer(&mut buf); - let count = - if dent.is_stdin() { - worker.run(&mut printer, Work::Stdin) - } else { - worker.run(&mut printer, Work::DirEntry(dent)) - }; - match_count.fetch_add(count as usize, Ordering::SeqCst); - if quiet_matched.set_match(count > 0) { - return Quit; + if search_result.has_match() { + matched.store(true, SeqCst); + } + if let Some(ref locked_stats) = *stats { + let mut stats = locked_stats.lock().unwrap(); + *stats += search_result.stats().unwrap(); + } + if let Err(err) = bufwtr.print(searcher.printer().get_mut()) { + // A broken pipe means graceful termination. + if err.kind() == io::ErrorKind::BrokenPipe { + return WalkState::Quit; } + // Otherwise, we continue on our merry way. + message!("{}: {}", subject.path().display(), err); + } + if matched.load(SeqCst) && quit_after_match { + WalkState::Quit + } else { + WalkState::Continue } - // BUG(burntsushi): We should handle this error instead of ignoring - // it. See: https://github.com/BurntSushi/ripgrep/issues/200 - let _ = bufwtr.print(&buf); - Continue }) }); - if !args.paths().is_empty() && paths_searched.load(Ordering::SeqCst) == 0 { - if !args.no_messages() { - eprint_nothing_searched(); - } + if let Some(err) = searcher_err.take() { + return Err(err); } - Ok(match_count.load(Ordering::SeqCst) as u64) -} - -fn run_one_thread(args: Arc) -> Result { - let stdout = args.stdout(); - let mut stdout = stdout.lock(); - let mut worker = args.worker(); - let mut paths_searched: u64 = 0; - let mut match_count = 0; - for result in args.walker() { - let dent = match get_or_log_dir_entry( - result, - args.stdout_handle(), - args.no_messages(), - ) { + if let Some(ref locked_stats) = *stats { + let elapsed = Instant::now().duration_since(started_at); + let stats = locked_stats.lock().unwrap(); + let mut searcher = args.search_worker(args.stdout())?; + // We don't care if we couldn't print this successfully. + let _ = searcher.print_stats(elapsed, &stats); + } + Ok(matched.load(SeqCst)) +} + +/// The top-level entry point for listing files without searching them. This +/// recursively steps through the file list (current directory by default) and +/// prints each path sequentially using a single thread. +fn files(args: Args) -> Result { + let quit_after_match = args.quit_after_match()?; + let subject_builder = args.subject_builder(); + let mut matched = false; + let mut path_printer = args.path_printer(args.stdout())?; + for result in args.walker()? { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, None => continue, - Some(dent) => dent, }; - let mut printer = args.printer(&mut stdout); - if match_count > 0 { - if args.quiet() { + matched = true; + if quit_after_match { + break; + } + if let Err(err) = path_printer.write_path(subject.path()) { + // A broken pipe means graceful termination. + if err.kind() == io::ErrorKind::BrokenPipe { break; } - if let Some(sep) = args.file_separator() { - printer = printer.file_separator(sep); - } + // Otherwise, we have some other error that's preventing us from + // writing to stdout, so we should bubble it up. + return Err(err.into()); } - paths_searched += 1; - match_count += - if dent.is_stdin() { - worker.run(&mut printer, Work::Stdin) - } else { - worker.run(&mut printer, Work::DirEntry(dent)) - }; } - if !args.paths().is_empty() && paths_searched == 0 { - if !args.no_messages() { - eprint_nothing_searched(); - } - } - Ok(match_count) + Ok(matched) } -fn run_files_parallel(args: Arc) -> Result { - let print_args = args.clone(); - let (tx, rx) = mpsc::channel::(); - let print_thread = thread::spawn(move || { - let stdout = print_args.stdout(); - let mut printer = print_args.printer(stdout.lock()); - let mut file_count = 0; - for dent in rx.iter() { - if !print_args.quiet() { - printer.path(dent.path()); - } - file_count += 1; +/// The top-level entry point for listing files without searching them. This +/// recursively steps through the file list (current directory by default) and +/// prints each path sequentially using multiple threads. +fn files_parallel(args: Args) -> Result { + use std::sync::atomic::AtomicBool; + use std::sync::atomic::Ordering::SeqCst; + use std::sync::mpsc; + use std::thread; + + let quit_after_match = args.quit_after_match()?; + let subject_builder = Arc::new(args.subject_builder()); + let mut path_printer = args.path_printer(args.stdout())?; + let matched = Arc::new(AtomicBool::new(false)); + let (tx, rx) = mpsc::channel::(); + + let print_thread = thread::spawn(move || -> io::Result<()> { + for subject in rx.iter() { + path_printer.write_path(subject.path())?; } - file_count + Ok(()) }); - args.walker_parallel().run(move || { - let args = args.clone(); + args.walker_parallel()?.run(|| { + let subject_builder = Arc::clone(&subject_builder); + let matched = Arc::clone(&matched); let tx = tx.clone(); + Box::new(move |result| { - if let Some(dent) = get_or_log_dir_entry( - result, - args.stdout_handle(), - args.no_messages(), - ) { - tx.send(dent).unwrap(); + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => return WalkState::Continue, + }; + matched.store(true, SeqCst); + if quit_after_match { + WalkState::Quit + } else { + match tx.send(subject) { + Ok(_) => WalkState::Continue, + Err(_) => WalkState::Quit, + } } - ignore::WalkState::Continue }) }); - Ok(print_thread.join().unwrap()) -} - -fn run_files_one_thread(args: Arc) -> Result { - let stdout = args.stdout(); - let mut printer = args.printer(stdout.lock()); - let mut file_count = 0; - for result in args.walker() { - let dent = match get_or_log_dir_entry( - result, - args.stdout_handle(), - args.no_messages(), - ) { - None => continue, - Some(dent) => dent, - }; - if !args.quiet() { - printer.path(dent.path()); + drop(tx); + if let Err(err) = print_thread.join().unwrap() { + // A broken pipe means graceful termination, so fall through. + // Otherwise, something bad happened while writing to stdout, so bubble + // it up. + if err.kind() != io::ErrorKind::BrokenPipe { + return Err(err.into()); } - file_count += 1; } - Ok(file_count) + Ok(matched.load(SeqCst)) } -fn run_types(args: Arc) -> Result { - let stdout = args.stdout(); - let mut printer = args.printer(stdout.lock()); - let mut ty_count = 0; - for def in args.type_defs() { - printer.type_def(def); - ty_count += 1; - } - Ok(ty_count) -} - -fn get_or_log_dir_entry( - result: result::Result, - stdout_handle: Option<&same_file::Handle>, - no_messages: bool, -) -> Option { - match result { - Err(err) => { - if !no_messages { - eprintln!("{}", err); +/// The top-level entry point for --type-list. +fn types(args: Args) -> Result { + let mut count = 0; + let mut stdout = args.stdout(); + for def in args.type_defs()? { + count += 1; + stdout.write_all(def.name().as_bytes())?; + stdout.write_all(b": ")?; + + let mut first = true; + for glob in def.globs() { + if !first { + stdout.write_all(b", ")?; } - None - } - Ok(dent) => { - if let Some(err) = dent.error() { - if !no_messages { - eprintln!("{}", err); - } - } - let ft = match dent.file_type() { - None => return Some(dent), // entry is stdin - Some(ft) => ft, - }; - // A depth of 0 means the user gave the path explicitly, so we - // should always try to search it. - if dent.depth() == 0 && !ft.is_dir() { - return Some(dent); - } else if !ft.is_file() { - return None; - } - // If we are redirecting stdout to a file, then don't search that - // file. - if is_stdout_file(&dent, stdout_handle, no_messages) { - return None; - } - Some(dent) + stdout.write_all(glob.as_bytes())?; + first = false; } + stdout.write_all(b"\n")?; } -} - -fn is_stdout_file( - dent: &ignore::DirEntry, - stdout_handle: Option<&same_file::Handle>, - no_messages: bool, -) -> bool { - let stdout_handle = match stdout_handle { - None => return false, - Some(stdout_handle) => stdout_handle, - }; - // If we know for sure that these two things aren't equal, then avoid - // the costly extra stat call to determine equality. - if !maybe_dent_eq_handle(dent, stdout_handle) { - return false; - } - match same_file::Handle::from_path(dent.path()) { - Ok(h) => stdout_handle == &h, - Err(err) => { - if !no_messages { - eprintln!("{}: {}", dent.path().display(), err); - } - false - } - } -} - -#[cfg(unix)] -fn maybe_dent_eq_handle( - dent: &ignore::DirEntry, - handle: &same_file::Handle, -) -> bool { - dent.ino() == Some(handle.ino()) -} - -#[cfg(not(unix))] -fn maybe_dent_eq_handle(_: &ignore::DirEntry, _: &same_file::Handle) -> bool { - true -} - -fn eprint_nothing_searched() { - eprintln!("No files were searched, which means ripgrep probably \ - applied a filter you didn't expect. \ - Try running again with --debug."); + Ok(count > 0) } diff -Nru ripgrep-0.6.0/src/messages.rs ripgrep-0.10.0.3/src/messages.rs --- ripgrep-0.6.0/src/messages.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/src/messages.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,50 @@ +use std::sync::atomic::{ATOMIC_BOOL_INIT, AtomicBool, Ordering}; + +static MESSAGES: AtomicBool = ATOMIC_BOOL_INIT; +static IGNORE_MESSAGES: AtomicBool = ATOMIC_BOOL_INIT; + +#[macro_export] +macro_rules! message { + ($($tt:tt)*) => { + if ::messages::messages() { + eprintln!($($tt)*); + } + } +} + +#[macro_export] +macro_rules! ignore_message { + ($($tt:tt)*) => { + if ::messages::messages() && ::messages::ignore_messages() { + eprintln!($($tt)*); + } + } +} + +/// Returns true if and only if messages should be shown. +pub fn messages() -> bool { + MESSAGES.load(Ordering::SeqCst) +} + +/// Set whether messages should be shown or not. +/// +/// By default, they are not shown. +pub fn set_messages(yes: bool) { + MESSAGES.store(yes, Ordering::SeqCst) +} + +/// Returns true if and only if "ignore" related messages should be shown. +pub fn ignore_messages() -> bool { + IGNORE_MESSAGES.load(Ordering::SeqCst) +} + +/// Set whether "ignore" related messages should be shown or not. +/// +/// By default, they are not shown. +/// +/// Note that this is overridden if `messages` is disabled. Namely, if +/// `messages` is disabled, then "ignore" messages are never shown, regardless +/// of this setting. +pub fn set_ignore_messages(yes: bool) { + IGNORE_MESSAGES.store(yes, Ordering::SeqCst) +} diff -Nru ripgrep-0.6.0/src/path_printer.rs ripgrep-0.10.0.3/src/path_printer.rs --- ripgrep-0.6.0/src/path_printer.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/src/path_printer.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,101 @@ +use std::io; +use std::path::Path; + +use grep::printer::{ColorSpecs, PrinterPath}; +use termcolor::WriteColor; + +/// A configuration for describing how paths should be written. +#[derive(Clone, Debug)] +struct Config { + colors: ColorSpecs, + separator: Option, + terminator: u8, +} + +impl Default for Config { + fn default() -> Config { + Config { + colors: ColorSpecs::default(), + separator: None, + terminator: b'\n', + } + } +} + +/// A builder for constructing things to search over. +#[derive(Clone, Debug)] +pub struct PathPrinterBuilder { + config: Config, +} + +impl PathPrinterBuilder { + /// Return a new subject builder with a default configuration. + pub fn new() -> PathPrinterBuilder { + PathPrinterBuilder { config: Config::default() } + } + + /// Create a new path printer with the current configuration that writes + /// paths to the given writer. + pub fn build(&self, wtr: W) -> PathPrinter { + PathPrinter { + config: self.config.clone(), + wtr: wtr, + } + } + + /// Set the color specification for this printer. + /// + /// Currently, only the `path` component of the given specification is + /// used. + pub fn color_specs( + &mut self, + specs: ColorSpecs, + ) -> &mut PathPrinterBuilder { + self.config.colors = specs; + self + } + + /// A path separator. + /// + /// When provided, the path's default separator will be replaced with + /// the given separator. + /// + /// This is not set by default, and the system's default path separator + /// will be used. + pub fn separator(&mut self, sep: Option) -> &mut PathPrinterBuilder { + self.config.separator = sep; + self + } + + /// A path terminator. + /// + /// When printing a path, it will be by terminated by the given byte. + /// + /// This is set to `\n` by default. + pub fn terminator(&mut self, terminator: u8) -> &mut PathPrinterBuilder { + self.config.terminator = terminator; + self + } +} + +/// A printer for emitting paths to a writer, with optional color support. +#[derive(Debug)] +pub struct PathPrinter { + config: Config, + wtr: W, +} + +impl PathPrinter { + /// Write the given path to the underlying writer. + pub fn write_path(&mut self, path: &Path) -> io::Result<()> { + let ppath = PrinterPath::with_separator(path, self.config.separator); + if !self.wtr.supports_color() { + self.wtr.write_all(ppath.as_bytes())?; + } else { + self.wtr.set_color(self.config.colors.path())?; + self.wtr.write_all(ppath.as_bytes())?; + self.wtr.reset()?; + } + self.wtr.write_all(&[self.config.terminator]) + } +} diff -Nru ripgrep-0.6.0/src/pathutil.rs ripgrep-0.10.0.3/src/pathutil.rs --- ripgrep-0.6.0/src/pathutil.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/src/pathutil.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -/*! -The pathutil module provides platform specific operations on paths that are -typically faster than the same operations as provided in `std::path`. In -particular, we really want to avoid the costly operation of parsing the path -into its constituent components. We give up on Windows, but on Unix, we deal -with the raw bytes directly. - -On large repositories (like chromium), this can have a ~25% performance -improvement on just listing the files to search (!). -*/ -use std::path::Path; - -/// Strip `prefix` from the `path` and return the remainder. -/// -/// If `path` doesn't have a prefix `prefix`, then return `None`. -#[cfg(unix)] -pub fn strip_prefix<'a, P: AsRef + ?Sized>( - prefix: &'a P, - path: &'a Path, -) -> Option<&'a Path> { - use std::ffi::OsStr; - use std::os::unix::ffi::OsStrExt; - - let prefix = prefix.as_ref().as_os_str().as_bytes(); - let path = path.as_os_str().as_bytes(); - if prefix.len() > path.len() || prefix != &path[0..prefix.len()] { - None - } else { - Some(Path::new(OsStr::from_bytes(&path[prefix.len()..]))) - } -} - -/// Strip `prefix` from the `path` and return the remainder. -/// -/// If `path` doesn't have a prefix `prefix`, then return `None`. -#[cfg(not(unix))] -pub fn strip_prefix<'a, P: AsRef + ?Sized>( - prefix: &'a P, - path: &'a Path, -) -> Option<&'a Path> { - path.strip_prefix(prefix).ok() -} diff -Nru ripgrep-0.6.0/src/printer.rs ripgrep-0.10.0.3/src/printer.rs --- ripgrep-0.6.0/src/printer.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/src/printer.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,847 +0,0 @@ -use std::error; -use std::fmt; -use std::path::Path; -use std::str::FromStr; - -use regex::bytes::{Captures, Regex, Replacer}; -use termcolor::{Color, ColorSpec, ParseColorError, WriteColor}; - -use pathutil::strip_prefix; -use ignore::types::FileTypeDef; - -/// CountingReplacer implements the Replacer interface for Regex, -/// and counts how often replacement is being performed. -struct CountingReplacer<'r> { - replace: &'r [u8], - count: &'r mut usize, -} - -impl<'r> CountingReplacer<'r> { - fn new(replace: &'r [u8], count: &'r mut usize) -> CountingReplacer<'r> { - CountingReplacer { replace: replace, count: count } - } -} - -impl<'r> Replacer for CountingReplacer<'r> { - fn replace_append(&mut self, caps: &Captures, dst: &mut Vec) { - *self.count += 1; - caps.expand(self.replace, dst); - } -} - -/// Printer encapsulates all output logic for searching. -/// -/// Note that we currently ignore all write errors. It's probably worthwhile -/// to fix this, but printers are only ever used for writes to stdout or -/// writes to memory, neither of which commonly fail. -pub struct Printer { - /// The underlying writer. - wtr: W, - /// Whether anything has been printed to wtr yet. - has_printed: bool, - /// Whether to show column numbers for the first match or not. - column: bool, - /// The string to use to separate non-contiguous runs of context lines. - context_separator: Vec, - /// The end-of-line terminator used by the printer. In general, eols are - /// printed via the match directly, but occasionally we need to insert them - /// ourselves (for example, to print a context separator). - eol: u8, - /// A file separator to show before any matches are printed. - file_separator: Option>, - /// Whether to show file name as a heading or not. - /// - /// N.B. If with_filename is false, then this setting has no effect. - heading: bool, - /// Whether to show every match on its own line. - line_per_match: bool, - /// Whether to print NUL bytes after a file path instead of new lines - /// or `:`. - null: bool, - /// Print only the matched (non-empty) parts of a matching line - only_matching: bool, - /// A string to use as a replacement of each match in a matching line. - replace: Option>, - /// Whether to prefix each match with the corresponding file name. - with_filename: bool, - /// The color specifications. - colors: ColorSpecs, - /// The separator to use for file paths. If empty, this is ignored. - path_separator: Option, - /// Restrict lines to this many columns. - max_columns: Option -} - -impl Printer { - /// Create a new printer that writes to wtr with the given color settings. - pub fn new(wtr: W) -> Printer { - Printer { - wtr: wtr, - has_printed: false, - column: false, - context_separator: "--".to_string().into_bytes(), - eol: b'\n', - file_separator: None, - heading: false, - line_per_match: false, - null: false, - only_matching: false, - replace: None, - with_filename: false, - colors: ColorSpecs::default(), - path_separator: None, - max_columns: None, - } - } - - /// Set the color specifications. - pub fn colors(mut self, colors: ColorSpecs) -> Printer { - self.colors = colors; - self - } - - /// When set, column numbers will be printed for the first match on each - /// line. - pub fn column(mut self, yes: bool) -> Printer { - self.column = yes; - self - } - - /// Set the context separator. The default is `--`. - pub fn context_separator(mut self, sep: Vec) -> Printer { - self.context_separator = sep; - self - } - - /// Set the end-of-line terminator. The default is `\n`. - pub fn eol(mut self, eol: u8) -> Printer { - self.eol = eol; - self - } - - /// If set, the separator is printed before any matches. By default, no - /// separator is printed. - pub fn file_separator(mut self, sep: Vec) -> Printer { - self.file_separator = Some(sep); - self - } - - /// Whether to show file name as a heading or not. - /// - /// N.B. If with_filename is false, then this setting has no effect. - pub fn heading(mut self, yes: bool) -> Printer { - self.heading = yes; - self - } - - /// Whether to show every match on its own line. - pub fn line_per_match(mut self, yes: bool) -> Printer { - self.line_per_match = yes; - self - } - - /// Whether to cause NUL bytes to follow file paths instead of other - /// visual separators (like `:`, `-` and `\n`). - pub fn null(mut self, yes: bool) -> Printer { - self.null = yes; - self - } - - /// Print only the matched (non-empty) parts of a matching line - pub fn only_matching(mut self, yes: bool) -> Printer { - self.only_matching = yes; - self - } - - /// A separator to use when printing file paths. When empty, use the - /// default separator for the current platform. (/ on Unix, \ on Windows.) - pub fn path_separator(mut self, sep: Option) -> Printer { - self.path_separator = sep; - self - } - - /// Replace every match in each matching line with the replacement string - /// given. - pub fn replace(mut self, replacement: Vec) -> Printer { - self.replace = Some(replacement); - self - } - - /// When set, each match is prefixed with the file name that it came from. - pub fn with_filename(mut self, yes: bool) -> Printer { - self.with_filename = yes; - self - } - - /// Configure the max. number of columns used for printing matching lines. - pub fn max_columns(mut self, max_columns: Option) -> Printer { - self.max_columns = max_columns; - self - } - - /// Returns true if and only if something has been printed. - pub fn has_printed(&self) -> bool { - self.has_printed - } - - /// Flushes the underlying writer and returns it. - #[allow(dead_code)] - pub fn into_inner(mut self) -> W { - let _ = self.wtr.flush(); - self.wtr - } - - /// Prints a type definition. - pub fn type_def(&mut self, def: &FileTypeDef) { - self.write(def.name().as_bytes()); - self.write(b": "); - let mut first = true; - for glob in def.globs() { - if !first { - self.write(b", "); - } - self.write(glob.as_bytes()); - first = false; - } - self.write_eol(); - } - - /// Prints the given path. - pub fn path>(&mut self, path: P) { - let path = strip_prefix("./", path.as_ref()).unwrap_or(path.as_ref()); - self.write_path(path); - self.write_path_eol(); - } - - /// Prints the given path and a count of the number of matches found. - pub fn path_count>(&mut self, path: P, count: u64) { - if self.with_filename { - self.write_path(path); - self.write_path_sep(b':'); - } - self.write(count.to_string().as_bytes()); - self.write_eol(); - } - - /// Prints the context separator. - pub fn context_separate(&mut self) { - if self.context_separator.is_empty() { - return; - } - let _ = self.wtr.write_all(&self.context_separator); - self.write_eol(); - } - - pub fn matched>( - &mut self, - re: &Regex, - path: P, - buf: &[u8], - start: usize, - end: usize, - line_number: Option, - ) { - if !self.line_per_match && !self.only_matching { - let mat = re - .find(&buf[start..end]) - .map(|m| (m.start(), m.end())) - .unwrap_or((0, 0)); - return self.write_match( - re, path, buf, start, end, line_number, mat.0, mat.1); - } - for m in re.find_iter(&buf[start..end]) { - self.write_match( - re, path.as_ref(), buf, start, end, - line_number, m.start(), m.end()); - } - } - - fn write_match>( - &mut self, - re: &Regex, - path: P, - buf: &[u8], - start: usize, - end: usize, - line_number: Option, - match_start: usize, - match_end: usize, - ) { - if self.heading && self.with_filename && !self.has_printed { - self.write_file_sep(); - self.write_path(path); - self.write_path_eol(); - } else if !self.heading && self.with_filename { - self.write_path(path); - self.write_path_sep(b':'); - } - if let Some(line_number) = line_number { - self.line_number(line_number, b':'); - } - if self.column { - self.column_number(match_start as u64 + 1, b':'); - } - if self.replace.is_some() { - let mut count = 0; - let line = { - let replacer = CountingReplacer::new( - self.replace.as_ref().unwrap(), &mut count); - re.replace_all(&buf[start..end], replacer) - }; - if self.max_columns.map_or(false, |m| line.len() > m) { - let msg = format!( - "[Omitted long line with {} replacements]", count); - self.write_colored(msg.as_bytes(), |colors| colors.matched()); - self.write_eol(); - return; - } - self.write(&line); - if line.last() != Some(&self.eol) { - self.write_eol(); - } - } else { - if self.only_matching { - let buf = &buf[start + match_start..start + match_end]; - self.write_matched_line(re, buf, true); - } else { - self.write_matched_line(re, &buf[start..end], false); - } - } - } - - fn write_matched_line( - &mut self, - re: &Regex, - buf: &[u8], - only_match: bool, - ) { - if self.max_columns.map_or(false, |m| buf.len() > m) { - let count = re.find_iter(buf).count(); - let msg = format!("[Omitted long line with {} matches]", count); - self.write_colored(msg.as_bytes(), |colors| colors.matched()); - self.write_eol(); - return; - } - if !self.wtr.supports_color() || self.colors.matched().is_none() { - self.write(buf); - } else if only_match { - self.write_colored(buf, |colors| colors.matched()); - } else { - let mut last_written = 0; - for m in re.find_iter(buf) { - self.write(&buf[last_written..m.start()]); - self.write_colored( - &buf[m.start()..m.end()], |colors| colors.matched()); - last_written = m.end(); - } - self.write(&buf[last_written..]); - } - if buf.last() != Some(&self.eol) { - self.write_eol(); - } - } - - pub fn context>( - &mut self, - path: P, - buf: &[u8], - start: usize, - end: usize, - line_number: Option, - ) { - if self.heading && self.with_filename && !self.has_printed { - self.write_file_sep(); - self.write_path(path); - self.write_path_eol(); - } else if !self.heading && self.with_filename { - self.write_path(path); - self.write_path_sep(b'-'); - } - if let Some(line_number) = line_number { - self.line_number(line_number, b'-'); - } - if self.max_columns.map_or(false, |m| end - start > m) { - self.write(format!("[Omitted long context line]").as_bytes()); - self.write_eol(); - return; - } - self.write(&buf[start..end]); - if buf[start..end].last() != Some(&self.eol) { - self.write_eol(); - } - } - - fn separator(&mut self, sep: &[u8]) { - self.write(&sep); - } - - fn write_path_sep(&mut self, sep: u8) { - if self.null { - self.write(b"\x00"); - } else { - self.separator(&[sep]); - } - } - - fn write_path_eol(&mut self) { - if self.null { - self.write(b"\x00"); - } else { - self.write_eol(); - } - } - - #[cfg(unix)] - fn write_path>(&mut self, path: P) { - use std::os::unix::ffi::OsStrExt; - let path = path.as_ref().as_os_str().as_bytes(); - self.write_path_replace_separator(path); - } - - #[cfg(not(unix))] - fn write_path>(&mut self, path: P) { - let path = path.as_ref().to_string_lossy(); - self.write_path_replace_separator(path.as_bytes()); - } - - fn write_path_replace_separator(&mut self, path: &[u8]) { - match self.path_separator { - None => self.write_colored(path, |colors| colors.path()), - Some(sep) => { - let transformed_path: Vec<_> = path.iter().map(|&b| { - if b == b'/' || (cfg!(windows) && b == b'\\') { - sep - } else { - b - } - }).collect(); - self.write_colored(&transformed_path, |colors| colors.path()); - } - } - } - - fn line_number(&mut self, n: u64, sep: u8) { - self.write_colored(n.to_string().as_bytes(), |colors| colors.line()); - self.separator(&[sep]); - } - - fn column_number(&mut self, n: u64, sep: u8) { - self.write_colored(n.to_string().as_bytes(), |colors| colors.column()); - self.separator(&[sep]); - } - - fn write(&mut self, buf: &[u8]) { - self.has_printed = true; - let _ = self.wtr.write_all(buf); - } - - fn write_eol(&mut self) { - let eol = self.eol; - self.write(&[eol]); - } - - fn write_colored(&mut self, buf: &[u8], get_color: F) - where F: Fn(&ColorSpecs) -> &ColorSpec - { - let _ = self.wtr.set_color( get_color(&self.colors) ); - self.write(buf); - let _ = self.wtr.reset(); - } - - fn write_file_sep(&mut self) { - if let Some(ref sep) = self.file_separator { - self.has_printed = true; - let _ = self.wtr.write_all(sep); - let _ = self.wtr.write_all(b"\n"); - } - } -} - -/// An error that can occur when parsing color specifications. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Error { - /// This occurs when an unrecognized output type is used. - UnrecognizedOutType(String), - /// This occurs when an unrecognized spec type is used. - UnrecognizedSpecType(String), - /// This occurs when an unrecognized color name is used. - UnrecognizedColor(String, String), - /// This occurs when an unrecognized style attribute is used. - UnrecognizedStyle(String), - /// This occurs when the format of a color specification is invalid. - InvalidFormat(String), -} - -impl error::Error for Error { - fn description(&self) -> &str { - match *self { - Error::UnrecognizedOutType(_) => "unrecognized output type", - Error::UnrecognizedSpecType(_) => "unrecognized spec type", - Error::UnrecognizedColor(_, _) => "unrecognized color name", - Error::UnrecognizedStyle(_) => "unrecognized style attribute", - Error::InvalidFormat(_) => "invalid color spec", - } - } - - fn cause(&self) -> Option<&error::Error> { - None - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::UnrecognizedOutType(ref name) => { - write!(f, "Unrecognized output type '{}'. Choose from: \ - path, line, column, match.", name) - } - Error::UnrecognizedSpecType(ref name) => { - write!(f, "Unrecognized spec type '{}'. Choose from: \ - fg, bg, style, none.", name) - } - Error::UnrecognizedColor(_, ref msg) => { - write!(f, "{}", msg) - } - Error::UnrecognizedStyle(ref name) => { - write!(f, "Unrecognized style attribute '{}'. Choose from: \ - nobold, bold, nointense, intense.", name) - } - Error::InvalidFormat(ref original) => { - write!( - f, - "Invalid color speci format: '{}'. Valid format \ - is '(path|line|column|match):(fg|bg|style):(value)'.", - original) - } - } - } -} - -impl From for Error { - fn from(err: ParseColorError) -> Error { - Error::UnrecognizedColor(err.invalid().to_string(), err.to_string()) - } -} - -/// A merged set of color specifications. -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct ColorSpecs { - path: ColorSpec, - line: ColorSpec, - column: ColorSpec, - matched: ColorSpec, -} - -/// A single color specification provided by the user. -/// -/// A `ColorSpecs` can be built by merging a sequence of `Spec`s. -/// -/// ## Example -/// -/// The only way to build a `Spec` is to parse it from a string. Once multiple -/// `Spec`s have been constructed, then can be merged into a single -/// `ColorSpecs` value. -/// -/// ```rust -/// use termcolor::{Color, ColorSpecs, Spec}; -/// -/// let spec1: Spec = "path:fg:blue".parse().unwrap(); -/// let spec2: Spec = "match:bg:green".parse().unwrap(); -/// let specs = ColorSpecs::new(&[spec1, spec2]); -/// -/// assert_eq!(specs.path().fg(), Some(Color::Blue)); -/// assert_eq!(specs.matched().bg(), Some(Color::Green)); -/// ``` -/// -/// ## Format -/// -/// The format of a `Spec` is a triple: `{type}:{attribute}:{value}`. Each -/// component is defined as follows: -/// -/// * `{type}` can be one of `path`, `line`, `column` or `match`. -/// * `{attribute}` can be one of `fg`, `bg` or `style`. `{attribute}` may also -/// be the special value `none`, in which case, `{value}` can be omitted. -/// * `{value}` is either a color name (for `fg`/`bg`) or a style instruction. -/// -/// `{type}` controls which part of the output should be styled and is -/// application dependent. -/// -/// When `{attribute}` is `none`, then this should cause any existing color -/// settings to be cleared. -/// -/// `{value}` should be a color when `{attribute}` is `fg` or `bg`, or it -/// should be a style instruction when `{attribute}` is `style`. When -/// `{attribute}` is `none`, `{value}` must be omitted. -/// -/// Valid colors are `black`, `blue`, `green`, `red`, `cyan`, `magenta`, -/// `yellow`, `white`. -/// -/// Valid style instructions are `nobold`, `bold`, `intense`, `nointense`. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Spec { - ty: OutType, - value: SpecValue, -} - -/// The actual value given by the specification. -#[derive(Clone, Debug, Eq, PartialEq)] -enum SpecValue { - None, - Fg(Color), - Bg(Color), - Style(Style), -} - -/// The set of configurable portions of ripgrep's output. -#[derive(Clone, Debug, Eq, PartialEq)] -enum OutType { - Path, - Line, - Column, - Match, -} - -/// The specification type. -#[derive(Clone, Debug, Eq, PartialEq)] -enum SpecType { - Fg, - Bg, - Style, - None, -} - -/// The set of available styles for use in the terminal. -#[derive(Clone, Debug, Eq, PartialEq)] -enum Style { - Bold, - NoBold, - Intense, - NoIntense, -} - -impl ColorSpecs { - /// Create color specifications from a list of user supplied - /// specifications. - pub fn new(user_specs: &[Spec]) -> ColorSpecs { - let mut specs = ColorSpecs::default(); - for user_spec in user_specs { - match user_spec.ty { - OutType::Path => user_spec.merge_into(&mut specs.path), - OutType::Line => user_spec.merge_into(&mut specs.line), - OutType::Column => user_spec.merge_into(&mut specs.column), - OutType::Match => user_spec.merge_into(&mut specs.matched), - } - } - specs - } - - /// Return the color specification for coloring file paths. - fn path(&self) -> &ColorSpec { - &self.path - } - - /// Return the color specification for coloring line numbers. - fn line(&self) -> &ColorSpec { - &self.line - } - - /// Return the color specification for coloring column numbers. - fn column(&self) -> &ColorSpec { - &self.column - } - - /// Return the color specification for coloring matched text. - fn matched(&self) -> &ColorSpec { - &self.matched - } -} - -impl Spec { - /// Merge this spec into the given color specification. - fn merge_into(&self, cspec: &mut ColorSpec) { - self.value.merge_into(cspec); - } -} - -impl SpecValue { - /// Merge this spec value into the given color specification. - fn merge_into(&self, cspec: &mut ColorSpec) { - match *self { - SpecValue::None => cspec.clear(), - SpecValue::Fg(ref color) => { cspec.set_fg(Some(color.clone())); } - SpecValue::Bg(ref color) => { cspec.set_bg(Some(color.clone())); } - SpecValue::Style(ref style) => { - match *style { - Style::Bold => { cspec.set_bold(true); } - Style::NoBold => { cspec.set_bold(false); } - Style::Intense => { cspec.set_intense(true); } - Style::NoIntense => { cspec.set_intense(false); } - } - } - } - } -} - -impl FromStr for Spec { - type Err = Error; - - fn from_str(s: &str) -> Result { - let pieces: Vec<&str> = s.split(':').collect(); - if pieces.len() <= 1 || pieces.len() > 3 { - return Err(Error::InvalidFormat(s.to_string())); - } - let otype: OutType = try!(pieces[0].parse()); - match try!(pieces[1].parse()) { - SpecType::None => Ok(Spec { ty: otype, value: SpecValue::None }), - SpecType::Style => { - if pieces.len() < 3 { - return Err(Error::InvalidFormat(s.to_string())); - } - let style: Style = try!(pieces[2].parse()); - Ok(Spec { ty: otype, value: SpecValue::Style(style) }) - } - SpecType::Fg => { - if pieces.len() < 3 { - return Err(Error::InvalidFormat(s.to_string())); - } - let color: Color = try!(pieces[2].parse()); - Ok(Spec { ty: otype, value: SpecValue::Fg(color) }) - } - SpecType::Bg => { - if pieces.len() < 3 { - return Err(Error::InvalidFormat(s.to_string())); - } - let color: Color = try!(pieces[2].parse()); - Ok(Spec { ty: otype, value: SpecValue::Bg(color) }) - } - } - } -} - -impl FromStr for OutType { - type Err = Error; - - fn from_str(s: &str) -> Result { - match &*s.to_lowercase() { - "path" => Ok(OutType::Path), - "line" => Ok(OutType::Line), - "column" => Ok(OutType::Column), - "match" => Ok(OutType::Match), - _ => Err(Error::UnrecognizedOutType(s.to_string())), - } - } -} - -impl FromStr for SpecType { - type Err = Error; - - fn from_str(s: &str) -> Result { - match &*s.to_lowercase() { - "fg" => Ok(SpecType::Fg), - "bg" => Ok(SpecType::Bg), - "style" => Ok(SpecType::Style), - "none" => Ok(SpecType::None), - _ => Err(Error::UnrecognizedSpecType(s.to_string())), - } - } -} - -impl FromStr for Style { - type Err = Error; - - fn from_str(s: &str) -> Result { - match &*s.to_lowercase() { - "bold" => Ok(Style::Bold), - "nobold" => Ok(Style::NoBold), - "intense" => Ok(Style::Intense), - "nointense" => Ok(Style::NoIntense), - _ => Err(Error::UnrecognizedStyle(s.to_string())), - } - } -} - -#[cfg(test)] -mod tests { - use termcolor::{Color, ColorSpec}; - use super::{ColorSpecs, Error, OutType, Spec, SpecValue, Style}; - - #[test] - fn merge() { - let user_specs: &[Spec] = &[ - "match:fg:blue".parse().unwrap(), - "match:none".parse().unwrap(), - "match:style:bold".parse().unwrap(), - ]; - let mut expect_matched = ColorSpec::new(); - expect_matched.set_bold(true); - assert_eq!(ColorSpecs::new(user_specs), ColorSpecs { - path: ColorSpec::default(), - line: ColorSpec::default(), - column: ColorSpec::default(), - matched: expect_matched, - }); - } - - #[test] - fn specs() { - let spec: Spec = "path:fg:blue".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Path, - value: SpecValue::Fg(Color::Blue), - }); - - let spec: Spec = "path:bg:red".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Path, - value: SpecValue::Bg(Color::Red), - }); - - let spec: Spec = "match:style:bold".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Match, - value: SpecValue::Style(Style::Bold), - }); - - let spec: Spec = "match:style:intense".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Match, - value: SpecValue::Style(Style::Intense), - }); - - let spec: Spec = "line:none".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Line, - value: SpecValue::None, - }); - - let spec: Spec = "column:bg:green".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Column, - value: SpecValue::Bg(Color::Green), - }); - } - - #[test] - fn spec_errors() { - let err = "line:nonee".parse::().unwrap_err(); - assert_eq!(err, Error::UnrecognizedSpecType("nonee".to_string())); - - let err = "".parse::().unwrap_err(); - assert_eq!(err, Error::InvalidFormat("".to_string())); - - let err = "foo".parse::().unwrap_err(); - assert_eq!(err, Error::InvalidFormat("foo".to_string())); - - let err = "line:style:italic".parse::().unwrap_err(); - assert_eq!(err, Error::UnrecognizedStyle("italic".to_string())); - - let err = "line:fg:brown".parse::().unwrap_err(); - match err { - Error::UnrecognizedColor(name, _) => assert_eq!(name, "brown"), - err => assert!(false, "unexpected error: {:?}", err), - } - - let err = "foo:fg:brown".parse::().unwrap_err(); - assert_eq!(err, Error::UnrecognizedOutType("foo".to_string())); - } -} diff -Nru ripgrep-0.6.0/src/search_buffer.rs ripgrep-0.10.0.3/src/search_buffer.rs --- ripgrep-0.6.0/src/search_buffer.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/src/search_buffer.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,353 +0,0 @@ -/*! -The `search_buffer` module is responsible for searching a single file all in a -single buffer. Typically, the source of the buffer is a memory map. This can -be useful for when memory maps are faster than streaming search. - -Note that this module doesn't quite support everything that `search_stream` -does. Notably, showing contexts. -*/ -use std::cmp; -use std::path::Path; - -use grep::Grep; -use termcolor::WriteColor; - -use printer::Printer; -use search_stream::{IterLines, Options, count_lines, is_binary}; - -pub struct BufferSearcher<'a, W: 'a> { - opts: Options, - printer: &'a mut Printer, - grep: &'a Grep, - path: &'a Path, - buf: &'a [u8], - match_count: u64, - line_count: Option, - last_line: usize, -} - -impl<'a, W: WriteColor> BufferSearcher<'a, W> { - pub fn new( - printer: &'a mut Printer, - grep: &'a Grep, - path: &'a Path, - buf: &'a [u8], - ) -> BufferSearcher<'a, W> { - BufferSearcher { - opts: Options::default(), - printer: printer, - grep: grep, - path: path, - buf: buf, - match_count: 0, - line_count: None, - last_line: 0, - } - } - - /// If enabled, searching will print a count instead of each match. - /// - /// Disabled by default. - pub fn count(mut self, yes: bool) -> Self { - self.opts.count = yes; - self - } - - /// If enabled, searching will print the path instead of each match. - /// - /// Disabled by default. - pub fn files_with_matches(mut self, yes: bool) -> Self { - self.opts.files_with_matches = yes; - self - } - - /// If enabled, searching will print the path of files that *don't* match - /// the given pattern. - /// - /// Disabled by default. - pub fn files_without_matches(mut self, yes: bool) -> Self { - self.opts.files_without_matches = yes; - self - } - - /// Set the end-of-line byte used by this searcher. - pub fn eol(mut self, eol: u8) -> Self { - self.opts.eol = eol; - self - } - - /// If enabled, matching is inverted so that lines that *don't* match the - /// given pattern are treated as matches. - pub fn invert_match(mut self, yes: bool) -> Self { - self.opts.invert_match = yes; - self - } - - /// If enabled, compute line numbers and prefix each line of output with - /// them. - pub fn line_number(mut self, yes: bool) -> Self { - self.opts.line_number = yes; - self - } - - /// Limit the number of matches to the given count. - /// - /// The default is None, which corresponds to no limit. - pub fn max_count(mut self, count: Option) -> Self { - self.opts.max_count = count; - self - } - - /// If enabled, don't show any output and quit searching after the first - /// match is found. - pub fn quiet(mut self, yes: bool) -> Self { - self.opts.quiet = yes; - self - } - - /// If enabled, search binary files as if they were text. - pub fn text(mut self, yes: bool) -> Self { - self.opts.text = yes; - self - } - - #[inline(never)] - pub fn run(mut self) -> u64 { - let binary_upto = cmp::min(10240, self.buf.len()); - if !self.opts.text && is_binary(&self.buf[..binary_upto], true) { - return 0; - } - - self.match_count = 0; - self.line_count = if self.opts.line_number { Some(0) } else { None }; - let mut last_end = 0; - for m in self.grep.iter(self.buf) { - if self.opts.invert_match { - self.print_inverted_matches(last_end, m.start()); - } else { - self.print_match(m.start(), m.end()); - } - last_end = m.end(); - if self.opts.terminate(self.match_count) { - break; - } - } - if self.opts.invert_match && !self.opts.terminate(self.match_count) { - let upto = self.buf.len(); - self.print_inverted_matches(last_end, upto); - } - if self.opts.count && self.match_count > 0 { - self.printer.path_count(self.path, self.match_count); - } - if self.opts.files_with_matches && self.match_count > 0 { - self.printer.path(self.path); - } - if self.opts.files_without_matches && self.match_count == 0 { - self.printer.path(self.path); - } - self.match_count - } - - #[inline(always)] - pub fn print_match(&mut self, start: usize, end: usize) { - self.match_count += 1; - if self.opts.skip_matches() { - return; - } - self.count_lines(start); - self.add_line(end); - self.printer.matched( - self.grep.regex(), self.path, self.buf, - start, end, self.line_count); - } - - #[inline(always)] - fn print_inverted_matches(&mut self, start: usize, end: usize) { - debug_assert!(self.opts.invert_match); - let mut it = IterLines::new(self.opts.eol, start); - while let Some((s, e)) = it.next(&self.buf[..end]) { - if self.opts.terminate(self.match_count) { - return; - } - self.print_match(s, e); - } - } - - #[inline(always)] - fn count_lines(&mut self, upto: usize) { - if let Some(ref mut line_count) = self.line_count { - *line_count += count_lines( - &self.buf[self.last_line..upto], self.opts.eol); - self.last_line = upto; - } - } - - #[inline(always)] - fn add_line(&mut self, line_end: usize) { - if let Some(ref mut line_count) = self.line_count { - *line_count += 1; - self.last_line = line_end; - } - } -} - -#[cfg(test)] -mod tests { - use std::path::Path; - - use grep::GrepBuilder; - - use printer::Printer; - use termcolor; - - use super::BufferSearcher; - - const SHERLOCK: &'static str = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -Holmeses, success in the province of detective work must always -be, to a very large extent, the result of luck. Sherlock Holmes -can extract a clew from a wisp of straw or a flake of cigar ash; -but Doctor Watson has to have it taken out for him and dusted, -and exhibited clearly, with a label attached.\ -"; - - fn test_path() -> &'static Path { - &Path::new("/baz.rs") - } - - type TestSearcher<'a> = BufferSearcher<'a, termcolor::NoColor>>; - - fn search TestSearcher>( - pat: &str, - haystack: &str, - mut map: F, - ) -> (u64, String) { - let outbuf = termcolor::NoColor::new(vec![]); - let mut pp = Printer::new(outbuf).with_filename(true); - let grep = GrepBuilder::new(pat).build().unwrap(); - let count = { - let searcher = BufferSearcher::new( - &mut pp, &grep, test_path(), haystack.as_bytes()); - map(searcher).run() - }; - (count, String::from_utf8(pp.into_inner().into_inner()).unwrap()) - } - - #[test] - fn basic_search() { - let (count, out) = search("Sherlock", SHERLOCK, |s|s); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn binary() { - let text = "Sherlock\n\x00Holmes\n"; - let (count, out) = search("Sherlock|Holmes", text, |s|s); - assert_eq!(0, count); - assert_eq!(out, ""); - } - - - #[test] - fn binary_text() { - let text = "Sherlock\n\x00Holmes\n"; - let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true)); - assert_eq!(2, count); - assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n"); - } - - #[test] - fn line_numbers() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.line_number(true)); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn count() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.count(true)); - assert_eq!(2, count); - assert_eq!(out, "/baz.rs:2\n"); - } - - #[test] - fn files_with_matches() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.files_with_matches(true)); - assert_eq!(1, count); - assert_eq!(out, "/baz.rs\n"); - } - - #[test] - fn files_without_matches() { - let (count, out) = search( - "zzzz", SHERLOCK, |s| s.files_without_matches(true)); - assert_eq!(0, count); - assert_eq!(out, "/baz.rs\n"); - } - - #[test] - fn max_count() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.max_count(Some(1))); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -"); - } - - #[test] - fn invert_match_max_count() { - let (count, out) = search( - "zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1))); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -"); - } - - #[test] - fn invert_match() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.invert_match(true)); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:Holmeses, success in the province of detective work must always -/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn invert_match_line_numbers() { - let (count, out) = search("Sherlock", SHERLOCK, |s| { - s.invert_match(true).line_number(true) - }); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn invert_match_count() { - let (count, out) = search("Sherlock", SHERLOCK, |s| { - s.invert_match(true).count(true) - }); - assert_eq!(4, count); - assert_eq!(out, "/baz.rs:4\n"); - } -} diff -Nru ripgrep-0.6.0/src/search.rs ripgrep-0.10.0.3/src/search.rs --- ripgrep-0.6.0/src/search.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/src/search.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,488 @@ +use std::fs::File; +use std::io; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::time::Duration; + +use grep::cli; +use grep::matcher::Matcher; +#[cfg(feature = "pcre2")] +use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher}; +use grep::printer::{JSON, Standard, Summary, Stats}; +use grep::regex::{RegexMatcher as RustRegexMatcher}; +use grep::searcher::Searcher; +use ignore::overrides::Override; +use serde_json as json; +use termcolor::WriteColor; + +use subject::Subject; + +/// The configuration for the search worker. Among a few other things, the +/// configuration primarily controls the way we show search results to users +/// at a very high level. +#[derive(Clone, Debug)] +struct Config { + json_stats: bool, + preprocessor: Option, + preprocessor_globs: Override, + search_zip: bool, +} + +impl Default for Config { + fn default() -> Config { + Config { + json_stats: false, + preprocessor: None, + preprocessor_globs: Override::empty(), + search_zip: false, + } + } +} + +/// A builder for configuring and constructing a search worker. +#[derive(Clone, Debug)] +pub struct SearchWorkerBuilder { + config: Config, + command_builder: cli::CommandReaderBuilder, + decomp_builder: cli::DecompressionReaderBuilder, +} + +impl Default for SearchWorkerBuilder { + fn default() -> SearchWorkerBuilder { + SearchWorkerBuilder::new() + } +} + +impl SearchWorkerBuilder { + /// Create a new builder for configuring and constructing a search worker. + pub fn new() -> SearchWorkerBuilder { + let mut cmd_builder = cli::CommandReaderBuilder::new(); + cmd_builder.async_stderr(true); + + let mut decomp_builder = cli::DecompressionReaderBuilder::new(); + decomp_builder.async_stderr(true); + + SearchWorkerBuilder { + config: Config::default(), + command_builder: cmd_builder, + decomp_builder: decomp_builder, + } + } + + /// Create a new search worker using the given searcher, matcher and + /// printer. + pub fn build( + &self, + matcher: PatternMatcher, + searcher: Searcher, + printer: Printer, + ) -> SearchWorker { + let config = self.config.clone(); + let command_builder = self.command_builder.clone(); + let decomp_builder = self.decomp_builder.clone(); + SearchWorker { + config, command_builder, decomp_builder, + matcher, searcher, printer, + } + } + + /// Forcefully use JSON to emit statistics, even if the underlying printer + /// is not the JSON printer. + /// + /// This is useful for implementing flag combinations like + /// `--json --quiet`, which uses the summary printer for implementing + /// `--quiet` but still wants to emit summary statistics, which should + /// be JSON formatted because of the `--json` flag. + pub fn json_stats(&mut self, yes: bool) -> &mut SearchWorkerBuilder { + self.config.json_stats = yes; + self + } + + /// Set the path to a preprocessor command. + /// + /// When this is set, instead of searching files directly, the given + /// command will be run with the file path as the first argument, and the + /// output of that command will be searched instead. + pub fn preprocessor( + &mut self, + cmd: Option, + ) -> &mut SearchWorkerBuilder { + self.config.preprocessor = cmd; + self + } + + /// Set the globs for determining which files should be run through the + /// preprocessor. By default, with no globs and a preprocessor specified, + /// every file is run through the preprocessor. + pub fn preprocessor_globs( + &mut self, + globs: Override, + ) -> &mut SearchWorkerBuilder { + self.config.preprocessor_globs = globs; + self + } + + /// Enable the decompression and searching of common compressed files. + /// + /// When enabled, if a particular file path is recognized as a compressed + /// file, then it is decompressed before searching. + /// + /// Note that if a preprocessor command is set, then it overrides this + /// setting. + pub fn search_zip(&mut self, yes: bool) -> &mut SearchWorkerBuilder { + self.config.search_zip = yes; + self + } +} + +/// The result of executing a search. +/// +/// Generally speaking, the "result" of a search is sent to a printer, which +/// writes results to an underlying writer such as stdout or a file. However, +/// every search also has some aggregate statistics or meta data that may be +/// useful to higher level routines. +#[derive(Clone, Debug, Default)] +pub struct SearchResult { + has_match: bool, + stats: Option, +} + +impl SearchResult { + /// Whether the search found a match or not. + pub fn has_match(&self) -> bool { + self.has_match + } + + /// Return aggregate search statistics for a single search, if available. + /// + /// It can be expensive to compute statistics, so these are only present + /// if explicitly enabled in the printer provided by the caller. + pub fn stats(&self) -> Option<&Stats> { + self.stats.as_ref() + } +} + +/// The pattern matcher used by a search worker. +#[derive(Clone, Debug)] +pub enum PatternMatcher { + RustRegex(RustRegexMatcher), + #[cfg(feature = "pcre2")] + PCRE2(PCRE2RegexMatcher), +} + +/// The printer used by a search worker. +/// +/// The `W` type parameter refers to the type of the underlying writer. +#[derive(Debug)] +pub enum Printer { + /// Use the standard printer, which supports the classic grep-like format. + Standard(Standard), + /// Use the summary printer, which supports aggregate displays of search + /// results. + Summary(Summary), + /// A JSON printer, which emits results in the JSON Lines format. + JSON(JSON), +} + +impl Printer { + fn print_stats( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + match *self { + Printer::JSON(_) => { + self.print_stats_json(total_duration, stats) + } + Printer::Standard(_) | Printer::Summary(_) => { + self.print_stats_human(total_duration, stats) + } + } + } + + fn print_stats_human( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + write!( + self.get_mut(), + " +{matches} matches +{lines} matched lines +{searches_with_match} files contained matches +{searches} files searched +{bytes_printed} bytes printed +{bytes_searched} bytes searched +{search_time:0.6} seconds spent searching +{process_time:0.6} seconds +", + matches = stats.matches(), + lines = stats.matched_lines(), + searches_with_match = stats.searches_with_match(), + searches = stats.searches(), + bytes_printed = stats.bytes_printed(), + bytes_searched = stats.bytes_searched(), + search_time = fractional_seconds(stats.elapsed()), + process_time = fractional_seconds(total_duration) + ) + } + + fn print_stats_json( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + // We specifically match the format laid out by the JSON printer in + // the grep-printer crate. We simply "extend" it with the 'summary' + // message type. + let fractional = fractional_seconds(total_duration); + json::to_writer(self.get_mut(), &json!({ + "type": "summary", + "data": { + "stats": stats, + "elapsed_total": { + "secs": total_duration.as_secs(), + "nanos": total_duration.subsec_nanos(), + "human": format!("{:0.6}s", fractional), + }, + } + }))?; + write!(self.get_mut(), "\n") + } + + /// Return a mutable reference to the underlying printer's writer. + pub fn get_mut(&mut self) -> &mut W { + match *self { + Printer::Standard(ref mut p) => p.get_mut(), + Printer::Summary(ref mut p) => p.get_mut(), + Printer::JSON(ref mut p) => p.get_mut(), + } + } +} + +/// A worker for executing searches. +/// +/// It is intended for a single worker to execute many searches, and is +/// generally intended to be used from a single thread. When searching using +/// multiple threads, it is better to create a new worker for each thread. +#[derive(Debug)] +pub struct SearchWorker { + config: Config, + command_builder: cli::CommandReaderBuilder, + decomp_builder: cli::DecompressionReaderBuilder, + matcher: PatternMatcher, + searcher: Searcher, + printer: Printer, +} + +impl SearchWorker { + /// Execute a search over the given subject. + pub fn search(&mut self, subject: &Subject) -> io::Result { + self.search_impl(subject) + } + + /// Return a mutable reference to the underlying printer. + pub fn printer(&mut self) -> &mut Printer { + &mut self.printer + } + + /// Print the given statistics to the underlying writer in a way that is + /// consistent with this searcher's printer's format. + /// + /// While `Stats` contains a duration itself, this only corresponds to the + /// time spent searching, where as `total_duration` should roughly + /// approximate the lifespan of the ripgrep process itself. + pub fn print_stats( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + if self.config.json_stats { + self.printer().print_stats_json(total_duration, stats) + } else { + self.printer().print_stats(total_duration, stats) + } + } + + /// Search the given subject using the appropriate strategy. + fn search_impl(&mut self, subject: &Subject) -> io::Result { + let path = subject.path(); + if subject.is_stdin() { + let stdin = io::stdin(); + // A `return` here appeases the borrow checker. NLL will fix this. + return self.search_reader(path, stdin.lock()); + } else if self.should_preprocess(path) { + self.search_preprocessor(path) + } else if self.should_decompress(path) { + self.search_decompress(path) + } else { + self.search_path(path) + } + } + + /// Returns true if and only if the given file path should be + /// decompressed before searching. + fn should_decompress(&self, path: &Path) -> bool { + if !self.config.search_zip { + return false; + } + self.decomp_builder.get_matcher().has_command(path) + } + + /// Returns true if and only if the given file path should be run through + /// the preprocessor. + fn should_preprocess(&self, path: &Path) -> bool { + if !self.config.preprocessor.is_some() { + return false; + } + if self.config.preprocessor_globs.is_empty() { + return true; + } + !self.config.preprocessor_globs.matched(path, false).is_ignore() + } + + /// Search the given file path by first asking the preprocessor for the + /// data to search instead of opening the path directly. + fn search_preprocessor( + &mut self, + path: &Path, + ) -> io::Result { + let bin = self.config.preprocessor.clone().unwrap(); + let mut cmd = Command::new(&bin); + cmd.arg(path).stdin(Stdio::from(File::open(path)?)); + + let rdr = self.command_builder.build(&mut cmd)?; + self.search_reader(path, rdr).map_err(|err| { + io::Error::new( + io::ErrorKind::Other, + format!("preprocessor command failed: '{:?}': {}", cmd, err), + ) + }) + } + + /// Attempt to decompress the data at the given file path and search the + /// result. If the given file path isn't recognized as a compressed file, + /// then search it without doing any decompression. + fn search_decompress( + &mut self, + path: &Path, + ) -> io::Result { + let rdr = self.decomp_builder.build(path)?; + self.search_reader(path, rdr) + } + + /// Search the contents of the given file path. + fn search_path(&mut self, path: &Path) -> io::Result { + use self::PatternMatcher::*; + + let (searcher, printer) = (&mut self.searcher, &mut self.printer); + match self.matcher { + RustRegex(ref m) => search_path(m, searcher, printer, path), + #[cfg(feature = "pcre2")] + PCRE2(ref m) => search_path(m, searcher, printer, path), + } + } + + /// Executes a search on the given reader, which may or may not correspond + /// directly to the contents of the given file path. Instead, the reader + /// may actually cause something else to be searched (for example, when + /// a preprocessor is set or when decompression is enabled). In those + /// cases, the file path is used for visual purposes only. + /// + /// Generally speaking, this method should only be used when there is no + /// other choice. Searching via `search_path` provides more opportunities + /// for optimizations (such as memory maps). + fn search_reader( + &mut self, + path: &Path, + rdr: R, + ) -> io::Result { + use self::PatternMatcher::*; + + let (searcher, printer) = (&mut self.searcher, &mut self.printer); + match self.matcher { + RustRegex(ref m) => search_reader(m, searcher, printer, path, rdr), + #[cfg(feature = "pcre2")] + PCRE2(ref m) => search_reader(m, searcher, printer, path, rdr), + } + } +} + +/// Search the contents of the given file path using the given matcher, +/// searcher and printer. +fn search_path( + matcher: M, + searcher: &mut Searcher, + printer: &mut Printer, + path: &Path, +) -> io::Result { + match *printer { + Printer::Standard(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::Summary(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::JSON(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: Some(sink.stats().clone()), + }) + } + } +} + +/// Search the contents of the given reader using the given matcher, searcher +/// and printer. +fn search_reader( + matcher: M, + searcher: &mut Searcher, + printer: &mut Printer, + path: &Path, + rdr: R, +) -> io::Result { + match *printer { + Printer::Standard(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::Summary(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::JSON(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: Some(sink.stats().clone()), + }) + } + } +} + +/// Return the given duration as fractional seconds. +fn fractional_seconds(duration: Duration) -> f64 { + (duration.as_secs() as f64) + (duration.subsec_nanos() as f64 * 1e-9) +} diff -Nru ripgrep-0.6.0/src/search_stream.rs ripgrep-0.10.0.3/src/search_stream.rs --- ripgrep-0.6.0/src/search_stream.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/src/search_stream.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,1329 +0,0 @@ -/*! -The `search_stream` module is responsible for searching a single file and -printing matches. In particular, it searches the file in a streaming fashion -using `read` calls and a (roughly) fixed size buffer. -*/ - -use std::cmp; -use std::error::Error as StdError; -use std::fmt; -use std::io; -use std::path::{Path, PathBuf}; - -use bytecount; -use grep::{Grep, Match}; -use memchr::{memchr, memrchr}; -use termcolor::WriteColor; - -use printer::Printer; - -/// The default read size (capacity of input buffer). -const READ_SIZE: usize = 8 * (1<<10); - -/// Error describes errors that can occur while searching. -#[derive(Debug)] -pub enum Error { - /// A standard I/O error attached to a particular file path. - Io { - err: io::Error, - path: PathBuf, - } -} - -impl Error { - fn from_io>(err: io::Error, path: P) -> Error { - Error::Io { err: err, path: path.as_ref().to_path_buf() } - } -} - -impl StdError for Error { - fn description(&self) -> &str { - match *self { - Error::Io { ref err, .. } => err.description(), - } - } - - fn cause(&self) -> Option<&StdError> { - match *self { - Error::Io { ref err, .. } => Some(err), - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::Io { ref err, ref path } => { - write!(f, "{}: {}", path.display(), err) - } - } - } -} - -pub struct Searcher<'a, R, W: 'a> { - opts: Options, - inp: &'a mut InputBuffer, - printer: &'a mut Printer, - grep: &'a Grep, - path: &'a Path, - haystack: R, - match_count: u64, - line_count: Option, - last_match: Match, - last_printed: usize, - last_line: usize, - after_context_remaining: usize, -} - -/// Options for configuring search. -#[derive(Clone)] -pub struct Options { - pub after_context: usize, - pub before_context: usize, - pub count: bool, - pub files_with_matches: bool, - pub files_without_matches: bool, - pub eol: u8, - pub invert_match: bool, - pub line_number: bool, - pub max_count: Option, - pub quiet: bool, - pub text: bool, -} - -impl Default for Options { - fn default() -> Options { - Options { - after_context: 0, - before_context: 0, - count: false, - files_with_matches: false, - files_without_matches: false, - eol: b'\n', - invert_match: false, - line_number: false, - max_count: None, - quiet: false, - text: false, - } - } - -} - -impl Options { - /// Several options (--quiet, --count, --files-with-matches, - /// --files-without-match) imply that we shouldn't ever display matches. - pub fn skip_matches(&self) -> bool { - self.count || self.files_with_matches || self.files_without_matches - || self.quiet - } - - /// Some options (--quiet, --files-with-matches, --files-without-match) - /// imply that we can stop searching after the first match. - pub fn stop_after_first_match(&self) -> bool { - self.files_with_matches || self.files_without_matches || self.quiet - } - - /// Returns true if the search should terminate based on the match count. - pub fn terminate(&self, match_count: u64) -> bool { - if match_count > 0 && self.stop_after_first_match() { - return true; - } - if self.max_count.map_or(false, |max| match_count >= max) { - return true; - } - false - } -} - -impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { - /// Create a new searcher. - /// - /// `inp` is a reusable input buffer that is used as scratch space by this - /// searcher. - /// - /// `printer` is used to output all results of searching. - /// - /// `grep` is the actual matcher. - /// - /// `path` is the file path being searched. - /// - /// `haystack` is a reader of text to search. - pub fn new( - inp: &'a mut InputBuffer, - printer: &'a mut Printer, - grep: &'a Grep, - path: &'a Path, - haystack: R, - ) -> Searcher<'a, R, W> { - Searcher { - opts: Options::default(), - inp: inp, - printer: printer, - grep: grep, - path: path, - haystack: haystack, - match_count: 0, - line_count: None, - last_match: Match::default(), - last_printed: 0, - last_line: 0, - after_context_remaining: 0, - } - } - - /// The number of contextual lines to show after each match. The default - /// is zero. - pub fn after_context(mut self, count: usize) -> Self { - self.opts.after_context = count; - self - } - - /// The number of contextual lines to show before each match. The default - /// is zero. - pub fn before_context(mut self, count: usize) -> Self { - self.opts.before_context = count; - self - } - - /// If enabled, searching will print a count instead of each match. - /// - /// Disabled by default. - pub fn count(mut self, yes: bool) -> Self { - self.opts.count = yes; - self - } - - /// If enabled, searching will print the path instead of each match. - /// - /// Disabled by default. - pub fn files_with_matches(mut self, yes: bool) -> Self { - self.opts.files_with_matches = yes; - self - } - - /// If enabled, searching will print the path of files without any matches. - /// - /// Disabled by default. - pub fn files_without_matches(mut self, yes: bool) -> Self { - self.opts.files_without_matches = yes; - self - } - - /// Set the end-of-line byte used by this searcher. - pub fn eol(mut self, eol: u8) -> Self { - self.opts.eol = eol; - self - } - - /// If enabled, matching is inverted so that lines that *don't* match the - /// given pattern are treated as matches. - pub fn invert_match(mut self, yes: bool) -> Self { - self.opts.invert_match = yes; - self - } - - /// If enabled, compute line numbers and prefix each line of output with - /// them. - pub fn line_number(mut self, yes: bool) -> Self { - self.opts.line_number = yes; - self - } - - /// Limit the number of matches to the given count. - /// - /// The default is None, which corresponds to no limit. - pub fn max_count(mut self, count: Option) -> Self { - self.opts.max_count = count; - self - } - - /// If enabled, don't show any output and quit searching after the first - /// match is found. - pub fn quiet(mut self, yes: bool) -> Self { - self.opts.quiet = yes; - self - } - - /// If enabled, search binary files as if they were text. - pub fn text(mut self, yes: bool) -> Self { - self.opts.text = yes; - self.inp.text(yes); - self - } - - /// Execute the search. Results are written to the printer and the total - /// number of matches is returned. - #[inline(never)] - pub fn run(mut self) -> Result { - self.inp.reset(); - self.match_count = 0; - self.line_count = if self.opts.line_number { Some(0) } else { None }; - self.last_match = Match::default(); - self.after_context_remaining = 0; - while !self.terminate() { - let upto = self.inp.lastnl; - self.print_after_context(upto); - if !try!(self.fill()) { - break; - } - while !self.terminate() && self.inp.pos < self.inp.lastnl { - let matched = self.grep.read_match( - &mut self.last_match, - &self.inp.buf[..self.inp.lastnl], - self.inp.pos); - if self.opts.invert_match { - let upto = - if matched { - self.last_match.start() - } else { - self.inp.lastnl - }; - if upto > self.inp.pos { - let upto_context = self.inp.pos; - self.print_after_context(upto_context); - self.print_before_context(upto_context); - self.print_inverted_matches(upto); - } - } else if matched { - let start = self.last_match.start(); - let end = self.last_match.end(); - self.print_after_context(start); - self.print_before_context(start); - self.print_match(start, end); - } - if matched { - self.inp.pos = self.last_match.end(); - } else { - self.inp.pos = self.inp.lastnl; - } - } - } - if self.match_count > 0 { - if self.opts.count { - self.printer.path_count(self.path, self.match_count); - } else if self.opts.files_with_matches { - self.printer.path(self.path); - } - } else if self.opts.files_without_matches { - self.printer.path(self.path); - } - Ok(self.match_count) - } - - #[inline(always)] - fn terminate(&self) -> bool { - self.opts.terminate(self.match_count) - } - - #[inline(always)] - fn fill(&mut self) -> Result { - let mut keep = self.inp.lastnl; - if self.opts.before_context > 0 || self.opts.after_context > 0 { - let lines = 1 + cmp::max( - self.opts.before_context, self.opts.after_context); - keep = start_of_previous_lines( - self.opts.eol, - &self.inp.buf, - self.inp.lastnl.saturating_sub(1), - lines); - } - if keep < self.last_printed { - self.last_printed -= keep; - } else { - self.last_printed = 0; - } - if keep <= self.last_line { - self.last_line -= keep; - } else { - self.count_lines(keep); - self.last_line = 0; - } - let ok = try!(self.inp.fill(&mut self.haystack, keep).map_err(|err| { - Error::from_io(err, &self.path) - })); - Ok(ok) - } - - #[inline(always)] - fn print_inverted_matches(&mut self, upto: usize) { - debug_assert!(self.opts.invert_match); - let mut it = IterLines::new(self.opts.eol, self.inp.pos); - while let Some((start, end)) = it.next(&self.inp.buf[..upto]) { - if self.terminate() { - return; - } - self.print_match(start, end); - self.inp.pos = end; - } - } - - #[inline(always)] - fn print_before_context(&mut self, upto: usize) { - if self.opts.skip_matches() || self.opts.before_context == 0 { - return; - } - let start = self.last_printed; - let end = upto; - if start >= end { - return; - } - let before_context_start = - start + start_of_previous_lines( - self.opts.eol, - &self.inp.buf[start..], - end - start - 1, - self.opts.before_context); - let mut it = IterLines::new(self.opts.eol, before_context_start); - while let Some((s, e)) = it.next(&self.inp.buf[..end]) { - self.print_separator(s); - self.print_context(s, e); - } - } - - #[inline(always)] - fn print_after_context(&mut self, upto: usize) { - if self.opts.skip_matches() || self.after_context_remaining == 0 { - return; - } - let start = self.last_printed; - let end = upto; - let mut it = IterLines::new(self.opts.eol, start); - while let Some((s, e)) = it.next(&self.inp.buf[..end]) { - self.print_context(s, e); - self.after_context_remaining -= 1; - if self.after_context_remaining == 0 { - break; - } - } - } - - #[inline(always)] - fn print_match(&mut self, start: usize, end: usize) { - self.match_count += 1; - if self.opts.skip_matches() { - return; - } - self.print_separator(start); - self.count_lines(start); - self.add_line(end); - self.printer.matched( - self.grep.regex(), self.path, - &self.inp.buf, start, end, self.line_count); - self.last_printed = end; - self.after_context_remaining = self.opts.after_context; - } - - #[inline(always)] - fn print_context(&mut self, start: usize, end: usize) { - self.count_lines(start); - self.add_line(end); - self.printer.context( - &self.path, &self.inp.buf, start, end, self.line_count); - self.last_printed = end; - } - - #[inline(always)] - fn print_separator(&mut self, before: usize) { - if self.opts.before_context == 0 && self.opts.after_context == 0 { - return; - } - if !self.printer.has_printed() { - return; - } - if (self.last_printed == 0 && before > 0) - || self.last_printed < before { - self.printer.context_separate(); - } - } - - #[inline(always)] - fn count_lines(&mut self, upto: usize) { - if let Some(ref mut line_count) = self.line_count { - *line_count += count_lines( - &self.inp.buf[self.last_line..upto], self.opts.eol); - self.last_line = upto; - } - } - - #[inline(always)] - fn add_line(&mut self, line_end: usize) { - if let Some(ref mut line_count) = self.line_count { - *line_count += 1; - self.last_line = line_end; - } - } -} - -/// `InputBuffer` encapsulates the logic of maintaining a ~fixed sized buffer -/// on which to search. There are three key pieces of complexity: -/// -/// 1. We must be able to handle lines that are longer than the size of the -/// buffer. For this reason, the buffer is allowed to expand (and is -/// therefore not technically fixed). Note that once a buffer expands, it -/// will never contract. -/// 2. The contents of the buffer may end with a partial line, so we must keep -/// track of where the last complete line ends. Namely, the partial line -/// is only completed on subsequent reads *after* searching up through -/// the last complete line is done. -/// 3. When printing the context of a match, the last N lines of the buffer -/// may need to be rolled over into the next buffer. For example, a match -/// may occur at the beginning of a buffer, in which case, lines at the end -/// of the previous contents of the buffer need to be printed. -/// -/// An `InputBuffer` is designed to be reused and isn't tied to any particular -/// reader. -pub struct InputBuffer { - /// The number of bytes to attempt to read at a time. Once set, this is - /// never changed. - read_size: usize, - /// The end-of-line terminator used in this buffer. - eol: u8, - /// A scratch buffer. - tmp: Vec, - /// A buffer to read bytes into. All searches are executed directly against - /// this buffer and pos/lastnl/end point into it. - buf: Vec, - /// The current position in buf. The current position represents where the - /// next search should start. - pos: usize, - /// The position immediately following the last line terminator in buf. - /// This may be equal to end. - /// - /// Searching should never cross this boundary. In particular, the contents - /// of the buffer following this position may correspond to *partial* line. - /// All contents before this position are complete lines. - lastnl: usize, - /// The end position of the buffer. Data after this position is not - /// specified. - end: usize, - /// Set to true if and only if no reads have occurred yet. - first: bool, - /// Set to true if all binary data should be treated as if it were text. - text: bool, -} - -impl InputBuffer { - /// Create a new buffer with a default capacity. - pub fn new() -> InputBuffer { - InputBuffer::with_capacity(READ_SIZE) - } - - /// Create a new buffer with the capacity given. - /// - /// The capacity determines the size of each read from the underlying - /// reader. - /// - /// `cap` must be a minimum of `1`. - pub fn with_capacity(mut cap: usize) -> InputBuffer { - if cap == 0 { - cap = 1; - } - InputBuffer { - read_size: cap, - eol: b'\n', - buf: vec![0; cap], - tmp: vec![], - pos: 0, - lastnl: 0, - end: 0, - first: true, - text: false, - } - } - - /// Set the end-of-line terminator used by this input buffer. - pub fn eol(&mut self, eol: u8) -> &mut Self { - self.eol = eol; - self - } - - /// If enabled, search binary files as if they were text. - /// - /// Note that this may cause the buffer to load the entire contents of a - /// file into memory. - pub fn text(&mut self, yes: bool) -> &mut Self { - self.text = yes; - self - } - - /// Resets this buffer so that it may be reused with a new reader. - fn reset(&mut self) { - self.pos = 0; - self.lastnl = 0; - self.end = 0; - self.first = true; - } - - /// Fill the contents of this buffer with the reader given. The reader - /// given should be the same in every call to fill unless reset has been - /// called. - /// - /// The bytes in buf[keep_from..end] are rolled over into the beginning - /// of the buffer. - fn fill( - &mut self, - rdr: &mut R, - keep_from: usize, - ) -> Result { - // Rollover bytes from buf[keep_from..end] and update our various - // pointers. N.B. This could be done with the ptr::copy, but I haven't - // been able to produce a benchmark that notices a difference in - // performance. (Invariably, ptr::copy is seems clearer IMO, but it is - // not safe.) - self.tmp.clear(); - self.tmp.extend_from_slice(&self.buf[keep_from..self.end]); - self.buf[0..self.tmp.len()].copy_from_slice(&self.tmp); - self.pos = self.lastnl - keep_from; - self.lastnl = 0; - self.end = self.tmp.len(); - while self.lastnl == 0 { - // If our buffer isn't big enough to hold the contents of a full - // read, expand it. - if self.buf.len() - self.end < self.read_size { - let min_len = self.read_size + self.buf.len() - self.end; - let new_len = cmp::max(min_len, self.buf.len() * 2); - self.buf.resize(new_len, 0); - } - let n = try!(rdr.read( - &mut self.buf[self.end..self.end + self.read_size])); - if !self.text { - if is_binary(&self.buf[self.end..self.end + n], self.first) { - return Ok(false); - } - } - self.first = false; - // We assume that reading 0 bytes means we've hit EOF. - if n == 0 { - // If we've searched everything up to the end of the buffer, - // then there's nothing left to do. - if self.end - self.pos == 0 { - return Ok(false); - } - // Even if we hit EOF, we might still have to search the - // last line if it didn't contain a trailing terminator. - self.lastnl = self.end; - break; - } - self.lastnl = - memrchr(self.eol, &self.buf[self.end..self.end + n]) - .map(|i| self.end + i + 1) - .unwrap_or(0); - self.end += n; - } - Ok(true) - } -} - -/// Returns true if and only if the given buffer is determined to be "binary" -/// or otherwise not contain text data that is usefully searchable. -/// -/// Note that this may return both false positives and false negatives. -#[inline(always)] -pub fn is_binary(buf: &[u8], first: bool) -> bool { - if first && buf.len() >= 4 && &buf[0..4] == b"%PDF" { - return true; - } - memchr(b'\x00', buf).is_some() -} - -/// Count the number of lines in the given buffer. -#[inline(never)] -pub fn count_lines(buf: &[u8], eol: u8) -> u64 { - bytecount::count(buf, eol) as u64 -} - -/// Replaces a with b in buf. -#[allow(dead_code)] -fn replace_buf(buf: &mut [u8], a: u8, b: u8) { - if a == b { - return; - } - let mut pos = 0; - while let Some(i) = memchr(a, &buf[pos..]).map(|i| pos + i) { - buf[i] = b; - pos = i + 1; - while buf.get(pos) == Some(&a) { - buf[pos] = b; - pos += 1; - } - } -} - -/// An "iterator" over lines in a particular buffer. -/// -/// Idiomatic Rust would borrow the buffer and use it as internal state to -/// advance over the positions of each line. We neglect that approach to avoid -/// the borrow in the search code. (Because the borrow prevents composition -/// through other mutable methods.) -pub struct IterLines { - eol: u8, - pos: usize, -} - -impl IterLines { - /// Creates a new iterator over lines starting at the position given. - /// - /// The buffer is passed to the `next` method. - #[inline(always)] - pub fn new(eol: u8, start: usize) -> IterLines { - IterLines { - eol: eol, - pos: start, - } - } - - /// Return the start and end position of the next line in the buffer. The - /// buffer given should be the same on every call. - /// - /// The range returned includes the new line. - #[inline(always)] - pub fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> { - match memchr(self.eol, &buf[self.pos..]) { - None => { - if self.pos < buf.len() { - let start = self.pos; - self.pos = buf.len(); - Some((start, buf.len())) - } else { - None - } - } - Some(end) => { - let start = self.pos; - let end = self.pos + end + 1; - self.pos = end; - Some((start, end)) - } - } - } -} - -/// Returns the starting index of the Nth line preceding `end`. -/// -/// If `buf` is empty, then `0` is returned. If `count` is `0`, then `end` is -/// returned. -/// -/// If `end` points at a new line in `buf`, then searching starts as if `end` -/// pointed immediately before the new line. -/// -/// The position returned corresponds to the first byte in the given line. -#[inline(always)] -fn start_of_previous_lines( - eol: u8, - buf: &[u8], - mut end: usize, - mut count: usize, -) -> usize { - // TODO(burntsushi): This function needs to be badly simplified. The case - // analysis is impossible to follow. - if buf[..end].is_empty() { - return 0; - } - if count == 0 { - return end; - } - if end == buf.len() { - end -= 1; - } - if buf[end] == eol { - if end == 0 { - return end + 1; - } - end -= 1; - } - while count > 0 { - if buf[end] == eol { - count -= 1; - if count == 0 { - return end + 1; - } - if end == 0 { - return end; - } - end -= 1; - continue; - } - match memrchr(eol, &buf[..end]) { - None => { - return 0; - } - Some(i) => { - count -= 1; - end = i; - if end == 0 { - if buf[end] == eol && count == 0 { - end += 1; - } - return end; - } - end -= 1; - } - } - } - end + 2 -} - -#[cfg(test)] -mod tests { - use std::io; - use std::path::Path; - - use grep::GrepBuilder; - use printer::Printer; - use termcolor; - - use super::{InputBuffer, Searcher, start_of_previous_lines}; - - const SHERLOCK: &'static str = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -Holmeses, success in the province of detective work must always -be, to a very large extent, the result of luck. Sherlock Holmes -can extract a clew from a wisp of straw or a flake of cigar ash; -but Doctor Watson has to have it taken out for him and dusted, -and exhibited clearly, with a label attached.\ -"; - - const CODE: &'static str = "\ -extern crate snap; - -use std::io; - -fn main() { - let stdin = io::stdin(); - let stdout = io::stdout(); - - // Wrap the stdin reader in a Snappy reader. - let mut rdr = snap::Reader::new(stdin.lock()); - let mut wtr = stdout.lock(); - io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); -} -"; - - fn hay(s: &str) -> io::Cursor> { - io::Cursor::new(s.to_string().into_bytes()) - } - - fn test_path() -> &'static Path { - &Path::new("/baz.rs") - } - - type TestSearcher<'a> = Searcher< - 'a, - io::Cursor>, - termcolor::NoColor>, - >; - - fn search_smallcap TestSearcher>( - pat: &str, - haystack: &str, - mut map: F, - ) -> (u64, String) { - let mut inp = InputBuffer::with_capacity(1); - let outbuf = termcolor::NoColor::new(vec![]); - let mut pp = Printer::new(outbuf).with_filename(true); - let grep = GrepBuilder::new(pat).build().unwrap(); - let count = { - let searcher = Searcher::new( - &mut inp, &mut pp, &grep, test_path(), hay(haystack)); - map(searcher).run().unwrap() - }; - (count, String::from_utf8(pp.into_inner().into_inner()).unwrap()) - } - - fn search TestSearcher>( - pat: &str, - haystack: &str, - mut map: F, - ) -> (u64, String) { - let mut inp = InputBuffer::with_capacity(4096); - let outbuf = termcolor::NoColor::new(vec![]); - let mut pp = Printer::new(outbuf).with_filename(true); - let grep = GrepBuilder::new(pat).build().unwrap(); - let count = { - let searcher = Searcher::new( - &mut inp, &mut pp, &grep, test_path(), hay(haystack)); - map(searcher).run().unwrap() - }; - (count, String::from_utf8(pp.into_inner().into_inner()).unwrap()) - } - - #[test] - fn previous_lines() { - let eol = b'\n'; - let text = SHERLOCK.as_bytes(); - assert_eq!(366, text.len()); - - assert_eq!(0, start_of_previous_lines(eol, text, 366, 100)); - assert_eq!(366, start_of_previous_lines(eol, text, 366, 0)); - - assert_eq!(321, start_of_previous_lines(eol, text, 366, 1)); - assert_eq!(321, start_of_previous_lines(eol, text, 365, 1)); - assert_eq!(321, start_of_previous_lines(eol, text, 364, 1)); - assert_eq!(321, start_of_previous_lines(eol, text, 322, 1)); - assert_eq!(321, start_of_previous_lines(eol, text, 321, 1)); - assert_eq!(258, start_of_previous_lines(eol, text, 320, 1)); - - assert_eq!(258, start_of_previous_lines(eol, text, 366, 2)); - assert_eq!(258, start_of_previous_lines(eol, text, 365, 2)); - assert_eq!(258, start_of_previous_lines(eol, text, 364, 2)); - assert_eq!(258, start_of_previous_lines(eol, text, 322, 2)); - assert_eq!(258, start_of_previous_lines(eol, text, 321, 2)); - assert_eq!(193, start_of_previous_lines(eol, text, 320, 2)); - - assert_eq!(65, start_of_previous_lines(eol, text, 66, 1)); - assert_eq!(0, start_of_previous_lines(eol, text, 66, 2)); - assert_eq!(64, start_of_previous_lines(eol, text, 64, 0)); - assert_eq!(0, start_of_previous_lines(eol, text, 64, 1)); - assert_eq!(0, start_of_previous_lines(eol, text, 64, 2)); - - assert_eq!(0, start_of_previous_lines(eol, text, 0, 2)); - assert_eq!(0, start_of_previous_lines(eol, text, 0, 1)); - } - - #[test] - fn previous_lines_short() { - let eol = b'\n'; - let text = &b"a\nb\nc\nd\ne\nf\n"[..]; - assert_eq!(12, text.len()); - - assert_eq!(10, start_of_previous_lines(eol, text, 12, 1)); - assert_eq!(8, start_of_previous_lines(eol, text, 12, 2)); - assert_eq!(6, start_of_previous_lines(eol, text, 12, 3)); - assert_eq!(4, start_of_previous_lines(eol, text, 12, 4)); - assert_eq!(2, start_of_previous_lines(eol, text, 12, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 12, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 12, 7)); - assert_eq!(10, start_of_previous_lines(eol, text, 11, 1)); - assert_eq!(8, start_of_previous_lines(eol, text, 11, 2)); - assert_eq!(6, start_of_previous_lines(eol, text, 11, 3)); - assert_eq!(4, start_of_previous_lines(eol, text, 11, 4)); - assert_eq!(2, start_of_previous_lines(eol, text, 11, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 11, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 11, 7)); - assert_eq!(10, start_of_previous_lines(eol, text, 10, 1)); - assert_eq!(8, start_of_previous_lines(eol, text, 10, 2)); - assert_eq!(6, start_of_previous_lines(eol, text, 10, 3)); - assert_eq!(4, start_of_previous_lines(eol, text, 10, 4)); - assert_eq!(2, start_of_previous_lines(eol, text, 10, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 10, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 10, 7)); - - assert_eq!(8, start_of_previous_lines(eol, text, 9, 1)); - assert_eq!(8, start_of_previous_lines(eol, text, 8, 1)); - - assert_eq!(6, start_of_previous_lines(eol, text, 7, 1)); - assert_eq!(6, start_of_previous_lines(eol, text, 6, 1)); - - assert_eq!(4, start_of_previous_lines(eol, text, 5, 1)); - assert_eq!(4, start_of_previous_lines(eol, text, 4, 1)); - - assert_eq!(2, start_of_previous_lines(eol, text, 3, 1)); - assert_eq!(2, start_of_previous_lines(eol, text, 2, 1)); - - assert_eq!(0, start_of_previous_lines(eol, text, 1, 1)); - assert_eq!(0, start_of_previous_lines(eol, text, 0, 1)); - } - - #[test] - fn previous_lines_empty() { - let eol = b'\n'; - let text = &b"\n\n\nd\ne\nf\n"[..]; - assert_eq!(9, text.len()); - - assert_eq!(7, start_of_previous_lines(eol, text, 9, 1)); - assert_eq!(5, start_of_previous_lines(eol, text, 9, 2)); - assert_eq!(3, start_of_previous_lines(eol, text, 9, 3)); - assert_eq!(2, start_of_previous_lines(eol, text, 9, 4)); - assert_eq!(1, start_of_previous_lines(eol, text, 9, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 9, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 9, 7)); - - let text = &b"a\n\n\nd\ne\nf\n"[..]; - assert_eq!(10, text.len()); - - assert_eq!(8, start_of_previous_lines(eol, text, 10, 1)); - assert_eq!(6, start_of_previous_lines(eol, text, 10, 2)); - assert_eq!(4, start_of_previous_lines(eol, text, 10, 3)); - assert_eq!(3, start_of_previous_lines(eol, text, 10, 4)); - assert_eq!(2, start_of_previous_lines(eol, text, 10, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 10, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 10, 7)); - } - - #[test] - fn basic_search1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s|s); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn binary() { - let text = "Sherlock\n\x00Holmes\n"; - let (count, out) = search("Sherlock|Holmes", text, |s|s); - assert_eq!(0, count); - assert_eq!(out, ""); - } - - #[test] - fn binary_text() { - let text = "Sherlock\n\x00Holmes\n"; - let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true)); - assert_eq!(2, count); - assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n"); - } - - #[test] - fn line_numbers() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.line_number(true)); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn count() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.count(true)); - assert_eq!(2, count); - assert_eq!(out, "/baz.rs:2\n"); - } - - #[test] - fn files_with_matches() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.files_with_matches(true)); - assert_eq!(1, count); - assert_eq!(out, "/baz.rs\n"); - } - - #[test] - fn files_without_matches() { - let (count, out) = search_smallcap( - "zzzz", SHERLOCK, |s| s.files_without_matches(true)); - assert_eq!(0, count); - assert_eq!(out, "/baz.rs\n"); - } - - #[test] - fn max_count() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.max_count(Some(1))); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -"); - } - - #[test] - fn invert_match_max_count() { - let (count, out) = search( - "zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1))); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -"); - } - - #[test] - fn invert_match() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.invert_match(true)); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:Holmeses, success in the province of detective work must always -/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn invert_match_line_numbers() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.invert_match(true).line_number(true) - }); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn invert_match_count() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.invert_match(true).count(true) - }); - assert_eq!(4, count); - assert_eq!(out, "/baz.rs:4\n"); - } - - #[test] - fn before_context_one1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).before_context(1) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn before_context_invert_one1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).before_context(1).invert_match(true) - }); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs-1-For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn before_context_invert_one2() { - let (count, out) = search_smallcap(" a ", SHERLOCK, |s| { - s.line_number(true).before_context(1).invert_match(true) - }); - assert_eq!(3, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:2:Holmeses, success in the province of detective work must always --- -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn before_context_two1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn before_context_two2() { - let (count, out) = search_smallcap("dusted", SHERLOCK, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn before_context_two3() { - let (count, out) = search_smallcap( - "success|attached", SHERLOCK, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs-1-For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:2:Holmeses, success in the province of detective work must always --- -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs-5-but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn before_context_two4() { - let (count, out) = search("stdin", CODE, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(3, count); - assert_eq!(out, "\ -/baz.rs-4- -/baz.rs-5-fn main() { -/baz.rs:6: let stdin = io::stdin(); -/baz.rs-7- let stdout = io::stdout(); -/baz.rs-8- -/baz.rs:9: // Wrap the stdin reader in a Snappy reader. -/baz.rs:10: let mut rdr = snap::Reader::new(stdin.lock()); -"); - } - - #[test] - fn before_context_two5() { - let (count, out) = search("stdout", CODE, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs-5-fn main() { -/baz.rs-6- let stdin = io::stdin(); -/baz.rs:7: let stdout = io::stdout(); --- -/baz.rs-9- // Wrap the stdin reader in a Snappy reader. -/baz.rs-10- let mut rdr = snap::Reader::new(stdin.lock()); -/baz.rs:11: let mut wtr = stdout.lock(); -"); - } - - #[test] - fn before_context_three1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).before_context(3) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn after_context_one1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).after_context(1) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -"); - } - - #[test] - fn after_context_invert_one1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).after_context(1).invert_match(true) - }); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_invert_one2() { - let (count, out) = search_smallcap(" a ", SHERLOCK, |s| { - s.line_number(true).after_context(1).invert_match(true) - }); - assert_eq!(3, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes --- -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs-6-and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_two1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).after_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs-5-but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn after_context_two2() { - let (count, out) = search_smallcap("dusted", SHERLOCK, |s| { - s.line_number(true).after_context(2) - }); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs-6-and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_two3() { - let (count, out) = search_smallcap( - "success|attached", SHERLOCK, |s| { - s.line_number(true).after_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; --- -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_three1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).after_context(3) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs-5-but Doctor Watson has to have it taken out for him and dusted, -/baz.rs-6-and exhibited clearly, with a label attached. -"); - } - - #[test] - fn before_after_context_two1() { - let (count, out) = search( - r"fn main|let mut rdr", CODE, |s| { - s.line_number(true).after_context(2).before_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs-3-use std::io; -/baz.rs-4- -/baz.rs:5:fn main() { -/baz.rs-6- let stdin = io::stdin(); -/baz.rs-7- let stdout = io::stdout(); -/baz.rs-8- -/baz.rs-9- // Wrap the stdin reader in a Snappy reader. -/baz.rs:10: let mut rdr = snap::Reader::new(stdin.lock()); -/baz.rs-11- let mut wtr = stdout.lock(); -/baz.rs-12- io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); -"); - } -} diff -Nru ripgrep-0.6.0/src/subject.rs ripgrep-0.10.0.3/src/subject.rs --- ripgrep-0.6.0/src/subject.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/src/subject.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,138 @@ +use std::path::Path; + +use ignore::{self, DirEntry}; + +/// A configuration for describing how subjects should be built. +#[derive(Clone, Debug)] +struct Config { + strip_dot_prefix: bool, +} + +impl Default for Config { + fn default() -> Config { + Config { + strip_dot_prefix: false, + } + } +} + +/// A builder for constructing things to search over. +#[derive(Clone, Debug)] +pub struct SubjectBuilder { + config: Config, +} + +impl SubjectBuilder { + /// Return a new subject builder with a default configuration. + pub fn new() -> SubjectBuilder { + SubjectBuilder { config: Config::default() } + } + + /// Create a new subject from a possibly missing directory entry. + /// + /// If the directory entry isn't present, then the corresponding error is + /// logged if messages have been configured. Otherwise, if the subject is + /// deemed searchable, then it is returned. + pub fn build_from_result( + &self, + result: Result, + ) -> Option { + match result { + Ok(dent) => self.build(dent), + Err(err) => { + message!("{}", err); + None + } + } + } + + /// Create a new subject using this builder's configuration. + /// + /// If a subject could not be created or should otherwise not be searched, + /// then this returns `None` after emitting any relevant log messages. + pub fn build(&self, dent: DirEntry) -> Option { + let subj = Subject { + dent: dent, + strip_dot_prefix: self.config.strip_dot_prefix, + }; + if let Some(ignore_err) = subj.dent.error() { + ignore_message!("{}", ignore_err); + } + // If this entry represents stdin, then we always search it. + if subj.dent.is_stdin() { + return Some(subj); + } + // If this subject has a depth of 0, then it was provided explicitly + // by an end user (or via a shell glob). In this case, we always want + // to search it if it even smells like a file (e.g., a symlink). + if subj.dent.depth() == 0 && !subj.is_dir() { + return Some(subj); + } + // At this point, we only want to search something it's explicitly a + // file. This omits symlinks. (If ripgrep was configured to follow + // symlinks, then they have already been followed by the directory + // traversal.) + if subj.is_file() { + return Some(subj); + } + // We got nothin. Emit a debug message, but only if this isn't a + // directory. Otherwise, emitting messages for directories is just + // noisy. + if !subj.is_dir() { + debug!( + "ignoring {}: failed to pass subject filter: \ + file type: {:?}, metadata: {:?}", + subj.dent.path().display(), + subj.dent.file_type(), + subj.dent.metadata() + ); + } + None + } + + /// When enabled, if the subject's file path starts with `./` then it is + /// stripped. + /// + /// This is useful when implicitly searching the current working directory. + pub fn strip_dot_prefix(&mut self, yes: bool) -> &mut SubjectBuilder { + self.config.strip_dot_prefix = yes; + self + } +} + +/// A subject is a thing we want to search. Generally, a subject is either a +/// file or stdin. +#[derive(Clone, Debug)] +pub struct Subject { + dent: DirEntry, + strip_dot_prefix: bool, +} + +impl Subject { + /// Return the file path corresponding to this subject. + /// + /// If this subject corresponds to stdin, then a special `` path + /// is returned instead. + pub fn path(&self) -> &Path { + if self.strip_dot_prefix && self.dent.path().starts_with("./") { + self.dent.path().strip_prefix("./").unwrap() + } else { + self.dent.path() + } + } + + /// Returns true if and only if this entry corresponds to stdin. + pub fn is_stdin(&self) -> bool { + self.dent.is_stdin() + } + + /// Returns true if and only if this subject points to a directory. + fn is_dir(&self) -> bool { + self.dent.file_type().map_or(false, |ft| ft.is_dir()) + } + + /// Returns true if and only if this subject points to a file. + fn is_file(&self) -> bool { + self.dent.file_type().map_or(false, |ft| ft.is_file()) + } +} diff -Nru ripgrep-0.6.0/src/unescape.rs ripgrep-0.10.0.3/src/unescape.rs --- ripgrep-0.6.0/src/unescape.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/src/unescape.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,128 +0,0 @@ -/// A single state in the state machine used by `unescape`. -#[derive(Clone, Copy, Eq, PartialEq)] -enum State { - /// The state after seeing a `\`. - Escape, - /// The state after seeing a `\x`. - HexFirst, - /// The state after seeing a `\x[0-9A-Fa-f]`. - HexSecond(char), - /// Default state. - Literal, -} - -/// Unescapes a string given on the command line. It supports a limited set of -/// escape sequences: -/// -/// * \t, \r and \n are mapped to their corresponding ASCII bytes. -/// * \xZZ hexadecimal escapes are mapped to their byte. -pub fn unescape(s: &str) -> Vec { - use self::State::*; - - let mut bytes = vec![]; - let mut state = Literal; - for c in s.chars() { - match state { - Escape => { - match c { - 'n' => { bytes.push(b'\n'); state = Literal; } - 'r' => { bytes.push(b'\r'); state = Literal; } - 't' => { bytes.push(b'\t'); state = Literal; } - 'x' => { state = HexFirst; } - c => { - bytes.extend(format!(r"\{}", c).into_bytes()); - state = Literal; - } - } - } - HexFirst => { - match c { - '0'...'9' | 'A'...'F' | 'a'...'f' => { - state = HexSecond(c); - } - c => { - bytes.extend(format!(r"\x{}", c).into_bytes()); - state = Literal; - } - } - } - HexSecond(first) => { - match c { - '0'...'9' | 'A'...'F' | 'a'...'f' => { - let ordinal = format!("{}{}", first, c); - let byte = u8::from_str_radix(&ordinal, 16).unwrap(); - bytes.push(byte); - state = Literal; - } - c => { - let original = format!(r"\x{}{}", first, c); - bytes.extend(original.into_bytes()); - state = Literal; - } - } - } - Literal => { - match c { - '\\' => { state = Escape; } - c => { bytes.extend(c.to_string().as_bytes()); } - } - } - } - } - match state { - Escape => bytes.push(b'\\'), - HexFirst => bytes.extend(b"\\x"), - HexSecond(c) => bytes.extend(format!("\\x{}", c).into_bytes()), - Literal => {} - } - bytes -} - -#[cfg(test)] -mod tests { - use super::unescape; - - fn b(bytes: &'static [u8]) -> Vec { - bytes.to_vec() - } - - #[test] - fn unescape_nul() { - assert_eq!(b(b"\x00"), unescape(r"\x00")); - } - - #[test] - fn unescape_nl() { - assert_eq!(b(b"\n"), unescape(r"\n")); - } - - #[test] - fn unescape_tab() { - assert_eq!(b(b"\t"), unescape(r"\t")); - } - - #[test] - fn unescape_carriage() { - assert_eq!(b(b"\r"), unescape(r"\r")); - } - - #[test] - fn unescape_nothing_simple() { - assert_eq!(b(b"\\a"), unescape(r"\a")); - } - - #[test] - fn unescape_nothing_hex0() { - assert_eq!(b(b"\\x"), unescape(r"\x")); - } - - #[test] - fn unescape_nothing_hex1() { - assert_eq!(b(b"\\xz"), unescape(r"\xz")); - } - - #[test] - fn unescape_nothing_hex2() { - assert_eq!(b(b"\\xzz"), unescape(r"\xzz")); - } -} diff -Nru ripgrep-0.6.0/src/worker.rs ripgrep-0.10.0.3/src/worker.rs --- ripgrep-0.6.0/src/worker.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/src/worker.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,313 +0,0 @@ -use std::fs::File; -use std::io; -use std::path::Path; - -use encoding_rs::Encoding; -use grep::Grep; -use ignore::DirEntry; -use memmap::{Mmap, Protection}; -use termcolor::WriteColor; - -use decoder::DecodeReader; -use pathutil::strip_prefix; -use printer::Printer; -use search_buffer::BufferSearcher; -use search_stream::{InputBuffer, Searcher}; - -use Result; - -pub enum Work { - Stdin, - DirEntry(DirEntry), -} - -pub struct WorkerBuilder { - grep: Grep, - opts: Options, -} - -#[derive(Clone, Debug)] -struct Options { - mmap: bool, - encoding: Option<&'static Encoding>, - after_context: usize, - before_context: usize, - count: bool, - files_with_matches: bool, - files_without_matches: bool, - eol: u8, - invert_match: bool, - line_number: bool, - max_count: Option, - no_messages: bool, - quiet: bool, - text: bool, -} - -impl Default for Options { - fn default() -> Options { - Options { - mmap: false, - encoding: None, - after_context: 0, - before_context: 0, - count: false, - files_with_matches: false, - files_without_matches: false, - eol: b'\n', - invert_match: false, - line_number: false, - max_count: None, - no_messages: false, - quiet: false, - text: false, - } - } -} - -impl WorkerBuilder { - /// Create a new builder for a worker. - /// - /// A reusable input buffer and a grep matcher are required, but there - /// are numerous additional options that can be configured on this builder. - pub fn new(grep: Grep) -> WorkerBuilder { - WorkerBuilder { - grep: grep, - opts: Options::default(), - } - } - - /// Create the worker from this builder. - pub fn build(self) -> Worker { - let mut inpbuf = InputBuffer::new(); - inpbuf.eol(self.opts.eol); - Worker { - grep: self.grep, - inpbuf: inpbuf, - decodebuf: vec![0; 8 * (1<<10)], - opts: self.opts, - } - } - - /// The number of contextual lines to show after each match. The default - /// is zero. - pub fn after_context(mut self, count: usize) -> Self { - self.opts.after_context = count; - self - } - - /// The number of contextual lines to show before each match. The default - /// is zero. - pub fn before_context(mut self, count: usize) -> Self { - self.opts.before_context = count; - self - } - - /// If enabled, searching will print a count instead of each match. - /// - /// Disabled by default. - pub fn count(mut self, yes: bool) -> Self { - self.opts.count = yes; - self - } - - /// Set the encoding to use to read each file. - /// - /// If the encoding is `None` (the default), then the encoding is - /// automatically detected on a best-effort per-file basis. - pub fn encoding(mut self, enc: Option<&'static Encoding>) -> Self { - self.opts.encoding = enc; - self - } - - /// If enabled, searching will print the path instead of each match. - /// - /// Disabled by default. - pub fn files_with_matches(mut self, yes: bool) -> Self { - self.opts.files_with_matches = yes; - self - } - - /// If enabled, searching will print the path of files without any matches. - /// - /// Disabled by default. - pub fn files_without_matches(mut self, yes: bool) -> Self { - self.opts.files_without_matches = yes; - self - } - - /// Set the end-of-line byte used by this searcher. - pub fn eol(mut self, eol: u8) -> Self { - self.opts.eol = eol; - self - } - - /// If enabled, matching is inverted so that lines that *don't* match the - /// given pattern are treated as matches. - pub fn invert_match(mut self, yes: bool) -> Self { - self.opts.invert_match = yes; - self - } - - /// If enabled, compute line numbers and prefix each line of output with - /// them. - pub fn line_number(mut self, yes: bool) -> Self { - self.opts.line_number = yes; - self - } - - /// Limit the number of matches to the given count. - /// - /// The default is None, which corresponds to no limit. - pub fn max_count(mut self, count: Option) -> Self { - self.opts.max_count = count; - self - } - - /// If enabled, try to use memory maps for searching if possible. - pub fn mmap(mut self, yes: bool) -> Self { - self.opts.mmap = yes; - self - } - - /// If enabled, error messages are suppressed. - /// - /// This is disabled by default. - pub fn no_messages(mut self, yes: bool) -> Self { - self.opts.no_messages = yes; - self - } - - /// If enabled, don't show any output and quit searching after the first - /// match is found. - pub fn quiet(mut self, yes: bool) -> Self { - self.opts.quiet = yes; - self - } - - /// If enabled, search binary files as if they were text. - pub fn text(mut self, yes: bool) -> Self { - self.opts.text = yes; - self - } -} - -/// Worker is responsible for executing searches on file paths, while choosing -/// streaming search or memory map search as appropriate. -pub struct Worker { - grep: Grep, - inpbuf: InputBuffer, - decodebuf: Vec, - opts: Options, -} - -impl Worker { - /// Execute the worker with the given printer and work item. - /// - /// A work item can either be stdin or a file path. - pub fn run( - &mut self, - printer: &mut Printer, - work: Work, - ) -> u64 { - let result = match work { - Work::Stdin => { - let stdin = io::stdin(); - let stdin = stdin.lock(); - self.search(printer, Path::new(""), stdin) - } - Work::DirEntry(dent) => { - let mut path = dent.path(); - let file = match File::open(path) { - Ok(file) => file, - Err(err) => { - if !self.opts.no_messages { - eprintln!("{}: {}", path.display(), err); - } - return 0; - } - }; - if let Some(p) = strip_prefix("./", path) { - path = p; - } - if self.opts.mmap { - self.search_mmap(printer, path, &file) - } else { - self.search(printer, path, file) - } - } - }; - match result { - Ok(count) => { - count - } - Err(err) => { - if !self.opts.no_messages { - eprintln!("{}", err); - } - 0 - } - } - } - - fn search( - &mut self, - printer: &mut Printer, - path: &Path, - rdr: R, - ) -> Result { - let rdr = DecodeReader::new( - rdr, &mut self.decodebuf, self.opts.encoding); - let searcher = Searcher::new( - &mut self.inpbuf, printer, &self.grep, path, rdr); - searcher - .after_context(self.opts.after_context) - .before_context(self.opts.before_context) - .count(self.opts.count) - .files_with_matches(self.opts.files_with_matches) - .files_without_matches(self.opts.files_without_matches) - .eol(self.opts.eol) - .line_number(self.opts.line_number) - .invert_match(self.opts.invert_match) - .max_count(self.opts.max_count) - .quiet(self.opts.quiet) - .text(self.opts.text) - .run() - .map_err(From::from) - } - - fn search_mmap( - &mut self, - printer: &mut Printer, - path: &Path, - file: &File, - ) -> Result { - if try!(file.metadata()).len() == 0 { - // Opening a memory map with an empty file results in an error. - // However, this may not actually be an empty file! For example, - // /proc/cpuinfo reports itself as an empty file, but it can - // produce data when it's read from. Therefore, we fall back to - // regular read calls. - return self.search(printer, path, file); - } - let mmap = try!(Mmap::open(file, Protection::Read)); - let buf = unsafe { mmap.as_slice() }; - if buf.len() >= 3 && Encoding::for_bom(buf).is_some() { - // If we have a UTF-16 bom in our memory map, then we need to fall - // back to the stream reader, which will do transcoding. - return self.search(printer, path, file); - } - let searcher = BufferSearcher::new(printer, &self.grep, path, buf); - Ok(searcher - .count(self.opts.count) - .files_with_matches(self.opts.files_with_matches) - .files_without_matches(self.opts.files_without_matches) - .eol(self.opts.eol) - .line_number(self.opts.line_number) - .invert_match(self.opts.invert_match) - .max_count(self.opts.max_count) - .quiet(self.opts.quiet) - .text(self.opts.text) - .run()) - } -} diff -Nru ripgrep-0.6.0/termcolor/Cargo.toml ripgrep-0.10.0.3/termcolor/Cargo.toml --- ripgrep-0.6.0/termcolor/Cargo.toml 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/termcolor/Cargo.toml 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ -[package] -name = "termcolor" -version = "0.3.2" #:version -authors = ["Andrew Gallant "] -description = """ -A simple cross platform library for writing colored text to a terminal. -""" -documentation = "https://docs.rs/termcolor" -homepage = "https://github.com/BurntSushi/ripgrep/tree/master/termcolor" -repository = "https://github.com/BurntSushi/ripgrep/tree/master/termcolor" -readme = "README.md" -keywords = ["windows", "win", "color", "ansi", "console"] -license = "Unlicense/MIT" - -[lib] -name = "termcolor" -bench = false - -[target.'cfg(windows)'.dependencies] -wincolor = { version = "0.1.3", path = "../wincolor" } diff -Nru ripgrep-0.6.0/termcolor/COPYING ripgrep-0.10.0.3/termcolor/COPYING --- ripgrep-0.6.0/termcolor/COPYING 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/termcolor/COPYING 1970-01-01 00:00:00.000000000 +0000 @@ -1,3 +0,0 @@ -This project is dual-licensed under the Unlicense and MIT licenses. - -You may use this code under the terms of either license. diff -Nru ripgrep-0.6.0/termcolor/LICENSE-MIT ripgrep-0.10.0.3/termcolor/LICENSE-MIT --- ripgrep-0.6.0/termcolor/LICENSE-MIT 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/termcolor/LICENSE-MIT 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2015 Andrew Gallant - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff -Nru ripgrep-0.6.0/termcolor/README.md ripgrep-0.10.0.3/termcolor/README.md --- ripgrep-0.6.0/termcolor/README.md 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/termcolor/README.md 2018-09-10 21:10:55.000000000 +0000 @@ -1,86 +1,2 @@ -termcolor -========= -A simple cross platform library for writing colored text to a terminal. This -library writes colored text either using standard ANSI escape sequences or -by interacting with the Windows console. Several convenient abstractions -are provided for use in single-threaded or multi-threaded command line -applications. - -[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep) -[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) -[![](https://img.shields.io/crates/v/termcolor.svg)](https://crates.io/crates/termcolor) - -Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). - -### Documentation - -[https://docs.rs/termcolor](https://docs.rs/termcolor) - -### Usage - -Add this to your `Cargo.toml`: - -```toml -[dependencies] -termcolor = "0.3" -``` - -and this to your crate root: - -```rust -extern crate termcolor; -``` - -### Organization - -The `WriteColor` trait extends the `io::Write` trait with methods for setting -colors or resetting them. - -`StandardStream` and `StandardStreamLock` both satisfy `WriteColor` and are -analogous to `std::io::Stdout` and `std::io::StdoutLock`, or `std::io::Stderr` -and `std::io::StderrLock`. - -`Buffer` is an in memory buffer that supports colored text. In a parallel -program, each thread might write to its own buffer. A buffer can be printed to -stdout or stderr using a `BufferWriter`. The advantage of this design is that -each thread can work in parallel on a buffer without having to synchronize -access to global resources such as the Windows console. Moreover, this design -also prevents interleaving of buffer output. - -`Ansi` and `NoColor` both satisfy `WriteColor` for arbitrary implementors of -`io::Write`. These types are useful when you know exactly what you need. An -analogous type for the Windows console is not provided since it cannot exist. - -### Example: using `StandardStream` - -The `StandardStream` type in this crate works similarly to `std::io::Stdout`, -except it is augmented with methods for coloring by the `WriteColor` trait. -For example, to write some green text: - -```rust -use std::io::Write; -use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; - -let mut stdout = StandardStream::stdout(ColorChoice::Always); -try!(stdout.set_color(ColorSpec::new().set_fg(Some(Color::Green)))); -try!(writeln!(&mut stdout, "green text!")); -``` - -### Example: using `BufferWriter` - -A `BufferWriter` can create buffers and write buffers to stdout or stderr. It -does *not* implement `io::Write` or `WriteColor` itself. Instead, `Buffer` -implements `io::Write` and `io::WriteColor`. - -This example shows how to print some green text to stderr. - -```rust -use std::io::Write; -use termcolor::{BufferWriter, Color, ColorChoice, ColorSpec, WriteColor}; - -let mut bufwtr = BufferWriter::stderr(ColorChoice::Always); -let mut buffer = bufwtr.buffer(); -try!(buffer.set_color(ColorSpec::new().set_fg(Some(Color::Green)))); -try!(writeln!(&mut buffer, "green text!")); -try!(bufwtr.print(&buffer)); -``` +termcolor has moved to its own repository: +https://github.com/BurntSushi/termcolor diff -Nru ripgrep-0.6.0/termcolor/src/lib.rs ripgrep-0.10.0.3/termcolor/src/lib.rs --- ripgrep-0.6.0/termcolor/src/lib.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/termcolor/src/lib.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,1322 +0,0 @@ -/*! -This crate provides a cross platform abstraction for writing colored text to -a terminal. Colors are written using either ANSI escape sequences or by -communicating with a Windows console. Much of this API was motivated by use -inside command line applications, where colors or styles can be configured -by the end user and/or the environment. - -This crate also provides platform independent support for writing colored text -to an in memory buffer. While this is easy to do with ANSI escape sequences -(because they are in the buffer themselves), it is trickier to do with the -Windows console API, which requires synchronous communication. - -# Organization - -The `WriteColor` trait extends the `io::Write` trait with methods for setting -colors or resetting them. - -`StandardStream` and `StandardStreamLock` both satisfy `WriteColor` and are -analogous to `std::io::Stdout` and `std::io::StdoutLock`, or `std::io::Stderr` -and `std::io::StderrLock`. - -`Buffer` is an in memory buffer that supports colored text. In a parallel -program, each thread might write to its own buffer. A buffer can be printed to -using a `BufferWriter`. The advantage of this design is that each thread can -work in parallel on a buffer without having to synchronize access to global -resources such as the Windows console. Moreover, this design also prevents -interleaving of buffer output. - -`Ansi` and `NoColor` both satisfy `WriteColor` for arbitrary implementors of -`io::Write`. These types are useful when you know exactly what you need. An -analogous type for the Windows console is not provided since it cannot exist. - -# Example: using `StandardStream` - -The `StandardStream` type in this crate works similarly to `std::io::Stdout`, -except it is augmented with methods for coloring by the `WriteColor` trait. -For example, to write some green text: - -```rust,no_run -# fn test() -> Result<(), Box<::std::error::Error>> { -use std::io::Write; -use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; - -let mut stdout = StandardStream::stdout(ColorChoice::Always); -try!(stdout.set_color(ColorSpec::new().set_fg(Some(Color::Green)))); -try!(writeln!(&mut stdout, "green text!")); -# Ok(()) } -``` - -# Example: using `BufferWriter` - -A `BufferWriter` can create buffers and write buffers to stdout or stderr. It -does *not* implement `io::Write` or `WriteColor` itself. Instead, `Buffer` -implements `io::Write` and `io::WriteColor`. - -This example shows how to print some green text to stderr. - -```rust,no_run -# fn test() -> Result<(), Box<::std::error::Error>> { -use std::io::Write; -use termcolor::{BufferWriter, Color, ColorChoice, ColorSpec, WriteColor}; - -let mut bufwtr = BufferWriter::stderr(ColorChoice::Always); -let mut buffer = bufwtr.buffer(); -try!(buffer.set_color(ColorSpec::new().set_fg(Some(Color::Green)))); -try!(writeln!(&mut buffer, "green text!")); -try!(bufwtr.print(&buffer)); -# Ok(()) } -``` -*/ - -#![deny(missing_docs)] - -#[cfg(windows)] -extern crate wincolor; - -use std::env; -use std::error; -use std::fmt; -use std::io::{self, Write}; -use std::str::FromStr; -#[cfg(windows)] -use std::sync::{Mutex, MutexGuard}; -use std::sync::atomic::{AtomicBool, Ordering}; - -/// This trait describes the behavior of writers that support colored output. -pub trait WriteColor: io::Write { - /// Returns true if and only if the underlying writer supports colors. - fn supports_color(&self) -> bool; - - /// Set the color settings of the writer. - /// - /// Subsequent writes to this writer will use these settings until either - /// `reset` is called or new color settings are set. - /// - /// If there was a problem setting the color settings, then an error is - /// returned. - fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()>; - - /// Reset the current color settings to their original settings. - /// - /// If there was a problem resetting the color settings, then an error is - /// returned. - fn reset(&mut self) -> io::Result<()>; -} - -impl<'a, T: WriteColor> WriteColor for &'a mut T { - fn supports_color(&self) -> bool { (&**self).supports_color() } - fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { - (&mut **self).set_color(spec) - } - fn reset(&mut self) -> io::Result<()> { (&mut **self).reset() } -} - -/// ColorChoice represents the color preferences of an end user. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum ColorChoice { - /// Try very hard to emit colors. This includes emitting ANSI colors - /// on Windows if the console API is unavailable. - Always, - /// AlwaysAnsi is like Always, except it never tries to use anything other - /// than emitting ANSI color codes. - AlwaysAnsi, - /// Try to use colors, but don't force the issue. If the console isn't - /// available on Windows, or if TERM=dumb, for example, then don't use - /// colors. - Auto, - /// Never emit colors. - Never, -} - -impl ColorChoice { - /// Returns true if we should attempt to write colored output. - #[cfg(not(windows))] - fn should_attempt_color(&self) -> bool { - match *self { - ColorChoice::Always => true, - ColorChoice::AlwaysAnsi => true, - ColorChoice::Never => false, - ColorChoice::Auto => { - match env::var("TERM") { - Err(_) => false, - Ok(k) => k != "dumb", - } - } - } - } - - /// Returns true if we should attempt to write colored output. - #[cfg(windows)] - fn should_attempt_color(&self) -> bool { - match *self { - ColorChoice::Always => true, - ColorChoice::AlwaysAnsi => true, - ColorChoice::Never => false, - ColorChoice::Auto => { - match env::var("TERM") { - Err(_) => true, - Ok(k) => k != "dumb", - } - } - } - } - - /// Returns true if this choice should forcefully use ANSI color codes. - /// - /// It's possible that ANSI is still the correct choice even if this - /// returns false. - #[cfg(windows)] - fn should_ansi(&self) -> bool { - match *self { - ColorChoice::Always => false, - ColorChoice::AlwaysAnsi => true, - ColorChoice::Never => false, - ColorChoice::Auto => { - match env::var("TERM") { - Err(_) => false, - // cygwin doesn't seem to support ANSI escape sequences - // and instead has its own variety. However, the Windows - // console API may be available. - Ok(k) => k != "dumb" && k != "cygwin", - } - } - } - } -} - -/// `std::io` implements `Stdout` and `Stderr` (and their `Lock` variants) as -/// separate types, which makes it difficult to abstract over them. We use -/// some simple internal enum types to work around this. - -enum StandardStreamType { - Stdout, - Stderr, -} - -enum IoStandardStream { - Stdout(io::Stdout), - Stderr(io::Stderr), -} - -impl IoStandardStream { - fn new(sty: StandardStreamType) -> IoStandardStream { - match sty { - StandardStreamType::Stdout => IoStandardStream::Stdout(io::stdout()), - StandardStreamType::Stderr => IoStandardStream::Stderr(io::stderr()), - } - } - - fn lock(&self) -> IoStandardStreamLock { - match *self { - IoStandardStream::Stdout(ref s) => IoStandardStreamLock::StdoutLock(s.lock()), - IoStandardStream::Stderr(ref s) => IoStandardStreamLock::StderrLock(s.lock()), - } - } -} - -impl io::Write for IoStandardStream { - fn write(&mut self, b: &[u8]) -> io::Result { - match *self { - IoStandardStream::Stdout(ref mut s) => s.write(b), - IoStandardStream::Stderr(ref mut s) => s.write(b), - } - } - - fn flush(&mut self) -> io::Result<()> { - match *self { - IoStandardStream::Stdout(ref mut s) => s.flush(), - IoStandardStream::Stderr(ref mut s) => s.flush(), - } - } -} - -/// Same rigamorale for the locked variants of the standard streams. - -enum IoStandardStreamLock<'a> { - StdoutLock(io::StdoutLock<'a>), - StderrLock(io::StderrLock<'a>), -} - -impl<'a> io::Write for IoStandardStreamLock<'a> { - fn write(&mut self, b: &[u8]) -> io::Result { - match *self { - IoStandardStreamLock::StdoutLock(ref mut s) => s.write(b), - IoStandardStreamLock::StderrLock(ref mut s) => s.write(b), - } - } - - fn flush(&mut self) -> io::Result<()> { - match *self { - IoStandardStreamLock::StdoutLock(ref mut s) => s.flush(), - IoStandardStreamLock::StderrLock(ref mut s) => s.flush(), - } - } -} - -/// Satisfies `io::Write` and `WriteColor`, and supports optional coloring -/// to either of the standard output streams, stdout and stderr. -pub struct StandardStream { - wtr: LossyStandardStream>, -} - -/// `StandardStreamLock` is a locked reference to a `StandardStream`. -/// -/// This implements the `io::Write` and `WriteColor` traits, and is constructed -/// via the `Write::lock` method. -/// -/// The lifetime `'a` refers to the lifetime of the corresponding `StandardStream`. -pub struct StandardStreamLock<'a> { - wtr: LossyStandardStream>>, -} - -/// WriterInner is a (limited) generic representation of a writer. It is -/// limited because W should only ever be stdout/stderr on Windows. -enum WriterInner<'a, W> { - NoColor(NoColor), - Ansi(Ansi), - /// What a gross hack. On Windows, we need to specify a lifetime for the - /// console when in a locked state, but obviously don't need to do that - /// on Unix, which make the `'a` unused. To satisfy the compiler, we need - /// a PhantomData. - #[allow(dead_code)] - Unreachable(::std::marker::PhantomData<&'a ()>), - #[cfg(windows)] - Windows { wtr: W, console: Mutex }, - #[cfg(windows)] - WindowsLocked { wtr: W, console: MutexGuard<'a, wincolor::Console> }, -} - -impl StandardStream { - /// Create a new `StandardStream` with the given color preferences. - /// - /// The specific color/style settings can be configured when writing via - /// the `WriteColor` trait. - #[cfg(not(windows))] - fn create(sty: StandardStreamType, choice: ColorChoice) -> StandardStream { - let wtr = - if choice.should_attempt_color() { - WriterInner::Ansi(Ansi(IoStandardStream::new(sty))) - } else { - WriterInner::NoColor(NoColor(IoStandardStream::new(sty))) - }; - StandardStream { wtr: LossyStandardStream::new(wtr) } - } - - /// Create a new `StandardStream` with the given color preferences. - /// - /// If coloring is desired and a Windows console could not be found, then - /// ANSI escape sequences are used instead. - /// - /// The specific color/style settings can be configured when writing via - /// the `WriteColor` trait. - #[cfg(windows)] - fn create(sty: StandardStreamType, choice: ColorChoice) -> StandardStream { - let con = match sty { - StandardStreamType::Stdout => wincolor::Console::stdout(), - StandardStreamType::Stderr => wincolor::Console::stderr(), - }; - let is_win_console = con.is_ok(); - let wtr = - if choice.should_attempt_color() { - if choice.should_ansi() { - WriterInner::Ansi(Ansi(IoStandardStream::new(sty))) - } else if let Ok(console) = con { - WriterInner::Windows { - wtr: IoStandardStream::new(sty), - console: Mutex::new(console), - } - } else { - WriterInner::Ansi(Ansi(IoStandardStream::new(sty))) - } - } else { - WriterInner::NoColor(NoColor(IoStandardStream::new(sty))) - }; - StandardStream { wtr: LossyStandardStream::new(wtr).is_console(is_win_console) } - } - - /// Create a new `StandardStream` with the given color preferences that - /// writes to standard output. - /// - /// On Windows, if coloring is desired and a Windows console could not be - /// found, then ANSI escape sequences are used instead. - /// - /// The specific color/style settings can be configured when writing via - /// the `WriteColor` trait. - pub fn stdout(choice: ColorChoice) -> StandardStream { - StandardStream::create(StandardStreamType::Stdout, choice) - } - - /// Create a new `StandardStream` with the given color preferences that - /// writes to standard error. - /// - /// On Windows, if coloring is desired and a Windows console could not be - /// found, then ANSI escape sequences are used instead. - /// - /// The specific color/style settings can be configured when writing via - /// the `WriteColor` trait. - pub fn stderr(choice: ColorChoice) -> StandardStream { - StandardStream::create(StandardStreamType::Stderr, choice) - } - - /// Lock the underlying writer. - /// - /// The lock guard returned also satisfies `io::Write` and - /// `WriteColor`. - /// - /// This method is **not reentrant**. It may panic if `lock` is called - /// while a `StandardStreamLock` is still alive. - pub fn lock(&self) -> StandardStreamLock { - StandardStreamLock::from_stream(self) - } -} - -impl<'a> StandardStreamLock<'a> { - #[cfg(not(windows))] - fn from_stream(stream: &StandardStream) -> StandardStreamLock { - let locked = match *stream.wtr.get_ref() { - WriterInner::Unreachable(_) => unreachable!(), - WriterInner::NoColor(ref w) => { - WriterInner::NoColor(NoColor(w.0.lock())) - } - WriterInner::Ansi(ref w) => { - WriterInner::Ansi(Ansi(w.0.lock())) - } - }; - StandardStreamLock { wtr: stream.wtr.wrap(locked) } - } - - #[cfg(windows)] - fn from_stream(stream: &StandardStream) -> StandardStreamLock { - let locked = match *stream.wtr.get_ref() { - WriterInner::Unreachable(_) => unreachable!(), - WriterInner::NoColor(ref w) => { - WriterInner::NoColor(NoColor(w.0.lock())) - } - WriterInner::Ansi(ref w) => { - WriterInner::Ansi(Ansi(w.0.lock())) - } - #[cfg(windows)] - WriterInner::Windows { ref wtr, ref console } => { - WriterInner::WindowsLocked { - wtr: wtr.lock(), - console: console.lock().unwrap(), - } - } - #[cfg(windows)] - WriterInner::WindowsLocked{..} => { - panic!("cannot call StandardStream.lock while a StandardStreamLock is alive"); - } - }; - StandardStreamLock { wtr: stream.wtr.wrap(locked) } - } -} - -impl io::Write for StandardStream { - fn write(&mut self, b: &[u8]) -> io::Result { self.wtr.write(b) } - fn flush(&mut self) -> io::Result<()> { self.wtr.flush() } -} - -impl WriteColor for StandardStream { - fn supports_color(&self) -> bool { self.wtr.supports_color() } - fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { - self.wtr.set_color(spec) - } - fn reset(&mut self) -> io::Result<()> { self.wtr.reset() } -} - -impl<'a> io::Write for StandardStreamLock<'a> { - fn write(&mut self, b: &[u8]) -> io::Result { self.wtr.write(b) } - fn flush(&mut self) -> io::Result<()> { self.wtr.flush() } -} - -impl<'a> WriteColor for StandardStreamLock<'a> { - fn supports_color(&self) -> bool { self.wtr.supports_color() } - fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { - self.wtr.set_color(spec) - } - fn reset(&mut self) -> io::Result<()> { self.wtr.reset() } -} - -impl<'a, W: io::Write> io::Write for WriterInner<'a, W> { - fn write(&mut self, buf: &[u8]) -> io::Result { - match *self { - WriterInner::Unreachable(_) => unreachable!(), - WriterInner::NoColor(ref mut wtr) => wtr.write(buf), - WriterInner::Ansi(ref mut wtr) => wtr.write(buf), - #[cfg(windows)] - WriterInner::Windows { ref mut wtr, .. } => wtr.write(buf), - #[cfg(windows)] - WriterInner::WindowsLocked { ref mut wtr, .. } => wtr.write(buf), - } - } - - fn flush(&mut self) -> io::Result<()> { - match *self { - WriterInner::Unreachable(_) => unreachable!(), - WriterInner::NoColor(ref mut wtr) => wtr.flush(), - WriterInner::Ansi(ref mut wtr) => wtr.flush(), - #[cfg(windows)] - WriterInner::Windows { ref mut wtr, .. } => wtr.flush(), - #[cfg(windows)] - WriterInner::WindowsLocked { ref mut wtr, .. } => wtr.flush(), - } - } -} - -impl<'a, W: io::Write> WriteColor for WriterInner<'a, W> { - fn supports_color(&self) -> bool { - match *self { - WriterInner::Unreachable(_) => unreachable!(), - WriterInner::NoColor(_) => false, - WriterInner::Ansi(_) => true, - #[cfg(windows)] - WriterInner::Windows { .. } => true, - #[cfg(windows)] - WriterInner::WindowsLocked { .. } => true, - } - } - - fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { - match *self { - WriterInner::Unreachable(_) => unreachable!(), - WriterInner::NoColor(ref mut wtr) => wtr.set_color(spec), - WriterInner::Ansi(ref mut wtr) => wtr.set_color(spec), - #[cfg(windows)] - WriterInner::Windows { ref mut wtr, ref console } => { - try!(wtr.flush()); - let mut console = console.lock().unwrap(); - spec.write_console(&mut *console) - } - #[cfg(windows)] - WriterInner::WindowsLocked { ref mut wtr, ref mut console } => { - try!(wtr.flush()); - spec.write_console(console) - } - } - } - - fn reset(&mut self) -> io::Result<()> { - match *self { - WriterInner::Unreachable(_) => unreachable!(), - WriterInner::NoColor(ref mut wtr) => wtr.reset(), - WriterInner::Ansi(ref mut wtr) => wtr.reset(), - #[cfg(windows)] - WriterInner::Windows { ref mut wtr, ref mut console } => { - try!(wtr.flush()); - try!(console.lock().unwrap().reset()); - Ok(()) - } - #[cfg(windows)] - WriterInner::WindowsLocked { ref mut wtr, ref mut console } => { - try!(wtr.flush()); - try!(console.reset()); - Ok(()) - } - } - } -} - -/// Writes colored buffers to stdout or stderr. -/// -/// Writable buffers can be obtained by calling `buffer` on a `BufferWriter`. -/// -/// This writer works with terminals that support ANSI escape sequences or -/// with a Windows console. -/// -/// It is intended for a `BufferWriter` to be put in an `Arc` and written to -/// from multiple threads simultaneously. -pub struct BufferWriter { - stream: LossyStandardStream, - printed: AtomicBool, - separator: Option>, - color_choice: ColorChoice, - #[cfg(windows)] - console: Option>, -} - -impl BufferWriter { - /// Create a new `BufferWriter` that writes to a standard stream with the - /// given color preferences. - /// - /// The specific color/style settings can be configured when writing to - /// the buffers themselves. - #[cfg(not(windows))] - fn create(sty: StandardStreamType, choice: ColorChoice) -> BufferWriter { - BufferWriter { - stream: LossyStandardStream::new(IoStandardStream::new(sty)), - printed: AtomicBool::new(false), - separator: None, - color_choice: choice, - } - } - - /// Create a new `BufferWriter` that writes to a standard stream with the - /// given color preferences. - /// - /// If coloring is desired and a Windows console could not be found, then - /// ANSI escape sequences are used instead. - /// - /// The specific color/style settings can be configured when writing to - /// the buffers themselves. - #[cfg(windows)] - fn create(sty: StandardStreamType, choice: ColorChoice) -> BufferWriter { - let con = match sty { - StandardStreamType::Stdout => wincolor::Console::stdout(), - StandardStreamType::Stderr => wincolor::Console::stderr(), - }.ok().map(Mutex::new); - let stream = LossyStandardStream::new(IoStandardStream::new(sty)).is_console(con.is_some()); - BufferWriter { - stream: stream, - printed: AtomicBool::new(false), - separator: None, - color_choice: choice, - console: con, - } - } - - /// Create a new `BufferWriter` that writes to stdout with the given - /// color preferences. - /// - /// On Windows, if coloring is desired and a Windows console could not be - /// found, then ANSI escape sequences are used instead. - /// - /// The specific color/style settings can be configured when writing to - /// the buffers themselves. - pub fn stdout(choice: ColorChoice) -> BufferWriter { - BufferWriter::create(StandardStreamType::Stdout, choice) - } - - /// Create a new `BufferWriter` that writes to stderr with the given - /// color preferences. - /// - /// On Windows, if coloring is desired and a Windows console could not be - /// found, then ANSI escape sequences are used instead. - /// - /// The specific color/style settings can be configured when writing to - /// the buffers themselves. - pub fn stderr(choice: ColorChoice) -> BufferWriter { - BufferWriter::create(StandardStreamType::Stderr, choice) - } - - /// If set, the separator given is printed between buffers. By default, no - /// separator is printed. - /// - /// The default value is `None`. - pub fn separator(&mut self, sep: Option>) { - self.separator = sep; - } - - /// Creates a new `Buffer` with the current color preferences. - /// - /// A `Buffer` satisfies both `io::Write` and `WriteColor`. A `Buffer` can - /// be printed using the `print` method. - #[cfg(not(windows))] - pub fn buffer(&self) -> Buffer { - Buffer::new(self.color_choice) - } - - /// Creates a new `Buffer` with the current color preferences. - /// - /// A `Buffer` satisfies both `io::Write` and `WriteColor`. A `Buffer` can - /// be printed using the `print` method. - #[cfg(windows)] - pub fn buffer(&self) -> Buffer { - Buffer::new(self.color_choice, self.console.is_some()) - } - - /// Prints the contents of the given buffer. - /// - /// It is safe to call this from multiple threads simultaneously. In - /// particular, all buffers are written atomically. No interleaving will - /// occur. - pub fn print(&self, buf: &Buffer) -> io::Result<()> { - if buf.is_empty() { - return Ok(()); - } - let mut stream = self.stream.wrap(self.stream.get_ref().lock()); - if let Some(ref sep) = self.separator { - if self.printed.load(Ordering::SeqCst) { - try!(stream.write_all(sep)); - try!(stream.write_all(b"\n")); - } - } - match buf.0 { - BufferInner::NoColor(ref b) => try!(stream.write_all(&b.0)), - BufferInner::Ansi(ref b) => try!(stream.write_all(&b.0)), - #[cfg(windows)] - BufferInner::Windows(ref b) => { - // We guarantee by construction that we have a console here. - // Namely, a BufferWriter is the only way to produce a Buffer. - let console_mutex = self.console.as_ref() - .expect("got Windows buffer but have no Console"); - let mut console = console_mutex.lock().unwrap(); - try!(b.print(&mut *console, &mut stream)); - } - } - self.printed.store(true, Ordering::SeqCst); - Ok(()) - } -} - -/// Write colored text to memory. -/// -/// `Buffer` is a platform independent abstraction for printing colored text to -/// an in memory buffer. When the buffer is printed using a `BufferWriter`, the -/// color information will be applied to the output device (a tty on Unix and a -/// console on Windows). -/// -/// A `Buffer` is typically created by calling the `BufferWriter.buffer` -/// method, which will take color preferences and the environment into -/// account. However, buffers can also be manually created using `no_color`, -/// `ansi` or `console` (on Windows). -pub struct Buffer(BufferInner); - -/// BufferInner is an enumeration of different buffer types. -enum BufferInner { - /// No coloring information should be applied. This ignores all coloring - /// directives. - NoColor(NoColor>), - /// Apply coloring using ANSI escape sequences embedded into the buffer. - Ansi(Ansi>), - /// Apply coloring using the Windows console APIs. This buffer saves - /// color information in memory and only interacts with the console when - /// the buffer is printed. - #[cfg(windows)] - Windows(WindowsBuffer), -} - -impl Buffer { - /// Create a new buffer with the given color settings. - #[cfg(not(windows))] - fn new(choice: ColorChoice) -> Buffer { - if choice.should_attempt_color() { - Buffer::ansi() - } else { - Buffer::no_color() - } - } - - /// Create a new buffer with the given color settings. - /// - /// On Windows, one can elect to create a buffer capable of being written - /// to a console. Only enable it if a console is available. - /// - /// If coloring is desired and `console` is false, then ANSI escape - /// sequences are used instead. - #[cfg(windows)] - fn new(choice: ColorChoice, console: bool) -> Buffer { - if choice.should_attempt_color() { - if !console || choice.should_ansi() { - Buffer::ansi() - } else { - Buffer::console() - } - } else { - Buffer::no_color() - } - } - - /// Create a buffer that drops all color information. - pub fn no_color() -> Buffer { - Buffer(BufferInner::NoColor(NoColor(vec![]))) - } - - /// Create a buffer that uses ANSI escape sequences. - pub fn ansi() -> Buffer { - Buffer(BufferInner::Ansi(Ansi(vec![]))) - } - - /// Create a buffer that can be written to a Windows console. - #[cfg(windows)] - pub fn console() -> Buffer { - Buffer(BufferInner::Windows(WindowsBuffer::new())) - } - - /// Returns true if and only if this buffer is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns the length of this buffer in bytes. - pub fn len(&self) -> usize { - match self.0 { - BufferInner::NoColor(ref b) => b.0.len(), - BufferInner::Ansi(ref b) => b.0.len(), - #[cfg(windows)] - BufferInner::Windows(ref b) => b.buf.len(), - } - } - - /// Clears this buffer. - pub fn clear(&mut self) { - match self.0 { - BufferInner::NoColor(ref mut b) => b.0.clear(), - BufferInner::Ansi(ref mut b) => b.0.clear(), - #[cfg(windows)] - BufferInner::Windows(ref mut b) => b.clear(), - } - } - - /// Consume this buffer and return the underlying raw data. - /// - /// On Windows, this unrecoverably drops all color information associated - /// with the buffer. - pub fn into_inner(self) -> Vec { - match self.0 { - BufferInner::NoColor(b) => b.0, - BufferInner::Ansi(b) => b.0, - #[cfg(windows)] - BufferInner::Windows(b) => b.buf, - } - } - - /// Return the underlying data of the buffer. - pub fn as_slice(&self) -> &[u8] { - match self.0 { - BufferInner::NoColor(ref b) => &b.0, - BufferInner::Ansi(ref b) => &b.0, - #[cfg(windows)] - BufferInner::Windows(ref b) => &b.buf, - } - } - - /// Return the underlying data of the buffer as a mutable slice. - pub fn as_mut_slice(&mut self) -> &mut [u8] { - match self.0 { - BufferInner::NoColor(ref mut b) => &mut b.0, - BufferInner::Ansi(ref mut b) => &mut b.0, - #[cfg(windows)] - BufferInner::Windows(ref mut b) => &mut b.buf, - } - } -} - -impl io::Write for Buffer { - fn write(&mut self, buf: &[u8]) -> io::Result { - match self.0 { - BufferInner::NoColor(ref mut w) => w.write(buf), - BufferInner::Ansi(ref mut w) => w.write(buf), - #[cfg(windows)] - BufferInner::Windows(ref mut w) => w.write(buf), - } - } - - fn flush(&mut self) -> io::Result<()> { - match self.0 { - BufferInner::NoColor(ref mut w) => w.flush(), - BufferInner::Ansi(ref mut w) => w.flush(), - #[cfg(windows)] - BufferInner::Windows(ref mut w) => w.flush(), - } - } -} - -impl WriteColor for Buffer { - fn supports_color(&self) -> bool { - match self.0 { - BufferInner::NoColor(_) => false, - BufferInner::Ansi(_) => true, - #[cfg(windows)] - BufferInner::Windows(_) => true, - } - } - - fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { - match self.0 { - BufferInner::NoColor(ref mut w) => w.set_color(spec), - BufferInner::Ansi(ref mut w) => w.set_color(spec), - #[cfg(windows)] - BufferInner::Windows(ref mut w) => w.set_color(spec), - } - } - - fn reset(&mut self) -> io::Result<()> { - match self.0 { - BufferInner::NoColor(ref mut w) => w.reset(), - BufferInner::Ansi(ref mut w) => w.reset(), - #[cfg(windows)] - BufferInner::Windows(ref mut w) => w.reset(), - } - } -} - -/// Satisfies `WriteColor` but ignores all color options. -pub struct NoColor(W); - -impl NoColor { - /// Create a new writer that satisfies `WriteColor` but drops all color - /// information. - pub fn new(wtr: W) -> NoColor { NoColor(wtr) } - - /// Consume this `NoColor` value and return the inner writer. - pub fn into_inner(self) -> W { self.0 } - - /// Return a reference to the inner writer. - pub fn get_ref(&self) -> &W { &self.0 } - - /// Return a mutable reference to the inner writer. - pub fn get_mut(&mut self) -> &mut W { &mut self.0 } -} - -impl io::Write for NoColor { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.0.write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.0.flush() - } -} - -impl WriteColor for NoColor { - fn supports_color(&self) -> bool { false } - fn set_color(&mut self, _: &ColorSpec) -> io::Result<()> { Ok(()) } - fn reset(&mut self) -> io::Result<()> { Ok(()) } -} - -/// Satisfies `WriteColor` using standard ANSI escape sequences. -pub struct Ansi(W); - -impl Ansi { - /// Create a new writer that satisfies `WriteColor` using standard ANSI - /// escape sequences. - pub fn new(wtr: W) -> Ansi { Ansi(wtr) } - - /// Consume this `Ansi` value and return the inner writer. - pub fn into_inner(self) -> W { self.0 } - - /// Return a reference to the inner writer. - pub fn get_ref(&self) -> &W { &self.0 } - - /// Return a mutable reference to the inner writer. - pub fn get_mut(&mut self) -> &mut W { &mut self.0 } -} - -impl io::Write for Ansi { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.0.write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.0.flush() - } -} - -impl WriteColor for Ansi { - fn supports_color(&self) -> bool { true } - - fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { - try!(self.reset()); - if let Some(ref c) = spec.fg_color { - try!(self.write_color(true, c, spec.intense)); - } - if let Some(ref c) = spec.bg_color { - try!(self.write_color(false, c, spec.intense)); - } - if spec.bold { - try!(self.write_str("\x1B[1m")); - } - Ok(()) - } - - fn reset(&mut self) -> io::Result<()> { - self.write_str("\x1B[m") - } -} - -impl Ansi { - fn write_str(&mut self, s: &str) -> io::Result<()> { - self.write_all(s.as_bytes()) - } - - fn write_color( - &mut self, - fg: bool, - c: &Color, - intense: bool, - ) -> io::Result<()> { - macro_rules! write_intense { - ($clr:expr) => { - if fg { - self.write_str(concat!("\x1B[38;5;", $clr, "m")) - } else { - self.write_str(concat!("\x1B[48;5;", $clr, "m")) - } - } - } - macro_rules! write_normal { - ($clr:expr) => { - if fg { - self.write_str(concat!("\x1B[3", $clr, "m")) - } else { - self.write_str(concat!("\x1B[4", $clr, "m")) - } - } - } - if intense { - match *c { - Color::Black => write_intense!("8"), - Color::Blue => write_intense!("12"), - Color::Green => write_intense!("10"), - Color::Red => write_intense!("9"), - Color::Cyan => write_intense!("14"), - Color::Magenta => write_intense!("13"), - Color::Yellow => write_intense!("11"), - Color::White => write_intense!("15"), - Color::__Nonexhaustive => unreachable!(), - } - } else { - match *c { - Color::Black => write_normal!("0"), - Color::Blue => write_normal!("4"), - Color::Green => write_normal!("2"), - Color::Red => write_normal!("1"), - Color::Cyan => write_normal!("6"), - Color::Magenta => write_normal!("5"), - Color::Yellow => write_normal!("3"), - Color::White => write_normal!("7"), - Color::__Nonexhaustive => unreachable!(), - } - } - } -} - -/// An in-memory buffer that provides Windows console coloring. -/// -/// This doesn't actually communicate with the Windows console. Instead, it -/// acts like a normal buffer but also saves the color information associated -/// with positions in the buffer. It is only when the buffer is written to the -/// console that coloring is actually applied. -/// -/// This is roughly isomorphic to the ANSI based approach (i.e., -/// `Ansi>`), except with ANSI, the color information is embedded -/// directly into the buffer. -/// -/// Note that there is no way to write something generic like -/// `WindowsConsole` since coloring on Windows is tied -/// specifically to the console APIs, and therefore can't work on arbitrary -/// writers. -#[cfg(windows)] -#[derive(Clone, Debug)] -struct WindowsBuffer { - /// The actual content that should be printed. - buf: Vec, - /// A sequence of position oriented color specifications. Namely, each - /// element is a position and a color spec, where the color spec should - /// be applied at the position inside of `buf`. - /// - /// A missing color spec implies the underlying console should be reset. - colors: Vec<(usize, Option)>, -} - -#[cfg(windows)] -impl WindowsBuffer { - /// Create a new empty buffer for Windows console coloring. - fn new() -> WindowsBuffer { - WindowsBuffer { - buf: vec![], - colors: vec![], - } - } - - /// Push the given color specification into this buffer. - /// - /// This has the effect of setting the given color information at the - /// current position in the buffer. - fn push(&mut self, spec: Option) { - let pos = self.buf.len(); - self.colors.push((pos, spec)); - } - - /// Print the contents to the given stream handle, and use the console - /// for coloring. - fn print( - &self, - console: &mut wincolor::Console, - stream: &mut LossyStandardStream, - ) -> io::Result<()> { - let mut last = 0; - for &(pos, ref spec) in &self.colors { - try!(stream.write_all(&self.buf[last..pos])); - try!(stream.flush()); - last = pos; - match *spec { - None => try!(console.reset()), - Some(ref spec) => try!(spec.write_console(console)), - } - } - try!(stream.write_all(&self.buf[last..])); - stream.flush() - } - - /// Clear the buffer. - fn clear(&mut self) { - self.buf.clear(); - self.colors.clear(); - } -} - -#[cfg(windows)] -impl io::Write for WindowsBuffer { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.buf.extend_from_slice(buf); - Ok(buf.len()) - } - - fn flush(&mut self) -> io::Result<()> { - Ok(()) - } -} - -#[cfg(windows)] -impl WriteColor for WindowsBuffer { - fn supports_color(&self) -> bool { true } - - fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { - self.push(Some(spec.clone())); - Ok(()) - } - - fn reset(&mut self) -> io::Result<()> { - self.push(None); - Ok(()) - } -} - -/// A color specification. -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct ColorSpec { - fg_color: Option, - bg_color: Option, - bold: bool, - intense: bool, -} - -impl ColorSpec { - /// Create a new color specification that has no colors or styles. - pub fn new() -> ColorSpec { - ColorSpec::default() - } - - /// Get the foreground color. - pub fn fg(&self) -> Option<&Color> { self.fg_color.as_ref() } - - /// Set the foreground color. - pub fn set_fg(&mut self, color: Option) -> &mut ColorSpec { - self.fg_color = color; - self - } - - /// Get the background color. - pub fn bg(&self) -> Option<&Color> { self.bg_color.as_ref() } - - /// Set the background color. - pub fn set_bg(&mut self, color: Option) -> &mut ColorSpec { - self.bg_color = color; - self - } - - /// Get whether this is bold or not. - /// - /// Note that the bold setting has no effect in a Windows console. - pub fn bold(&self) -> bool { self.bold } - - /// Set whether the text is bolded or not. - /// - /// Note that the bold setting has no effect in a Windows console. - pub fn set_bold(&mut self, yes: bool) -> &mut ColorSpec { - self.bold = yes; - self - } - - /// Get whether this is intense or not. - pub fn intense(&self) -> bool { self.intense } - - /// Set whether the text is intense or not. - pub fn set_intense(&mut self, yes: bool) -> &mut ColorSpec { - self.intense = yes; - self - } - - /// Returns true if this color specification has no colors or styles. - pub fn is_none(&self) -> bool { - self.fg_color.is_none() && self.bg_color.is_none() && !self.bold - } - - /// Clears this color specification so that it has no color/style settings. - pub fn clear(&mut self) { - self.fg_color = None; - self.bg_color = None; - self.bold = false; - } - - /// Writes this color spec to the given Windows console. - #[cfg(windows)] - fn write_console( - &self, - console: &mut wincolor::Console, - ) -> io::Result<()> { - use wincolor::Intense; - - let intense = if self.intense { Intense::Yes } else { Intense::No }; - if let Some(color) = self.fg_color.as_ref().map(|c| c.to_windows()) { - try!(console.fg(intense, color)); - } - if let Some(color) = self.bg_color.as_ref().map(|c| c.to_windows()) { - try!(console.bg(intense, color)); - } - Ok(()) - } -} - -/// The set of available English colors for the terminal foreground/background. -/// -/// Note that this set may expand over time. -#[allow(missing_docs)] -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Color { - Black, - Blue, - Green, - Red, - Cyan, - Magenta, - Yellow, - White, - #[doc(hidden)] - __Nonexhaustive, -} - -#[cfg(windows)] -impl Color { - /// Translate this color to a wincolor::Color. - fn to_windows(&self) -> wincolor::Color { - match *self { - Color::Black => wincolor::Color::Black, - Color::Blue => wincolor::Color::Blue, - Color::Green => wincolor::Color::Green, - Color::Red => wincolor::Color::Red, - Color::Cyan => wincolor::Color::Cyan, - Color::Magenta => wincolor::Color::Magenta, - Color::Yellow => wincolor::Color::Yellow, - Color::White => wincolor::Color::White, - Color::__Nonexhaustive => unreachable!(), - } - } -} - -/// An error from parsing an invalid color name. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct ParseColorError(String); - -impl ParseColorError { - /// Return the string that couldn't be parsed as a valid color. - pub fn invalid(&self) -> &str { &self.0 } -} - -impl error::Error for ParseColorError { - fn description(&self) -> &str { "unrecognized color name" } -} - -impl fmt::Display for ParseColorError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Unrecognized color name '{}'. Choose from: \ - black, blue, green, red, cyan, magenta, yellow, white.", - self.0) - } -} - -impl FromStr for Color { - type Err = ParseColorError; - - fn from_str(s: &str) -> Result { - match &*s.to_lowercase() { - "black" => Ok(Color::Black), - "blue" => Ok(Color::Blue), - "green" => Ok(Color::Green), - "red" => Ok(Color::Red), - "cyan" => Ok(Color::Cyan), - "magenta" => Ok(Color::Magenta), - "yellow" => Ok(Color::Yellow), - "white" => Ok(Color::White), - _ => Err(ParseColorError(s.to_string())), - } - } -} - -struct LossyStandardStream { - wtr: W, - #[cfg(windows)] - is_console: bool, -} - -impl LossyStandardStream { - #[cfg(not(windows))] - fn new(wtr: W) -> LossyStandardStream { LossyStandardStream { wtr: wtr } } - - #[cfg(windows)] - fn new(wtr: W) -> LossyStandardStream { - LossyStandardStream { wtr: wtr, is_console: false } - } - - #[cfg(not(windows))] - fn wrap(&self, wtr: Q) -> LossyStandardStream { - LossyStandardStream::new(wtr) - } - - #[cfg(windows)] - fn wrap(&self, wtr: Q) -> LossyStandardStream { - LossyStandardStream::new(wtr).is_console(self.is_console) - } - - #[cfg(windows)] - fn is_console(mut self, yes: bool) -> LossyStandardStream { - self.is_console = yes; - self - } - - fn get_ref(&self) -> &W { - &self.wtr - } -} - -impl WriteColor for LossyStandardStream { - fn supports_color(&self) -> bool { self.wtr.supports_color() } - fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { - self.wtr.set_color(spec) - } - fn reset(&mut self) -> io::Result<()> { self.wtr.reset() } -} - -impl io::Write for LossyStandardStream { - #[cfg(not(windows))] - fn write(&mut self, buf: &[u8]) -> io::Result { - self.wtr.write(buf) - } - - #[cfg(windows)] - fn write(&mut self, buf: &[u8]) -> io::Result { - if self.is_console { - write_lossy_utf8(&mut self.wtr, buf) - } else { - self.wtr.write(buf) - } - } - - fn flush(&mut self) -> io::Result<()> { - self.wtr.flush() - } -} - -#[cfg(windows)] -fn write_lossy_utf8(mut w: W, buf: &[u8]) -> io::Result { - match ::std::str::from_utf8(buf) { - Ok(s) => w.write(s.as_bytes()), - Err(ref e) if e.valid_up_to() == 0 => { - try!(w.write(b"\xEF\xBF\xBD")); - Ok(1) - } - Err(e) => w.write(&buf[..e.valid_up_to()]), - } -} diff -Nru ripgrep-0.6.0/termcolor/UNLICENSE ripgrep-0.10.0.3/termcolor/UNLICENSE --- ripgrep-0.6.0/termcolor/UNLICENSE 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/termcolor/UNLICENSE 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to Binary files /tmp/tmpWb_x3N/8wrkwDuYlx/ripgrep-0.6.0/tests/data/sherlock.bz2 and /tmp/tmpWb_x3N/i6IbzxLPq8/ripgrep-0.10.0.3/tests/data/sherlock.bz2 differ Binary files /tmp/tmpWb_x3N/8wrkwDuYlx/ripgrep-0.6.0/tests/data/sherlock.gz and /tmp/tmpWb_x3N/i6IbzxLPq8/ripgrep-0.10.0.3/tests/data/sherlock.gz differ Binary files /tmp/tmpWb_x3N/8wrkwDuYlx/ripgrep-0.6.0/tests/data/sherlock.lz4 and /tmp/tmpWb_x3N/i6IbzxLPq8/ripgrep-0.10.0.3/tests/data/sherlock.lz4 differ Binary files /tmp/tmpWb_x3N/8wrkwDuYlx/ripgrep-0.6.0/tests/data/sherlock.lzma and /tmp/tmpWb_x3N/i6IbzxLPq8/ripgrep-0.10.0.3/tests/data/sherlock.lzma differ Binary files /tmp/tmpWb_x3N/8wrkwDuYlx/ripgrep-0.6.0/tests/data/sherlock.xz and /tmp/tmpWb_x3N/i6IbzxLPq8/ripgrep-0.10.0.3/tests/data/sherlock.xz differ diff -Nru ripgrep-0.6.0/tests/feature.rs ripgrep-0.10.0.3/tests/feature.rs --- ripgrep-0.6.0/tests/feature.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/tests/feature.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,631 @@ +use hay::{SHERLOCK, SHERLOCK_CRLF}; +use util::{Dir, TestCommand, sort_lines}; + +// See: https://github.com/BurntSushi/ripgrep/issues/1 +rgtest!(f1_sjis, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes( + "foo", + b"\x84Y\x84u\x84\x82\x84|\x84\x80\x84{ \x84V\x84\x80\x84|\x84}\x84\x83" + ); + cmd.arg("-Esjis").arg("Шерлок Холмс"); + eqnice!("foo:Шерлок Холмс\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1 +rgtest!(f1_utf16_auto, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes( + "foo", + b"\xff\xfe(\x045\x04@\x04;\x04>\x04:\x04 \x00%\x04>\x04;\x04<\x04A\x04" + ); + cmd.arg("Шерлок Холмс"); + eqnice!("foo:Шерлок Холмс\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1 +rgtest!(f1_utf16_explicit, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes( + "foo", + b"\xff\xfe(\x045\x04@\x04;\x04>\x04:\x04 \x00%\x04>\x04;\x04<\x04A\x04" + ); + cmd.arg("-Eutf-16le").arg("Шерлок Холмс"); + eqnice!("foo:Шерлок Холмс\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1 +rgtest!(f1_eucjp, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes( + "foo", + b"\xa7\xba\xa7\xd6\xa7\xe2\xa7\xdd\xa7\xe0\xa7\xdc \xa7\xb7\xa7\xe0\xa7\xdd\xa7\xde\xa7\xe3" + ); + cmd.arg("-Eeuc-jp").arg("Шерлок Холмс"); + eqnice!("foo:Шерлок Холмс\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1 +rgtest!(f1_unknown_encoding, |_: Dir, mut cmd: TestCommand| { + cmd.arg("-Efoobar").assert_non_empty_stderr(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1 +rgtest!(f1_replacement_encoding, |_: Dir, mut cmd: TestCommand| { + cmd.arg("-Ecsiso2022kr").assert_non_empty_stderr(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/7 +rgtest!(f7, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("pat", "Sherlock\nHolmes"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("-fpat").arg("sherlock").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/7 +rgtest!(f7_stdin, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("-f-").pipe("Sherlock")); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/20 +rgtest!(f20_no_filename, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--no-filename"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("--no-filename").arg("Sherlock").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/34 +rgtest!(f34_only_matching, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock:Sherlock +sherlock:Sherlock +"; + eqnice!(expected, cmd.arg("-o").arg("Sherlock").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/34 +rgtest!(f34_only_matching_line_column, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock:1:57:Sherlock +sherlock:3:49:Sherlock +"; + cmd.arg("-o").arg("--column").arg("-n").arg("Sherlock"); + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/45 +rgtest!(f45_relative_cwd, |dir: Dir, mut cmd: TestCommand| { + dir.create(".not-an-ignore", "foo\n/bar"); + dir.create_dir("bar"); + dir.create_dir("baz/bar"); + dir.create_dir("baz/baz/bar"); + dir.create("bar/test", "test"); + dir.create("baz/bar/test", "test"); + dir.create("baz/baz/bar/test", "test"); + dir.create("baz/foo", "test"); + dir.create("baz/test", "test"); + dir.create("foo", "test"); + dir.create("test", "test"); + + cmd.arg("-l").arg("test"); + + // First, get a baseline without applying ignore rules. + let expected = " +bar/test +baz/bar/test +baz/baz/bar/test +baz/foo +baz/test +foo +test +"; + eqnice!(sort_lines(expected), sort_lines(&cmd.stdout())); + + // Now try again with the ignore file activated. + cmd.arg("--ignore-file").arg(".not-an-ignore"); + let expected = " +baz/bar/test +baz/baz/bar/test +baz/test +test +"; + eqnice!(sort_lines(expected), sort_lines(&cmd.stdout())); + + // Now do it again, but inside the baz directory. Since the ignore file + // is interpreted relative to the CWD, this will cause the /bar anchored + // pattern to filter out baz/bar, which is a subtle difference between true + // parent ignore files and manually specified ignore files. + let mut cmd = dir.command(); + cmd.args(&["--ignore-file", "../.not-an-ignore", "-l", "test"]); + cmd.current_dir(dir.path().join("baz")); + let expected = " +baz/bar/test +test +"; + eqnice!(sort_lines(expected), sort_lines(&cmd.stdout())); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/45 +rgtest!(f45_precedence_with_others, |dir: Dir, mut cmd: TestCommand| { + dir.create(".not-an-ignore", "*.log"); + dir.create(".ignore", "!imp.log"); + dir.create("imp.log", "test"); + dir.create("wat.log", "test"); + + cmd.arg("--ignore-file").arg(".not-an-ignore").arg("test"); + eqnice!("imp.log:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/45 +rgtest!(f45_precedence_internal, |dir: Dir, mut cmd: TestCommand| { + dir.create(".not-an-ignore1", "*.log"); + dir.create(".not-an-ignore2", "!imp.log"); + dir.create("imp.log", "test"); + dir.create("wat.log", "test"); + + cmd.args(&[ + "--ignore-file", ".not-an-ignore1", + "--ignore-file", ".not-an-ignore2", + "test", + ]); + eqnice!("imp.log:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/68 +rgtest!(f68_no_ignore_vcs, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "foo"); + dir.create(".ignore", "bar"); + dir.create("foo", "test"); + dir.create("bar", "test"); + + eqnice!("foo:test\n", cmd.arg("--no-ignore-vcs").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/70 +rgtest!(f70_smart_case, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("-S").arg("sherlock").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/89 +rgtest!(f89_files_with_matches, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + cmd.arg("--null").arg("--files-with-matches").arg("Sherlock"); + eqnice!("sherlock\x00", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/89 +rgtest!(f89_files_without_match, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "foo"); + + cmd.arg("--null").arg("--files-without-match").arg("Sherlock"); + eqnice!("file.py\x00", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/89 +rgtest!(f89_count, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + cmd.arg("--null").arg("--count").arg("Sherlock"); + eqnice!("sherlock\x002\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/89 +rgtest!(f89_files, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + eqnice!("sherlock\x00", cmd.arg("--null").arg("--files").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/89 +rgtest!(f89_match, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock\x00Holmeses, success in the province of detective work must always +sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes +sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash; +"; + eqnice!(expected, cmd.arg("--null").arg("-C1").arg("Sherlock").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/109 +rgtest!(f109_max_depth, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("one"); + dir.create("one/pass", "far"); + dir.create_dir("one/too"); + dir.create("one/too/many", "far"); + + cmd.arg("--maxdepth").arg("2").arg("far"); + eqnice!("one/pass:far\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/124 +rgtest!(f109_case_sensitive_part1, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "tEsT"); + + cmd.arg("--smart-case").arg("--case-sensitive").arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/124 +rgtest!(f109_case_sensitive_part2, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "tEsT"); + cmd.arg("--ignore-case").arg("--case-sensitive").arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/129 +rgtest!(f129_matches, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test\ntest abcdefghijklmnopqrstuvwxyz test"); + + let expected = "foo:test\nfoo:[Omitted long matching line]\n"; + eqnice!(expected, cmd.arg("-M26").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/129 +rgtest!(f129_context, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test\nabcdefghijklmnopqrstuvwxyz"); + + let expected = "foo:test\nfoo-[Omitted long context line]\n"; + eqnice!(expected, cmd.arg("-M20").arg("-C1").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/129 +rgtest!(f129_replace, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test\ntest abcdefghijklmnopqrstuvwxyz test"); + + let expected = "foo:foo\nfoo:[Omitted long line with 2 matches]\n"; + eqnice!(expected, cmd.arg("-M26").arg("-rfoo").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/159 +rgtest!(f159_max_count, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test\ntest"); + + eqnice!("foo:test\n", cmd.arg("-m1").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/159 +rgtest!(f159_max_count_zero, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test\ntest"); + + cmd.arg("-m0").arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/196 +rgtest!(f196_persistent_config, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("sherlock").arg("sherlock"); + + // Make sure we get no matches by default. + cmd.assert_err(); + + // Now add our config file, and make sure it impacts ripgrep. + dir.create(".ripgreprc", "--ignore-case"); + cmd.cmd().env("RIPGREP_CONFIG_PATH", ".ripgreprc"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/243 +rgtest!(f243_column_line, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test"); + + eqnice!("foo:1:1:test\n", cmd.arg("--column").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/263 +rgtest!(f263_sort_files, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test"); + dir.create("abc", "test"); + dir.create("zoo", "test"); + dir.create("bar", "test"); + + let expected = "abc:test\nbar:test\nfoo:test\nzoo:test\n"; + eqnice!(expected, cmd.arg("--sort-files").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/275 +rgtest!(f275_pathsep, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("foo"); + dir.create("foo/bar", "test"); + + cmd.arg("test").arg("--path-separator").arg("Z"); + eqnice!("fooZbar:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/362 +rgtest!(f362_dfa_size_limit, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + // This should fall back to the nfa engine but should still produce the + // expected result. + cmd.arg("--dfa-size-limit").arg("10").arg(r"For\s").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/362 +rgtest!(f362_exceeds_regex_size_limit, |dir: Dir, mut cmd: TestCommand| { + // --regex-size-limit doesn't apply to PCRE2. + if dir.is_pcre2() { + return; + } + cmd.arg("--regex-size-limit").arg("10K").arg(r"[0-9]\w+").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/362 +#[cfg(target_pointer_width = "32")] +rgtest!(f362_u64_to_narrow_usize_overflow, |dir: Dir, mut cmd: TestCommand| { + // --dfa-size-limit doesn't apply to PCRE2. + if dir.is_pcre2() { + return; + } + dir.create_size("foo", 1000000); + + // 2^35 * 2^20 is ok for u64, but not for usize + cmd.arg("--dfa-size-limit").arg("34359738368M").arg("--files"); + cmd.assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/411 +rgtest!(f411_single_threaded_search_stats, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let lines = cmd.arg("--stats").arg("Sherlock").stdout(); + assert!(lines.contains("2 matched lines")); + assert!(lines.contains("1 files contained matches")); + assert!(lines.contains("1 files searched")); + assert!(lines.contains("seconds")); +}); + +rgtest!(f411_parallel_search_stats, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock_1", SHERLOCK); + dir.create("sherlock_2", SHERLOCK); + + let lines = cmd.arg("--stats").arg("Sherlock").stdout(); + assert!(lines.contains("4 matched lines")); + assert!(lines.contains("2 files contained matches")); + assert!(lines.contains("2 files searched")); + assert!(lines.contains("seconds")); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/416 +rgtest!(f416_crlf, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK_CRLF); + cmd.arg("--crlf").arg(r"Sherlock$").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock\r +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/416 +rgtest!(f416_crlf_multiline, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK_CRLF); + cmd.arg("--crlf").arg("-U").arg(r"Sherlock$").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock\r +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/416 +rgtest!(f416_crlf_only_matching, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK_CRLF); + cmd.arg("--crlf").arg("-o").arg(r"Sherlock$").arg("sherlock"); + + let expected = "\ +Sherlock\r +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/419 +rgtest!(f419_zero_as_shortcut_for_null, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + cmd.arg("-0").arg("--count").arg("Sherlock"); + eqnice!("sherlock\x002\n", cmd.stdout()); +}); + +rgtest!(f740_passthru, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "\nfoo\nbar\nfoobar\n\nbaz\n"); + dir.create("patterns", "foo\nbar\n"); + + // We can't assume that the way colour specs are translated to ANSI + // sequences will remain stable, and --replace doesn't currently work with + // pass-through, so for now we don't actually test the match sub-strings + let common_args = &["-n", "--passthru"]; + let foo_expected = "\ +1- +2:foo +3-bar +4:foobar +5- +6-baz +"; + + // With single pattern + cmd.args(common_args).arg("foo").arg("file"); + eqnice!(foo_expected, cmd.stdout()); + + let foo_bar_expected = "\ +1- +2:foo +3:bar +4:foobar +5- +6-baz +"; + + // With multiple -e patterns + let mut cmd = dir.command(); + cmd.args(common_args); + cmd.args(&["-e", "foo", "-e", "bar", "file"]); + eqnice!(foo_bar_expected, cmd.stdout()); + + // With multiple -f patterns + let mut cmd = dir.command(); + cmd.args(common_args); + cmd.args(&["-f", "patterns", "file"]); + eqnice!(foo_bar_expected, cmd.stdout()); + + // -c should override + let mut cmd = dir.command(); + cmd.args(common_args); + cmd.args(&["-c", "foo", "file"]); + eqnice!("2\n", cmd.stdout()); + + let only_foo_expected = "\ +1- +2:foo +3-bar +4:foo +5- +6-baz +"; + + // -o should work + let mut cmd = dir.command(); + cmd.args(common_args); + cmd.args(&["-o", "foo", "file"]); + eqnice!(only_foo_expected, cmd.stdout()); + + let replace_foo_expected = "\ +1- +2:wat +3-bar +4:watbar +5- +6-baz +"; + + // -r should work + let mut cmd = dir.command(); + cmd.args(common_args); + cmd.args(&["-r", "wat", "foo", "file"]); + eqnice!(replace_foo_expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/948 +rgtest!(f948_exit_code_match, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("."); + + cmd.assert_exit_code(0); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/948 +rgtest!(f948_exit_code_no_match, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("NADA"); + + cmd.assert_exit_code(1); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/948 +rgtest!(f948_exit_code_error, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("*"); + + cmd.assert_exit_code(2); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/917 +rgtest!(f917_trim, |dir: Dir, mut cmd: TestCommand| { +const SHERLOCK: &'static str = "\ +zzz + For the Doctor Watsons of this world, as opposed to the Sherlock + Holmeses, success in the province of detective work must always +\tbe, to a very large extent, the result of luck. Sherlock Holmes + can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, + and exhibited clearly, with a label attached. +"; + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-n", "-B1", "-A2", "--trim", "Holmeses", "sherlock", + ]); + + let expected = "\ +2-For the Doctor Watsons of this world, as opposed to the Sherlock +3:Holmeses, success in the province of detective work must always +4-be, to a very large extent, the result of luck. Sherlock Holmes +5-can extract a clew from a wisp of straw or a flake of cigar ash; +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/917 +// +// This is like f917_trim, except this tests that trimming occurs even when the +// whitespace is part of a match. +rgtest!(f917_trim_match, |dir: Dir, mut cmd: TestCommand| { +const SHERLOCK: &'static str = "\ +zzz + For the Doctor Watsons of this world, as opposed to the Sherlock + Holmeses, success in the province of detective work must always +\tbe, to a very large extent, the result of luck. Sherlock Holmes + can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, + and exhibited clearly, with a label attached. +"; + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-n", "-B1", "-A2", "--trim", r"\s+Holmeses", "sherlock", + ]); + + let expected = "\ +2-For the Doctor Watsons of this world, as opposed to the Sherlock +3:Holmeses, success in the province of detective work must always +4-be, to a very large extent, the result of luck. Sherlock Holmes +5-can extract a clew from a wisp of straw or a flake of cigar ash; +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/993 +rgtest!(f993_null_data, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "foo\x00bar\x00\x00\x00baz\x00"); + cmd.arg("--null-data").arg(r".+").arg("test"); + + // If we just used -a instead of --null-data, then the result would include + // all NUL bytes. + let expected = "foo\x00bar\x00baz\x00"; + eqnice!(expected, cmd.stdout()); +}); diff -Nru ripgrep-0.6.0/tests/hay.rs ripgrep-0.10.0.3/tests/hay.rs --- ripgrep-0.6.0/tests/hay.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/tests/hay.rs 2018-09-10 21:10:55.000000000 +0000 @@ -7,18 +7,11 @@ and exhibited clearly, with a label attached. "; -pub const CODE: &'static str = "\ -extern crate snap; - -use std::io; - -fn main() { - let stdin = io::stdin(); - let stdout = io::stdout(); - - // Wrap the stdin reader in a Snappy reader. - let mut rdr = snap::Reader::new(stdin.lock()); - let mut wtr = stdout.lock(); - io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); -} +pub const SHERLOCK_CRLF: &'static str = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock\r +Holmeses, success in the province of detective work must always\r +be, to a very large extent, the result of luck. Sherlock Holmes\r +can extract a clew from a wisp of straw or a flake of cigar ash;\r +but Doctor Watson has to have it taken out for him and dusted,\r +and exhibited clearly, with a label attached.\r "; diff -Nru ripgrep-0.6.0/tests/json.rs ripgrep-0.10.0.3/tests/json.rs --- ripgrep-0.6.0/tests/json.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/tests/json.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,306 @@ +use std::time; + +use serde_json as json; + +use hay::{SHERLOCK, SHERLOCK_CRLF}; +use util::{Dir, TestCommand}; + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[serde(tag = "type", content = "data")] +#[serde(rename_all = "snake_case")] +enum Message { + Begin(Begin), + End(End), + Match(Match), + Context(Context), + Summary(Summary), +} + +impl Message { + fn unwrap_begin(&self) -> Begin { + match *self { + Message::Begin(ref x) => x.clone(), + ref x => panic!("expected Message::Begin but got {:?}", x), + } + } + + fn unwrap_end(&self) -> End { + match *self { + Message::End(ref x) => x.clone(), + ref x => panic!("expected Message::End but got {:?}", x), + } + } + + fn unwrap_match(&self) -> Match { + match *self { + Message::Match(ref x) => x.clone(), + ref x => panic!("expected Message::Match but got {:?}", x), + } + } + + fn unwrap_context(&self) -> Context { + match *self { + Message::Context(ref x) => x.clone(), + ref x => panic!("expected Message::Context but got {:?}", x), + } + } + + fn unwrap_summary(&self) -> Summary { + match *self { + Message::Summary(ref x) => x.clone(), + ref x => panic!("expected Message::Summary but got {:?}", x), + } + } +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct Begin { + path: Option, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct End { + path: Option, + binary_offset: Option, + stats: Stats, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct Summary { + elapsed_total: Duration, + stats: Stats, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct Match { + path: Option, + lines: Data, + line_number: Option, + absolute_offset: u64, + submatches: Vec, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct Context { + path: Option, + lines: Data, + line_number: Option, + absolute_offset: u64, + submatches: Vec, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct SubMatch { + #[serde(rename = "match")] + m: Data, + start: usize, + end: usize, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[serde(untagged)] +enum Data { + Text { text: String }, + // This variant is used when the data isn't valid UTF-8. The bytes are + // base64 encoded, so using a String here is OK. + Bytes { bytes: String }, +} + +impl Data { + fn text(s: &str) -> Data { Data::Text { text: s.to_string() } } + fn bytes(s: &str) -> Data { Data::Bytes { bytes: s.to_string() } } +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct Stats { + elapsed: Duration, + searches: u64, + searches_with_match: u64, + bytes_searched: u64, + bytes_printed: u64, + matched_lines: u64, + matches: u64, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct Duration { + #[serde(flatten)] + duration: time::Duration, + human: String, +} + +/// Decode JSON Lines into a Vec. If there was an error decoding, +/// this function panics. +fn json_decode(jsonlines: &str) -> Vec { + json::Deserializer::from_str(jsonlines) + .into_iter() + .collect::, _>>() + .unwrap() +} + +rgtest!(basic, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--json").arg("-B1").arg("Sherlock Holmes").arg("sherlock"); + + let msgs = json_decode(&cmd.stdout()); + + assert_eq!( + msgs[0].unwrap_begin(), + Begin { path: Some(Data::text("sherlock")) } + ); + assert_eq!( + msgs[1].unwrap_context(), + Context { + path: Some(Data::text("sherlock")), + lines: Data::text("Holmeses, success in the province of detective work must always\n"), + line_number: Some(2), + absolute_offset: 65, + submatches: vec![], + } + ); + assert_eq!( + msgs[2].unwrap_match(), + Match { + path: Some(Data::text("sherlock")), + lines: Data::text("be, to a very large extent, the result of luck. Sherlock Holmes\n"), + line_number: Some(3), + absolute_offset: 129, + submatches: vec![ + SubMatch { + m: Data::text("Sherlock Holmes"), + start: 48, + end: 63, + }, + ], + } + ); + assert_eq!( + msgs[3].unwrap_end().path, + Some(Data::text("sherlock")) + ); + assert_eq!( + msgs[3].unwrap_end().binary_offset, + None + ); + assert_eq!( + msgs[4].unwrap_summary().stats.searches_with_match, + 1 + ); + assert_eq!( + msgs[4].unwrap_summary().stats.bytes_printed, + 494 + ); +}); + +#[cfg(unix)] +rgtest!(notutf8, |dir: Dir, mut cmd: TestCommand| { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + // This test does not work with PCRE2 because PCRE2 does not support the + // `u` flag. + if dir.is_pcre2() { + return; + } + // macOS doesn't like this either... sigh. + if cfg!(target_os = "macos") { + return; + } + + let name = &b"foo\xFFbar"[..]; + let contents = &b"quux\xFFbaz"[..]; + + // APFS does not support creating files with invalid UTF-8 bytes, so just + // skip the test if we can't create our file. + if !dir.try_create_bytes(OsStr::from_bytes(name), contents).is_ok() { + return; + } + cmd.arg("--json").arg(r"(?-u)\xFF"); + + let msgs = json_decode(&cmd.stdout()); + + assert_eq!( + msgs[0].unwrap_begin(), + Begin { path: Some(Data::bytes("Zm9v/2Jhcg==")) } + ); + assert_eq!( + msgs[1].unwrap_match(), + Match { + path: Some(Data::bytes("Zm9v/2Jhcg==")), + lines: Data::bytes("cXV1eP9iYXo="), + line_number: Some(1), + absolute_offset: 0, + submatches: vec![ + SubMatch { + m: Data::bytes("/w=="), + start: 4, + end: 5, + }, + ], + } + ); +}); + +rgtest!(notutf8_file, |dir: Dir, mut cmd: TestCommand| { + use std::ffi::OsStr; + + // This test does not work with PCRE2 because PCRE2 does not support the + // `u` flag. + if dir.is_pcre2() { + return; + } + + let name = "foo"; + let contents = &b"quux\xFFbaz"[..]; + + // APFS does not support creating files with invalid UTF-8 bytes, so just + // skip the test if we can't create our file. + if !dir.try_create_bytes(OsStr::new(name), contents).is_ok() { + return; + } + cmd.arg("--json").arg(r"(?-u)\xFF"); + + let msgs = json_decode(&cmd.stdout()); + + assert_eq!( + msgs[0].unwrap_begin(), + Begin { path: Some(Data::text("foo")) } + ); + assert_eq!( + msgs[1].unwrap_match(), + Match { + path: Some(Data::text("foo")), + lines: Data::bytes("cXV1eP9iYXo="), + line_number: Some(1), + absolute_offset: 0, + submatches: vec![ + SubMatch { + m: Data::bytes("/w=="), + start: 4, + end: 5, + }, + ], + } + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/416 +// +// This test in particular checks that our match does _not_ include the `\r` +// even though the '$' may be rewritten as '(?:\r??$)' and could thus include +// `\r` in the match. +rgtest!(crlf, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK_CRLF); + cmd.arg("--json").arg("--crlf").arg(r"Sherlock$").arg("sherlock"); + + let msgs = json_decode(&cmd.stdout()); + + assert_eq!( + msgs[1].unwrap_match().submatches[0].clone(), + SubMatch { + m: Data::text("Sherlock"), + start: 56, + end: 64, + }, + ); +}); diff -Nru ripgrep-0.6.0/tests/macros.rs ripgrep-0.10.0.3/tests/macros.rs --- ripgrep-0.6.0/tests/macros.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/tests/macros.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,61 @@ +#[macro_export] +macro_rules! rgtest { + ($name:ident, $fun:expr) => { + #[test] + fn $name() { + let (dir, cmd) = ::util::setup(stringify!($name)); + $fun(dir, cmd); + + if cfg!(feature = "pcre2") { + let (dir, cmd) = ::util::setup_pcre2(stringify!($name)); + $fun(dir, cmd); + } + } + } +} + +#[macro_export] +macro_rules! eqnice { + ($expected:expr, $got:expr) => { + let expected = &*$expected; + let got = &*$got; + if expected != got { + panic!(" +printed outputs differ! + +expected: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +got: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +", expected, got); + } + } +} + +#[macro_export] +macro_rules! eqnice_repr { + ($expected:expr, $got:expr) => { + let expected = &*$expected; + let got = &*$got; + if expected != got { + panic!(" +printed outputs differ! + +expected: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{:?} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +got: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{:?} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +", expected, got); + } + } +} diff -Nru ripgrep-0.6.0/tests/misc.rs ripgrep-0.10.0.3/tests/misc.rs --- ripgrep-0.6.0/tests/misc.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/tests/misc.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,966 @@ +use hay::SHERLOCK; +use util::{Dir, TestCommand, cmd_exists, sort_lines}; + +// This file contains "miscellaneous" tests that were either written before +// features were tracked more explicitly, or were simply written without +// linking them to a specific issue number. We should try to minimize the +// addition of more tests in this file and instead add them to either the +// regression test suite or the feature test suite (found in regression.rs and +// feature.rs, respectively). + +rgtest!(single_file, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("Sherlock").arg("sherlock").stdout()); +}); + +rgtest!(dir, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("Sherlock").stdout()); +}); + +rgtest!(line_numbers, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +3:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("-n").arg("Sherlock").arg("sherlock").stdout()); +}); + +rgtest!(columns, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--column").arg("Sherlock").arg("sherlock"); + + let expected = "\ +1:57:For the Doctor Watsons of this world, as opposed to the Sherlock +3:49:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(with_filename, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-H").arg("Sherlock").arg("sherlock"); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(with_heading, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + // This forces the issue since --with-filename is disabled by default + // when searching one file. + "--with-filename", "--heading", + "Sherlock", "sherlock", + ]); + + let expected = "\ +sherlock +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(with_heading_default, |dir: Dir, mut cmd: TestCommand| { + // Search two or more and get --with-filename enabled by default. + // Use -j1 to get deterministic results. + dir.create("sherlock", SHERLOCK); + dir.create("foo", "Sherlock Holmes lives on Baker Street."); + cmd.arg("-j1").arg("--heading").arg("Sherlock"); + + let expected = "\ +foo +Sherlock Holmes lives on Baker Street. + +sherlock +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(sort_lines(expected), sort_lines(&cmd.stdout())); +}); + +rgtest!(inverted, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-v").arg("Sherlock").arg("sherlock"); + + let expected = "\ +Holmeses, success in the province of detective work must always +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(inverted_line_numbers, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-n").arg("-v").arg("Sherlock").arg("sherlock"); + + let expected = "\ +2:Holmeses, success in the province of detective work must always +4:can extract a clew from a wisp of straw or a flake of cigar ash; +5:but Doctor Watson has to have it taken out for him and dusted, +6:and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(case_insensitive, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-i").arg("sherlock").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(word, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-w").arg("as").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(line, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-x", + "Watson|and exhibited clearly, with a label attached.", + "sherlock", + ]); + + let expected = "\ +and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(literal, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file", "blib\n()\nblab\n"); + cmd.arg("-F").arg("()").arg("file"); + + eqnice!("()\n", cmd.stdout()); +}); + +rgtest!(quiet, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-q").arg("Sherlock").arg("sherlock"); + + assert!(cmd.stdout().is_empty()); +}); + +rgtest!(replace, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-r").arg("FooBar").arg("Sherlock").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the FooBar +be, to a very large extent, the result of luck. FooBar Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(replace_groups, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-r", "$2, $1", "([A-Z][a-z]+) ([A-Z][a-z]+)", "sherlock", + ]); + + let expected = "\ +For the Watsons, Doctor of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Holmes, Sherlock +but Watson, Doctor has to have it taken out for him and dusted, +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(replace_named_groups, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-r", "$last, $first", + "(?P[A-Z][a-z]+) (?P[A-Z][a-z]+)", + "sherlock", + ]); + + let expected = "\ +For the Watsons, Doctor of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Holmes, Sherlock +but Watson, Doctor has to have it taken out for him and dusted, +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(replace_with_only_matching, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-o").arg("-r").arg("$1").arg(r"of (\w+)").arg("sherlock"); + + let expected = "\ +this +detective +luck +straw +cigar +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(file_types, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + cmd.arg("-t").arg("rust").arg("Sherlock"); + + eqnice!("file.rs:Sherlock\n", cmd.stdout()); +}); + +rgtest!(file_types_all, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + cmd.arg("-t").arg("all").arg("Sherlock"); + + eqnice!("file.py:Sherlock\n", cmd.stdout()); +}); + +rgtest!(file_types_negate, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.remove("sherlock"); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + cmd.arg("-T").arg("rust").arg("Sherlock"); + + eqnice!("file.py:Sherlock\n", cmd.stdout()); +}); + +rgtest!(file_types_negate_all, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + cmd.arg("-T").arg("all").arg("Sherlock"); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(file_type_clear, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + cmd.arg("--type-clear").arg("rust").arg("-t").arg("rust").arg("Sherlock"); + + cmd.assert_non_empty_stderr(); +}); + +rgtest!(file_type_add, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + dir.create("file.wat", "Sherlock"); + cmd.args(&[ + "--type-add", "wat:*.wat", "-t", "wat", "Sherlock", + ]); + + eqnice!("file.wat:Sherlock\n", cmd.stdout()); +}); + +rgtest!(file_type_add_compose, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + dir.create("file.wat", "Sherlock"); + cmd.args(&[ + "--type-add", "wat:*.wat", + "--type-add", "combo:include:wat,py", + "-t", "combo", + "Sherlock", + ]); + + let expected = "\ +file.py:Sherlock +file.wat:Sherlock +"; + eqnice!(expected, sort_lines(&cmd.stdout())); +}); + +rgtest!(glob, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + cmd.arg("-g").arg("*.rs").arg("Sherlock"); + + eqnice!("file.rs:Sherlock\n", cmd.stdout()); +}); + +rgtest!(glob_negate, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.remove("sherlock"); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + cmd.arg("-g").arg("!*.rs").arg("Sherlock"); + + eqnice!("file.py:Sherlock\n", cmd.stdout()); +}); + +rgtest!(glob_case_insensitive, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.HTML", "Sherlock"); + cmd.arg("--iglob").arg("*.html").arg("Sherlock"); + + eqnice!("file.HTML:Sherlock\n", cmd.stdout()); +}); + +rgtest!(glob_case_sensitive, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file1.HTML", "Sherlock"); + dir.create("file2.html", "Sherlock"); + cmd.arg("--glob").arg("*.html").arg("Sherlock"); + + eqnice!("file2.html:Sherlock\n", cmd.stdout()); +}); + +rgtest!(byte_offset_only_matching, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-b").arg("-o").arg("Sherlock"); + + let expected = "\ +sherlock:56:Sherlock +sherlock:177:Sherlock +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(count, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--count").arg("Sherlock"); + + let expected = "sherlock:2\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(count_matches, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--count-matches").arg("the"); + + let expected = "sherlock:4\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(count_matches_inverted, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--count-matches").arg("--invert-match").arg("Sherlock"); + + let expected = "sherlock:4\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(count_matches_via_only, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--count").arg("--only-matching").arg("the"); + + let expected = "sherlock:4\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(files_with_matches, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--files-with-matches").arg("Sherlock"); + + let expected = "sherlock\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(files_without_match, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "foo"); + cmd.arg("--files-without-match").arg("Sherlock"); + + let expected = "file.py\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(after_context, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-A").arg("1").arg("Sherlock").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(after_context_line_numbers, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-A").arg("1").arg("-n").arg("Sherlock").arg("sherlock"); + + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2-Holmeses, success in the province of detective work must always +3:be, to a very large extent, the result of luck. Sherlock Holmes +4-can extract a clew from a wisp of straw or a flake of cigar ash; +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(before_context, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-B").arg("1").arg("Sherlock").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(before_context_line_numbers, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-B").arg("1").arg("-n").arg("Sherlock").arg("sherlock"); + + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2-Holmeses, success in the province of detective work must always +3:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(context, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-C").arg("1").arg("world|attached").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +-- +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(context_line_numbers, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-C").arg("1").arg("-n").arg("world|attached").arg("sherlock"); + + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2-Holmeses, success in the province of detective work must always +-- +5-but Doctor Watson has to have it taken out for him and dusted, +6:and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(max_filesize_parse_errro_length, |_: Dir, mut cmd: TestCommand| { + cmd.arg("--max-filesize").arg("44444444444444444444"); + cmd.assert_non_empty_stderr(); +}); + +rgtest!(max_filesize_parse_error_suffix, |_: Dir, mut cmd: TestCommand| { + cmd.arg("--max-filesize").arg("45k"); + cmd.assert_non_empty_stderr(); +}); + +rgtest!(max_filesize_parse_no_suffix, |dir: Dir, mut cmd: TestCommand| { + dir.create_size("foo", 40); + dir.create_size("bar", 60); + cmd.arg("--max-filesize").arg("50").arg("--files"); + + eqnice!("foo\n", cmd.stdout()); +}); + +rgtest!(max_filesize_parse_k_suffix, |dir: Dir, mut cmd: TestCommand| { + dir.create_size("foo", 3048); + dir.create_size("bar", 4100); + cmd.arg("--max-filesize").arg("4K").arg("--files"); + + eqnice!("foo\n", cmd.stdout()); +}); + +rgtest!(max_filesize_parse_m_suffix, |dir: Dir, mut cmd: TestCommand| { + dir.create_size("foo", 1000000); + dir.create_size("bar", 1400000); + cmd.arg("--max-filesize").arg("1M").arg("--files"); + + eqnice!("foo\n", cmd.stdout()); +}); + +rgtest!(max_filesize_suffix_overflow, |dir: Dir, mut cmd: TestCommand| { + dir.create_size("foo", 1000000); + + // 2^35 * 2^30 would otherwise overflow + cmd.arg("--max-filesize").arg("34359738368G").arg("--files"); + cmd.assert_non_empty_stderr(); +}); + +rgtest!(ignore_hidden, |dir: Dir, mut cmd: TestCommand| { + dir.create(".sherlock", SHERLOCK); + cmd.arg("Sherlock").assert_err(); +}); + +rgtest!(no_ignore_hidden, |dir: Dir, mut cmd: TestCommand| { + dir.create(".sherlock", SHERLOCK); + cmd.arg("--hidden").arg("Sherlock"); + + let expected = "\ +.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(ignore_git, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create_dir(".git"); + dir.create(".gitignore", "sherlock\n"); + cmd.arg("Sherlock"); + + cmd.assert_err(); +}); + +rgtest!(ignore_generic, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create(".ignore", "sherlock\n"); + cmd.arg("Sherlock"); + + cmd.assert_err(); +}); + +rgtest!(ignore_ripgrep, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create(".rgignore", "sherlock\n"); + cmd.arg("Sherlock"); + + cmd.assert_err(); +}); + +rgtest!(no_ignore, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create(".gitignore", "sherlock\n"); + cmd.arg("--no-ignore").arg("Sherlock"); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(ignore_git_parent, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "sherlock\n"); + dir.create_dir("foo"); + dir.create("foo/sherlock", SHERLOCK); + cmd.arg("Sherlock"); + + // Even though we search in foo/, which has no .gitignore, ripgrep will + // traverse parent directories and respect the gitignore files found. + cmd.current_dir(dir.path().join("foo")); + cmd.assert_err(); +}); + +rgtest!(ignore_git_parent_stop, |dir: Dir, mut cmd: TestCommand| { + // This tests that searching parent directories for .gitignore files stops + // after it sees a .git directory. To test this, we create this directory + // hierarchy: + // + // .gitignore (contains `sherlock`) + // foo/ + // .git/ + // bar/ + // sherlock + // + // And we perform the search inside `foo/bar/`. ripgrep will stop looking + // for .gitignore files after it sees `foo/.git/`, and therefore not + // respect the top-level `.gitignore` containing `sherlock`. + dir.create(".gitignore", "sherlock\n"); + dir.create_dir("foo"); + dir.create_dir("foo/.git"); + dir.create_dir("foo/bar"); + dir.create("foo/bar/sherlock", SHERLOCK); + cmd.arg("Sherlock"); + cmd.current_dir(dir.path().join("foo").join("bar")); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +// Like ignore_git_parent_stop, but with a .git file instead of a .git +// directory. +rgtest!(ignore_git_parent_stop_file, |dir: Dir, mut cmd: TestCommand| { + // This tests that searching parent directories for .gitignore files stops + // after it sees a .git *file*. A .git file is used for submodules. To test + // this, we create this directory hierarchy: + // + // .gitignore (contains `sherlock`) + // foo/ + // .git + // bar/ + // sherlock + // + // And we perform the search inside `foo/bar/`. ripgrep will stop looking + // for .gitignore files after it sees `foo/.git`, and therefore not + // respect the top-level `.gitignore` containing `sherlock`. + dir.create(".gitignore", "sherlock\n"); + dir.create_dir("foo"); + dir.create("foo/.git", ""); + dir.create_dir("foo/bar"); + dir.create("foo/bar/sherlock", SHERLOCK); + cmd.arg("Sherlock"); + cmd.current_dir(dir.path().join("foo").join("bar")); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(ignore_ripgrep_parent_no_stop, |dir: Dir, mut cmd: TestCommand| { + // This is like the `ignore_git_parent_stop` test, except it checks that + // ripgrep *doesn't* stop checking for .rgignore files. + dir.create(".rgignore", "sherlock\n"); + dir.create_dir("foo"); + dir.create_dir("foo/.git"); + dir.create_dir("foo/bar"); + dir.create("foo/bar/sherlock", SHERLOCK); + cmd.arg("Sherlock"); + cmd.current_dir(dir.path().join("foo").join("bar")); + + // The top-level .rgignore applies. + cmd.assert_err(); +}); + +rgtest!(no_parent_ignore_git, |dir: Dir, mut cmd: TestCommand| { + // Set up a directory hierarchy like this: + // + // .git/ + // .gitignore + // foo/ + // .gitignore + // sherlock + // watson + // + // Where `.gitignore` contains `sherlock` and `foo/.gitignore` contains + // `watson`. + // + // Now *do the search* from the foo directory. By default, ripgrep will + // search parent directories for .gitignore files. The --no-ignore-parent + // flag should prevent that. At the same time, the `foo/.gitignore` file + // will still be respected (since the search is happening in `foo/`). + // + // In other words, we should only see results from `sherlock`, not from + // `watson`. + dir.create_dir(".git"); + dir.create(".gitignore", "sherlock\n"); + dir.create_dir("foo"); + dir.create("foo/.gitignore", "watson\n"); + dir.create("foo/sherlock", SHERLOCK); + dir.create("foo/watson", SHERLOCK); + cmd.arg("--no-ignore-parent").arg("Sherlock"); + cmd.current_dir(dir.path().join("foo")); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(symlink_nofollow, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("foo"); + dir.create_dir("foo/bar"); + dir.link_dir("foo/baz", "foo/bar/baz"); + dir.create_dir("foo/baz"); + dir.create("foo/baz/sherlock", SHERLOCK); + cmd.arg("Sherlock"); + cmd.current_dir(dir.path().join("foo/bar")); + + cmd.assert_err(); +}); + +#[cfg(not(windows))] +rgtest!(symlink_follow, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("foo"); + dir.create_dir("foo/bar"); + dir.create_dir("foo/baz"); + dir.create("foo/baz/sherlock", SHERLOCK); + dir.link_dir("foo/baz", "foo/bar/baz"); + cmd.arg("-L").arg("Sherlock"); + cmd.current_dir(dir.path().join("foo/bar")); + + let expected = "\ +baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(unrestricted1, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create(".gitignore", "sherlock\n"); + cmd.arg("-u").arg("Sherlock"); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(unrestricted2, |dir: Dir, mut cmd: TestCommand| { + dir.create(".sherlock", SHERLOCK); + cmd.arg("-uu").arg("Sherlock"); + + let expected = "\ +.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(unrestricted3, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file", "foo\x00bar\nfoo\x00baz\n"); + cmd.arg("-uuu").arg("foo"); + + let expected = "\ +file:foo\x00bar +file:foo\x00baz +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(vimgrep, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--vimgrep").arg("Sherlock|Watson"); + + let expected = "\ +sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes +sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted, +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(vimgrep_no_line, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--vimgrep").arg("-N").arg("Sherlock|Watson"); + + let expected = "\ +sherlock:16:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:57:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:49:be, to a very large extent, the result of luck. Sherlock Holmes +sherlock:12:but Doctor Watson has to have it taken out for him and dusted, +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(vimgrep_no_line_no_column, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--vimgrep").arg("-N").arg("--no-column").arg("Sherlock|Watson"); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +sherlock:but Doctor Watson has to have it taken out for him and dusted, +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(preprocessing, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("xzcat") { + return; + } + + dir.create_bytes("sherlock.xz", include_bytes!("./data/sherlock.xz")); + cmd.arg("--pre").arg("xzcat").arg("Sherlock").arg("sherlock.xz"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(preprocessing_glob, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("xzcat") { + return; + } + + dir.create("sherlock", SHERLOCK); + dir.create_bytes("sherlock.xz", include_bytes!("./data/sherlock.xz")); + cmd.args(&["--pre", "xzcat", "--pre-glob", "*.xz", "Sherlock"]); + + let expected = "\ +sherlock.xz:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock.xz:be, to a very large extent, the result of luck. Sherlock Holmes +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(sort_lines(expected), sort_lines(&cmd.stdout())); +}); + +rgtest!(compressed_gzip, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("gzip") { + return; + } + + dir.create_bytes("sherlock.gz", include_bytes!("./data/sherlock.gz")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.gz"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_bzip2, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("bzip2") { + return; + } + + dir.create_bytes("sherlock.bz2", include_bytes!("./data/sherlock.bz2")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.bz2"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_xz, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("xz") { + return; + } + + dir.create_bytes("sherlock.xz", include_bytes!("./data/sherlock.xz")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.xz"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_lz4, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("lz4") { + return; + } + + dir.create_bytes("sherlock.lz4", include_bytes!("./data/sherlock.lz4")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.lz4"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_lzma, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("xz") { + return; + } + + dir.create_bytes("sherlock.lzma", include_bytes!("./data/sherlock.lzma")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.lzma"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_failing_gzip, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("gzip") { + return; + } + + dir.create("sherlock.gz", SHERLOCK); + cmd.arg("-z").arg("Sherlock").arg("sherlock.gz"); + + cmd.assert_non_empty_stderr(); +}); + +rgtest!(binary_nosearch, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "foo\x00bar\nfoo\x00baz\n"); + cmd.arg("foo").arg("file"); + + cmd.assert_err(); +}); + +// The following two tests show a discrepancy in search results between +// searching with memory mapped files and stream searching. Stream searching +// uses a heuristic (that GNU grep also uses) where NUL bytes are replaced with +// the EOL terminator, which tends to avoid allocating large amounts of memory +// for really long "lines." The memory map searcher has no need to worry about +// such things, and more than that, it would be pretty hard for it to match the +// semantics of streaming search in this case. +// +// Binary files with lots of NULs aren't really part of the use case of ripgrep +// (or any other grep-like tool for that matter), so we shouldn't feel too bad +// about it. +rgtest!(binary_search_mmap, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "foo\x00bar\nfoo\x00baz\n"); + cmd.arg("-a").arg("--mmap").arg("foo").arg("file"); + eqnice!("foo\x00bar\nfoo\x00baz\n", cmd.stdout()); +}); + +rgtest!(binary_search_no_mmap, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "foo\x00bar\nfoo\x00baz\n"); + cmd.arg("-a").arg("--no-mmap").arg("foo").arg("file"); + eqnice!("foo\x00bar\nfoo\x00baz\n", cmd.stdout()); +}); + +rgtest!(files, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", ""); + dir.create_dir("dir"); + dir.create("dir/file", ""); + cmd.arg("--files"); + + eqnice!(sort_lines("file\ndir/file\n"), sort_lines(&cmd.stdout())); +}); + +rgtest!(type_list, |_: Dir, mut cmd: TestCommand| { + cmd.arg("--type-list"); + // This can change over time, so just make sure we print something. + assert!(!cmd.stdout().is_empty()); +}); diff -Nru ripgrep-0.6.0/tests/multiline.rs ripgrep-0.10.0.3/tests/multiline.rs --- ripgrep-0.6.0/tests/multiline.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/tests/multiline.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,109 @@ +use hay::SHERLOCK; +use util::{Dir, TestCommand}; + +// This tests that multiline matches that span multiple lines, but where +// multiple matches may begin and end on the same line work correctly. +rgtest!(overlap1, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "xxx\nabc\ndefxxxabc\ndefxxx\nxxx"); + cmd.arg("-n").arg("-U").arg("abc\ndef").arg("test"); + eqnice!("2:abc\n3:defxxxabc\n4:defxxx\n", cmd.stdout()); +}); + +// Like overlap1, but tests the case where one match ends at precisely the same +// location at which the next match begins. +rgtest!(overlap2, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "xxx\nabc\ndefabc\ndefxxx\nxxx"); + cmd.arg("-n").arg("-U").arg("abc\ndef").arg("test"); + eqnice!("2:abc\n3:defabc\n4:defxxx\n", cmd.stdout()); +}); + +// Tests that even in a multiline search, a '.' does not match a newline. +rgtest!(dot_no_newline, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-n", "-U", "of this world.+detective work", "sherlock", + ]); + cmd.assert_err(); +}); + +// Tests that the --multiline-dotall flag causes '.' to match a newline. +rgtest!(dot_all, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-n", "-U", "--multiline-dotall", + "of this world.+detective work", "sherlock", + ]); + + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2:Holmeses, success in the province of detective work must always +"; + eqnice!(expected, cmd.stdout()); +}); + +// Tests that --only-matching works in multiline mode. +rgtest!(only_matching, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-n", "-U", "--only-matching", + r"Watson|Sherlock\p{Any}+?Holmes", "sherlock", + ]); + + let expected = "\ +1:Watson +1:Sherlock +2:Holmes +3:Sherlock Holmes +5:Watson +"; + eqnice!(expected, cmd.stdout()); +}); + +// Tests that --vimgrep works in multiline mode. +rgtest!(vimgrep, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-n", "-U", "--vimgrep", + r"Watson|Sherlock\p{Any}+?Holmes", "sherlock", + ]); + + let expected = "\ +sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:2:57:Holmeses, success in the province of detective work must always +sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes +sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted, +"; + eqnice!(expected, cmd.stdout()); +}); + +// Tests that multiline search works when reading from stdin. This is an +// important test because multiline search must read the entire contents of +// what it is searching into memory before executing the search. +rgtest!(stdin, |_: Dir, mut cmd: TestCommand| { + cmd.args(&[ + "-n", "-U", r"of this world\p{Any}+?detective work", + ]); + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2:Holmeses, success in the province of detective work must always +"; + eqnice!(expected, cmd.pipe(SHERLOCK)); +}); + +// Test that multiline search and contextual matches work. +rgtest!(context, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-n", "-U", "-C1", + r"detective work\p{Any}+?result of luck", "sherlock", + ]); + + let expected = "\ +1-For the Doctor Watsons of this world, as opposed to the Sherlock +2:Holmeses, success in the province of detective work must always +3:be, to a very large extent, the result of luck. Sherlock Holmes +4-can extract a clew from a wisp of straw or a flake of cigar ash; +"; + eqnice!(expected, cmd.stdout()); +}); diff -Nru ripgrep-0.6.0/tests/regression.rs ripgrep-0.10.0.3/tests/regression.rs --- ripgrep-0.6.0/tests/regression.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/tests/regression.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,564 @@ +use hay::SHERLOCK; +use util::{Dir, TestCommand, sort_lines}; + +// See: https://github.com/BurntSushi/ripgrep/issues/16 +rgtest!(r16, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "ghi/"); + dir.create_dir("ghi"); + dir.create_dir("def/ghi"); + dir.create("ghi/toplevel.txt", "xyz"); + dir.create("def/ghi/subdir.txt", "xyz"); + + cmd.arg("xyz").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/25 +rgtest!(r25, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "/llvm/"); + dir.create_dir("src/llvm"); + dir.create("src/llvm/foo", "test"); + + cmd.arg("test"); + eqnice!("src/llvm/foo:test\n", cmd.stdout()); + + cmd.current_dir(dir.path().join("src")); + eqnice!("llvm/foo:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/30 +rgtest!(r30, |dir: Dir, mut cmd: TestCommand| { + dir.create(".gitignore", "vendor/**\n!vendor/manifest"); + dir.create_dir("vendor"); + dir.create("vendor/manifest", "test"); + + eqnice!("vendor/manifest:test\n", cmd.arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/49 +rgtest!(r49, |dir: Dir, mut cmd: TestCommand| { + dir.create(".gitignore", "foo/bar"); + dir.create_dir("test/foo/bar"); + dir.create("test/foo/bar/baz", "test"); + + cmd.arg("xyz").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/50 +rgtest!(r50, |dir: Dir, mut cmd: TestCommand| { + dir.create(".gitignore", "XXX/YYY/"); + dir.create_dir("abc/def/XXX/YYY"); + dir.create_dir("ghi/XXX/YYY"); + dir.create("abc/def/XXX/YYY/bar", "test"); + dir.create("ghi/XXX/YYY/bar", "test"); + + cmd.arg("xyz").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/64 +rgtest!(r64, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("dir"); + dir.create_dir("foo"); + dir.create("dir/abc", ""); + dir.create("foo/abc", ""); + + eqnice!("foo/abc\n", cmd.arg("--files").arg("foo").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/65 +rgtest!(r65, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "a/"); + dir.create_dir("a"); + dir.create("a/foo", "xyz"); + dir.create("a/bar", "xyz"); + + cmd.arg("xyz").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/67 +rgtest!(r67, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "/*\n!/dir"); + dir.create_dir("dir"); + dir.create_dir("foo"); + dir.create("foo/bar", "test"); + dir.create("dir/bar", "test"); + + eqnice!("dir/bar:test\n", cmd.arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/87 +rgtest!(r87, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "foo\n**no-vcs**"); + dir.create("foo", "test"); + + cmd.arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/90 +rgtest!(r90, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "!.foo"); + dir.create(".foo", "test"); + + eqnice!(".foo:test\n", cmd.arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/93 +rgtest!(r93, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "192.168.1.1"); + + eqnice!("foo:192.168.1.1\n", cmd.arg(r"(\d{1,3}\.){3}\d{1,3}").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/99 +rgtest!(r99, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo1", "test"); + dir.create("foo2", "zzz"); + dir.create("bar", "test"); + + eqnice!( + sort_lines("bar\ntest\n\nfoo1\ntest\n"), + sort_lines(&cmd.arg("-j1").arg("--heading").arg("test").stdout()) + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/105 +rgtest!(r105_part1, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "zztest"); + + eqnice!("foo:1:3:zztest\n", cmd.arg("--vimgrep").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/105 +rgtest!(r105_part2, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "zztest"); + + eqnice!("foo:1:3:zztest\n", cmd.arg("--column").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/127 +rgtest!(r127, |dir: Dir, mut cmd: TestCommand| { + // Set up a directory hierarchy like this: + // + // .gitignore + // foo/ + // sherlock + // watson + // + // Where `.gitignore` contains `foo/sherlock`. + // + // ripgrep should ignore 'foo/sherlock' giving us results only from + // 'foo/watson' but on Windows ripgrep will include both 'foo/sherlock' and + // 'foo/watson' in the search results. + dir.create_dir(".git"); + dir.create(".gitignore", "foo/sherlock\n"); + dir.create_dir("foo"); + dir.create("foo/sherlock", SHERLOCK); + dir.create("foo/watson", SHERLOCK); + + let expected = "\ +foo/watson:For the Doctor Watsons of this world, as opposed to the Sherlock +foo/watson:be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq!(expected, cmd.arg("Sherlock").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/128 +rgtest!(r128, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("foo", b"01234567\x0b\n\x0b\n\x0b\n\x0b\nx"); + + eqnice!("foo:5:x\n", cmd.arg("-n").arg("x").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/131 +// +// TODO(burntsushi): Darwin doesn't like this test for some reason. Probably +// due to the weird file path. +#[cfg(not(target_os = "macos"))] +rgtest!(r131, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "TopÑapa"); + dir.create("TopÑapa", "test"); + + cmd.arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/137 +// +// TODO(burntsushi): Figure out how to make this test work on Windows. Right +// now it gives "access denied" errors when trying to create a file symlink. +// For now, disable test on Windows. +#[cfg(not(windows))] +rgtest!(r137, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.link_file("sherlock", "sym1"); + dir.link_file("sherlock", "sym2"); + + let expected = "\ +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +sym1:For the Doctor Watsons of this world, as opposed to the Sherlock +sym1:be, to a very large extent, the result of luck. Sherlock Holmes +sym2:For the Doctor Watsons of this world, as opposed to the Sherlock +sym2:be, to a very large extent, the result of luck. Sherlock Holmes +"; + cmd.arg("-j1").arg("Sherlock").arg("./").arg("sym1").arg("sym2"); + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/156 +rgtest!(r156, |dir: Dir, mut cmd: TestCommand| { + let expected = r#"#parse('widgets/foo_bar_macros.vm') +#parse ( 'widgets/mobile/foo_bar_macros.vm' ) +#parse ("widgets/foobarhiddenformfields.vm") +#parse ( "widgets/foo_bar_legal.vm" ) +#include( 'widgets/foo_bar_tips.vm' ) +#include('widgets/mobile/foo_bar_macros.vm') +#include ("widgets/mobile/foo_bar_resetpw.vm") +#parse('widgets/foo-bar-macros.vm') +#parse ( 'widgets/mobile/foo-bar-macros.vm' ) +#parse ("widgets/foo-bar-hiddenformfields.vm") +#parse ( "widgets/foo-bar-legal.vm" ) +#include( 'widgets/foo-bar-tips.vm' ) +#include('widgets/mobile/foo-bar-macros.vm') +#include ("widgets/mobile/foo-bar-resetpw.vm") +"#; + dir.create("testcase.txt", expected); + + cmd.arg("-N"); + cmd.arg(r#"#(?:parse|include)\s*\(\s*(?:"|')[./A-Za-z_-]+(?:"|')"#); + cmd.arg("testcase.txt"); + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/184 +rgtest!(r184, |dir: Dir, mut cmd: TestCommand| { + dir.create(".gitignore", ".*"); + dir.create_dir("foo/bar"); + dir.create("foo/bar/baz", "test"); + + cmd.arg("test"); + eqnice!("foo/bar/baz:test\n", cmd.stdout()); + + cmd.current_dir(dir.path().join("./foo/bar")); + eqnice!("baz:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/199 +rgtest!(r199, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "tEsT"); + + eqnice!("foo:tEsT\n", cmd.arg("--smart-case").arg(r"\btest\b").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/206 +rgtest!(r206, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("foo"); + dir.create("foo/bar.txt", "test"); + + cmd.arg("test").arg("-g").arg("*.txt"); + eqnice!("foo/bar.txt:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/210 +#[cfg(unix)] +rgtest!(r210, |dir: Dir, mut cmd: TestCommand| { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + let badutf8 = OsStr::from_bytes(&b"foo\xffbar"[..]); + + // APFS does not support creating files with invalid UTF-8 bytes. + // https://github.com/BurntSushi/ripgrep/issues/559 + if dir.try_create(badutf8, "test").is_ok() { + cmd.arg("-H").arg("test").arg(badutf8); + assert_eq!(b"foo\xffbar:test\n".to_vec(), cmd.output().stdout); + } +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/228 +rgtest!(r228, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("foo"); + + cmd.arg("--ignore-file").arg("foo").arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/229 +rgtest!(r229, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "economie"); + + cmd.arg("-S").arg("[E]conomie").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/251 +rgtest!(r251, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "привет\nПривет\nПрИвЕт"); + + let expected = "foo:привет\nfoo:Привет\nfoo:ПрИвЕт\n"; + eqnice!(expected, cmd.arg("-i").arg("привет").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/256 +#[cfg(not(windows))] +rgtest!(r256, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("bar"); + dir.create("bar/baz", "test"); + dir.link_dir("bar", "foo"); + + eqnice!("foo/baz:test\n", cmd.arg("test").arg("foo").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/256 +#[cfg(not(windows))] +rgtest!(r256_j1, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("bar"); + dir.create("bar/baz", "test"); + dir.link_dir("bar", "foo"); + + eqnice!("foo/baz:test\n", cmd.arg("-j1").arg("test").arg("foo").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/270 +rgtest!(r270, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "-test"); + + cmd.arg("-e").arg("-test"); + eqnice!("foo:-test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/279 +rgtest!(r279, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test"); + + eqnice!("", cmd.arg("-q").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/391 +rgtest!(r391, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create("lock", ""); + dir.create("bar.py", ""); + dir.create(".git/packed-refs", ""); + dir.create(".git/description", ""); + + cmd.args(&[ + "--no-ignore", "--hidden", "--follow", "--files", + "--glob", + "!{.git,node_modules,plugged}/**", + "--glob", + "*.{js,json,php,md,styl,scss,sass,pug,html,config,py,cpp,c,go,hs}", + ]); + eqnice!("bar.py\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/405 +rgtest!(r405, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("foo/bar"); + dir.create_dir("bar/foo"); + dir.create("foo/bar/file1.txt", "test"); + dir.create("bar/foo/file2.txt", "test"); + + cmd.arg("-g").arg("!/foo/**").arg("test"); + eqnice!("bar/foo/file2.txt:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/428 +#[cfg(not(windows))] +rgtest!(r428_color_context_path, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", "foo\nbar"); + cmd.args(&[ + "-A1", "-H", "--no-heading", "-N", + "--colors=match:none", "--color=always", + "foo", + ]); + + let expected = format!( + "{colored_path}:foo\n{colored_path}-bar\n", + colored_path= + "\x1b\x5b\x30\x6d\x1b\x5b\x33\x35\x6dsherlock\x1b\x5b\x30\x6d" + ); + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/428 +rgtest!(r428_unrecognized_style, |_: Dir, mut cmd: TestCommand| { + cmd.arg("--colors=match:style:").arg("Sherlock"); + cmd.assert_err(); + + let output = cmd.cmd().output().unwrap(); + let stderr = String::from_utf8_lossy(&output.stderr); + let expected = "\ +unrecognized style attribute ''. Choose from: nobold, bold, nointense, \ +intense, nounderline, underline. +"; + eqnice!(expected, stderr); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/451 +rgtest!(r451_only_matching_as_in_issue, |dir: Dir, mut cmd: TestCommand| { + dir.create("digits.txt", "1 2 3\n"); + cmd.arg("--only-matching").arg(r"[0-9]+").arg("digits.txt"); + + let expected = "\ +1 +2 +3 +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/451 +rgtest!(r451_only_matching, |dir: Dir, mut cmd: TestCommand| { + dir.create("digits.txt", "1 2 3\n123\n"); + cmd.args(&[ + "--only-matching", "--column", r"[0-9]", "digits.txt", + ]); + + let expected = "\ +1:1:1 +1:3:2 +1:5:3 +2:1:1 +2:2:2 +2:3:3 +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/483 +rgtest!(r483_matching_no_stdout, |dir: Dir, mut cmd: TestCommand| { + dir.create("file.py", ""); + cmd.arg("--quiet").arg("--files").arg("--glob").arg("*.py"); + eqnice!("", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/483 +rgtest!(r483_non_matching_exit_code, |dir: Dir, mut cmd: TestCommand| { + dir.create("file.rs", ""); + cmd.arg("--quiet").arg("--files").arg("--glob").arg("*.py"); + cmd.assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/493 +rgtest!(r493, |dir: Dir, mut cmd: TestCommand| { + dir.create("input.txt", "peshwaship 're seminomata"); + + cmd.arg("-o").arg(r"\b 're \b").arg("input.txt"); + assert_eq!(" 're \n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/506 +rgtest!(r506_word_not_parenthesized, |dir: Dir, mut cmd: TestCommand| { + dir.create("wb.txt", "min minimum amin\nmax maximum amax"); + cmd.arg("-w").arg("-o").arg("min|max").arg("wb.txt"); + eqnice!("min\nmax\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/553 +rgtest!(r553_switch, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + cmd.arg("-i").arg("sherlock"); + eqnice!(expected, cmd.stdout()); + + // Repeat the `i` flag to make sure everything still works. + eqnice!(expected, cmd.arg("-i").stdout()); +}); + +rgtest!(r553_flag, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +-- +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + cmd.arg("-C").arg("1").arg(r"world|attached").arg("sherlock"); + eqnice!(expected, cmd.stdout()); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.arg("-C").arg("0").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/568 +rgtest!(r568_leading_hyphen_option_args, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "foo bar -baz\n"); + cmd.arg("-e-baz").arg("-e").arg("-baz").arg("file"); + eqnice!("foo bar -baz\n", cmd.stdout()); + + let mut cmd = dir.command(); + cmd.arg("-rni").arg("bar").arg("file"); + eqnice!("foo ni -baz\n", cmd.stdout()); + + let mut cmd = dir.command(); + cmd.arg("-r").arg("-n").arg("-i").arg("bar").arg("file"); + eqnice!("foo -n -baz\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/599 +// +// This test used to check that we emitted color escape sequences even for +// empty matches, but with the addition of the JSON output format, clients no +// longer need to rely on escape sequences to parse matches. Therefore, we no +// longer emit useless escape sequences. +rgtest!(r599, |dir: Dir, mut cmd: TestCommand| { + dir.create("input.txt", "\n\ntest\n"); + cmd.args(&[ + "--color", "ansi", + "--colors", "path:none", + "--colors", "line:none", + "--colors", "match:fg:red", + "--colors", "match:style:nobold", + "--line-number", + r"^$", + "input.txt", + ]); + + let expected = "\ +1: +2: +"; + eqnice_repr!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/693 +rgtest!(r693_context_in_contextless_mode, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "xyz\n"); + dir.create("bar", "xyz\n"); + + cmd.arg("-C1").arg("-c").arg("--sort-files").arg("xyz"); + eqnice!("bar:1\nfoo:1\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/807 +rgtest!(r807, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", ".a/b"); + dir.create_dir(".a/b"); + dir.create_dir(".a/c"); + dir.create(".a/b/file", "test"); + dir.create(".a/c/file", "test"); + + eqnice!(".a/c/file:test\n", cmd.arg("--hidden").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/900 +rgtest!(r900, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("pat", ""); + + cmd.arg("-fpat").arg("sherlock").assert_err(); +}); diff -Nru ripgrep-0.6.0/tests/tests.rs ripgrep-0.10.0.3/tests/tests.rs --- ripgrep-0.6.0/tests/tests.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/tests/tests.rs 2018-09-10 21:10:55.000000000 +0000 @@ -1,1771 +1,25 @@ -/*! -This module contains *integration* tests. Their purpose is to test the CLI -interface. Namely, that passing a flag does what it says on the tin. - -Tests for more fine grained behavior (like the search or the globber) should be -unit tests in their respective modules. -*/ - -#![allow(dead_code, unused_imports)] - -use std::process::Command; - -use workdir::WorkDir; +extern crate serde; +#[macro_use] +extern crate serde_derive; +extern crate serde_json; + +// Macros useful for testing. +#[macro_use] +mod macros; +// Corpora. mod hay; -mod workdir; - -macro_rules! sherlock { - ($name:ident, $fun:expr) => { - sherlock!($name, "Sherlock", $fun); - }; - ($name:ident, $query:expr, $fun:expr) => { - sherlock!($name, $query, "sherlock", $fun); - }; - ($name:ident, $query:expr, $path:expr, $fun:expr) => { - #[test] - fn $name() { - let wd = WorkDir::new(stringify!($name)); - wd.create("sherlock", hay::SHERLOCK); - let mut cmd = wd.command(); - cmd.arg($query).arg($path); - $fun(wd, cmd); - } - }; -} - -macro_rules! clean { - ($name:ident, $query:expr, $path:expr, $fun:expr) => { - #[test] - fn $name() { - let wd = WorkDir::new(stringify!($name)); - let mut cmd = wd.command(); - cmd.arg($query).arg($path); - $fun(wd, cmd); - } - }; -} - -fn path(unix: &str) -> String { - if cfg!(windows) { - unix.replace("/", "\\") - } else { - unix.to_string() - } -} - -fn paths(unix: &[&str]) -> Vec { - let mut xs: Vec<_> = unix.iter().map(|s| path(s)).collect(); - xs.sort(); - xs -} - -fn paths_from_stdout(stdout: String) -> Vec { - let mut paths: Vec<_> = stdout.lines().map(|s| { - s.split(":").next().unwrap().to_string() - }).collect(); - paths.sort(); - paths -} - -fn sort_lines(lines: &str) -> String { - let mut lines: Vec = - lines.trim().lines().map(|s| s.to_owned()).collect(); - lines.sort(); - format!("{}\n", lines.join("\n")) -} - -sherlock!(single_file, |wd: WorkDir, mut cmd| { - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(dir, "Sherlock", ".", |wd: WorkDir, mut cmd| { - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(line_numbers, |wd: WorkDir, mut cmd: Command| { - cmd.arg("-n"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -1:For the Doctor Watsons of this world, as opposed to the Sherlock -3:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(columns, |wd: WorkDir, mut cmd: Command| { - cmd.arg("--column"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -1:57:For the Doctor Watsons of this world, as opposed to the Sherlock -3:49:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(with_filename, |wd: WorkDir, mut cmd: Command| { - cmd.arg("-H"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(with_heading, |wd: WorkDir, mut cmd: Command| { - // This forces the issue since --with-filename is disabled by default - // when searching one fil.e - cmd.arg("--with-filename").arg("--heading"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock -For the Doctor Watsons of this world, as opposed to the Sherlock -be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(with_heading_default, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - // Search two or more and get --with-filename enabled by default. - // Use -j1 to get deterministic results. - wd.create("foo", "Sherlock Holmes lives on Baker Street."); - cmd.arg("-j1").arg("--heading"); - let lines: String = wd.stdout(&mut cmd); - let expected1 = "\ -foo -Sherlock Holmes lives on Baker Street. - -sherlock -For the Doctor Watsons of this world, as opposed to the Sherlock -be, to a very large extent, the result of luck. Sherlock Holmes -"; - let expected2 = "\ -sherlock -For the Doctor Watsons of this world, as opposed to the Sherlock -be, to a very large extent, the result of luck. Sherlock Holmes - -foo -Sherlock Holmes lives on Baker Street. -"; - if lines != expected1 { - assert_eq!(lines, expected2); - } else { - assert_eq!(lines, expected1); - } -}); - -sherlock!(inverted, |wd: WorkDir, mut cmd: Command| { - cmd.arg("-v"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -Holmeses, success in the province of detective work must always -can extract a clew from a wisp of straw or a flake of cigar ash; -but Doctor Watson has to have it taken out for him and dusted, -and exhibited clearly, with a label attached. -"; - assert_eq!(lines, expected); -}); - -sherlock!(inverted_line_numbers, |wd: WorkDir, mut cmd: Command| { - cmd.arg("-n").arg("-v"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -2:Holmeses, success in the province of detective work must always -4:can extract a clew from a wisp of straw or a flake of cigar ash; -5:but Doctor Watson has to have it taken out for him and dusted, -6:and exhibited clearly, with a label attached. -"; - assert_eq!(lines, expected); -}); - -sherlock!(case_insensitive, "sherlock", |wd: WorkDir, mut cmd: Command| { - cmd.arg("-i"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(word, "as", |wd: WorkDir, mut cmd: Command| { - cmd.arg("-w"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -"; - assert_eq!(lines, expected); -}); - -sherlock!(line, "Watson|and exhibited clearly, with a label attached.", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("-x"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -and exhibited clearly, with a label attached. -"; - assert_eq!(lines, expected); -}); - -sherlock!(literal, "()", "file", |wd: WorkDir, mut cmd: Command| { - wd.create("file", "blib\n()\nblab\n"); - cmd.arg("-F"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "()\n"); -}); - -sherlock!(quiet, |wd: WorkDir, mut cmd: Command| { - cmd.arg("-q"); - let lines: String = wd.stdout(&mut cmd); - assert!(lines.is_empty()); -}); - -sherlock!(replace, |wd: WorkDir, mut cmd: Command| { - cmd.arg("-r").arg("FooBar"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -For the Doctor Watsons of this world, as opposed to the FooBar -be, to a very large extent, the result of luck. FooBar Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(replace_groups, "([A-Z][a-z]+) ([A-Z][a-z]+)", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("-r").arg("$2, $1"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -For the Watsons, Doctor of this world, as opposed to the Sherlock -be, to a very large extent, the result of luck. Holmes, Sherlock -but Watson, Doctor has to have it taken out for him and dusted, -"; - assert_eq!(lines, expected); -}); - -sherlock!(replace_named_groups, "(?P[A-Z][a-z]+) (?P[A-Z][a-z]+)", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("-r").arg("$last, $first"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -For the Watsons, Doctor of this world, as opposed to the Sherlock -be, to a very large extent, the result of luck. Holmes, Sherlock -but Watson, Doctor has to have it taken out for him and dusted, -"; - assert_eq!(lines, expected); -}); - -sherlock!(file_types, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("file.py", "Sherlock"); - wd.create("file.rs", "Sherlock"); - cmd.arg("-t").arg("rust"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.rs:Sherlock\n"); -}); - -sherlock!(file_types_all, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("file.py", "Sherlock"); - cmd.arg("-t").arg("all"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py:Sherlock\n"); -}); - -sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create("file.py", "Sherlock"); - wd.create("file.rs", "Sherlock"); - cmd.arg("-T").arg("rust"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py:Sherlock\n"); -}); - -sherlock!(file_types_negate_all, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - wd.create("file.py", "Sherlock"); - cmd.arg("-T").arg("all"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -"); -}); - -sherlock!(file_type_clear, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("file.py", "Sherlock"); - wd.create("file.rs", "Sherlock"); - cmd.arg("--type-clear").arg("rust").arg("-t").arg("rust"); - wd.assert_err(&mut cmd); -}); - -sherlock!(file_type_add, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("file.py", "Sherlock"); - wd.create("file.rs", "Sherlock"); - wd.create("file.wat", "Sherlock"); - cmd.arg("--type-add").arg("wat:*.wat").arg("-t").arg("wat"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.wat:Sherlock\n"); -}); - -sherlock!(file_type_add_compose, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("file.py", "Sherlock"); - wd.create("file.rs", "Sherlock"); - wd.create("file.wat", "Sherlock"); - cmd.arg("--type-add").arg("wat:*.wat"); - cmd.arg("--type-add").arg("combo:include:wat,py").arg("-t").arg("combo"); - let lines: String = wd.stdout(&mut cmd); - println!("{}", lines); - assert_eq!(sort_lines(&lines), "file.py:Sherlock\nfile.wat:Sherlock\n"); -}); - -sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("file.py", "Sherlock"); - wd.create("file.rs", "Sherlock"); - cmd.arg("-g").arg("*.rs"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.rs:Sherlock\n"); -}); - -sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create("file.py", "Sherlock"); - wd.create("file.rs", "Sherlock"); - cmd.arg("-g").arg("!*.rs"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py:Sherlock\n"); -}); - -sherlock!(iglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("file.HTML", "Sherlock"); - cmd.arg("--iglob").arg("*.html"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.HTML:Sherlock\n"); -}); - -sherlock!(csglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("file1.HTML", "Sherlock"); - wd.create("file2.html", "Sherlock"); - cmd.arg("--glob").arg("*.html"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file2.html:Sherlock\n"); -}); - -sherlock!(count, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - cmd.arg("--count"); - let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:2\n"; - assert_eq!(lines, expected); -}); - -sherlock!(files_with_matches, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - cmd.arg("--files-with-matches"); - let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock\n"; - assert_eq!(lines, expected); -}); - -sherlock!(files_without_matches, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - wd.create("file.py", "foo"); - cmd.arg("--files-without-match"); - let lines: String = wd.stdout(&mut cmd); - let expected = "file.py\n"; - assert_eq!(lines, expected); -}); - -sherlock!(after_context, |wd: WorkDir, mut cmd: Command| { - cmd.arg("-A").arg("1"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -Holmeses, success in the province of detective work must always -be, to a very large extent, the result of luck. Sherlock Holmes -can extract a clew from a wisp of straw or a flake of cigar ash; -"; - assert_eq!(lines, expected); -}); - -sherlock!(after_context_line_numbers, |wd: WorkDir, mut cmd: Command| { - cmd.arg("-A").arg("1").arg("-n"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -1:For the Doctor Watsons of this world, as opposed to the Sherlock -2-Holmeses, success in the province of detective work must always -3:be, to a very large extent, the result of luck. Sherlock Holmes -4-can extract a clew from a wisp of straw or a flake of cigar ash; -"; - assert_eq!(lines, expected); -}); - -sherlock!(before_context, |wd: WorkDir, mut cmd: Command| { - cmd.arg("-B").arg("1"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -Holmeses, success in the province of detective work must always -be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(before_context_line_numbers, |wd: WorkDir, mut cmd: Command| { - cmd.arg("-B").arg("1").arg("-n"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -1:For the Doctor Watsons of this world, as opposed to the Sherlock -2-Holmeses, success in the province of detective work must always -3:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(context, "world|attached", |wd: WorkDir, mut cmd: Command| { - cmd.arg("-C").arg("1"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -Holmeses, success in the province of detective work must always --- -but Doctor Watson has to have it taken out for him and dusted, -and exhibited clearly, with a label attached. -"; - assert_eq!(lines, expected); -}); - -sherlock!(context_line_numbers, "world|attached", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("-C").arg("1").arg("-n"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -1:For the Doctor Watsons of this world, as opposed to the Sherlock -2-Holmeses, success in the province of detective work must always --- -5-but Doctor Watson has to have it taken out for him and dusted, -6:and exhibited clearly, with a label attached. -"; - assert_eq!(lines, expected); -}); - -sherlock!(max_filesize_parse_error_length, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("--max-filesize").arg("44444444444444444444"); - wd.assert_err(&mut cmd); -}); - -sherlock!(max_filesize_parse_error_suffix, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("--max-filesize").arg("45k"); - wd.assert_err(&mut cmd); -}); - -sherlock!(max_filesize_parse_no_suffix, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create_size("foo", 40); - wd.create_size("bar", 60); - - cmd.arg("--max-filesize").arg("50").arg("--files"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -foo -"; - assert_eq!(lines, expected); -}); - -sherlock!(max_filesize_parse_k_suffix, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create_size("foo", 3048); - wd.create_size("bar", 4100); - - cmd.arg("--max-filesize").arg("4K").arg("--files"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -foo -"; - assert_eq!(lines, expected); -}); - -sherlock!(max_filesize_parse_m_suffix, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create_size("foo", 1000000); - wd.create_size("bar", 1400000); - - cmd.arg("--max-filesize").arg("1M").arg("--files"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -foo -"; - assert_eq!(lines, expected); -}); - -sherlock!(max_filesize_suffix_overflow, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create_size("foo", 1000000); - - // 2^35 * 2^30 would otherwise overflow - cmd.arg("--max-filesize").arg("34359738368G").arg("--files"); - wd.assert_err(&mut cmd); -}); - -sherlock!(ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create(".sherlock", hay::SHERLOCK); - wd.assert_err(&mut cmd); -}); - -sherlock!(no_ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create(".sherlock", hay::SHERLOCK); - - cmd.arg("--hidden"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(ignore_git, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "sherlock\n"); - wd.assert_err(&mut cmd); -}); - -sherlock!(ignore_generic, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".ignore", "sherlock\n"); - wd.assert_err(&mut cmd); -}); - -sherlock!(ignore_ripgrep, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".rgignore", "sherlock\n"); - wd.assert_err(&mut cmd); -}); - -sherlock!(no_ignore, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "sherlock\n"); - cmd.arg("--no-ignore"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(ignore_git_parent, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create(".gitignore", "sherlock\n"); - wd.create_dir(".git"); - wd.create_dir("foo"); - wd.create("foo/sherlock", hay::SHERLOCK); - // Even though we search in foo/, which has no .gitignore, ripgrep will - // search parent directories and respect the gitignore files found. - cmd.current_dir(wd.path().join("foo")); - wd.assert_err(&mut cmd); -}); - -sherlock!(ignore_git_parent_stop, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - // This tests that searching parent directories for .gitignore files stops - // after it sees a .git directory. To test this, we create this directory - // hierarchy: - // - // .gitignore (contains `sherlock`) - // foo/ - // .git - // bar/ - // sherlock - // - // And we perform the search inside `foo/bar/`. ripgrep will stop looking - // for .gitignore files after it sees `foo/.git/`, and therefore not - // respect the top-level `.gitignore` containing `sherlock`. - wd.remove("sherlock"); - wd.create(".gitignore", "sherlock\n"); - wd.create_dir("foo"); - wd.create_dir("foo/.git"); - wd.create_dir("foo/bar"); - wd.create("foo/bar/sherlock", hay::SHERLOCK); - cmd.current_dir(wd.path().join("foo").join("bar")); - - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(ignore_ripgrep_parent_no_stop, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - // This is like the `ignore_git_parent_stop` test, except it checks that - // ripgrep *doesn't* stop checking for .rgignore files. - wd.remove("sherlock"); - wd.create(".rgignore", "sherlock\n"); - wd.create_dir("foo"); - wd.create_dir("foo/.git"); - wd.create_dir("foo/bar"); - wd.create("foo/bar/sherlock", hay::SHERLOCK); - cmd.current_dir(wd.path().join("foo").join("bar")); - // The top-level .rgignore applies. - wd.assert_err(&mut cmd); -}); - -sherlock!(no_parent_ignore_git, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - // Set up a directory hierarchy like this: - // - // .gitignore - // foo/ - // .gitignore - // sherlock - // watson - // - // Where `.gitignore` contains `sherlock` and `foo/.gitignore` contains - // `watson`. - // - // Now *do the search* from the foo directory. By default, ripgrep will - // search parent directories for .gitignore files. The --no-ignore-parent - // flag should prevent that. At the same time, the `foo/.gitignore` file - // will still be respected (since the search is happening in `foo/`). - // - // In other words, we should only see results from `sherlock`, not from - // `watson`. - wd.remove("sherlock"); - wd.create(".gitignore", "sherlock\n"); - wd.create_dir("foo"); - wd.create("foo/.gitignore", "watson\n"); - wd.create("foo/sherlock", hay::SHERLOCK); - wd.create("foo/watson", hay::SHERLOCK); - cmd.current_dir(wd.path().join("foo")); - cmd.arg("--no-ignore-parent"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -#[cfg(not(windows))] -sherlock!(symlink_nofollow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create_dir("foo"); - wd.create_dir("foo/bar"); - wd.link_dir("foo/baz", "foo/bar/baz"); - wd.create_dir("foo/baz"); - wd.create("foo/baz/sherlock", hay::SHERLOCK); - cmd.current_dir(wd.path().join("foo/bar")); - wd.assert_err(&mut cmd); -}); - -#[cfg(not(windows))] -sherlock!(symlink_follow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create_dir("foo"); - wd.create_dir("foo/bar"); - wd.create_dir("foo/baz"); - wd.create("foo/baz/sherlock", hay::SHERLOCK); - wd.link_dir("foo/baz", "foo/bar/baz"); - cmd.arg("-L"); - cmd.current_dir(wd.path().join("foo/bar")); - - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, path(expected)); -}); - -sherlock!(unrestricted1, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "sherlock\n"); - cmd.arg("-u"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(unrestricted2, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create(".sherlock", hay::SHERLOCK); - cmd.arg("-uu"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("file", "foo\x00bar\nfoo\x00baz\n"); - cmd.arg("-uuu"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file:foo\x00bar\nfile:foo\x00baz\n"); -}); - -sherlock!(vimgrep, "Sherlock|Watson", ".", |wd: WorkDir, mut cmd: Command| { - cmd.arg("--vimgrep"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes -sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted, -"; - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/16 -clean!(regression_16, "xyz", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "ghi/"); - wd.create_dir("ghi"); - wd.create_dir("def/ghi"); - wd.create("ghi/toplevel.txt", "xyz"); - wd.create("def/ghi/subdir.txt", "xyz"); - wd.assert_err(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/25 -clean!(regression_25, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "/llvm/"); - wd.create_dir("src/llvm"); - wd.create("src/llvm/foo", "test"); - - let lines: String = wd.stdout(&mut cmd); - let expected = path("src/llvm/foo:test\n"); - assert_eq!(lines, expected); - - cmd.current_dir(wd.path().join("src")); - let lines: String = wd.stdout(&mut cmd); - let expected = path("llvm/foo:test\n"); - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/30 -clean!(regression_30, "test", ".", |wd: WorkDir, mut cmd: Command| { - if cfg!(windows) { - wd.create(".gitignore", "vendor/**\n!vendor\\manifest"); - } else { - wd.create(".gitignore", "vendor/**\n!vendor/manifest"); - } - wd.create_dir("vendor"); - wd.create("vendor/manifest", "test"); - - let lines: String = wd.stdout(&mut cmd); - let expected = path("vendor/manifest:test\n"); - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/49 -clean!(regression_49, "xyz", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "foo/bar"); - wd.create_dir("test/foo/bar"); - wd.create("test/foo/bar/baz", "test"); - wd.assert_err(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/50 -clean!(regression_50, "xyz", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "XXX/YYY/"); - wd.create_dir("abc/def/XXX/YYY"); - wd.create_dir("ghi/XXX/YYY"); - wd.create("abc/def/XXX/YYY/bar", "test"); - wd.create("ghi/XXX/YYY/bar", "test"); - wd.assert_err(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/65 -clean!(regression_65, "xyz", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "a/"); - wd.create_dir("a"); - wd.create("a/foo", "xyz"); - wd.create("a/bar", "xyz"); - wd.assert_err(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/67 -clean!(regression_67, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "/*\n!/dir"); - wd.create_dir("dir"); - wd.create_dir("foo"); - wd.create("foo/bar", "test"); - wd.create("dir/bar", "test"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, path("dir/bar:test\n")); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/87 -clean!(regression_87, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "foo\n**no-vcs**"); - wd.create("foo", "test"); - wd.assert_err(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/90 -clean!(regression_90, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "!.foo"); - wd.create(".foo", "test"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, ".foo:test\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/93 -clean!(regression_93, r"(\d{1,3}\.){3}\d{1,3}", ".", -|wd: WorkDir, mut cmd: Command| { - wd.create("foo", "192.168.1.1"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:192.168.1.1\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/99 -clean!(regression_99, "test", ".", -|wd: WorkDir, mut cmd: Command| { - wd.create("foo1", "test"); - wd.create("foo2", "zzz"); - wd.create("bar", "test"); - cmd.arg("-j1").arg("--heading"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(sort_lines(&lines), sort_lines("bar\ntest\n\nfoo1\ntest\n")); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/105 -clean!(regression_105_part1, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "zztest"); - cmd.arg("--vimgrep"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:1:3:zztest\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/105 -clean!(regression_105_part2, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "zztest"); - cmd.arg("--column"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:1:3:zztest\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/127 -clean!(regression_127, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - // Set up a directory hierarchy like this: - // - // .gitignore - // foo/ - // sherlock - // watson - // - // Where `.gitignore` contains `foo/sherlock`. - // - // ripgrep should ignore 'foo/sherlock' giving us results only from - // 'foo/watson' but on Windows ripgrep will include both 'foo/sherlock' and - // 'foo/watson' in the search results. - wd.create(".gitignore", "foo/sherlock\n"); - wd.create_dir("foo"); - wd.create("foo/sherlock", hay::SHERLOCK); - wd.create("foo/watson", hay::SHERLOCK); - - let lines: String = wd.stdout(&mut cmd); - let expected = format!("\ -{path}:For the Doctor Watsons of this world, as opposed to the Sherlock -{path}:be, to a very large extent, the result of luck. Sherlock Holmes -", path=path("foo/watson")); - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/128 -clean!(regression_128, "x", ".", |wd: WorkDir, mut cmd: Command| { - wd.create_bytes("foo", b"01234567\x0b\n\x0b\n\x0b\n\x0b\nx"); - cmd.arg("-n"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:5:x\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/131 -// -// TODO(burntsushi): Darwin doesn't like this test for some reason. -#[cfg(not(target_os = "macos"))] -clean!(regression_131, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "TopÑapa"); - wd.create("TopÑapa", "test"); - wd.assert_err(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/137 -// -// TODO(burntsushi): Figure out why Windows gives "access denied" errors -// when trying to create a file symlink. For now, disable test on Windows. -#[cfg(not(windows))] -sherlock!(regression_137, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { - wd.link_file("sherlock", "sym1"); - wd.link_file("sherlock", "sym2"); - cmd.arg("sym1"); - cmd.arg("sym2"); - cmd.arg("-j1"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -sym1:For the Doctor Watsons of this world, as opposed to the Sherlock -sym1:be, to a very large extent, the result of luck. Sherlock Holmes -sym2:For the Doctor Watsons of this world, as opposed to the Sherlock -sym2:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, path(expected)); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/156 -clean!( - regression_156, - r#"#(?:parse|include)\s*\(\s*(?:"|')[./A-Za-z_-]+(?:"|')"#, - "testcase.txt", -|wd: WorkDir, mut cmd: Command| { - const TESTCASE: &'static str = r#"#parse('widgets/foo_bar_macros.vm') -#parse ( 'widgets/mobile/foo_bar_macros.vm' ) -#parse ("widgets/foobarhiddenformfields.vm") -#parse ( "widgets/foo_bar_legal.vm" ) -#include( 'widgets/foo_bar_tips.vm' ) -#include('widgets/mobile/foo_bar_macros.vm') -#include ("widgets/mobile/foo_bar_resetpw.vm") -#parse('widgets/foo-bar-macros.vm') -#parse ( 'widgets/mobile/foo-bar-macros.vm' ) -#parse ("widgets/foo-bar-hiddenformfields.vm") -#parse ( "widgets/foo-bar-legal.vm" ) -#include( 'widgets/foo-bar-tips.vm' ) -#include('widgets/mobile/foo-bar-macros.vm') -#include ("widgets/mobile/foo-bar-resetpw.vm") -"#; - wd.create("testcase.txt", TESTCASE); - cmd.arg("-N"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, TESTCASE); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/184 -clean!(regression_184, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", ".*"); - wd.create_dir("foo/bar"); - wd.create("foo/bar/baz", "test"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path("foo/bar/baz"))); - - cmd.current_dir(wd.path().join("./foo/bar")); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "baz:test\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/199 -clean!(regression_199, r"\btest\b", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "tEsT"); - cmd.arg("--smart-case"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:tEsT\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/206 -clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create_dir("foo"); - wd.create("foo/bar.txt", "test"); - cmd.arg("-g").arg("*.txt"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path("foo/bar.txt"))); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/210 -#[cfg(unix)] -#[test] -fn regression_210() { - use std::ffi::OsStr; - use std::os::unix::ffi::OsStrExt; - - let badutf8 = OsStr::from_bytes(&b"foo\xffbar"[..]); - - let wd = WorkDir::new("regression_210"); - let mut cmd = wd.command(); - wd.create(badutf8, "test"); - cmd.arg("-H").arg("test").arg(badutf8); - - let out = wd.output(&mut cmd); - assert_eq!(out.stdout, b"foo\xffbar:test\n".to_vec()); -} - -// See: https://github.com/BurntSushi/ripgrep/issues/228 -clean!(regression_228, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create_dir("foo"); - cmd.arg("--ignore-file").arg("foo"); - wd.assert_err(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/229 -clean!(regression_229, "[E]conomie", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "economie"); - cmd.arg("-S"); - wd.assert_err(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/251 -clean!(regression_251, "привет", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "привет\nПривет\nПрИвЕт"); - cmd.arg("-i"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:привет\nfoo:Привет\nfoo:ПрИвЕт\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/256 -#[cfg(not(windows))] -clean!(regression_256, "test", "foo", |wd: WorkDir, mut cmd: Command| { - wd.create_dir("bar"); - wd.create("bar/baz", "test"); - wd.link_dir("bar", "foo"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo/baz:test\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/256 -#[cfg(not(windows))] -clean!(regression_256_j1, "test", "foo", |wd: WorkDir, mut cmd: Command| { - wd.create_dir("bar"); - wd.create("bar/baz", "test"); - wd.link_dir("bar", "foo"); - cmd.arg("-j1"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo/baz:test\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/279 -clean!(regression_279, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "test"); - cmd.arg("-q"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, ""); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/405 -clean!(regression_405, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create_dir("foo/bar"); - wd.create_dir("bar/foo"); - wd.create("foo/bar/file1.txt", "test"); - wd.create("bar/foo/file2.txt", "test"); - cmd.arg("-g").arg("!/foo/**"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path("bar/foo/file2.txt"))); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/428 -#[cfg(not(windows))] -clean!(regression_428_color_context_path, "foo", ".", -|wd: WorkDir, mut cmd: Command| { - wd.create("sherlock", "foo\nbar"); - cmd.arg("-A1").arg("-H").arg("--no-heading").arg("-N") - .arg("--colors=match:none").arg("--color=always"); - - let lines: String = wd.stdout(&mut cmd); - let expected = format!( - "{colored_path}:foo\n{colored_path}-bar\n", - colored_path=format!( - "\x1b\x5b\x6d\x1b\x5b\x33\x35\x6d{path}\x1b\x5b\x6d", - path=path("sherlock"))); - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/428 -clean!(regression_428_unrecognized_style, "Sherlok", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("--colors=match:style:"); - wd.assert_err(&mut cmd); - - let output = cmd.output().unwrap(); - let err = String::from_utf8_lossy(&output.stderr); - let expected = "\ -Unrecognized style attribute ''. Choose from: nobold, bold, nointense, intense. -"; - assert_eq!(err, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/493 -clean!(regression_493, " 're ", "input.txt", |wd: WorkDir, mut cmd: Command| { - wd.create("input.txt", "peshwaship 're seminomata"); - cmd.arg("-o").arg("-w"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, " 're \n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/1 -clean!(feature_1_sjis, "Шерлок Холмс", ".", |wd: WorkDir, mut cmd: Command| { - let sherlock = - b"\x84Y\x84u\x84\x82\x84|\x84\x80\x84{ \x84V\x84\x80\x84|\x84}\x84\x83"; - wd.create_bytes("foo", &sherlock[..]); - cmd.arg("-Esjis"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/1 -clean!(feature_1_utf16_auto, "Шерлок Холмс", ".", -|wd: WorkDir, mut cmd: Command| { - let sherlock = - b"\xff\xfe(\x045\x04@\x04;\x04>\x04:\x04 \x00%\x04>\x04;\x04<\x04A\x04"; - wd.create_bytes("foo", &sherlock[..]); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/1 -clean!(feature_1_utf16_explicit, "Шерлок Холмс", ".", -|wd: WorkDir, mut cmd: Command| { - let sherlock = - b"\xff\xfe(\x045\x04@\x04;\x04>\x04:\x04 \x00%\x04>\x04;\x04<\x04A\x04"; - wd.create_bytes("foo", &sherlock[..]); - cmd.arg("-Eutf-16le"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/1 -clean!(feature_1_eucjp, "Шерлок Холмс", ".", -|wd: WorkDir, mut cmd: Command| { - let sherlock = - b"\xa7\xba\xa7\xd6\xa7\xe2\xa7\xdd\xa7\xe0\xa7\xdc \xa7\xb7\xa7\xe0\xa7\xdd\xa7\xde\xa7\xe3"; - wd.create_bytes("foo", &sherlock[..]); - cmd.arg("-Eeuc-jp"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/1 -sherlock!(feature_1_unknown_encoding, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("-Efoobar"); - wd.assert_non_empty_stderr(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/1 -// Specific: https://github.com/BurntSushi/ripgrep/pull/398/files#r111109265 -sherlock!(feature_1_replacement_encoding, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("-Ecsiso2022kr"); - wd.assert_non_empty_stderr(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/7 -sherlock!(feature_7, "-fpat", "sherlock", |wd: WorkDir, mut cmd: Command| { - wd.create("pat", "Sherlock\nHolmes"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -Holmeses, success in the province of detective work must always -be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/7 -sherlock!(feature_7_dash, "-f-", ".", |wd: WorkDir, mut cmd: Command| { - let output = wd.pipe(&mut cmd, "Sherlock"); - let lines = String::from_utf8_lossy(&output.stdout); - let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/20 -sherlock!(feature_20_no_filename, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("--no-filename"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/34 -sherlock!(feature_34_only_matching, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("--only-matching"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock:Sherlock -sherlock:Sherlock -"; - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/34 -sherlock!(feature_34_only_matching_line_column, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("--only-matching").arg("--column").arg("--line-number"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock:1:57:Sherlock -sherlock:3:49:Sherlock -"; - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/45 -sherlock!(feature_45_relative_cwd, "test", ".", -|wd: WorkDir, mut cmd: Command| { - wd.create(".not-an-ignore", "foo\n/bar"); - wd.create_dir("bar"); - wd.create_dir("baz/bar"); - wd.create_dir("baz/baz/bar"); - wd.create("bar/test", "test"); - wd.create("baz/bar/test", "test"); - wd.create("baz/baz/bar/test", "test"); - wd.create("baz/foo", "test"); - wd.create("baz/test", "test"); - wd.create("foo", "test"); - wd.create("test", "test"); - - // First, get a baseline without applying ignore rules. - let lines = paths_from_stdout(wd.stdout(&mut cmd)); - assert_eq!(lines, paths(&[ - "bar/test", "baz/bar/test", "baz/baz/bar/test", "baz/foo", - "baz/test", "foo", "test", - ])); - - // Now try again with the ignore file activated. - cmd.arg("--ignore-file").arg(".not-an-ignore"); - let lines = paths_from_stdout(wd.stdout(&mut cmd)); - assert_eq!(lines, paths(&[ - "baz/bar/test", "baz/baz/bar/test", "baz/test", "test", - ])); - - // Now do it again, but inside the baz directory. - // Since the ignore file is interpreted relative to the CWD, this will - // cause the /bar anchored pattern to filter out baz/bar, which is a - // subtle difference between true parent ignore files and manually - // specified ignore files. - let mut cmd = wd.command(); - cmd.arg("test").arg(".").arg("--ignore-file").arg("../.not-an-ignore"); - cmd.current_dir(wd.path().join("baz")); - let lines = paths_from_stdout(wd.stdout(&mut cmd)); - assert_eq!(lines, paths(&["baz/bar/test", "test"])); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/45 -sherlock!(feature_45_precedence_with_others, "test", ".", -|wd: WorkDir, mut cmd: Command| { - wd.create(".not-an-ignore", "*.log"); - wd.create(".ignore", "!imp.log"); - wd.create("imp.log", "test"); - wd.create("wat.log", "test"); - - cmd.arg("--ignore-file").arg(".not-an-ignore"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "imp.log:test\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/45 -sherlock!(feature_45_precedence_internal, "test", ".", -|wd: WorkDir, mut cmd: Command| { - wd.create(".not-an-ignore1", "*.log"); - wd.create(".not-an-ignore2", "!imp.log"); - wd.create("imp.log", "test"); - wd.create("wat.log", "test"); - - cmd.arg("--ignore-file").arg(".not-an-ignore1"); - cmd.arg("--ignore-file").arg(".not-an-ignore2"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "imp.log:test\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/68 -clean!(feature_68_no_ignore_vcs, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create(".gitignore", "foo"); - wd.create(".ignore", "bar"); - wd.create("foo", "test"); - wd.create("bar", "test"); - cmd.arg("--no-ignore-vcs"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:test\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/70 -sherlock!(feature_70_smart_case, "sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("--smart-case"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -"; - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/89 -sherlock!(feature_89_files_with_matches, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("--null").arg("--files-with-matches"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x00"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/89 -sherlock!(feature_89_files_without_matches, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - wd.create("file.py", "foo"); - cmd.arg("--null").arg("--files-without-match"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py\x00"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/89 -sherlock!(feature_89_count, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("--null").arg("--count"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x002\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/89 -sherlock!(feature_89_files, "NADA", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("--null").arg("--files"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x00"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/89 -sherlock!(feature_89_match, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("--null").arg("-C1"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock\x00Holmeses, success in the province of detective work must always -sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes -sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash; -"; - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/109 -clean!(feature_109_max_depth, "far", ".", |wd: WorkDir, mut cmd: Command| { - wd.create_dir("one"); - wd.create("one/pass", "far"); - wd.create_dir("one/too"); - wd.create("one/too/many", "far"); - - cmd.arg("--maxdepth").arg("2"); - - let lines: String = wd.stdout(&mut cmd); - let expected = path("one/pass:far\n"); - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/124 -clean!(feature_109_case_sensitive_part1, "test", ".", -|wd: WorkDir, mut cmd: Command| { - wd.create("foo", "tEsT"); - cmd.arg("--smart-case").arg("--case-sensitive"); - wd.assert_err(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/124 -clean!(feature_109_case_sensitive_part2, "test", ".", -|wd: WorkDir, mut cmd: Command| { - wd.create("foo", "tEsT"); - cmd.arg("--ignore-case").arg("--case-sensitive"); - wd.assert_err(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/129 -clean!(feature_129_matches, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "test\ntest abcdefghijklmnopqrstuvwxyz test"); - cmd.arg("-M26"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "foo:test\nfoo:[Omitted long line with 2 matches]\n"; - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/129 -clean!(feature_129_context, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "test\nabcdefghijklmnopqrstuvwxyz"); - cmd.arg("-M20").arg("-C1"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "foo:test\nfoo-[Omitted long context line]\n"; - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/129 -clean!(feature_129_replace, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "test\ntest abcdefghijklmnopqrstuvwxyz test"); - cmd.arg("-M26").arg("-rfoo"); - - let lines: String = wd.stdout(&mut cmd); - let expected = "foo:foo\nfoo:[Omitted long line with 2 replacements]\n"; - assert_eq!(lines, expected); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/159 -clean!(feature_159_works, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "test\ntest"); - cmd.arg("-m1"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:test\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/159 -clean!(feature_159_zero_max, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "test\ntest"); - cmd.arg("-m0"); - wd.assert_err(&mut cmd); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/243 -clean!(feature_243_column_line, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "test"); - cmd.arg("--column"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:1:1:test\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/263 -clean!(feature_263_sort_files, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create("foo", "test"); - wd.create("abc", "test"); - wd.create("zoo", "test"); - wd.create("bar", "test"); - cmd.arg("--sort-files"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "abc:test\nbar:test\nfoo:test\nzoo:test\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/275 -clean!(feature_275_pathsep, "test", ".", |wd: WorkDir, mut cmd: Command| { - wd.create_dir("foo"); - wd.create("foo/bar", "test"); - cmd.arg("--path-separator").arg("Z"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "fooZbar:test\n"); -}); - -// See: https://github.com/BurntSushi/ripgrep/issues/362 -sherlock!(feature_362_dfa_size_limit, r"For\s", -|wd: WorkDir, mut cmd: Command| { - // This should fall back to the nfa engine but should still produce the - // expected result. - cmd.arg("--dfa-size-limit").arg("10"); - let lines: String = wd.stdout(&mut cmd); - let expected = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -"; - assert_eq!(lines, expected); -}); - -sherlock!(feature_362_exceeds_regex_size_limit, r"[0-9]\w+", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("--regex-size-limit").arg("10K"); - wd.assert_err(&mut cmd); -}); - -#[cfg(target_pointer_width = "32")] -sherlock!(feature_362_u64_to_narrow_usize_suffix_overflow, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - wd.remove("sherlock"); - wd.create_size("foo", 1000000); - - // 2^35 * 2^20 is ok for u64, but not for usize - cmd.arg("--dfa-size-limit").arg("34359738368M").arg("--files"); - wd.assert_err(&mut cmd); -}); - - -// See: https://github.com/BurntSushi/ripgrep/issues/419 -sherlock!(feature_419_zero_as_shortcut_for_null, "Sherlock", ".", -|wd: WorkDir, mut cmd: Command| { - cmd.arg("-0").arg("--count"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x002\n"); -}); - -#[test] -fn binary_nosearch() { - let wd = WorkDir::new("binary_nosearch"); - wd.create("file", "foo\x00bar\nfoo\x00baz\n"); - let mut cmd = wd.command(); - cmd.arg("foo").arg("file"); - wd.assert_err(&mut cmd); -} - -// The following two tests show a discrepancy in search results between -// searching with memory mapped files and stream searching. Stream searching -// uses a heuristic (that GNU grep also uses) where NUL bytes are replaced with -// the EOL terminator, which tends to avoid allocating large amounts of memory -// for really long "lines." The memory map searcher has no need to worry about -// such things, and more than that, it would be pretty hard for it to match -// the semantics of streaming search in this case. -// -// Binary files with lots of NULs aren't really part of the use case of ripgrep -// (or any other grep-like tool for that matter), so we shouldn't feel too bad -// about it. -#[test] -fn binary_search_mmap() { - let wd = WorkDir::new("binary_search_mmap"); - wd.create("file", "foo\x00bar\nfoo\x00baz\n"); - let mut cmd = wd.command(); - cmd.arg("-a").arg("--mmap").arg("foo").arg("file"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo\x00bar\nfoo\x00baz\n"); -} - -#[test] -fn binary_search_no_mmap() { - let wd = WorkDir::new("binary_search_no_mmap"); - wd.create("file", "foo\x00bar\nfoo\x00baz\n"); - let mut cmd = wd.command(); - cmd.arg("-a").arg("--no-mmap").arg("foo").arg("file"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo\x00bar\nfoo\x00baz\n"); -} - -#[test] -fn files() { - let wd = WorkDir::new("files"); - wd.create("file", ""); - wd.create_dir("dir"); - wd.create("dir/file", ""); - - let mut cmd = wd.command(); - cmd.arg("--files"); - let lines: String = wd.stdout(&mut cmd); - assert!(lines == path("file\ndir/file\n") - || lines == path("dir/file\nfile\n")); -} - -// See: https://github.com/BurntSushi/ripgrep/issues/64 -#[test] -fn regression_64() { - let wd = WorkDir::new("regression_64"); - wd.create_dir("dir"); - wd.create_dir("foo"); - wd.create("dir/abc", ""); - wd.create("foo/abc", ""); - - let mut cmd = wd.command(); - cmd.arg("--files").arg("foo"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, path("foo/abc\n")); -} - -// See: https://github.com/BurntSushi/ripgrep/issues/270 -#[test] -fn regression_270() { - let wd = WorkDir::new("regression_270"); - wd.create("foo", "-test"); - - let mut cmd = wd.command(); - cmd.arg("-e").arg("-test"); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, path("foo:-test\n")); -} - -// See: https://github.com/BurntSushi/ripgrep/issues/391 -#[test] -fn regression_391() { - let wd = WorkDir::new("regression_391"); - wd.create_dir(".git"); - wd.create("lock", ""); - wd.create("bar.py", ""); - wd.create(".git/packed-refs", ""); - wd.create(".git/description", ""); - - let mut cmd = wd.command(); - cmd.arg("--no-ignore").arg("--hidden").arg("--follow").arg("--files") - .arg("--glob") - .arg("!{.git,node_modules,plugged}/**") - .arg("--glob") - .arg("*.{js,json,php,md,styl,scss,sass,pug,html,config,py,cpp,c,go,hs}"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "bar.py\n"); -} - -// See: https://github.com/BurntSushi/ripgrep/issues/451 -#[test] -fn regression_451_only_matching_as_in_issue() { - let wd = WorkDir::new("regression_451_only_matching"); - let path = "digits.txt"; - wd.create(path, "1 2 3\n"); - - let mut cmd = wd.command(); - cmd.arg("[0-9]+").arg(path).arg("--only-matching"); - let lines: String = wd.stdout(&mut cmd); - - let expected = "\ -1 -2 -3 -"; - - assert_eq!(lines, expected); -} - -// See: https://github.com/BurntSushi/ripgrep/issues/451 -#[test] -fn regression_451_only_matching() { - let wd = WorkDir::new("regression_451_only_matching"); - let path = "digits.txt"; - wd.create(path, "1 2 3\n123\n"); - - let mut cmd = wd.command(); - cmd.arg("[0-9]").arg(path) - .arg("--only-matching") - .arg("--column"); - let lines: String = wd.stdout(&mut cmd); - - let expected = "\ -1:1:1 -1:3:2 -1:5:3 -2:1:1 -2:2:2 -2:3:3 -"; - - assert_eq!(lines, expected); -} - -// See: https://github.com/BurntSushi/ripgrep/issues/483 -#[test] -fn regression_483_matching_no_stdout() { - let wd = WorkDir::new("regression_483_matching_no_stdout"); - wd.create("file.py", ""); - - let mut cmd = wd.command(); - cmd.arg("--quiet") - .arg("--files") - .arg("--glob").arg("*.py"); - - let lines: String = wd.stdout(&mut cmd); - assert!(lines.is_empty()); -} - -// See: https://github.com/BurntSushi/ripgrep/issues/483 -#[test] -fn regression_483_non_matching_exit_code() { - let wd = WorkDir::new("regression_483_non_matching_exit_code"); - wd.create("file.rs", ""); - - let mut cmd = wd.command(); - cmd.arg("--quiet") - .arg("--files") - .arg("--glob").arg("*.py"); - - wd.assert_err(&mut cmd); -} - -// See: https://github.com/BurntSushi/ripgrep/issues/506 -#[test] -fn regression_506_word_boundaries_not_parenthesized() { - let wd = WorkDir::new("regression_506_word_boundaries_not_parenthesized"); - let path = "wb.txt"; - wd.create(path, "min minimum amin\n\ - max maximum amax"); - - let mut cmd = wd.command(); - cmd.arg("-w").arg("min|max").arg(path).arg("--only-matching"); - let lines: String = wd.stdout(&mut cmd); - - let expected = "min\nmax\n"; - - assert_eq!(lines, expected); -} - -// See: https://github.com/BurntSushi/ripgrep/issues/568 -#[test] -fn regression_568_leading_hyphen_option_arguments() { - let wd = WorkDir::new("regression_568_leading_hyphen_option_arguments"); - let path = "file"; - wd.create(path, "foo bar -baz\n"); - - let mut cmd = wd.command(); - cmd.arg("-e-baz").arg("-e").arg("-baz").arg(path); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo bar -baz\n"); - - let mut cmd = wd.command(); - cmd.arg("-rni").arg("bar").arg(path); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo ni -baz\n"); - - let mut cmd = wd.command(); - cmd.arg("-r").arg("-n").arg("-i").arg("bar").arg(path); - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo -n -baz\n"); -} - -#[test] -fn type_list() { - let wd = WorkDir::new("type_list"); +// Utilities for making tests nicer to read and easier to write. +mod util; - let mut cmd = wd.command(); - cmd.arg("--type-list"); - let lines: String = wd.stdout(&mut cmd); - // This can change over time, so just make sure we print something. - assert!(!lines.is_empty()); -} +// Tests related to most features in ripgrep. If you're adding something new +// to ripgrep, tests should probably go in here. +mod feature; +// Tests for ripgrep's JSON format. +mod json; +// Miscellaneous tests grouped in a haphazard manner. Try not to add more. +mod misc; +// Tests for ripgrep's multiline search support. +mod multiline; +// Regression tests. +mod regression; diff -Nru ripgrep-0.6.0/tests/util.rs ripgrep-0.10.0.3/tests/util.rs --- ripgrep-0.6.0/tests/util.rs 1970-01-01 00:00:00.000000000 +0000 +++ ripgrep-0.10.0.3/tests/util.rs 2018-09-10 21:10:55.000000000 +0000 @@ -0,0 +1,441 @@ +use std::env; +use std::error; +use std::ffi::OsStr; +use std::fs::{self, File}; +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; +use std::process::{self, Command}; +use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize, Ordering}; +use std::thread; +use std::time::Duration; + +static TEST_DIR: &'static str = "ripgrep-tests"; +static NEXT_ID: AtomicUsize = ATOMIC_USIZE_INIT; + +/// Setup an empty work directory and return a command pointing to the ripgrep +/// executable whose CWD is set to the work directory. +/// +/// The name given will be used to create the directory. Generally, it should +/// correspond to the test name. +pub fn setup(test_name: &str) -> (Dir, TestCommand) { + let dir = Dir::new(test_name); + let cmd = dir.command(); + (dir, cmd) +} + +/// Like `setup`, but uses PCRE2 as the underlying regex engine. +pub fn setup_pcre2(test_name: &str) -> (Dir, TestCommand) { + let mut dir = Dir::new(test_name); + dir.pcre2(true); + let cmd = dir.command(); + (dir, cmd) +} + +/// Break the given string into lines, sort them and then join them back +/// together. This is useful for testing output from ripgrep that may not +/// always be in the same order. +pub fn sort_lines(lines: &str) -> String { + let mut lines: Vec<&str> = lines.trim().lines().collect(); + lines.sort(); + format!("{}\n", lines.join("\n")) +} + +/// Returns true if and only if the given program can be successfully executed +/// with a `--help` flag. +pub fn cmd_exists(program: &str) -> bool { + Command::new(program).arg("--help").output().is_ok() +} + +/// Dir represents a directory in which tests should be run. +/// +/// Directories are created from a global atomic counter to avoid duplicates. +#[derive(Clone, Debug)] +pub struct Dir { + /// The directory in which this test executable is running. + root: PathBuf, + /// The directory in which the test should run. If a test needs to create + /// files, they should go in here. This directory is also used as the CWD + /// for any processes created by the test. + dir: PathBuf, + /// Set to true when the test should use PCRE2 as the regex engine. + pcre2: bool, +} + +impl Dir { + /// Create a new test working directory with the given name. The name + /// does not need to be distinct for each invocation, but should correspond + /// to a logical grouping of tests. + pub fn new(name: &str) -> Dir { + let id = NEXT_ID.fetch_add(1, Ordering::SeqCst); + let root = env::current_exe() + .unwrap() + .parent() + .expect("executable's directory") + .to_path_buf(); + let dir = env::temp_dir() + .join(TEST_DIR) + .join(name) + .join(&format!("{}", id)); + nice_err(&dir, repeat(|| fs::create_dir_all(&dir))); + Dir { + root: root, + dir: dir, + pcre2: false, + } + } + + /// Use PCRE2 for this test. + pub fn pcre2(&mut self, yes: bool) { + self.pcre2 = yes; + } + + /// Returns true if and only if this test is configured to use PCRE2 as + /// the regex engine. + pub fn is_pcre2(&self) -> bool { + self.pcre2 + } + + /// Create a new file with the given name and contents in this directory, + /// or panic on error. + pub fn create>(&self, name: P, contents: &str) { + self.create_bytes(name, contents.as_bytes()); + } + + /// Try to create a new file with the given name and contents in this + /// directory. + #[allow(dead_code)] // unused on Windows + pub fn try_create>( + &self, + name: P, + contents: &str, + ) -> io::Result<()> { + let path = self.dir.join(name); + self.try_create_bytes(path, contents.as_bytes()) + } + + /// Create a new file with the given name and size. + pub fn create_size>(&self, name: P, filesize: u64) { + let path = self.dir.join(name); + let file = nice_err(&path, File::create(&path)); + nice_err(&path, file.set_len(filesize)); + } + + /// Create a new file with the given name and contents in this directory, + /// or panic on error. + pub fn create_bytes>(&self, name: P, contents: &[u8]) { + let path = self.dir.join(&name); + nice_err(&path, self.try_create_bytes(name, contents)); + } + + /// Try to create a new file with the given name and contents in this + /// directory. + pub fn try_create_bytes>( + &self, + name: P, + contents: &[u8], + ) -> io::Result<()> { + let path = self.dir.join(name); + let mut file = File::create(path)?; + file.write_all(contents)?; + file.flush() + } + + /// Remove a file with the given name from this directory. + pub fn remove>(&self, name: P) { + let path = self.dir.join(name); + nice_err(&path, fs::remove_file(&path)); + } + + /// Create a new directory with the given path (and any directories above + /// it) inside this directory. + pub fn create_dir>(&self, path: P) { + let path = self.dir.join(path); + nice_err(&path, repeat(|| fs::create_dir_all(&path))); + } + + /// Creates a new command that is set to use the ripgrep executable in + /// this working directory. + /// + /// This also: + /// + /// * Unsets the `RIPGREP_CONFIG_PATH` environment variable. + /// * Sets the `--path-separator` to `/` so that paths have the same output + /// on all systems. Tests that need to check `--path-separator` itself + /// can simply pass it again to override it. + pub fn command(&self) -> TestCommand { + let mut cmd = process::Command::new(&self.bin()); + cmd.env_remove("RIPGREP_CONFIG_PATH"); + cmd.current_dir(&self.dir); + cmd.arg("--path-separator").arg("/"); + if self.is_pcre2() { + cmd.arg("--pcre2"); + } + TestCommand { dir: self.clone(), cmd: cmd } + } + + /// Returns the path to the ripgrep executable. + pub fn bin(&self) -> PathBuf { + if cfg!(windows) { + self.root.join("../rg.exe") + } else { + self.root.join("../rg") + } + } + + /// Returns the path to this directory. + pub fn path(&self) -> &Path { + &self.dir + } + + /// Creates a directory symlink to the src with the given target name + /// in this directory. + #[cfg(not(windows))] + pub fn link_dir, T: AsRef>(&self, src: S, target: T) { + use std::os::unix::fs::symlink; + let src = self.dir.join(src); + let target = self.dir.join(target); + let _ = fs::remove_file(&target); + nice_err(&target, symlink(&src, &target)); + } + + /// Creates a directory symlink to the src with the given target name + /// in this directory. + #[cfg(windows)] + pub fn link_dir, T: AsRef>(&self, src: S, target: T) { + use std::os::windows::fs::symlink_dir; + let src = self.dir.join(src); + let target = self.dir.join(target); + let _ = fs::remove_dir(&target); + nice_err(&target, symlink_dir(&src, &target)); + } + + /// Creates a file symlink to the src with the given target name + /// in this directory. + #[cfg(not(windows))] + pub fn link_file, T: AsRef>( + &self, + src: S, + target: T, + ) { + self.link_dir(src, target); + } + + /// Creates a file symlink to the src with the given target name + /// in this directory. + #[cfg(windows)] + #[allow(dead_code)] // unused on Windows + pub fn link_file, T: AsRef>( + &self, + src: S, + target: T, + ) { + use std::os::windows::fs::symlink_file; + let src = self.dir.join(src); + let target = self.dir.join(target); + let _ = fs::remove_file(&target); + nice_err(&target, symlink_file(&src, &target)); + } +} + +/// A simple wrapper around a process::Command with some conveniences. +#[derive(Debug)] +pub struct TestCommand { + /// The dir used to launched this command. + dir: Dir, + /// The actual command we use to control the process. + cmd: Command, +} + +impl TestCommand { + /// Returns a mutable reference to the underlying command. + pub fn cmd(&mut self) -> &mut Command { + &mut self.cmd + } + + /// Add an argument to pass to the command. + pub fn arg>(&mut self, arg: A) -> &mut TestCommand { + self.cmd.arg(arg); + self + } + + /// Add any number of arguments to the command. + pub fn args( + &mut self, + args: I, + ) -> &mut TestCommand + where I: IntoIterator, + A: AsRef + { + self.cmd.args(args); + self + } + + /// Set the working directory for this command. + /// + /// Note that this does not need to be called normally, since the creation + /// of this TestCommand causes its working directory to be set to the + /// test's directory automatically. + pub fn current_dir>(&mut self, dir: P) -> &mut TestCommand { + self.cmd.current_dir(dir); + self + } + + /// Runs and captures the stdout of the given command. + pub fn stdout(&mut self) -> String { + let o = self.output(); + let stdout = String::from_utf8_lossy(&o.stdout); + match stdout.parse() { + Ok(t) => t, + Err(err) => { + panic!( + "could not convert from string: {:?}\n\n{}", + err, + stdout + ); + } + } + } + + /// Pipe `input` to a command, and collect the output. + pub fn pipe(&mut self, input: &str) -> String { + self.cmd.stdin(process::Stdio::piped()); + self.cmd.stdout(process::Stdio::piped()); + self.cmd.stderr(process::Stdio::piped()); + + let mut child = self.cmd.spawn().unwrap(); + + // Pipe input to child process using a separate thread to avoid + // risk of deadlock between parent and child process. + let mut stdin = child.stdin.take().expect("expected standard input"); + let input = input.to_owned(); + let worker = thread::spawn(move || { + write!(stdin, "{}", input) + }); + + let output = self.expect_success(child.wait_with_output().unwrap()); + worker.join().unwrap().unwrap(); + + let stdout = String::from_utf8_lossy(&output.stdout); + match stdout.parse() { + Ok(t) => t, + Err(err) => { + panic!( + "could not convert from string: {:?}\n\n{}", + err, + stdout + ); + } + } + } + + /// Gets the output of a command. If the command failed, then this panics. + pub fn output(&mut self) -> process::Output { + let output = self.cmd.output().unwrap(); + self.expect_success(output) + } + + /// Runs the command and asserts that it resulted in an error exit code. + pub fn assert_err(&mut self) { + let o = self.cmd.output().unwrap(); + if o.status.success() { + panic!( + "\n\n===== {:?} =====\n\ + command succeeded but expected failure!\ + \n\ncwd: {}\ + \n\nstatus: {}\ + \n\nstdout: {}\n\nstderr: {}\ + \n\n=====\n", + self.cmd, + self.dir.dir.display(), + o.status, + String::from_utf8_lossy(&o.stdout), + String::from_utf8_lossy(&o.stderr) + ); + } + } + + /// Runs the command and asserts that its exit code matches expected exit + /// code. + pub fn assert_exit_code(&mut self, expected_code: i32) { + let code = self.cmd.output().unwrap().status.code().unwrap(); + assert_eq!( + expected_code, code, + "\n\n===== {:?} =====\n\ + expected exit code did not match\ + \n\nexpected: {}\ + \n\nfound: {}\ + \n\n=====\n", + self.cmd, + expected_code, + code + ); + } + + /// Runs the command and asserts that something was printed to stderr. + pub fn assert_non_empty_stderr(&mut self) { + let o = self.cmd.output().unwrap(); + if o.status.success() || o.stderr.is_empty() { + panic!( + "\n\n===== {:?} =====\n\ + command succeeded but expected failure!\ + \n\ncwd: {}\ + \n\nstatus: {}\ + \n\nstdout: {}\n\nstderr: {}\ + \n\n=====\n", + self.cmd, + self.dir.dir.display(), + o.status, + String::from_utf8_lossy(&o.stdout), + String::from_utf8_lossy(&o.stderr) + ); + } + } + + fn expect_success(&self, o: process::Output) -> process::Output { + if !o.status.success() { + let suggest = + if o.stderr.is_empty() { + "\n\nDid your search end up with no results?".to_string() + } else { + "".to_string() + }; + + panic!("\n\n==========\n\ + command failed but expected success!\ + {}\ + \n\ncommand: {:?}\ + \ncwd: {}\ + \n\nstatus: {}\ + \n\nstdout: {}\ + \n\nstderr: {}\ + \n\n==========\n", + suggest, self.cmd, self.dir.dir.display(), o.status, + String::from_utf8_lossy(&o.stdout), + String::from_utf8_lossy(&o.stderr)); + } + o + } +} + +fn nice_err( + path: &Path, + res: Result, +) -> T { + match res { + Ok(t) => t, + Err(err) => panic!("{}: {:?}", path.display(), err), + } +} + +fn repeat io::Result<()>>(mut f: F) -> io::Result<()> { + let mut last_err = None; + for _ in 0..10 { + if let Err(err) = f() { + last_err = Some(err); + thread::sleep(Duration::from_millis(500)); + } else { + return Ok(()); + } + } + Err(last_err.unwrap()) +} diff -Nru ripgrep-0.6.0/tests/workdir.rs ripgrep-0.10.0.3/tests/workdir.rs --- ripgrep-0.6.0/tests/workdir.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/tests/workdir.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,301 +0,0 @@ -use std::env; -use std::error; -use std::fmt; -use std::fs::{self, File}; -use std::io::{self, Write}; -use std::path::{Path, PathBuf}; -use std::process; -use std::str::FromStr; -use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize, Ordering}; -use std::thread; -use std::time::Duration; - -static TEST_DIR: &'static str = "ripgrep-tests"; -static NEXT_ID: AtomicUsize = ATOMIC_USIZE_INIT; - -/// WorkDir represents a directory in which tests are run. -/// -/// Directories are created from a global atomic counter to avoid duplicates. -#[derive(Debug)] -pub struct WorkDir { - /// The directory in which this test executable is running. - root: PathBuf, - /// The directory in which the test should run. If a test needs to create - /// files, they should go in here. - dir: PathBuf, -} - -impl WorkDir { - /// Create a new test working directory with the given name. The name - /// does not need to be distinct for each invocation, but should correspond - /// to a logical grouping of tests. - pub fn new(name: &str) -> WorkDir { - let id = NEXT_ID.fetch_add(1, Ordering::SeqCst); - let root = env::current_exe().unwrap() - .parent().expect("executable's directory").to_path_buf(); - let dir = root.join(TEST_DIR).join(name).join(&format!("{}", id)); - nice_err(&dir, repeat(|| fs::create_dir_all(&dir))); - WorkDir { - root: root, - dir: dir, - } - } - - /// Create a new file with the given name and contents in this directory. - pub fn create>(&self, name: P, contents: &str) { - self.create_bytes(name, contents.as_bytes()); - } - - /// Create a new file with the given name and size. - pub fn create_size>(&self, name: P, filesize: u64) { - let path = self.dir.join(name); - let file = nice_err(&path, File::create(&path)); - nice_err(&path, file.set_len(filesize)); - } - - /// Create a new file with the given name and contents in this directory. - pub fn create_bytes>(&self, name: P, contents: &[u8]) { - let path = self.dir.join(name); - let mut file = nice_err(&path, File::create(&path)); - nice_err(&path, file.write_all(contents)); - nice_err(&path, file.flush()); - } - - /// Remove a file with the given name from this directory. - pub fn remove>(&self, name: P) { - let path = self.dir.join(name); - nice_err(&path, fs::remove_file(&path)); - } - - /// Create a new directory with the given path (and any directories above - /// it) inside this directory. - pub fn create_dir>(&self, path: P) { - let path = self.dir.join(path); - nice_err(&path, repeat(|| fs::create_dir_all(&path))); - } - - /// Creates a new command that is set to use the ripgrep executable in - /// this working directory. - pub fn command(&self) -> process::Command { - let mut cmd = process::Command::new(&self.bin()); - cmd.current_dir(&self.dir); - cmd - } - - /// Returns the path to the ripgrep executable. - #[cfg(not(windows))] - pub fn bin(&self) -> PathBuf { - let path = self.root.join("rg"); - if !path.is_file() { - // Looks like a recent version of Cargo changed the cwd or the - // location of the test executable. - self.root.join("../rg") - } else { - path - } - } - - /// Returns the path to the ripgrep executable. - #[cfg(windows)] - pub fn bin(&self) -> PathBuf { - let path = self.root.join("rg.exe"); - if !path.is_file() { - // Looks like a recent version of Cargo changed the cwd or the - // location of the test executable. - self.root.join("../rg.exe") - } else { - path - } - } - - /// Returns the path to this directory. - pub fn path(&self) -> &Path { - &self.dir - } - - /// Creates a directory symlink to the src with the given target name - /// in this directory. - #[cfg(not(windows))] - pub fn link_dir, T: AsRef>(&self, src: S, target: T) { - use std::os::unix::fs::symlink; - let src = self.dir.join(src); - let target = self.dir.join(target); - let _ = fs::remove_file(&target); - nice_err(&target, symlink(&src, &target)); - } - - /// Creates a directory symlink to the src with the given target name - /// in this directory. - #[cfg(windows)] - pub fn link_dir, T: AsRef>(&self, src: S, target: T) { - use std::os::windows::fs::symlink_dir; - let src = self.dir.join(src); - let target = self.dir.join(target); - let _ = fs::remove_dir(&target); - nice_err(&target, symlink_dir(&src, &target)); - } - - /// Creates a file symlink to the src with the given target name - /// in this directory. - #[cfg(not(windows))] - pub fn link_file, T: AsRef>( - &self, - src: S, - target: T, - ) { - self.link_dir(src, target); - } - - /// Creates a file symlink to the src with the given target name - /// in this directory. - #[cfg(windows)] - pub fn link_file, T: AsRef>( - &self, - src: S, - target: T, - ) { - use std::os::windows::fs::symlink_file; - let src = self.dir.join(src); - let target = self.dir.join(target); - let _ = fs::remove_file(&target); - nice_err(&target, symlink_file(&src, &target)); - } - - /// Runs and captures the stdout of the given command. - /// - /// If the return type could not be created from a string, then this - /// panics. - pub fn stdout>( - &self, - cmd: &mut process::Command, - ) -> T { - let o = self.output(cmd); - let stdout = String::from_utf8_lossy(&o.stdout); - match stdout.parse() { - Ok(t) => t, - Err(err) => { - panic!("could not convert from string: {:?}\n\n{}", err, stdout); - } - } - } - - /// Gets the output of a command. If the command failed, then this panics. - pub fn output(&self, cmd: &mut process::Command) -> process::Output { - let output = cmd.output().unwrap(); - self.expect_success(cmd, output) - } - - /// Pipe `input` to a command, and collect the output. - pub fn pipe( - &self, - cmd: &mut process::Command, - input: &str - ) -> process::Output { - cmd.stdin(process::Stdio::piped()); - cmd.stdout(process::Stdio::piped()); - cmd.stderr(process::Stdio::piped()); - - let mut child = cmd.spawn().unwrap(); - - // Pipe input to child process using a separate thread to avoid - // risk of deadlock between parent and child process. - let mut stdin = child.stdin.take().expect("expected standard input"); - let input = input.to_owned(); - let worker = thread::spawn(move || { - write!(stdin, "{}", input) - }); - - let output = self.expect_success(cmd, child.wait_with_output().unwrap()); - worker.join().unwrap().unwrap(); - output - } - - /// If `o` is not the output of a successful process run - fn expect_success( - &self, - cmd: &process::Command, - o: process::Output - ) -> process::Output { - if !o.status.success() { - let suggest = - if o.stderr.is_empty() { - "\n\nDid your search end up with no results?".to_string() - } else { - "".to_string() - }; - - panic!("\n\n==========\n\ - command failed but expected success!\ - {}\ - \n\ncommand: {:?}\ - \ncwd: {}\ - \n\nstatus: {}\ - \n\nstdout: {}\ - \n\nstderr: {}\ - \n\n==========\n", - suggest, cmd, self.dir.display(), o.status, - String::from_utf8_lossy(&o.stdout), - String::from_utf8_lossy(&o.stderr)); - } - o - } - - /// Runs the given command and asserts that it resulted in an error exit - /// code. - pub fn assert_err(&self, cmd: &mut process::Command) { - let o = cmd.output().unwrap(); - if o.status.success() { - panic!("\n\n===== {:?} =====\n\ - command succeeded but expected failure!\ - \n\ncwd: {}\ - \n\nstatus: {}\ - \n\nstdout: {}\n\nstderr: {}\ - \n\n=====\n", - cmd, self.dir.display(), o.status, - String::from_utf8_lossy(&o.stdout), - String::from_utf8_lossy(&o.stderr)); - } - } - - /// Runs the given command and asserts that something was printed to - /// stderr. - pub fn assert_non_empty_stderr(&self, cmd: &mut process::Command) { - let o = cmd.output().unwrap(); - if o.status.success() || o.stderr.is_empty() { - panic!("\n\n===== {:?} =====\n\ - command succeeded but expected failure!\ - \n\ncwd: {}\ - \n\nstatus: {}\ - \n\nstdout: {}\n\nstderr: {}\ - \n\n=====\n", - cmd, self.dir.display(), o.status, - String::from_utf8_lossy(&o.stdout), - String::from_utf8_lossy(&o.stderr)); - } - } -} - -fn nice_err, T, E: error::Error>( - path: P, - res: Result, -) -> T { - match res { - Ok(t) => t, - Err(err) => { - panic!("{}: {:?}", path.as_ref().display(), err); - } - } -} - -fn repeat io::Result<()>>(mut f: F) -> io::Result<()> { - let mut last_err = None; - for _ in 0..10 { - if let Err(err) = f() { - last_err = Some(err); - thread::sleep(Duration::from_millis(500)); - } else { - return Ok(()); - } - } - Err(last_err.unwrap()) -} diff -Nru ripgrep-0.6.0/.travis.yml ripgrep-0.10.0.3/.travis.yml --- ripgrep-0.6.0/.travis.yml 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/.travis.yml 2018-09-10 21:10:55.000000000 +0000 @@ -1,21 +1,30 @@ language: rust - env: global: - - PROJECT_NAME=ripgrep + - PROJECT_NAME: ripgrep - RUST_BACKTRACE: full - addons: apt: packages: - # Needed for completion-function test + # For generating man page. + - libxslt1-dev + - asciidoc + - docbook-xsl + - xsltproc + - libxml2-utils + # Needed for completion-function test. - zsh - + # Needed for testing decompression search. + - xz-utils + - liblz4-tool + # For building MUSL static builds on Linux. + - musl-tools matrix: + fast_finish: true include: # Nightly channel. - # (All *nix releases are done on the nightly channel to take advantage - # of the regex library's multiple pattern SIMD search.) + # All *nix releases are done on the nightly channel to take advantage + # of the regex library's multiple pattern SIMD search. - os: linux rust: nightly env: TARGET=i686-unknown-linux-musl @@ -24,58 +33,77 @@ env: TARGET=x86_64-unknown-linux-musl - os: osx rust: nightly - env: TARGET=x86_64-apple-darwin - # Beta channel. + # XML_CATALOG_FILES is apparently necessary for asciidoc on macOS. + env: TARGET=x86_64-apple-darwin XML_CATALOG_FILES=/usr/local/etc/xml/catalog + - os: linux + rust: nightly + env: TARGET=arm-unknown-linux-gnueabihf GCC_VERSION=4.8 + addons: + apt: + packages: + - gcc-4.8-arm-linux-gnueabihf + - binutils-arm-linux-gnueabihf + - libc6-armhf-cross + - libc6-dev-armhf-cross + # For generating man page. + - libxslt1-dev + - asciidoc + - docbook-xsl + - xsltproc + - libxml2-utils + # Beta channel. We enable these to make sure there are no regressions in + # Rust beta releases. - os: linux rust: beta env: TARGET=x86_64-unknown-linux-musl - os: linux rust: beta env: TARGET=x86_64-unknown-linux-gnu - # Minimum Rust supported channel. + # Minimum Rust supported channel. We enable these to make sure ripgrep + # continues to work on the advertised minimum Rust version. - os: linux - rust: 1.17.0 + rust: 1.28.0 env: TARGET=x86_64-unknown-linux-gnu - os: linux - rust: 1.17.0 + rust: 1.28.0 env: TARGET=x86_64-unknown-linux-musl - -before_install: - - export PATH="$PATH:$HOME/.cargo/bin" - -install: - - bash ci/install.sh - -script: - - bash ci/script.sh - -before_deploy: - - bash ci/before_deploy.sh - + - os: linux + rust: 1.28.0 + env: TARGET=arm-unknown-linux-gnueabihf GCC_VERSION=4.8 + addons: + apt: + packages: + - gcc-4.8-arm-linux-gnueabihf + - binutils-arm-linux-gnueabihf + - libc6-armhf-cross + - libc6-dev-armhf-cross + # For generating man page. + - libxslt1-dev + - asciidoc + - docbook-xsl + - xsltproc + - libxml2-utils +install: ci/install.sh +script: ci/script.sh +before_deploy: ci/before_deploy.sh deploy: provider: releases - api_key: - secure: "IbSnsbGkxSydR/sozOf1/SRvHplzwRUHzcTjM7BKnr7GccL86gRPUrsrvD103KjQUGWIc1TnK1YTq5M0Onswg/ORDjqa1JEJPkPdPnVh9ipbF7M2De/7IlB4X4qXLKoApn8+bx2x/mfYXu4G+G1/2QdbaKK2yfXZKyjz0YFx+6CNrVCT2Nk8q7aHvOOzAL58vsG8iPDpupuhxlMDDn/UhyOWVInmPPQ0iJR1ZUJN8xJwXvKvBbfp3AhaBiAzkhXHNLgBR8QC5noWWMXnuVDMY3k4f3ic0V+p/qGUCN/nhptuceLxKFicMCYObSZeUzE5RAI0/OBW7l3z2iCoc+TbAnn+JrX/ObJCfzgAOXAU3tLaBFMiqQPGFKjKg1ltSYXomOFP/F7zALjpvFp4lYTBajRR+O3dqaxA9UQuRjw27vOeUpMcga4ZzL4VXFHzrxZKBHN//XIGjYAVhJ1NSSeGpeJV5/+jYzzWKfwSagRxQyVCzMooYFFXzn8Yxdm3PJlmp3GaAogNkdB9qKcrEvRINCelalzALPi0hD/HUDi8DD2PNTCLLMo6VSYtvc685Zbe+KgNzDV1YyTrRCUW6JotrS0r2ULLwnsh40hSB//nNv3XmwNmC/CmW5QAnIGj8cBMF4S2t6ohADIndojdAfNiptmaZOIT6owK7bWMgPMyopo=" file_glob: true - file: ${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}.* - # don't delete the artifacts from previous phases + file: deployment/${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}.tar.gz skip_cleanup: true - # deploy when a new tag is pushed on: - # channel to use to produce the release artifacts - # NOTE make sure you only release *once* per target - # TODO you may want to pick a different channel condition: $TRAVIS_RUST_VERSION = nightly + branch: master # i guess we do need this after all? tags: true - + api_key: + secure: "IbSnsbGkxSydR/sozOf1/SRvHplzwRUHzcTjM7BKnr7GccL86gRPUrsrvD103KjQUGWIc1TnK1YTq5M0Onswg/ORDjqa1JEJPkPdPnVh9ipbF7M2De/7IlB4X4qXLKoApn8+bx2x/mfYXu4G+G1/2QdbaKK2yfXZKyjz0YFx+6CNrVCT2Nk8q7aHvOOzAL58vsG8iPDpupuhxlMDDn/UhyOWVInmPPQ0iJR1ZUJN8xJwXvKvBbfp3AhaBiAzkhXHNLgBR8QC5noWWMXnuVDMY3k4f3ic0V+p/qGUCN/nhptuceLxKFicMCYObSZeUzE5RAI0/OBW7l3z2iCoc+TbAnn+JrX/ObJCfzgAOXAU3tLaBFMiqQPGFKjKg1ltSYXomOFP/F7zALjpvFp4lYTBajRR+O3dqaxA9UQuRjw27vOeUpMcga4ZzL4VXFHzrxZKBHN//XIGjYAVhJ1NSSeGpeJV5/+jYzzWKfwSagRxQyVCzMooYFFXzn8Yxdm3PJlmp3GaAogNkdB9qKcrEvRINCelalzALPi0hD/HUDi8DD2PNTCLLMo6VSYtvc685Zbe+KgNzDV1YyTrRCUW6JotrS0r2ULLwnsh40hSB//nNv3XmwNmC/CmW5QAnIGj8cBMF4S2t6ohADIndojdAfNiptmaZOIT6owK7bWMgPMyopo=" branches: only: # Pushes and PR to the master branch - master - # IMPORTANT Ruby regex to match tags. Required, or travis won't trigger deploys when a new tag - # is pushed. This regex matches semantic versions like v1.2.3-rc4+2016.02.22 + # Ruby regex to match tags. Required, or travis won't trigger deploys when + # a new tag is pushed. - /^\d+\.\d+\.\d+.*$/ - notifications: email: on_success: never diff -Nru ripgrep-0.6.0/wincolor/Cargo.toml ripgrep-0.10.0.3/wincolor/Cargo.toml --- ripgrep-0.6.0/wincolor/Cargo.toml 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/wincolor/Cargo.toml 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -[package] -name = "wincolor" -version = "0.1.4" #:version -authors = ["Andrew Gallant "] -description = """ -A simple Windows specific API for controlling text color in a Windows console. -""" -documentation = "https://docs.rs/wincolor" -homepage = "https://github.com/BurntSushi/ripgrep/tree/master/wincolor" -repository = "https://github.com/BurntSushi/ripgrep/tree/master/wincolor" -readme = "README.md" -keywords = ["windows", "win", "color", "ansi", "console"] -license = "Unlicense/MIT" - -[lib] -name = "wincolor" -bench = false - -[dependencies] -kernel32-sys = "0.2.2" -winapi = "0.2.8" diff -Nru ripgrep-0.6.0/wincolor/COPYING ripgrep-0.10.0.3/wincolor/COPYING --- ripgrep-0.6.0/wincolor/COPYING 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/wincolor/COPYING 1970-01-01 00:00:00.000000000 +0000 @@ -1,3 +0,0 @@ -This project is dual-licensed under the Unlicense and MIT licenses. - -You may use this code under the terms of either license. diff -Nru ripgrep-0.6.0/wincolor/LICENSE-MIT ripgrep-0.10.0.3/wincolor/LICENSE-MIT --- ripgrep-0.6.0/wincolor/LICENSE-MIT 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/wincolor/LICENSE-MIT 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2015 Andrew Gallant - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff -Nru ripgrep-0.6.0/wincolor/README.md ripgrep-0.10.0.3/wincolor/README.md --- ripgrep-0.6.0/wincolor/README.md 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/wincolor/README.md 2018-09-10 21:10:55.000000000 +0000 @@ -1,44 +1,2 @@ -wincolor -======== -A simple Windows specific API for controlling text color in a Windows console. -The purpose of this crate is to expose the full inflexibility of the Windows -console without any platform independent abstraction. - -[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) -[![](https://img.shields.io/crates/v/wincolor.svg)](https://crates.io/crates/wincolor) - -Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). - -### Documentation - -[https://docs.rs/wincolor](https://docs.rs/wincolor) - -### Usage - -Add this to your `Cargo.toml`: - -```toml -[dependencies] -wincolor = "0.1" -``` - -and this to your crate root: - -```rust -extern crate wincolor; -``` - -### Example - -This is a simple example that shows how to write text with a foreground color -of cyan and the intense attribute set: - -```rust -use wincolor::{Console, Color, Intense}; - -let mut con = Console::stdout().unwrap(); -con.fg(Intense::Yes, Color::Cyan).unwrap(); -println!("This text will be intense cyan."); -con.reset().unwrap(); -println!("This text will be normal."); -``` +wincolor has moved to the termcolor repository: +https://github.com/BurntSushi/termcolor diff -Nru ripgrep-0.6.0/wincolor/src/lib.rs ripgrep-0.10.0.3/wincolor/src/lib.rs --- ripgrep-0.6.0/wincolor/src/lib.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/wincolor/src/lib.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -/*! -This crate provides a safe and simple Windows specific API to control -text attributes in the Windows console. Text attributes are limited to -foreground/background colors, as well as whether to make colors intense or not. - -Note that on non-Windows platforms, this crate is empty but will compile. - -# Example - -```no_run -use wincolor::{Console, Color, Intense}; - -let mut con = Console::stdout().unwrap(); -con.fg(Intense::Yes, Color::Cyan).unwrap(); -println!("This text will be intense cyan."); -con.reset().unwrap(); -println!("This text will be normal."); -``` -*/ - -#![deny(missing_docs)] - -#[cfg(windows)] -extern crate kernel32; -#[cfg(windows)] -extern crate winapi; - -#[cfg(windows)] -pub use win::*; - -#[cfg(windows)] -mod win; diff -Nru ripgrep-0.6.0/wincolor/src/win.rs ripgrep-0.10.0.3/wincolor/src/win.rs --- ripgrep-0.6.0/wincolor/src/win.rs 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/wincolor/src/win.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,230 +0,0 @@ -use std::io; -use std::mem; - -use kernel32; -use winapi::{DWORD, WORD}; -use winapi::winbase::{STD_ERROR_HANDLE, STD_OUTPUT_HANDLE}; -use winapi::wincon::{ - FOREGROUND_BLUE as FG_BLUE, - FOREGROUND_GREEN as FG_GREEN, - FOREGROUND_RED as FG_RED, - FOREGROUND_INTENSITY as FG_INTENSITY, -}; - -const FG_CYAN: DWORD = FG_BLUE | FG_GREEN; -const FG_MAGENTA: DWORD = FG_BLUE | FG_RED; -const FG_YELLOW: DWORD = FG_GREEN | FG_RED; -const FG_WHITE: DWORD = FG_BLUE | FG_GREEN | FG_RED; - -/// A Windows console. -/// -/// This represents a very limited set of functionality available to a Windows -/// console. In particular, it can only change text attributes such as color -/// and intensity. -/// -/// There is no way to "write" to this console. Simply write to -/// stdout or stderr instead, while interleaving instructions to the console -/// to change text attributes. -/// -/// A common pitfall when using a console is to forget to flush writes to -/// stdout before setting new text attributes. -#[derive(Debug)] -pub struct Console { - handle_id: DWORD, - start_attr: TextAttributes, - cur_attr: TextAttributes, -} - -impl Console { - /// Get a console for a standard I/O stream. - fn create_for_stream(handle_id: DWORD) -> io::Result { - let mut info = unsafe { mem::zeroed() }; - let res = unsafe { - let handle = kernel32::GetStdHandle(handle_id); - kernel32::GetConsoleScreenBufferInfo(handle, &mut info) - }; - if res == 0 { - return Err(io::Error::last_os_error()); - } - let attr = TextAttributes::from_word(info.wAttributes); - Ok(Console { - handle_id: handle_id, - start_attr: attr, - cur_attr: attr, - }) - } - - /// Create a new Console to stdout. - /// - /// If there was a problem creating the console, then an error is returned. - pub fn stdout() -> io::Result { - Self::create_for_stream(STD_OUTPUT_HANDLE) - } - - /// Create a new Console to stderr. - /// - /// If there was a problem creating the console, then an error is returned. - pub fn stderr() -> io::Result { - Self::create_for_stream(STD_ERROR_HANDLE) - } - - /// Applies the current text attributes. - fn set(&mut self) -> io::Result<()> { - let attr = self.cur_attr.to_word(); - let res = unsafe { - let handle = kernel32::GetStdHandle(self.handle_id); - kernel32::SetConsoleTextAttribute(handle, attr) - }; - if res == 0 { - return Err(io::Error::last_os_error()); - } - Ok(()) - } - - /// Apply the given intensity and color attributes to the console - /// foreground. - /// - /// If there was a problem setting attributes on the console, then an error - /// is returned. - pub fn fg(&mut self, intense: Intense, color: Color) -> io::Result<()> { - self.cur_attr.fg_color = color; - self.cur_attr.fg_intense = intense; - self.set() - } - - /// Apply the given intensity and color attributes to the console - /// background. - /// - /// If there was a problem setting attributes on the console, then an error - /// is returned. - pub fn bg(&mut self, intense: Intense, color: Color) -> io::Result<()> { - self.cur_attr.bg_color = color; - self.cur_attr.bg_intense = intense; - self.set() - } - - /// Reset the console text attributes to their original settings. - /// - /// The original settings correspond to the text attributes on the console - /// when this `Console` value was created. - /// - /// If there was a problem setting attributes on the console, then an error - /// is returned. - pub fn reset(&mut self) -> io::Result<()> { - self.cur_attr = self.start_attr; - self.set() - } -} - -/// A representation of text attributes for the Windows console. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -struct TextAttributes { - fg_color: Color, - fg_intense: Intense, - bg_color: Color, - bg_intense: Intense, -} - -impl TextAttributes { - fn to_word(&self) -> WORD { - let mut w = 0; - w |= self.fg_color.to_fg(); - w |= self.fg_intense.to_fg(); - w |= self.bg_color.to_bg(); - w |= self.bg_intense.to_bg(); - w as WORD - } - - fn from_word(word: WORD) -> TextAttributes { - let attr = word as DWORD; - TextAttributes { - fg_color: Color::from_fg(attr), - fg_intense: Intense::from_fg(attr), - bg_color: Color::from_bg(attr), - bg_intense: Intense::from_bg(attr), - } - } -} - -/// Whether to use intense colors or not. -#[allow(missing_docs)] -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum Intense { - Yes, - No, -} - -impl Intense { - fn to_bg(&self) -> DWORD { - self.to_fg() << 4 - } - - fn from_bg(word: DWORD) -> Intense { - Intense::from_fg(word >> 4) - } - - fn to_fg(&self) -> DWORD { - match *self { - Intense::No => 0, - Intense::Yes => FG_INTENSITY, - } - } - - fn from_fg(word: DWORD) -> Intense { - if word & FG_INTENSITY > 0 { - Intense::Yes - } else { - Intense::No - } - } -} - -/// The set of available colors for use with a Windows console. -#[allow(missing_docs)] -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum Color { - Black, - Blue, - Green, - Red, - Cyan, - Magenta, - Yellow, - White, -} - -impl Color { - fn to_bg(&self) -> DWORD { - self.to_fg() << 4 - } - - fn from_bg(word: DWORD) -> Color { - Color::from_fg(word >> 4) - } - - fn to_fg(&self) -> DWORD { - match *self { - Color::Black => 0, - Color::Blue => FG_BLUE, - Color::Green => FG_GREEN, - Color::Red => FG_RED, - Color::Cyan => FG_CYAN, - Color::Magenta => FG_MAGENTA, - Color::Yellow => FG_YELLOW, - Color::White => FG_WHITE, - } - } - - fn from_fg(word: DWORD) -> Color { - match word & 0b111 { - FG_BLUE => Color::Blue, - FG_GREEN => Color::Green, - FG_RED => Color::Red, - FG_CYAN => Color::Cyan, - FG_MAGENTA => Color::Magenta, - FG_YELLOW => Color::Yellow, - FG_WHITE => Color::White, - _ => Color::Black, - } - } -} diff -Nru ripgrep-0.6.0/wincolor/UNLICENSE ripgrep-0.10.0.3/wincolor/UNLICENSE --- ripgrep-0.6.0/wincolor/UNLICENSE 2017-08-24 00:05:27.000000000 +0000 +++ ripgrep-0.10.0.3/wincolor/UNLICENSE 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to