From 660d2417ad86860830cbfb2b64d3a8830a7ebceb Mon Sep 17 00:00:00 2001 From: Greg Hurrell Date: Mon, 16 Oct 2023 00:02:36 +0200 Subject: [PATCH] feat: implement max_files option in a number of places MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike in the Ruby version, the Lua version allows you to set a different limit for each scanner, becauase different scanners have different performance characteristics. commandt.setup({ scanners = { file = { max_files = 100000, -- A "big" limit. }, find = { max_files = 10, -- A small limit. }, git = { max_files = 0, -- Same as no limit. }, rg = { -- No setting, no limit. }, } A subsequent commit will explain the details of how to set this up, but in brief, setting the limit to "0" (or omitting the limit) means no limit. If you have circular symlinks in a filesystem, then the limit stops Command-T for scanning forever (particularly in the case of the `:CommandT` — "find" — scanner). If you have a custom command-based scanner, then you can supply a "max_files" limit in your return value; eg. a limit of 10,000 like so: finders = { ack = { command = function(directory) local command = 'ack -f --print0' if directory ~= '' and directory ~= '.' and directory ~= './' then directory = vim.fn.shellescape(directory) command = command .. ' -- ' .. directory end local drop = 0 local max_files = 10000 return command, drop, max_files end, }, }, Disclaimer: this is only exceedingly lightly tested, so I'm going to park it on the "next" branch for a bit before merging to "main". Note: formatting changes due to running "bin/format" included. --- lua/wincent/commandt/init.lua | 48 ++++++++-- lua/wincent/commandt/lib/find.c | 15 ++-- lua/wincent/commandt/lib/find.h | 4 +- lua/wincent/commandt/lib/scanner.c | 88 ++++++++++++++----- lua/wincent/commandt/lib/scanner.h | 2 +- lua/wincent/commandt/lib/watchman.c | 8 +- .../commandt/private/finders/command.lua | 5 +- lua/wincent/commandt/private/finders/file.lua | 3 +- lua/wincent/commandt/private/lib.lua | 12 +-- .../commandt/private/scanners/command.lua | 4 +- .../commandt/private/scanners/file.lua | 6 +- ruby/command-t/ext/command-t/ext.h | 18 ++-- ruby/command-t/ext/command-t/matcher.c | 2 +- ruby/command-t/ext/command-t/watchman.c | 8 +- 14 files changed, 155 insertions(+), 68 deletions(-) diff --git a/lua/wincent/commandt/init.lua b/lua/wincent/commandt/init.lua index 20f8e15a..f6a5f0db 100644 --- a/lua/wincent/commandt/init.lua +++ b/lua/wincent/commandt/init.lua @@ -108,9 +108,24 @@ local options_spec = { scanners = { kind = 'table', keys = { + find = { + kind = 'table', + keys = { + max_files = { kind = 'number' }, + }, + optional = true, + }, + file = { + kind = 'table', + keys = { + max_files = { kind = 'number' }, + }, + optional = true, + }, git = { kind = 'table', keys = { + max_files = { kind = 'number' }, submodules = { kind = 'boolean', optional = true, @@ -129,6 +144,13 @@ local options_spec = { end, optional = true, }, + rg = { + kind = 'table', + keys = { + max_files = { kind = 'number' }, + }, + optional = true, + }, }, }, selection_highlight = { kind = 'string' }, @@ -173,7 +195,7 @@ local default_options = { end, }, find = { - command = function(directory) + command = function(directory, options) if vim.startswith(directory, './') then directory = directory:sub(3, -1) end @@ -191,9 +213,10 @@ local default_options = { -- I may end up needing to do some fancy, separate micromanagement of -- prefixes and let the matcher operate on paths without prefixes. end - -- TODO: support max depth, dot directory filter etc + -- TODO: support dot directory filter etc local command = 'find -L ' .. directory .. ' -type f -print0 2> /dev/null' - return command, drop + local max_files = options.scanners.find.max_files or 0 + return command, drop, max_files end, fallback = true, }, @@ -212,7 +235,9 @@ local default_options = { command = command .. ' -- ' .. directory end command = command .. ' 2> /dev/null' - return command + local drop = 0 + local max_files = options.scanners.git.max_files or 0 + return command, drop, max_files end, fallback = true, }, @@ -289,7 +314,7 @@ local default_options = { end, }, rg = { - command = function(directory) + command = function(directory, options) if vim.startswith(directory, './') then directory = directory:sub(3, -1) end @@ -305,7 +330,8 @@ local default_options = { command = command .. ' ' .. directory end command = command .. ' 2> /dev/null' - return command, drop + local max_files = options.scanners.rg.max_files or 0 + return command, drop, max_files end, fallback = true, }, @@ -368,10 +394,20 @@ local default_options = { position = 'center', -- 'bottom', 'center', 'top'. open = open, scanners = { + file = { + max_files = 0, + }, + find = { + max_files = 0, + }, git = { + max_files = 0, submodules = true, untracked = false, }, + rg = { + max_files = 0, + }, }, selection_highlight = 'PMenuSel', smart_case = nil, -- If nil, will infer from Neovim's `'smartcase'`. diff --git a/lua/wincent/commandt/lib/find.c b/lua/wincent/commandt/lib/find.c index adb078a2..c8745515 100644 --- a/lua/wincent/commandt/lib/find.c +++ b/lua/wincent/commandt/lib/find.c @@ -3,7 +3,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ -// TODO: implement max_depth, max_files +// TODO: implement max_depth // TODO: implement scan_dot_directories #include "find.h" @@ -25,11 +25,10 @@ static long MAX_FILES = MAX_FILES_CONF; static size_t buffer_size = MMAP_SLAB_SIZE_CONF; static const char *current_directory = "."; -find_result_t *commandt_find(const char *directory) { +find_result_t *commandt_find(const char *directory, unsigned max_files) { find_result_t *result = xcalloc(1, sizeof(find_result_t)); - // TODO: once i am passing in max_files, don't bother asking for MAX_FILES - result->files_size = sizeof(str_t) * MAX_FILES; + result->files_size = sizeof(str_t) * (max_files ? max_files + 1 : MAX_FILES); result->files = xmap(result->files_size); result->buffer_size = buffer_size; @@ -67,6 +66,10 @@ find_result_t *commandt_find(const char *directory) { memcpy(buffer, node->fts_path + drop, path_len); str_init(str, buffer, path_len - 1); // Don't count NUL byte. buffer += path_len; + + if (max_files && result->count >= max_files) { + break; + } } } if (errno != 0) { @@ -92,8 +95,8 @@ void commandt_find_result_free(find_result_t *result) { free(result); } -scanner_t *commandt_file_scanner(const char *directory) { - find_result_t *result = commandt_find(directory); +scanner_t *commandt_file_scanner(const char *directory, unsigned max_files) { + find_result_t *result = commandt_find(directory, max_files); // BUG: if there is an error here, we effectively swallow it... if (result->error) { DEBUG_LOG("%s\n", result->error); diff --git a/lua/wincent/commandt/lib/find.h b/lua/wincent/commandt/lib/find.h index 0455c71b..0a80570d 100644 --- a/lua/wincent/commandt/lib/find.h +++ b/lua/wincent/commandt/lib/find.h @@ -42,7 +42,7 @@ typedef struct { size_t buffer_size; } find_result_t; -find_result_t *commandt_find(const char *directory); +find_result_t *commandt_find(const char *directory, unsigned max_files); void commandt_find_result_free(find_result_t *result); /** @@ -51,6 +51,6 @@ void commandt_find_result_free(find_result_t *result); * new scanner takes ownership of the resources, which means you should call * `scanner_free()` on it. */ -scanner_t *commandt_file_scanner(const char *directory); +scanner_t *commandt_file_scanner(const char *directory, unsigned max_files); #endif diff --git a/lua/wincent/commandt/lib/scanner.c b/lua/wincent/commandt/lib/scanner.c index 306f12ea..31e98e2f 100644 --- a/lua/wincent/commandt/lib/scanner.c +++ b/lua/wincent/commandt/lib/scanner.c @@ -6,19 +6,21 @@ #include "scanner.h" #include /* for assert() */ +#include /* for errno */ +#include /* for SIGKILL, kill() */ #include /* for NULL */ #include /* for fileno(), fprintf(), pclose(), popen(), stderr */ #include /* for free() */ #include /* for memchr(), strlen() */ -#include /* read() */ +#include /* close(), fork(), pipe(), read(), tcgetpgrp() */ #include "debug.h" +#include "str.h" #include "xmalloc.h" #include "xmap.h" /* for xmap(), xmunmap() */ // TODO: make this capable of producing asynchronously? -// TODO make this configurable static long MAX_FILES = MAX_FILES_CONF; static size_t buffer_size = MMAP_SLAB_SIZE_CONF; @@ -37,7 +39,7 @@ scanner_t *scanner_new_copy(const char **candidates, unsigned count) { return scanner; } -scanner_t *scanner_new_command(const char *command, unsigned drop) { +scanner_t *scanner_new_command(const char *command, unsigned drop, unsigned max_files) { scanner_t *scanner = xcalloc(1, sizeof(scanner_t)); scanner->candidates_size = sizeof(str_t) * MAX_FILES; DEBUG_LOG( @@ -50,19 +52,52 @@ scanner_t *scanner_new_command(const char *command, unsigned drop) { ); scanner->buffer = xmap(scanner->buffer_size); - FILE *file = popen(command, "r"); - if (!file) { - // Rather than crashing the process, act like we got an empty result. + // Index 0 = read end of pipe; index 1 = write end of pipe. + int stdout_pipe[2]; + + if (pipe(stdout_pipe) != 0) { + DEBUG_LOG("scanner_new_command(): failed pipe()\n"); + goto out; + } + + pid_t child_pid = fork(); + if (child_pid == -1) { + DEBUG_LOG("scanner_new_command(): failed fork()\n"); goto out; + } else if (child_pid == 0) { + // In child. + DEBUG_LOG("scanner_new_command(): forked child\n"); + close(stdout_pipe[0]); // TODO check error (!= 0) etc + dup2(stdout_pipe[1], 1); + close(stdout_pipe[1]); + + // Fork a shell to mimic behavior of `popen()`. + execl("/bin/sh", "sh", "-c", command, NULL); + perror("execl"); + _exit(1); } - int fd = fileno(file); + // In parent. + DEBUG_LOG( + "scanner_new_command(): parent forked child with PID %d\n", child_pid + ); + int status = close(stdout_pipe[1]); + if (status != 0) { + // Degrade gracefully; either: + // - status -1: probably a `wait4()` call failed; or: + // - otherwise: command exited with this status. + DEBUG_LOG( + "commandt_scanner_new_command(): close() exited with %d status\n", status + ); + } char *start = scanner->buffer; char *end = scanner->buffer; ssize_t read_count; - while ((read_count = read(fd, end, 4096)) != 0) { + while ((read_count = read(stdout_pipe[0], end, 4096)) != 0) { + DEBUG_LOG("scanner_new_command(): read %d bytes\n", read_count); if (read_count < 0) { // A read error, but we may as well try and proceed gracefully. + DEBUG_LOG("scanner_new_command(): read() - %s\n", strerror(errno)); break; } end += read_count; @@ -86,27 +121,38 @@ scanner_t *scanner_new_command(const char *command, unsigned drop) { } start = next_end + 1; str_init(&scanner->candidates[scanner->count++], path, length); + DEBUG_LOG( + "commandt_scanner_new_command(): scanned %s\n", + str_c_string(&scanner->candidates[scanner->count - 1]) + ); - if (scanner->count >= MAX_FILES) { - // TODO: make this real + if (max_files && scanner->count >= max_files) { + DEBUG_LOG( + "commandt_scanner_new_command(): killing process %d because count %d\n", + child_pid, + scanner->count + ); + if (kill(child_pid, SIGKILL)) { + DEBUG_LOG( + "commandt_scanner_new_command(): kill() errno %d error %s\n", + errno, + strerror(errno) + ); + } + goto bail; } } } bail: - (void)0; - int status = pclose(file); - if (status != 0) { - // Degrade gracefully; either: - // - status -1: probably a `wait4()` call failed; or: - // - otherwise: command exited with this status. - DEBUG_LOG( - "commandt_scanner_new_command(): pclose() exited with %d status\n", - status - ); - } + DEBUG_LOG("commandt_scanner_new_command(): waiting %d\n", child_pid); + wait(&child_pid); out: + DEBUG_LOG( + "commandt_scanner_new_command(): returning scanner with count %d\n", + scanner->count + ); return scanner; } diff --git a/lua/wincent/commandt/lib/scanner.h b/lua/wincent/commandt/lib/scanner.h index 42d608df..11cf009c 100644 --- a/lua/wincent/commandt/lib/scanner.h +++ b/lua/wincent/commandt/lib/scanner.h @@ -34,7 +34,7 @@ scanner_t *scanner_new_copy(const char **candidates, unsigned count); * 0, but for commands such as `find .` which prefix all paths with "./", `drop` * would be 2. */ -scanner_t *scanner_new_command(const char *command, unsigned drop); +scanner_t *scanner_new_command(const char *command, unsigned drop, unsigned max_files); /** * Create a new `scanner_t` struct initialized with `candidates`. diff --git a/lua/wincent/commandt/lib/watchman.c b/lua/wincent/commandt/lib/watchman.c index 258df128..3ef5df13 100644 --- a/lua/wincent/commandt/lib/watchman.c +++ b/lua/wincent/commandt/lib/watchman.c @@ -70,16 +70,16 @@ static void watchman_write_string( #define WATCHMAN_SKIP_MARKER 0x0c #define WATCHMAN_HEADER \ -WATCHMAN_BINARY_MARKER "\x06\x00\x00\x00\x00\x00\x00\x00\x00" + WATCHMAN_BINARY_MARKER "\x06\x00\x00\x00\x00\x00\x00\x00\x00" // How far we have to look to figure out the size of the PDU header. #define WATCHMAN_SNIFF_BUFFER_SIZE \ -(sizeof(WATCHMAN_BINARY_MARKER) - 1 + sizeof(int8_t)) + (sizeof(WATCHMAN_BINARY_MARKER) - 1 + sizeof(int8_t)) // How far we have to peek, at most, to figure out the size of the PDU itself. #define WATCHMAN_PEEK_BUFFER_SIZE \ -(sizeof(WATCHMAN_BINARY_MARKER) - 1 + sizeof(typeof(WATCHMAN_INT64_MARKER)) + \ - sizeof(int64_t)) + (sizeof(WATCHMAN_BINARY_MARKER) - 1 + \ + sizeof(typeof(WATCHMAN_INT64_MARKER)) + sizeof(int64_t)) int commandt_watchman_connect(const char *socket_path) { int fd = socket(PF_LOCAL, SOCK_STREAM, 0); diff --git a/lua/wincent/commandt/private/finders/command.lua b/lua/wincent/commandt/private/finders/command.lua index 8604f303..23c28ef2 100644 --- a/lua/wincent/commandt/private/finders/command.lua +++ b/lua/wincent/commandt/private/finders/command.lua @@ -6,11 +6,12 @@ local ffi = require('ffi') return function(directory, command, options) local lib = require('wincent.commandt.private.lib') local drop = 0 + local max_files = 0 if type(command) == 'function' then - command, drop = command(directory, options) + command, drop, max_files = command(directory, options) end local finder = {} - finder.scanner = require('wincent.commandt.private.scanners.command').scanner(command, drop) + finder.scanner = require('wincent.commandt.private.scanners.command').scanner(command, drop, max_files) finder.matcher = lib.matcher_new(finder.scanner, options) finder.run = function(query) local results = lib.matcher_run(finder.matcher, query) diff --git a/lua/wincent/commandt/private/finders/file.lua b/lua/wincent/commandt/private/finders/file.lua index 8b444d4c..e43faf23 100644 --- a/lua/wincent/commandt/private/finders/file.lua +++ b/lua/wincent/commandt/private/finders/file.lua @@ -8,7 +8,8 @@ return function(directory, options) directory = directory or os.getenv('PWD') local lib = require('wincent.commandt.private.lib') local finder = {} - finder.scanner = require('wincent.commandt.private.scanners.file').scanner(directory) + local max_files = options.scanners.file.max_files or 0 + finder.scanner = require('wincent.commandt.private.scanners.file').scanner(directory, max_files) finder.matcher = lib.matcher_new(finder.scanner, options) finder.run = function(query) local results = lib.matcher_run(finder.matcher, query) diff --git a/lua/wincent/commandt/private/lib.lua b/lua/wincent/commandt/private/lib.lua index d45f21bd..e3fb4b0b 100644 --- a/lua/wincent/commandt/private/lib.lua +++ b/lua/wincent/commandt/private/lib.lua @@ -105,8 +105,8 @@ setmetatable(c, { // Scanner functions. - scanner_t *commandt_file_scanner(const char *directory); - scanner_t *commandt_scanner_new_command(const char *command, unsigned drop); + scanner_t *commandt_file_scanner(const char *directory, unsigned max_files); + scanner_t *commandt_scanner_new_command(const char *command, unsigned drop, unsigned max_files); scanner_t *commandt_scanner_new_copy(const char **candidates, unsigned count); scanner_t *commandt_scanner_new_str(str_t *candidates, unsigned count); void commandt_scanner_free(scanner_t *scanner); @@ -161,8 +161,8 @@ lib.epoch = function() return result['seconds'], result['microseconds'] end -lib.file_scanner = function(directory) - local scanner = c.commandt_file_scanner(directory) +lib.file_scanner = function(directory, max_files) + local scanner = c.commandt_file_scanner(directory, max_files or 0) ffi.gc(scanner, c.commandt_scanner_free) return scanner end @@ -221,8 +221,8 @@ lib.print_scanner = function(scanner) c.commandt_print_scanner(scanner) end -lib.scanner_new_command = function(command, drop) - local scanner = c.commandt_scanner_new_command(command, drop or 0) +lib.scanner_new_command = function(command, drop, max_files) + local scanner = c.commandt_scanner_new_command(command, drop or 0, max_files or 0) ffi.gc(scanner, c.commandt_scanner_free) return scanner end diff --git a/lua/wincent/commandt/private/scanners/command.lua b/lua/wincent/commandt/private/scanners/command.lua index eaad098b..45198ddd 100644 --- a/lua/wincent/commandt/private/scanners/command.lua +++ b/lua/wincent/commandt/private/scanners/command.lua @@ -3,9 +3,9 @@ local command = {} -command.scanner = function(user_command, drop) +command.scanner = function(user_command, drop, max_files) local lib = require('wincent.commandt.private.lib') - local scanner = lib.scanner_new_command(user_command, drop) + local scanner = lib.scanner_new_command(user_command, drop, max_files) return scanner end diff --git a/lua/wincent/commandt/private/scanners/file.lua b/lua/wincent/commandt/private/scanners/file.lua index e0dc6c6c..8fbc0ccc 100644 --- a/lua/wincent/commandt/private/scanners/file.lua +++ b/lua/wincent/commandt/private/scanners/file.lua @@ -3,10 +3,10 @@ local file = {} -file.scanner = function(directory) +file.scanner = function(directory, max_files) local lib = require('wincent.commandt.private.lib') - -- TODO: support max depth, dot directory filter etc - local scanner = lib.file_scanner(directory) + -- TODO: support dot directory filter etc + local scanner = lib.file_scanner(directory, max_files) return scanner end diff --git a/ruby/command-t/ext/command-t/ext.h b/ruby/command-t/ext/command-t/ext.h index 7fee70df..8fde933f 100644 --- a/ruby/command-t/ext/command-t/ext.h +++ b/ruby/command-t/ext/command-t/ext.h @@ -17,13 +17,13 @@ VALUE CommandT_option_from_hash(const char *option, VALUE hash); // Debugging macros. #define L(...) \ -{ \ -fprintf(stdout, __VA_ARGS__); \ -fflush(stdout); \ -} \ -while (0) + { \ + fprintf(stdout, __VA_ARGS__); \ + fflush(stdout); \ + } \ + while (0) #define RUBY_INSPECT(obj) \ -do { \ -rb_funcall(rb_mKernel, rb_intern("p"), 1, obj); \ -fflush(stdout); \ -} while (0) + do { \ + rb_funcall(rb_mKernel, rb_intern("p"), 1, obj); \ + fflush(stdout); \ + } while (0) diff --git a/ruby/command-t/ext/command-t/matcher.c b/ruby/command-t/ext/command-t/matcher.c index 719cb402..6c5792f3 100644 --- a/ruby/command-t/ext/command-t/matcher.c +++ b/ruby/command-t/ext/command-t/matcher.c @@ -280,7 +280,7 @@ VALUE CommandTMatcher_sorted_matches_for(int argc, VALUE *argv, VALUE self) { #ifdef HAVE_PTHREAD_H #define THREAD_THRESHOLD \ -1000 /* avoid the overhead of threading when search space is small */ + 1000 /* avoid the overhead of threading when search space is small */ if (path_count < THREAD_THRESHOLD) { thread_count = 1; } diff --git a/ruby/command-t/ext/command-t/watchman.c b/ruby/command-t/ext/command-t/watchman.c index bd57f83a..c3967c25 100644 --- a/ruby/command-t/ext/command-t/watchman.c +++ b/ruby/command-t/ext/command-t/watchman.c @@ -48,7 +48,7 @@ void watchman_dump(watchman_t *w, VALUE serializable); #define WATCHMAN_SKIP_MARKER 0x0c #define WATCHMAN_HEADER \ -WATCHMAN_BINARY_MARKER "\x06\x00\x00\x00\x00\x00\x00\x00\x00" + WATCHMAN_BINARY_MARKER "\x06\x00\x00\x00\x00\x00\x00\x00\x00" static const char watchman_array_marker = WATCHMAN_ARRAY_MARKER; static const char watchman_hash_marker = WATCHMAN_HASH_MARKER; @@ -552,12 +552,12 @@ void watchman_raise_system_call_error(int number) { // How far we have to look to figure out the size of the PDU header #define WATCHMAN_SNIFF_BUFFER_SIZE \ -sizeof(WATCHMAN_BINARY_MARKER) - 1 + sizeof(int8_t) + sizeof(WATCHMAN_BINARY_MARKER) - 1 + sizeof(int8_t) // How far we have to peek, at most, to figure out the size of the PDU itself #define WATCHMAN_PEEK_BUFFER_SIZE \ -sizeof(WATCHMAN_BINARY_MARKER) - 1 + sizeof(WATCHMAN_INT64_MARKER) + \ - sizeof(int64_t) + sizeof(WATCHMAN_BINARY_MARKER) - 1 + sizeof(WATCHMAN_INT64_MARKER) + \ + sizeof(int64_t) /** * CommandT::Watchman::Utils.query(query, socket)