diff --git a/README.md b/README.md index a03abc79..8a1b5f8f 100644 --- a/README.md +++ b/README.md @@ -5458,7 +5458,7 @@ in markdown: - ugrep 7.0.1 October 26, 2024 UGREP(1) + ugrep 7.0.2 October 27, 2024 UGREP(1) 🔝 [Back to table of contents](#toc) diff --git a/bin/win32/ug.exe b/bin/win32/ug.exe index 5e997b54..f243e83c 100755 Binary files a/bin/win32/ug.exe and b/bin/win32/ug.exe differ diff --git a/bin/win32/ugrep-indexer.exe b/bin/win32/ugrep-indexer.exe index 9cf125e5..1a24117c 100755 Binary files a/bin/win32/ugrep-indexer.exe and b/bin/win32/ugrep-indexer.exe differ diff --git a/bin/win32/ugrep.exe b/bin/win32/ugrep.exe index 5e997b54..f243e83c 100755 Binary files a/bin/win32/ugrep.exe and b/bin/win32/ugrep.exe differ diff --git a/bin/win64/ug.exe b/bin/win64/ug.exe index ef006342..c05a0fdb 100755 Binary files a/bin/win64/ug.exe and b/bin/win64/ug.exe differ diff --git a/bin/win64/ugrep-indexer.exe b/bin/win64/ugrep-indexer.exe index 55116dce..aa0b711e 100755 Binary files a/bin/win64/ugrep-indexer.exe and b/bin/win64/ugrep-indexer.exe differ diff --git a/bin/win64/ugrep.exe b/bin/win64/ugrep.exe index ef006342..c05a0fdb 100755 Binary files a/bin/win64/ugrep.exe and b/bin/win64/ugrep.exe differ diff --git a/configure b/configure index 2f55fe72..b0be4514 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.72 for ugrep 7.0.1. +# Generated by GNU Autoconf 2.72 for ugrep 7.0.2. # # Report bugs to . # @@ -606,8 +606,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='ugrep' PACKAGE_TARNAME='ugrep' -PACKAGE_VERSION='7.0.1' -PACKAGE_STRING='ugrep 7.0.1' +PACKAGE_VERSION='7.0.2' +PACKAGE_STRING='ugrep 7.0.2' PACKAGE_BUGREPORT='https://github.com/Genivia/ugrep/issues' PACKAGE_URL='https://ugrep.com' @@ -1382,7 +1382,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -'configure' configures ugrep 7.0.1 to adapt to many kinds of systems. +'configure' configures ugrep 7.0.2 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1453,7 +1453,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of ugrep 7.0.1:";; + short | recursive ) echo "Configuration of ugrep 7.0.2:";; esac cat <<\_ACEOF @@ -1630,7 +1630,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -ugrep configure 7.0.1 +ugrep configure 7.0.2 generated by GNU Autoconf 2.72 Copyright (C) 2023 Free Software Foundation, Inc. @@ -2184,7 +2184,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by ugrep $as_me 7.0.1, which was +It was created by ugrep $as_me 7.0.2, which was generated by GNU Autoconf 2.72. Invocation command line was $ $0$ac_configure_args_raw @@ -3694,7 +3694,7 @@ fi # Define the identity of the package. PACKAGE='ugrep' - VERSION='7.0.1' + VERSION='7.0.2' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -11790,7 +11790,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by ugrep $as_me 7.0.1, which was +This file was extended by ugrep $as_me 7.0.2, which was generated by GNU Autoconf 2.72. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -11863,7 +11863,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -ugrep config.status 7.0.1 +ugrep config.status 7.0.2 configured by $0, generated by GNU Autoconf 2.72, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 91eeed22..31f190c9 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([ugrep],[7.0.1],[https://github.com/Genivia/ugrep/issues],[ugrep],[https://ugrep.com]) +AC_INIT([ugrep],[7.0.2],[https://github.com/Genivia/ugrep/issues],[ugrep],[https://ugrep.com]) AM_INIT_AUTOMAKE([foreign subdir-objects dist-xz no-dist-gzip]) AC_CONFIG_HEADERS([config.h]) AC_COPYRIGHT([Copyright (C) 2019-2024 Robert van Engelen, Genivia Inc.]) diff --git a/include/reflex/absmatcher.h b/include/reflex/absmatcher.h index 6dfb474d..33253375 100644 --- a/include/reflex/absmatcher.h +++ b/include/reflex/absmatcher.h @@ -426,6 +426,7 @@ class AbstractMatcher { #endif cno_ = 0; num_ = 0; + res_ = 0; own_ = true; eof_ = false; mat_ = false; @@ -466,10 +467,15 @@ class AbstractMatcher { } #if WITH_SPAN /// Set event handler functor to invoke when the buffer contents are shifted out, e.g. for logging the data searched. - void set_handler(Handler *handler) + inline void set_handler(Handler *handler) { evh_ = handler; } + /// Set reserved bytes for buffer shifting + inline void set_reserve(size_t n) + { + res_ = n; + } /// Get the buffered context before the matching line. inline Context before() { @@ -569,6 +575,7 @@ class AbstractMatcher { #endif cno_ = 0; num_ = 0; + res_ = 0; own_ = false; eof_ = true; mat_ = false; @@ -1422,10 +1429,10 @@ class AbstractMatcher { bol_ = txt_; } size_t gap = bol_ - buf_; - if (gap > 4096) + if (gap > res_) { - // make the new end_ address page-aligned to read input, retain some data before bol - gap -= 4096 - ((reinterpret_cast(buf_ + end_) - gap) & 4095); + // keep reserved bytes before the current line in the buffer, when nonzero (default is zero) + gap -= res_; // invoke user-defined handler when defined if (evh_ != NULL) (*evh_)(*this, buf_, gap, num_); @@ -1447,7 +1454,7 @@ class AbstractMatcher { else { size_t newmax = end_ + need; - // adjust max to page-sized + // adjust max to ignore last byte --max_; while (max_ < newmax) max_ *= 2; @@ -1655,6 +1662,7 @@ class AbstractMatcher { #endif size_t cno_; ///< column number count (cached) size_t num_; ///< character count of the input till bol_ + size_t res_; ///< reserve bytes to keep in the buffer before bol_ when shifting, use only w/o evh_() set bool own_; ///< true if AbstractMatcher::buf_ was allocated and should be deleted bool eof_; ///< input has reached EOF bool mat_; ///< true if AbstractMatcher::matches() was successful diff --git a/lib/pattern.cpp b/lib/pattern.cpp index 43f879bf..4f5ae3d8 100644 --- a/lib/pattern.cpp +++ b/lib/pattern.cpp @@ -3876,7 +3876,7 @@ void Pattern::analyze_dfa(DFA::State *start) fin_count += hi - lo + 1; continue; } - if (next_state->first == 0 || next_state->first > cut_depth + 1) + if (next_state->first == 0 || next_state->first > cut_depth + 1U) next_chars.insert(lo, hi); if (next_state->first == 0) { @@ -3886,7 +3886,7 @@ void Pattern::analyze_dfa(DFA::State *start) { chars.insert(lo, hi); // has a backedge to a state after the new cut? - if (cut_depth == 0 || next_state->first > cut_depth + 1) + if (cut_depth == 0 || next_state->first > cut_depth + 1U) has_backedge = true; backedge = true; // has a backedge to a previous state continue; @@ -3957,7 +3957,7 @@ void Pattern::analyze_dfa(DFA::State *start) if ((lo > '\n' || hi < '\n') && !edge.next_accepting()) { DFA::State *next_state = edge.state(); - if (next_state->first == 0 || next_state->first > depth + 1) + if (next_state->first == 0 || next_state->first > depth + 1U) next_chars.insert(lo, hi); } } diff --git a/man/ug.1 b/man/ug.1 index f24a592d..b14b9f5a 100644 --- a/man/ug.1 +++ b/man/ug.1 @@ -1,4 +1,4 @@ -.TH UGREP "1" "October 26, 2024" "ugrep 7.0.1" "User Commands" +.TH UGREP "1" "October 27, 2024" "ugrep 7.0.2" "User Commands" .SH NAME \fBugrep\fR, \fBug\fR -- file pattern searcher .SH SYNOPSIS diff --git a/man/ugrep-indexer.1 b/man/ugrep-indexer.1 index 66c9b5e3..339bddad 100644 --- a/man/ugrep-indexer.1 +++ b/man/ugrep-indexer.1 @@ -1,4 +1,4 @@ -.TH UGREP-INDEXER "1" "October 26, 2024" "ugrep-indexer 7.0.1" "User Commands" +.TH UGREP-INDEXER "1" "October 27, 2024" "ugrep-indexer 7.0.2" "User Commands" .SH NAME \fBugrep-indexer\fR -- file indexer to accelerate recursive searching .SH SYNOPSIS diff --git a/man/ugrep.1 b/man/ugrep.1 index f24a592d..b14b9f5a 100644 --- a/man/ugrep.1 +++ b/man/ugrep.1 @@ -1,4 +1,4 @@ -.TH UGREP "1" "October 26, 2024" "ugrep 7.0.1" "User Commands" +.TH UGREP "1" "October 27, 2024" "ugrep 7.0.2" "User Commands" .SH NAME \fBugrep\fR, \fBug\fR -- file pattern searcher .SH SYNOPSIS diff --git a/src/ugrep-indexer.cpp b/src/ugrep-indexer.cpp index 50f08c4c..8dd7caf0 100644 --- a/src/ugrep-indexer.cpp +++ b/src/ugrep-indexer.cpp @@ -35,7 +35,7 @@ */ // DO NOT ALTER THIS LINE: updated by makemake.sh and we need it physically here for MSVC++ build from source -#define UGREP_VERSION "7.0.1" +#define UGREP_VERSION "7.0.2" // use a task-parallel thread to decompress the stream into a pipe to search, also handles nested archives #define WITH_DECOMPRESSION_THREAD diff --git a/src/ugrep.cpp b/src/ugrep.cpp index 6b717d91..5578dd7e 100644 --- a/src/ugrep.cpp +++ b/src/ugrep.cpp @@ -11149,7 +11149,7 @@ void Grep::search(const char *pathname, uint16_t cost) } else if (flag_before_context == 0 && flag_after_context == 0 && !flag_any_line && !flag_invert_match) { - // options -ABC, -y, -v are not specified + // options -ABC, -y, -v are not specified, --hexdump context is supported (including with options -ABC) size_t lineno = 0; size_t matching = 0; @@ -11170,6 +11170,10 @@ void Grep::search(const char *pathname, uint16_t cost) // register the event handler to update restline on buffer shift matcher->set_handler(&handler); + // --hexdump: keep -B NUM+1 before context times hex column bytes in the buffer when shifting + if (flag_hex_before > 0) + matcher->set_reserve(flag_hex_before * flag_hex_columns); + // the rest of the matching line restline_data = NULL; restline_size = 0; @@ -11188,6 +11192,8 @@ void Grep::search(const char *pathname, uint16_t cost) if (flag_hex_after > 0) { size_t right = flag_hex_after * flag_hex_columns - ((restline_last - 1) % flag_hex_columns) - 1; + if (restline_last + right > matcher->first()) + right = matcher->first() - restline_last; if (right < restline_size) restline_size = right; } @@ -11309,7 +11315,14 @@ void Grep::search(const char *pathname, uint16_t cost) border = begin - bol; } - size_t left = flag_hex_before * flag_hex_columns + (first % flag_hex_columns) - flag_hex_columns; + size_t left = 0; + if (restline_last + restline_size < first) + { + left = flag_hex_before * flag_hex_columns + (first % flag_hex_columns) - flag_hex_columns; + if (restline_last + restline_size + left > first) + left = first - (restline_last + restline_size); + } + if (begin > bol + left) { bol = begin - left; diff --git a/src/ugrep.hpp b/src/ugrep.hpp index 60da17df..9a9ae97e 100644 --- a/src/ugrep.hpp +++ b/src/ugrep.hpp @@ -38,7 +38,7 @@ #define UGREP_HPP // DO NOT ALTER THIS LINE: updated by makemake.sh and we need it physically here for MSVC++ build from source -#define UGREP_VERSION "7.0.1" +#define UGREP_VERSION "7.0.2" // disable mmap because mmap is almost always slower than the file reading speed improvements since 3.0.0 #define WITH_NO_MMAP