[OE-core] [PATCH] grep_2.5.1a: fix grep for LSB compliance.

Li, Xin lixin.fnst at cn.fujitsu.com
Tue May 19 02:50:07 UTC 2015


Ping

There are two versions of recipe grep.
One is 2.21 and the LICENSE is GPLv3.
The other is 2.5.1a and the LICENSE is GPLv2.
If you use the version 2.5.1a and this patch is need to fix LSB compliance.

Thanks.

Li xin
> -----Original Message-----
> From: Li, Xin/李 欣
> Sent: Friday, May 08, 2015 11:13 AM
> To: openembedded-core at lists.openembedded.org
> Cc: Li, Xin/李 欣
> Subject: [PATCH] grep_2.5.1a: fix grep for LSB compliance.
> 
> The LSB test requires the grep egrep and fgrep can perform pattern matching in
> searches without regard to case if -i option is specified.
> 
> Signed-off-by: Li Xin <lixin.fnst at cn.fujitsu.com>
> ---
>  .../grep/grep-2.5.1a/grep-fix-LSB-NG-Cases.patch   | 1414
> ++++++++++++++++++++
>  meta/recipes-extended/grep/grep_2.5.1a.bb          |    4 +-
>  2 files changed, 1417 insertions(+), 1 deletion(-)  create mode 100644
> meta/recipes-extended/grep/grep-2.5.1a/grep-fix-LSB-NG-Cases.patch
> 
> diff --git a/meta/recipes-extended/grep/grep-2.5.1a/grep-fix-LSB-NG-Cases.patch
> b/meta/recipes-extended/grep/grep-2.5.1a/grep-fix-LSB-NG-Cases.patch
> new file mode 100644
> index 0000000..6ed6968
> --- /dev/null
> +++ b/meta/recipes-extended/grep/grep-2.5.1a/grep-fix-LSB-NG-Cases.patch
> @@ -0,0 +1,1414 @@
> +From baf14fa6a796753d3b74d74fa69fff0d38fea540 Mon Sep 17 00:00:00 2001
> +From: Li xin <lixin.fnst at cn.fujitsu.com>
> +Date: Thu, 7 May 2015 18:28:46 +0900
> +Subject: [PATCH] grep: fix LSB NG Cases.
> +
> +NG Cases:
> +/tset/LI18NUX2K.L1/utils/egrep-tp/T.egrep-tp 5
> +/tset/LI18NUX2K.L1/utils/fgrep/T.fgrep 5
> +/tset/LI18NUX2K.L1/utils/grep-tp/T.grep-tp 5
> +
> +this patch is from Debian.
> +
> +Upstream-Status: pending
> +
> +Signed-off-by: Li Xin <lixin.fnst at cn.fujitsu.com>
> +---
> + doc/grep.1        |   5 +-
> + doc/grep.texi     |   2 +-
> + lib/posix/regex.h |   4 +
> + src/dfa.c         |  22 +-
> + src/grep.c        |  96 ++++---
> + src/search.c      | 833
> +++++++++++++++++++++++++++++++++++++++++++++---------
> + tests/Makefile.am |   3 +-
> + tests/Makefile.in |   3 +-
> + 8 files changed, 776 insertions(+), 192 deletions(-)
> +
> +diff --git a/doc/grep.1 b/doc/grep.1
> +index 314669a..02288ff 100644
> +--- a/doc/grep.1
> ++++ b/doc/grep.1
> +@@ -191,6 +191,7 @@ Interpret
> + .I PATTERN
> + as a list of fixed strings, separated by newlines,  any of which is to
> +be matched.
> ++.TP
> + .BR \-P ", " \-\^\-perl-regexp
> + Interpret
> + .I PATTERN
> +@@ -300,9 +301,9 @@ Show only the part of a matching line that matches
> +Displays input actually coming from standard input as input coming from
> +file  .I LABEL.
> + This is especially useful for tools like zgrep, e.g.
> +-.B "gzip -cd foo.gz |grep --label=foo something"
> ++.B "gzip -cd foo.gz |grep -H --label=foo something"
> + .TP
> +-.BR \-\^\-line-buffering
> ++.BR \-\^\-line-buffered
> + Use line buffering, it can be a performance penality.
> + .TP
> + .BR \-q ", " \-\^\-quiet ", " \-\^\-silent diff --git a/doc/grep.texi
> +b/doc/grep.texi index 0aa2f4a..d84ed8a 100644
> +--- a/doc/grep.texi
> ++++ b/doc/grep.texi
> +@@ -364,7 +364,7 @@ Set the line buffering policy, this can be a performance
> penality.
> + @cindex changing name of standard input  Displays input actually
> +coming from standard input as input coming from file  @var{LABEL}. This
> +is especially useful for tools like zgrep, e.g.
> +- at command{gzip -cd foo.gz |grep --label=foo something}
> ++ at command{gzip -cd foo.gz |grep -H --label=foo something}
> +
> + @item -L
> + @itemx --files-without-match
> +diff --git a/lib/posix/regex.h b/lib/posix/regex.h index
> +63c2fef..7bb2b0e 100644
> +--- a/lib/posix/regex.h
> ++++ b/lib/posix/regex.h
> +@@ -109,6 +109,10 @@ typedef unsigned long int reg_syntax_t;
> +    If not set, \{, \}, {, and } are literals.  */  #define
> +RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
> +
> ++/* If this bit is set, then ignore case when matching.
> ++   If not set, then case is significant.  */ #define RE_ICASE
> ++(RE_INVALID_INTERVAL_ORD << 1)
> ++
> + /* If this bit is set, +, ? and | aren't recognized as operators.
> +    If not set, they are.  */
> + #define RE_LIMITED_OPS (RE_INTERVALS << 1) diff --git a/src/dfa.c
> +b/src/dfa.c index 590bfa7..27c876a 100644
> +--- a/src/dfa.c
> ++++ b/src/dfa.c
> +@@ -414,7 +414,7 @@ update_mb_len_index (unsigned char const *p, int
> +len)
> +
> + /* This function fetch a wide character, and update cur_mb_len,
> +    used only if the current locale is a multibyte environment.  */
> +-static wchar_t
> ++static wint_t
> + fetch_wc (char const *eoferr)
> + {
> +   wchar_t wc;
> +@@ -423,7 +423,7 @@ fetch_wc (char const *eoferr)
> +       if (eoferr != 0)
> + 	dfaerror (eoferr);
> +       else
> +-	return -1;
> ++	return WEOF;
> +     }
> +
> +   cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs); @@ -459,7 +459,7
> +@@ fetch_wc (char const *eoferr)  static void  parse_bracket_exp_mb ()
> +{
> +-  wchar_t wc, wc1, wc2;
> ++  wint_t wc, wc1, wc2;
> +
> +   /* Work area to build a mb_char_classes.  */
> +   struct mb_char_classes *work_mbc;
> +@@ -496,7 +496,7 @@ parse_bracket_exp_mb ()
> +     work_mbc->invert = 0;
> +   do
> +     {
> +-      wc1 = -1; /* mark wc1 is not initialized".  */
> ++      wc1 = WEOF; /* mark wc1 is not initialized".  */
> +
> +       /* Note that if we're looking at some other [:...:] construct,
> + 	 we just treat it as a bunch of ordinary characters.  We can do @@
> +-586,7 +586,7 @@ parse_bracket_exp_mb ()
> + 		      work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
> + 		    }
> +  		}
> +-	      wc = -1;
> ++	      wc1 = wc = WEOF;
> + 	    }
> + 	  else
> + 	    /* We treat '[' as a normal character here.  */ @@ -600,7 +600,7
> +@@ parse_bracket_exp_mb ()
> + 	    wc = fetch_wc(("Unbalanced ["));
> + 	}
> +
> +-      if (wc1 == -1)
> ++      if (wc1 == WEOF)
> + 	wc1 = fetch_wc(_("Unbalanced ["));
> +
> +       if (wc1 == L'-')
> +@@ -630,17 +630,17 @@ parse_bracket_exp_mb ()
> + 	    }
> + 	  REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
> + 			       range_sts_al, work_mbc->nranges + 1);
> +-	  work_mbc->range_sts[work_mbc->nranges] = wc;
> ++	  work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc;
> + 	  REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
> + 			       range_ends_al, work_mbc->nranges + 1);
> +-	  work_mbc->range_ends[work_mbc->nranges++] = wc2;
> ++	  work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2;
> + 	}
> +-      else if (wc != -1)
> ++      else if (wc != WEOF)
> + 	/* build normal characters.  */
> + 	{
> + 	  REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
> + 			       work_mbc->nchars + 1);
> +-	  work_mbc->chars[work_mbc->nchars++] = wc;
> ++	  work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc;
> + 	}
> +     }
> +   while ((wc = wc1) != L']');
> +@@ -2552,6 +2552,8 @@ match_mb_charset (struct dfa *d, int s, position pos,
> int index)
> +     }
> +
> +   /* match with a character?  */
> ++  if (case_fold)
> ++    wc = towlower (wc);
> +   for (i = 0; i<work_mbc->nchars; i++)
> +     {
> +       if (wc == work_mbc->chars[i])
> +diff --git a/src/grep.c b/src/grep.c
> +index 2fb2fac..3fd4b47 100644
> +--- a/src/grep.c
> ++++ b/src/grep.c
> +@@ -30,6 +30,12 @@
> + # include <sys/time.h>
> + # include <sys/resource.h>
> + #endif
> ++#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined
> ++HAVE_MBRTOWC
> ++/* We can handle multibyte string.  */ # define MBS_SUPPORT # include
> ++<wchar.h> # include <wctype.h> #endif
> + #include <stdio.h>
> + #include "system.h"
> + #include "getopt.h"
> +@@ -255,19 +261,6 @@ reset (int fd, char const *file, struct stats *stats)
> +   bufbeg[-1] = eolbyte;
> +   bufdesc = fd;
> +
> +-  if (fstat (fd, &stats->stat) != 0)
> +-    {
> +-      error (0, errno, "fstat");
> +-      return 0;
> +-    }
> +-  if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
> +-    return 0;
> +-#ifndef DJGPP
> +-  if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) ||
> +S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode))) -#else
> +-  if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) ||
> +S_ISBLK(stats->stat.st_mode))) -#endif
> +-    return 0;
> +   if (S_ISREG (stats->stat.st_mode))
> +     {
> +       if (file)
> +@@ -558,33 +551,6 @@ prline (char const *beg, char const *lim, int sep)
> +     {
> +       size_t match_size;
> +       size_t match_offset;
> +-      if(match_icase)
> +-        {
> +-	  /* Yuck, this is tricky */
> +-          char *buf = (char*) xmalloc (lim - beg);
> +-	  char *ibeg = buf;
> +-	  char *ilim = ibeg + (lim - beg);
> +-	  int i;
> +-	  for (i = 0; i < lim - beg; i++)
> +-	    ibeg[i] = tolower (beg[i]);
> +-	  while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1))
> +-		 != (size_t) -1)
> +-	    {
> +-	      char const *b = beg + match_offset;
> +-	      if (b == lim)
> +-		break;
> +-	      fwrite (beg, sizeof (char), match_offset, stdout);
> +-	      printf ("\33[%sm", grep_color);
> +-	      fwrite (b, sizeof (char), match_size, stdout);
> +-	      fputs ("\33[00m", stdout);
> +-	      beg = b + match_size;
> +-	      ibeg = ibeg + match_offset + match_size;
> +-	    }
> +-	  fwrite (beg, 1, lim - beg, stdout);
> +-	  free (buf);
> +-	  lastout = lim;
> +-	  return;
> +-	}
> +       while (lim-beg && (match_offset = (*execute) (beg, lim - beg,
> &match_size, 1))
> + 	     != (size_t) -1)
> + 	{
> +@@ -601,6 +567,7 @@ prline (char const *beg, char const *lim, int sep)
> + 	  fputs ("\33[00m", stdout);
> + 	  beg = b + match_size;
> + 	}
> ++      fputs ("\33[K", stdout);
> +     }
> +   fwrite (beg, 1, lim - beg, stdout);
> +   if (ferror (stdout))
> +@@ -623,7 +590,7 @@ prpending (char const *lim)
> +       size_t match_size;
> +       --pending;
> +       if (outleft
> +-	  || (((*execute) (lastout, nl - lastout, &match_size, 0) == (size_t) -1)
> ++	  || (((*execute) (lastout, nl + 1 - lastout, &match_size, 0) ==
> ++(size_t) -1)
> + 	      == !out_invert))
> + 	prline (lastout, nl + 1, '-');
> +       else
> +@@ -895,6 +862,19 @@ grepfile (char const *file, struct stats *stats)
> +     }
> +   else
> +     {
> ++      if (stat (file, &stats->stat) != 0)
> ++        {
> ++          suppressible_error (file, errno);
> ++          return 1;
> ++        }
> ++      if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
> ++        return 1;
> ++#ifndef DJGPP
> ++      if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) ||
> ++S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode) ||
> S_ISFIFO(stats->stat.st_mode))) #else
> ++      if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) ||
> ++S_ISBLK(stats->stat.st_mode))) #endif
> ++        return 1;
> +       while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR)
> + 	continue;
> +
> +@@ -1681,9 +1661,6 @@ warranty; not even for MERCHANTABILITY or FITNESS
> FOR A PARTICULAR PURPOSE.\n"))
> + 	  out_invert ^= 1;
> + 	  match_lines = match_words = 0;
> + 	}
> +-      else
> +-	/* Strip trailing newline. */
> +-        --keycc;
> +     }
> +   else
> +     if (optind < argc)
> +@@ -1697,6 +1674,37 @@ warranty; not even for MERCHANTABILITY or FITNESS
> FOR A PARTICULAR PURPOSE.\n"))
> +   if (!install_matcher (matcher) && !install_matcher ("default"))
> +     abort ();
> +
> ++#ifdef MBS_SUPPORT
> ++  if (MB_CUR_MAX != 1 && match_icase)
> ++    {
> ++      wchar_t wc;
> ++      mbstate_t cur_state, prev_state;
> ++      int i, len = strlen(keys);
> ++
> ++      memset(&cur_state, 0, sizeof(mbstate_t));
> ++      for (i = 0; i <= len ;)
> ++	{
> ++	  size_t mbclen;
> ++	  mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state);
> ++	  if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
> ++	    {
> ++	      /* An invalid sequence, or a truncated multibyte character.
> ++		 We treat it as a singlebyte character.  */
> ++	      mbclen = 1;
> ++	    }
> ++	  else
> ++	    {
> ++	      if (iswupper((wint_t)wc))
> ++		{
> ++		  wc = towlower((wint_t)wc);
> ++		  wcrtomb(keys + i, wc, &cur_state);
> ++		}
> ++	    }
> ++	  i += mbclen;
> ++	}
> ++    }
> ++#endif /* MBS_SUPPORT */
> ++
> +   (*compile)(keys, keycc);
> +
> +   if ((argc - optind > 1 && !no_filenames) || with_filenames) diff
> +--git a/src/search.c b/src/search.c index 7bd233f..3c6a485 100644
> +--- a/src/search.c
> ++++ b/src/search.c
> +@@ -18,9 +18,13 @@
> +
> + /* Written August 1992 by Mike Haertel. */
> +
> ++#ifndef _GNU_SOURCE
> ++# define _GNU_SOURCE 1
> ++#endif
> + #ifdef HAVE_CONFIG_H
> + # include <config.h>
> + #endif
> ++#include <assert.h>
> + #include <sys/types.h>
> + #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined
> +HAVE_MBRTOWC
> + /* We can handle multibyte string.  */ @@ -31,7 +35,7 @@
> +
> + #include "system.h"
> + #include "grep.h"
> +-#include "regex.h"
> ++#include <regex.h>
> + #include "dfa.h"
> + #include "kwset.h"
> + #include "error.h"
> +@@ -39,6 +43,9 @@
> + #ifdef HAVE_LIBPCRE
> + # include <pcre.h>
> + #endif
> ++#ifdef HAVE_LANGINFO_CODESET
> ++# include <langinfo.h>
> ++#endif
> +
> + #define NCHAR (UCHAR_MAX + 1)
> +
> +@@ -70,9 +77,10 @@ static kwset_t kwset;
> +    call the regexp matcher at all. */
> + static int kwset_exact_matches;
> +
> +-#if defined(MBS_SUPPORT)
> +-static char* check_multibyte_string PARAMS ((char const *buf, size_t
> +size)); -#endif
> ++/* UTF-8 encoding allows some optimizations that we can't otherwise
> ++   assume in a multibyte encoding. */
> ++static int using_utf8;
> ++
> + static void kwsinit PARAMS ((void));
> + static void kwsmusts PARAMS ((void));
> + static void Gcompile PARAMS ((char const *, size_t)); @@ -84,6 +92,15
> +@@ static void Pcompile PARAMS ((char const *, size_t ));  static
> +size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
> +
> + void
> ++check_utf8 (void)
> ++{
> ++#ifdef HAVE_LANGINFO_CODESET
> ++  if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0)
> ++    using_utf8 = 1;
> ++#endif
> ++}
> ++
> ++void
> + dfaerror (char const *mesg)
> + {
> +   error (2, 0, mesg);
> +@@ -141,38 +158,6 @@ kwsmusts (void)
> +     }
> + }
> +
> +-#ifdef MBS_SUPPORT
> +-/* This function allocate the array which correspond to "buf".
> +-   Then this check multibyte string and mark on the positions which
> +-   are not singlebyte character nor the first byte of a multibyte
> +-   character.  Caller must free the array.  */
> +-static char*
> +-check_multibyte_string(char const *buf, size_t size) -{
> +-  char *mb_properties = malloc(size);
> +-  mbstate_t cur_state;
> +-  int i;
> +-  memset(&cur_state, 0, sizeof(mbstate_t));
> +-  memset(mb_properties, 0, sizeof(char)*size);
> +-  for (i = 0; i < size ;)
> +-    {
> +-      size_t mbclen;
> +-      mbclen = mbrlen(buf + i, size - i, &cur_state);
> +-
> +-      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
> +-	{
> +-	  /* An invalid sequence, or a truncated multibyte character.
> +-	     We treat it as a singlebyte character.  */
> +-	  mbclen = 1;
> +-	}
> +-      mb_properties[i] = mbclen;
> +-      i += mbclen;
> +-    }
> +-
> +-  return mb_properties;
> +-}
> +-#endif
> +-
> + static void
> + Gcompile (char const *pattern, size_t size)  { @@ -181,7 +166,8 @@
> +Gcompile (char const *pattern, size_t size)
> +   size_t total = size;
> +   char const *motif = pattern;
> +
> +-  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
> ++  check_utf8 ();
> ++  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE |
> ++ (match_icase ? RE_ICASE : 0));
> +   dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase,
> + eolbyte);
> +
> +   /* For GNU regex compiler we have to pass the patterns separately to
> +detect @@ -218,6 +204,10 @@ Gcompile (char const *pattern, size_t size)
> +       motif = sep;
> +     } while (sep && total != 0);
> +
> ++  /* Strip trailing newline. */
> ++  if (size && pattern[size - 1] == '\n')
> ++    size--;
> ++
> +   /* In the match_words and match_lines cases, we use a different pattern
> +      for the DFA matcher that will quickly throw out cases that won't work.
> +      Then if DFA succeeds we do some hairy stuff using the regex
> +matcher @@ -233,7 +223,7 @@ Gcompile (char const *pattern, size_t size)
> +       static char const line_end[] = "\\)$";
> +       static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
> +       static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
> +-      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
> ++      char *n = xmalloc (sizeof word_beg - 1 + size + sizeof
> ++ word_end);
> +       size_t i;
> +       strcpy (n, match_lines ? line_beg : word_beg);
> +       i = strlen (n);
> +@@ -257,14 +247,15 @@ Ecompile (char const *pattern, size_t size)
> +   size_t total = size;
> +   char const *motif = pattern;
> +
> ++  check_utf8 ();
> +   if (strcmp (matcher, "awk") == 0)
> +     {
> +-      re_set_syntax (RE_SYNTAX_AWK);
> ++      re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0));
> +       dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
> +     }
> +   else
> +     {
> +-      re_set_syntax (RE_SYNTAX_POSIX_EGREP);
> ++      re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE :
> ++ 0));
> +       dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
> +     }
> +
> +@@ -301,6 +292,10 @@ Ecompile (char const *pattern, size_t size)
> +       motif = sep;
> +     } while (sep && total != 0);
> +
> ++  /* Strip trailing newline. */
> ++  if (size && pattern[size - 1] == '\n')
> ++    size--;
> ++
> +   /* In the match_words and match_lines cases, we use a different pattern
> +      for the DFA matcher that will quickly throw out cases that won't work.
> +      Then if DFA succeeds we do some hairy stuff using the regex
> +matcher @@ -316,7 +311,7 @@ Ecompile (char const *pattern, size_t size)
> +       static char const line_end[] = ")$";
> +       static char const word_beg[] = "(^|[^[:alnum:]_])(";
> +       static char const word_end[] = ")([^[:alnum:]_]|$)";
> +-      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
> ++      char *n = xmalloc (sizeof word_beg - 1 + size + sizeof
> ++ word_end);
> +       size_t i;
> +       strcpy (n, match_lines ? line_beg : word_beg);
> +       i = strlen(n);
> +@@ -339,15 +334,34 @@ EGexecute (char const *buf, size_t size, size_t
> *match_size, int exact)
> +   char eol = eolbyte;
> +   int backref, start, len;
> +   struct kwsmatch kwsm;
> +-  size_t i;
> ++  size_t i, ret_val;
> ++  static int use_dfa;
> ++  static int use_dfa_checked = 0;
> + #ifdef MBS_SUPPORT
> +-  char *mb_properties = NULL;
> ++  int mb_cur_max = MB_CUR_MAX;
> ++  mbstate_t mbs;
> ++  memset (&mbs, '\0', sizeof (mbstate_t));
> + #endif /* MBS_SUPPORT */
> +
> ++  if (!use_dfa_checked)
> ++    {
> ++      char *grep_use_dfa = getenv ("GREP_USE_DFA");
> ++      if (!grep_use_dfa)
> ++	{
> + #ifdef MBS_SUPPORT
> +-  if (MB_CUR_MAX > 1 && kwset)
> +-    mb_properties = check_multibyte_string(buf, size);
> ++	  /* Turn off DFA when processing multibyte input. */
> ++	  use_dfa = (MB_CUR_MAX == 1);
> ++#else
> ++	  use_dfa = 1;
> + #endif /* MBS_SUPPORT */
> ++	}
> ++      else
> ++	{
> ++	  use_dfa = atoi (grep_use_dfa);
> ++	}
> ++
> ++      use_dfa_checked = 1;
> ++    }
> +
> +   buflim = buf + size;
> +
> +@@ -358,47 +372,120 @@ EGexecute (char const *buf, size_t size, size_t
> *match_size, int exact)
> + 	  if (kwset)
> + 	    {
> + 	      /* Find a possible match using the KWset matcher. */
> +-	      size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
> ++#ifdef MBS_SUPPORT
> ++	      size_t bytes_left = 0;
> ++#endif /* MBS_SUPPORT */
> ++	      size_t offset;
> ++#ifdef MBS_SUPPORT
> ++	      /* kwsexec doesn't work with match_icase and multibyte input. */
> ++	      if (match_icase && mb_cur_max > 1)
> ++		/* Avoid kwset */
> ++		offset = 0;
> ++	      else
> ++#endif /* MBS_SUPPORT */
> ++	      offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
> + 	      if (offset == (size_t) -1)
> +-		{
> ++	        goto failure;
> + #ifdef MBS_SUPPORT
> +-		  if (MB_CUR_MAX > 1)
> +-		    free(mb_properties);
> +-#endif
> +-		  return (size_t)-1;
> ++	      if (mb_cur_max > 1 && !using_utf8)
> ++		{
> ++		  bytes_left = offset;
> ++		  while (bytes_left)
> ++		    {
> ++		      size_t mlen = mbrlen (beg, bytes_left, &mbs);
> ++		      if (mlen == (size_t) -1 || mlen == 0)
> ++			{
> ++			  /* Incomplete character: treat as single-byte. */
> ++			  memset (&mbs, '\0', sizeof (mbstate_t));
> ++			  beg++;
> ++			  bytes_left--;
> ++			  continue;
> ++			}
> ++
> ++		      if (mlen == (size_t) -2)
> ++			/* Offset points inside multibyte character:
> ++			 * no good. */
> ++			break;
> ++
> ++		      beg += mlen;
> ++		      bytes_left -= mlen;
> ++		    }
> + 		}
> ++	      else
> ++#endif /* MBS_SUPPORT */
> + 	      beg += offset;
> + 	      /* Narrow down to the line containing the candidate, and
> + 		 run it through DFA. */
> + 	      end = memchr(beg, eol, buflim - beg);
> + 	      end++;
> + #ifdef MBS_SUPPORT
> +-	      if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
> ++	      if (mb_cur_max > 1 && bytes_left)
> + 		continue;
> +-#endif
> ++#endif /* MBS_SUPPORT */
> + 	      while (beg > buf && beg[-1] != eol)
> + 		--beg;
> +-	      if (kwsm.index < kwset_exact_matches)
> +-		goto success;
> +-	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
> ++	      if (
> ++#ifdef MBS_SUPPORT
> ++		  !(match_icase && mb_cur_max > 1) && #endif /* MBS_SUPPORT */
> ++		  (kwsm.index < kwset_exact_matches))
> ++		goto success_in_beg_and_end;
> ++	      if (use_dfa &&
> ++		  dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
> + 		continue;
> + 	    }
> + 	  else
> + 	    {
> + 	      /* No good fixed strings; start with DFA. */
> +-	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
> ++#ifdef MBS_SUPPORT
> ++	      size_t bytes_left = 0;
> ++#endif /* MBS_SUPPORT */
> ++	      size_t offset = 0;
> ++	      if (use_dfa)
> ++		offset = dfaexec (&dfa, beg, buflim - beg, &backref);
> + 	      if (offset == (size_t) -1)
> + 		break;
> + 	      /* Narrow down to the line we've found. */
> ++#ifdef MBS_SUPPORT
> ++	      if (mb_cur_max > 1 && !using_utf8)
> ++		{
> ++		  bytes_left = offset;
> ++		  while (bytes_left)
> ++		    {
> ++		      size_t mlen = mbrlen (beg, bytes_left, &mbs);
> ++		      if (mlen == (size_t) -1 || mlen == 0)
> ++			{
> ++			  /* Incomplete character: treat as single-byte. */
> ++			  memset (&mbs, '\0', sizeof (mbstate_t));
> ++			  beg++;
> ++			  bytes_left--;
> ++			  continue;
> ++			}
> ++
> ++		      if (mlen == (size_t) -2)
> ++			/* Offset points inside multibyte character:
> ++			 * no good. */
> ++			break;
> ++
> ++		      beg += mlen;
> ++		      bytes_left -= mlen;
> ++		    }
> ++		}
> ++	      else
> ++#endif /* MBS_SUPPORT */
> + 	      beg += offset;
> + 	      end = memchr (beg, eol, buflim - beg);
> + 	      end++;
> ++#ifdef MBS_SUPPORT
> ++	      if (mb_cur_max > 1 && bytes_left)
> ++		continue;
> ++#endif /* MBS_SUPPORT */
> + 	      while (beg > buf && beg[-1] != eol)
> + 		--beg;
> + 	    }
> + 	  /* Successful, no backreferences encountered! */
> +-	  if (!backref)
> +-	    goto success;
> ++	  if (use_dfa && !backref)
> ++	    goto success_in_beg_and_end;
> + 	}
> +       else
> + 	end = beg + size;
> +@@ -413,14 +500,11 @@ EGexecute (char const *buf, size_t size, size_t
> *match_size, int exact)
> + 				       end - beg - 1, &(patterns[i].regs))))
> + 	    {
> + 	      len = patterns[i].regs.end[0] - start;
> +-	      if (exact)
> +-		{
> +-		  *match_size = len;
> +-		  return start;
> +-		}
> ++	      if (exact && !match_words)
> ++	        goto success_in_start_and_len;
> + 	      if ((!match_lines && !match_words)
> + 		  || (match_lines && len == end - beg - 1))
> +-		goto success;
> ++		goto success_in_beg_and_end;
> + 	      /* If -w, check if the match aligns with word boundaries.
> + 		 We do this iteratively because:
> + 		 (a) the line may contain more than one occurence of the @@ -431,10
> ++515,114 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int
> exact)
> + 	      if (match_words)
> + 		while (start >= 0)
> + 		  {
> +-		    if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
> +-			&& (len == end - beg - 1
> +-			    || !WCHAR ((unsigned char) beg[start + len])))
> +-		      goto success;
> ++		    int lword_match = 0;
> ++		    if (start == 0)
> ++		      lword_match = 1;
> ++		    else
> ++		      {
> ++			assert (start > 0);
> ++#ifdef MBS_SUPPORT
> ++			if (mb_cur_max > 1)
> ++			  {
> ++			    const char *s;
> ++			    size_t mr;
> ++			    wchar_t pwc;
> ++
> ++			    /* Locate the start of the multibyte character
> ++			       before the match position (== beg + start). */
> ++			    if (using_utf8)
> ++			      {
> ++				/* UTF-8 is a special case: scan backwards
> ++				   until we find a 7-bit character or a
> ++				   lead byte. */
> ++				s = beg + start - 1;
> ++				while (s > buf
> ++				       && (unsigned char) *s >= 0x80
> ++				       && (unsigned char) *s <= 0xbf)
> ++				  --s;
> ++			      }
> ++			    else
> ++			      {
> ++				/* Scan forwards to find the start of the
> ++				   last complete character before the
> ++				   match position.  */
> ++				size_t bytes_left = start - 1;
> ++				s = beg;
> ++				while (bytes_left > 0)
> ++				  {
> ++				    mr = mbrlen (s, bytes_left, &mbs);
> ++				    if (mr == (size_t) -1 || mr == 0)
> ++				      {
> ++					memset (&mbs, '\0', sizeof (mbs));
> ++					s++;
> ++					bytes_left--;
> ++					continue;
> ++				      }
> ++				    if (mr == (size_t) -2)
> ++				      {
> ++					memset (&mbs, '\0', sizeof (mbs));
> ++					break;
> ++				      }
> ++				    s += mr;
> ++				    bytes_left -= mr;
> ++				  }
> ++			      }
> ++			    mr = mbrtowc (&pwc, s, beg + start - s, &mbs);
> ++			    if (mr == (size_t) -2 || mr == (size_t) -1 ||
> ++				mr == 0)
> ++			      {
> ++				memset (&mbs, '\0', sizeof (mbstate_t));
> ++				lword_match = 1;
> ++			      }
> ++			    else if (!(iswalnum (pwc) || pwc == L'_')
> ++				     && mr == beg + start - s)
> ++			      lword_match = 1;
> ++			  }
> ++			else
> ++#endif /* MBS_SUPPORT */
> ++			if (!WCHAR ((unsigned char) beg[start - 1]))
> ++			  lword_match = 1;
> ++		      }
> ++
> ++		    if (lword_match)
> ++		      {
> ++			int rword_match = 0;
> ++			if (start + len == end - beg - 1)
> ++			  rword_match = 1;
> ++			else
> ++			  {
> ++#ifdef MBS_SUPPORT
> ++			    if (mb_cur_max > 1)
> ++			      {
> ++				wchar_t nwc;
> ++				int mr;
> ++
> ++				mr = mbtowc (&nwc, beg + start + len,
> ++					     end - beg - start - len - 1);
> ++				if (mr <= 0)
> ++				  {
> ++				    memset (&mbs, '\0', sizeof (mbstate_t));
> ++				    rword_match = 1;
> ++				  }
> ++				else if (!iswalnum (nwc) && nwc != L'_')
> ++				  rword_match = 1;
> ++			      }
> ++			    else
> ++#endif /* MBS_SUPPORT */
> ++			    if (!WCHAR ((unsigned char) beg[start + len]))
> ++			      rword_match = 1;
> ++			  }
> ++
> ++			if (rword_match)
> ++			  {
> ++			    if (!exact)
> ++			      /* Returns the whole line. */
> ++			      goto success_in_beg_and_end;
> ++			    else
> ++			      /* Returns just this word match. */
> ++			      goto success_in_start_and_len;
> ++			  }
> ++		      }
> + 		    if (len > 0)
> + 		      {
> + 			/* Try a shorter length anchored at the same place. */ @@ -461,26
> ++649,154 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int
> exact)
> + 	    }
> + 	} /* for Regex patterns.  */
> +     } /* for (beg = end ..) */
> +-#ifdef MBS_SUPPORT
> +-  if (MB_CUR_MAX > 1 && mb_properties)
> +-    free (mb_properties);
> +-#endif /* MBS_SUPPORT */
> ++
> ++ failure:
> +   return (size_t) -1;
> +
> +- success:
> +-#ifdef MBS_SUPPORT
> +-  if (MB_CUR_MAX > 1 && mb_properties)
> +-    free (mb_properties);
> +-#endif /* MBS_SUPPORT */
> +-  *match_size = end - beg;
> +-  return beg - buf;
> ++ success_in_beg_and_end:
> ++  len = end - beg;
> ++  start = beg - buf;
> ++  /* FALLTHROUGH */
> ++
> ++ success_in_start_and_len:
> ++  *match_size = len;
> ++  return start;
> + }
> +
> ++#ifdef MBS_SUPPORT
> ++static int f_i_multibyte; /* whether we're using the new -Fi MB method
> ++*/ static struct {
> ++  wchar_t **patterns;
> ++  size_t count, maxlen;
> ++  unsigned char *match;
> ++} Fimb;
> ++#endif
> ++
> + static void
> + Fcompile (char const *pattern, size_t size) {
> ++  int mb_cur_max = MB_CUR_MAX;
> +   char const *beg, *lim, *err;
> +
> ++  check_utf8 ();
> ++#ifdef MBS_SUPPORT
> ++  /* Support -F -i for UTF-8 input. */
> ++  if (match_icase && mb_cur_max > 1)
> ++    {
> ++      mbstate_t mbs;
> ++      wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t));
> ++      const char *patternend = pattern;
> ++      size_t wcsize;
> ++      kwset_t fimb_kwset = NULL;
> ++      char *starts = NULL;
> ++      wchar_t *wcbeg, *wclim;
> ++      size_t allocated = 0;
> ++
> ++      memset (&mbs, '\0', sizeof (mbs)); # ifdef __GNU_LIBRARY__
> ++      wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs);
> ++      if (patternend != pattern + size)
> ++	wcsize = (size_t) -1;
> ++# else
> ++      {
> ++	char *patterncopy = xmalloc (size + 1);
> ++
> ++	memcpy (patterncopy, pattern, size);
> ++	patterncopy[size] = '\0';
> ++	patternend = patterncopy;
> ++	wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs);
> ++	if (patternend != patterncopy + size)
> ++	  wcsize = (size_t) -1;
> ++	free (patterncopy);
> ++      }
> ++# endif
> ++      if (wcsize + 2 <= 2)
> ++	{
> ++fimb_fail:
> ++	  free (wcpattern);
> ++	  free (starts);
> ++	  if (fimb_kwset)
> ++	    kwsfree (fimb_kwset);
> ++	  free (Fimb.patterns);
> ++	  Fimb.patterns = NULL;
> ++	}
> ++      else
> ++	{
> ++	  if (!(fimb_kwset = kwsalloc (NULL)))
> ++	    error (2, 0, _("memory exhausted"));
> ++
> ++	  starts = xmalloc (mb_cur_max * 3);
> ++	  wcbeg = wcpattern;
> ++	  do
> ++	    {
> ++	      int i;
> ++	      size_t wclen;
> ++
> ++	      if (Fimb.count >= allocated)
> ++		{
> ++		  if (allocated == 0)
> ++		    allocated = 128;
> ++		  else
> ++		    allocated *= 2;
> ++		  Fimb.patterns = xrealloc (Fimb.patterns,
> ++					    sizeof (wchar_t *) * allocated);
> ++		}
> ++	      Fimb.patterns[Fimb.count++] = wcbeg;
> ++	      for (wclim = wcbeg;
> ++		   wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim)
> ++		*wclim = towlower (*wclim);
> ++	      *wclim = L'\0';
> ++	      wclen = wclim - wcbeg;
> ++	      if (wclen > Fimb.maxlen)
> ++		Fimb.maxlen = wclen;
> ++	      if (wclen > 3)
> ++		wclen = 3;
> ++	      if (wclen == 0)
> ++		{
> ++		  if ((err = kwsincr (fimb_kwset, "", 0)) != 0)
> ++		    error (2, 0, err);
> ++		}
> ++	      else
> ++		for (i = 0; i < (1 << wclen); i++)
> ++		  {
> ++		    char *p = starts;
> ++		    int j, k;
> ++
> ++		    for (j = 0; j < wclen; ++j)
> ++		      {
> ++			wchar_t wc = wcbeg[j];
> ++			if (i & (1 << j))
> ++			  {
> ++			    wc = towupper (wc);
> ++			    if (wc == wcbeg[j])
> ++			      continue;
> ++			  }
> ++			k = wctomb (p, wc);
> ++			if (k <= 0)
> ++			  goto fimb_fail;
> ++			p += k;
> ++		      }
> ++		    if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0)
> ++		      error (2, 0, err);
> ++		  }
> ++	      if (wclim < wcpattern + wcsize)
> ++		++wclim;
> ++	      wcbeg = wclim;
> ++	    }
> ++	  while (wcbeg < wcpattern + wcsize);
> ++	  f_i_multibyte = 1;
> ++	  kwset = fimb_kwset;
> ++	  free (starts);
> ++	  Fimb.match = xmalloc (Fimb.count);
> ++	  if ((err = kwsprep (kwset)) != 0)
> ++	    error (2, 0, err);
> ++	  return;
> ++	}
> ++    }
> ++#endif /* MBS_SUPPORT */
> ++
> ++
> +   kwsinit ();
> +   beg = pattern;
> +   do
> +@@ -499,6 +815,76 @@ Fcompile (char const *pattern, size_t size)
> +     error (2, 0, err);
> + }
> +
> ++#ifdef MBS_SUPPORT
> ++static int
> ++Fimbexec (const char *buf, size_t size, size_t *plen, int exact) {
> ++  size_t len, letter, i;
> ++  int ret = -1;
> ++  mbstate_t mbs;
> ++  wchar_t wc;
> ++  int patterns_left;
> ++
> ++  assert (match_icase && f_i_multibyte == 1);  assert (MB_CUR_MAX >
> ++ 1);
> ++
> ++  memset (&mbs, '\0', sizeof (mbs));
> ++  memset (Fimb.match, '\1', Fimb.count);  letter = len = 0;
> ++ patterns_left = 1;  while (patterns_left && len <= size)
> ++    {
> ++      size_t c;
> ++
> ++      patterns_left = 0;
> ++      if (len < size)
> ++	{
> ++	  c = mbrtowc (&wc, buf + len, size - len, &mbs);
> ++	  if (c + 2 <= 2)
> ++	    return ret;
> ++
> ++	  wc = towlower (wc);
> ++	}
> ++      else
> ++	{
> ++	  c = 1;
> ++	  wc = L'\0';
> ++	}
> ++
> ++      for (i = 0; i < Fimb.count; i++)
> ++	{
> ++	  if (Fimb.match[i])
> ++	    {
> ++	      if (Fimb.patterns[i][letter] == L'\0')
> ++		{
> ++		  /* Found a match. */
> ++		  *plen = len;
> ++		  if (!exact && !match_words)
> ++		    return 0;
> ++		  else
> ++		    {
> ++		      /* For -w or exact look for longest match.  */
> ++		      ret = 0;
> ++		      Fimb.match[i] = '\0';
> ++		      continue;
> ++		    }
> ++		}
> ++
> ++	      if (Fimb.patterns[i][letter] == wc)
> ++		patterns_left = 1;
> ++	      else
> ++		Fimb.match[i] = '\0';
> ++	    }
> ++	}
> ++
> ++      len += c;
> ++      letter++;
> ++    }
> ++
> ++  return ret;
> ++}
> ++#endif /* MBS_SUPPORT */
> ++
> + static size_t
> + Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
> +{ @@ -506,88 +892,268 @@ Fexecute (char const *buf, size_t size, size_t
> +*match_size, int exact)
> +   register size_t len;
> +   char eol = eolbyte;
> +   struct kwsmatch kwsmatch;
> ++  size_t ret_val;
> + #ifdef MBS_SUPPORT
> +-  char *mb_properties;
> +-  if (MB_CUR_MAX > 1)
> +-    mb_properties = check_multibyte_string (buf, size);
> ++  int mb_cur_max = MB_CUR_MAX;
> ++  mbstate_t mbs;
> ++  memset (&mbs, '\0', sizeof (mbstate_t));  const char *last_char =
> ++ NULL;
> + #endif /* MBS_SUPPORT */
> +
> +-  for (beg = buf; beg <= buf + size; ++beg)
> ++  for (beg = buf; beg < buf + size; ++beg)
> +     {
> +-      size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
> ++      size_t offset;
> ++      offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
> ++
> +       if (offset == (size_t) -1)
> +-	{
> ++	goto failure;
> + #ifdef MBS_SUPPORT
> +-	  if (MB_CUR_MAX > 1)
> +-	    free(mb_properties);
> +-#endif /* MBS_SUPPORT */
> +-	  return offset;
> ++      if (mb_cur_max > 1 && !using_utf8)
> ++	{
> ++	  size_t bytes_left = offset;
> ++	  while (bytes_left)
> ++	    {
> ++	      size_t mlen = mbrlen (beg, bytes_left, &mbs);
> ++
> ++	      last_char = beg;
> ++	      if (mlen == (size_t) -1 || mlen == 0)
> ++		{
> ++		  /* Incomplete character: treat as single-byte. */
> ++		  memset (&mbs, '\0', sizeof (mbstate_t));
> ++		  beg++;
> ++		  bytes_left--;
> ++		  continue;
> ++		}
> ++
> ++	      if (mlen == (size_t) -2)
> ++		/* Offset points inside multibyte character: no good. */
> ++		break;
> ++
> ++	      beg += mlen;
> ++	      bytes_left -= mlen;
> ++	    }
> ++
> ++	  if (bytes_left)
> ++	    continue;
> + 	}
> +-#ifdef MBS_SUPPORT
> +-      if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
> +-	continue; /* It is a part of multibyte character.  */
> ++      else
> + #endif /* MBS_SUPPORT */
> +       beg += offset;
> +-      len = kwsmatch.size[0];
> +-      if (exact)
> +-	{
> +-	  *match_size = len;
> + #ifdef MBS_SUPPORT
> +-	  if (MB_CUR_MAX > 1)
> +-	    free (mb_properties);
> ++      /* For f_i_multibyte, the string at beg now matches first 3 chars of
> ++	 one of the search strings (less if there are shorter search strings).
> ++	 See if this is a real match.  */
> ++      if (f_i_multibyte
> ++	  && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact))
> ++	goto next_char;
> + #endif /* MBS_SUPPORT */
> +-	  return beg - buf;
> +-	}
> ++      len = kwsmatch.size[0];
> ++      if (exact && !match_words)
> ++	goto success_in_beg_and_len;
> +       if (match_lines)
> + 	{
> + 	  if (beg > buf && beg[-1] != eol)
> +-	    continue;
> ++	    goto next_char;
> + 	  if (beg + len < buf + size && beg[len] != eol)
> +-	    continue;
> ++	    goto next_char;
> + 	  goto success;
> + 	}
> +       else if (match_words)
> +-	for (try = beg; len; )
> +-	  {
> +-	    if (try > buf && WCHAR((unsigned char) try[-1]))
> +-	      break;
> +-	    if (try + len < buf + size && WCHAR((unsigned char) try[len]))
> +-	      {
> +-		offset = kwsexec (kwset, beg, --len, &kwsmatch);
> +-		if (offset == (size_t) -1)
> +-		  {
> ++	{
> ++	  while (len)
> ++	    {
> ++	      int word_match = 0;
> ++	      if (beg > buf)
> ++		{
> + #ifdef MBS_SUPPORT
> +-		    if (MB_CUR_MAX > 1)
> +-		      free (mb_properties);
> ++		  if (mb_cur_max > 1)
> ++		    {
> ++		      const char *s;
> ++		      int mr;
> ++		      wchar_t pwc;
> ++
> ++		      if (using_utf8)
> ++			{
> ++			  s = beg - 1;
> ++			  while (s > buf
> ++				 && (unsigned char) *s >= 0x80
> ++				 && (unsigned char) *s <= 0xbf)
> ++			    --s;
> ++			}
> ++		      else
> ++			s = last_char;
> ++		      mr = mbtowc (&pwc, s, beg - s);
> ++		      if (mr <= 0)
> ++			memset (&mbs, '\0', sizeof (mbstate_t));
> ++		      else if ((iswalnum (pwc) || pwc == L'_')
> ++			       && mr == (int) (beg - s))
> ++			goto next_char;
> ++		    }
> ++		  else
> + #endif /* MBS_SUPPORT */
> +-		    return offset;
> +-		  }
> +-		try = beg + offset;
> +-		len = kwsmatch.size[0];
> +-	      }
> +-	    else
> +-	      goto success;
> +-	  }
> ++		  if (WCHAR ((unsigned char) beg[-1]))
> ++		    goto next_char;
> ++		}
> ++#ifdef MBS_SUPPORT
> ++	      if (mb_cur_max > 1)
> ++		{
> ++		  wchar_t nwc;
> ++		  int mr;
> ++
> ++		  mr = mbtowc (&nwc, beg + len, buf + size - beg - len);
> ++		  if (mr <= 0)
> ++		    {
> ++		      memset (&mbs, '\0', sizeof (mbstate_t));
> ++		      word_match = 1;
> ++		    }
> ++		  else if (!iswalnum (nwc) && nwc != L'_')
> ++		    word_match = 1;
> ++		}
> ++	      else
> ++#endif /* MBS_SUPPORT */
> ++		if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len]))
> ++		  word_match = 1;
> ++	      if (word_match)
> ++		{
> ++		  if (!exact)
> ++		    /* Returns the whole line now we know there's a word match. */
> ++		    goto success;
> ++		  else
> ++		    /* Returns just this word match. */
> ++		    goto success_in_beg_and_len;
> ++		}
> ++	      if (len > 0)
> ++		{
> ++		  /* Try a shorter length anchored at the same place. */
> ++		  --len;
> ++		  offset = kwsexec (kwset, beg, len, &kwsmatch);
> ++
> ++		  if (offset == -1)
> ++		    goto next_char; /* Try a different anchor. */ #ifdef MBS_SUPPORT
> ++		  if (mb_cur_max > 1 && !using_utf8)
> ++		    {
> ++		      size_t bytes_left = offset;
> ++		      while (bytes_left)
> ++			{
> ++			  size_t mlen = mbrlen (beg, bytes_left, &mbs);
> ++
> ++			  last_char = beg;
> ++			  if (mlen == (size_t) -1 || mlen == 0)
> ++			    {
> ++			      /* Incomplete character: treat as single-byte. */
> ++			      memset (&mbs, '\0', sizeof (mbstate_t));
> ++			      beg++;
> ++			      bytes_left--;
> ++			      continue;
> ++			    }
> ++
> ++			  if (mlen == (size_t) -2)
> ++			    {
> ++			      /* Offset points inside multibyte character:
> ++			       * no good. */
> ++			      break;
> ++			    }
> ++
> ++			  beg += mlen;
> ++			  bytes_left -= mlen;
> ++			}
> ++
> ++		      if (bytes_left)
> ++			{
> ++			  memset (&mbs, '\0', sizeof (mbstate_t));
> ++			  goto next_char; /* Try a different anchor. */
> ++			}
> ++		    }
> ++		  else
> ++#endif /* MBS_SUPPORT */
> ++		  beg += offset;
> ++#ifdef MBS_SUPPORT
> ++		  /* The string at beg now matches first 3 chars of one of
> ++		     the search strings (less if there are shorter search
> ++		     strings).  See if this is a real match.  */
> ++		  if (f_i_multibyte
> ++		      && Fimbexec (beg, len - offset, &kwsmatch.size[0],
> ++				   exact))
> ++		    goto next_char;
> ++#endif /* MBS_SUPPORT */
> ++		  len = kwsmatch.size[0];
> ++		}
> ++	    }
> ++	}
> +       else
> + 	goto success;
> +-    }
> +-
> ++next_char:;
> + #ifdef MBS_SUPPORT
> +-  if (MB_CUR_MAX > 1)
> +-    free (mb_properties);
> ++      /* Advance to next character.  For MB_CUR_MAX == 1 case this is
> handled
> ++	 by ++beg above.  */
> ++      if (mb_cur_max > 1)
> ++	{
> ++	  if (using_utf8)
> ++	    {
> ++	      unsigned char c = *beg;
> ++	      if (c >= 0xc2)
> ++		{
> ++		  if (c < 0xe0)
> ++		    ++beg;
> ++		  else if (c < 0xf0)
> ++		    beg += 2;
> ++		  else if (c < 0xf8)
> ++		    beg += 3;
> ++		  else if (c < 0xfc)
> ++		    beg += 4;
> ++		  else if (c < 0xfe)
> ++		    beg += 5;
> ++		}
> ++	    }
> ++	  else
> ++	    {
> ++	      size_t l = mbrlen (beg, buf + size - beg, &mbs);
> ++
> ++	      last_char = beg;
> ++	      if (l + 2 >= 2)
> ++		beg += l - 1;
> ++	      else
> ++		memset (&mbs, '\0', sizeof (mbstate_t));
> ++	    }
> ++	}
> + #endif /* MBS_SUPPORT */
> ++    }
> ++
> ++ failure:
> +   return -1;
> +
> +  success:
> ++#ifdef MBS_SUPPORT
> ++  if (mb_cur_max > 1 && !using_utf8)
> ++    {
> ++      end = beg + len;
> ++      while (end < buf + size)
> ++	{
> ++	  size_t mlen = mbrlen (end, buf + size - end, &mbs);
> ++	  if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0)
> ++	    {
> ++	      memset (&mbs, '\0', sizeof (mbstate_t));
> ++	      mlen = 1;
> ++	    }
> ++	  if (mlen == 1 && *end == eol)
> ++	    break;
> ++
> ++	  end += mlen;
> ++	}
> ++    }
> ++  else
> ++#endif /* MBS_SUPPORT */
> +   end = memchr (beg + len, eol, (buf + size) - (beg + len));
> ++
> +   end++;
> +   while (buf < beg && beg[-1] != eol)
> +     --beg;
> +-  *match_size = end - beg;
> +-#ifdef MBS_SUPPORT
> +-  if (MB_CUR_MAX > 1)
> +-    free (mb_properties);
> +-#endif /* MBS_SUPPORT */
> ++  len = end - beg;
> ++  /* FALLTHROUGH */
> ++
> ++ success_in_beg_and_len:
> ++  *match_size = len;
> +   return beg - buf;
> + }
> +
> +@@ -701,8 +1267,9 @@ Pexecute (char const *buf, size_t size, size_t
> *match_size, int exact)
> +       char eol = eolbyte;
> +       if (!exact)
> + 	{
> +-	  end = memchr (end, eol, buflim - end);
> +-	  end++;
> ++	  while (end < buflim)
> ++	    if (*end++ == eol)
> ++	      break;
> + 	  while (buf < beg && beg[-1] != eol)
> + 	    --beg;
> + 	}
> +diff --git a/tests/Makefile.am b/tests/Makefile.am index
> +599db67..002014a 100644
> +--- a/tests/Makefile.am
> ++++ b/tests/Makefile.am
> +@@ -3,7 +3,8 @@
> + AWK=@AWK@
> +
> + TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
> +-        status.sh empty.sh options.sh backref.sh file.sh
> ++        status.sh empty.sh options.sh backref.sh file.sh \
> ++        fmbtest.sh
> + EXTRA_DIST = $(TESTS) \
> +              khadafy.lines khadafy.regexp \
> +              spencer1.awk spencer1.tests \ diff --git
> +a/tests/Makefile.in b/tests/Makefile.in index db3d066..e5b3f47 100644
> +--- a/tests/Makefile.in
> ++++ b/tests/Makefile.in
> +@@ -97,7 +97,8 @@ install_sh = @install_sh@  AWK = @AWK@
> +
> + TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
> +-        status.sh empty.sh options.sh backref.sh file.sh
> ++        status.sh empty.sh options.sh backref.sh file.sh \
> ++	fmbtest.sh
> +
> + EXTRA_DIST = $(TESTS) \
> +              khadafy.lines khadafy.regexp \
> +--
> +1.8.4.2
> +
> diff --git a/meta/recipes-extended/grep/grep_2.5.1a.bb
> b/meta/recipes-extended/grep/grep_2.5.1a.bb
> index 178fc31..cc05678 100644
> --- a/meta/recipes-extended/grep/grep_2.5.1a.bb
> +++ b/meta/recipes-extended/grep/grep_2.5.1a.bb
> @@ -15,6 +15,7 @@ SRC_URI = "${GNU_MIRROR}/grep/grep-${PV}.tar.bz2 \
>             file://Makevars \
>             file://grep-CVE-2012-5667.patch \
>             file://fix-for-texinfo-5.1.patch \
> +           file://grep-fix-LSB-NG-Cases.patch \
>            "
> 
>  SRC_URI[md5sum] = "52202fe462770fa6be1bb667bd6cf30c"
> @@ -22,7 +23,8 @@ SRC_URI[sha256sum] =
> "38c8a2bb9223d1fb1b10bdd607cf44830afc92fd451ac4cd07619bf92b
> 
>  inherit autotools gettext texinfo
> 
> -EXTRA_OECONF = "--disable-perl-regexp"
> +EXTRA_OECONF = "--disable-perl-regexp \
> +                --without-included-regex"
> 
>  CFLAGS += "-D PROTOTYPES"
>  do_configure_prepend () {
> --
> 1.8.4.2




More information about the Openembedded-core mailing list