# # Patch name: regex_caseinsensitive.patch # Patch version: 1 # Author's name: Kurt Fitzner # Author's email: kfitzner@nexus.v-wave.com # Version of PennMUSH: 1.7.2p26 # Date patch made: Tue Aug 24 13:51:07 1999 # Author is willing to support (yes/no): yes # Patch format: diff -u5 # # # This is a contributed PennMUSH patch. Its use is subject to the # same restrictions found in PennMUSH's hdrs/copyrite.h file. # # No warranty is given for this patch. It is not necessarily going # to work on your system, with any version of PennMUSH other than # the one above, etc. # # If the author given above was willing to support the patch, you # should write to the author if you have any questions or problems. Do # *NOT* send email messages to Javelin or any PennMUSH mailing list about # this patch! # # Below this line is the author's description of the patch, # followed by the patch itself. If the patch is in context diff # format, you'll probably apply it by typing: patch < patchfile # in your top-level MUSH directory, unless instructed otherwise # below. # # regex_caseinsensitive.patch # As of Pennmush 1.7.2p22 or 23 or so, the regex command became # by default, case insensitive. However, this was done is a # rather unfortunate manner. The original regex and match string # are upper cased before it is matched. This has the very # unfortunate side-effect of causing all the %0-%9 registers to # contain all upper-case text. The original case of the matched # text is totally lost. # # This would only happen, though, for case insensitive matches. If # you had a case-sensitive match, the original case was totally # preserved. # # This patch corrects that behavior. Changes are made directly to # regexp.c to cause it to support case-insensitive matching on regular # expressions. The resultant code is actually more CPU friendly than # the original, due to the elimination of one strdup(), and the # original case of the matched text is always preserved, for all types # of regex matching. # diff -u5r pennmush-vanilla/hdrs/externs.h project/hdrs/externs.h --- pennmush-vanilla/hdrs/externs.h Sun Jul 18 10:12:54 1999 +++ project/hdrs/externs.h Tue Aug 24 13:51:07 1999 @@ -238,11 +238,11 @@ extern dbref where_is _((dbref thing)); void charge_action _((dbref player, dbref thing, const char *awhat)); dbref first_visible _((dbref player, dbref thing)); /* From regexp.c (extract from Henry Spencer's package) */ -extern regexp *regcomp _((char *)); +extern regexp *regcomp _((char *, int caseinsensitive)); extern int regexec _((register regexp *, register char *)); extern char regexp_errbuf[]; #ifdef ALLOW_RPAGE /* From rpage.c */ diff -u5r pennmush-vanilla/hdrs/myregexp.h project/hdrs/myregexp.h --- pennmush-vanilla/hdrs/myregexp.h Sun Jul 18 10:13:05 1999 +++ project/hdrs/myregexp.h Tue Aug 24 12:55:05 1999 @@ -21,10 +21,11 @@ char *endp[NSUBEXP]; char regstart; /* Internal use only. */ char reganch; /* Internal use only. */ char *regmust; /* Internal use only. */ int regmlen; /* Internal use only. */ + int caseinsensitive; /* Flag to mark a case insensitive match */ char program[1]; /* Unwarranted chumminess with compiler. */ } regexp; /* * The first byte of the regexp internal "program" is actually this magic diff -u5r pennmush-vanilla/src/funlist.c project/src/funlist.c --- pennmush-vanilla/src/funlist.c Sun Jul 18 10:13:27 1999 +++ project/src/funlist.c Tue Aug 24 14:02:11 1999 @@ -1926,11 +1926,11 @@ int i, nqregs, curq, len; char *qregs[NSUBEXP]; regexp *re; int matched; - if ((re = regcomp(args[1])) == NULL) { + if ((re = regcomp(args[1],0)) == NULL) { /* Matching error. */ safe_str("#-1 REGEXP ERROR: ", buff, bp); safe_str((const char *) regexp_errbuf, buff, bp); return; } @@ -1990,17 +1990,17 @@ int i, nqregs, curq, len; char *qregs[NSUBEXP]; regexp *re; int matched; - if ((re = regcomp(strupper(args[1]))) == NULL) { + if ((re = regcomp(args[1],1)) == NULL) { /* Matching error. */ safe_str("#-1 REGEXP ERROR: ", buff, bp); safe_str((const char *) regexp_errbuf, buff, bp); return; } - matched = (int) regexec(re, strupper(args[0])); + matched = (int) regexec(re, args[0]); safe_str(unparse_integer(matched), buff, bp); /* If we don't have a third argument, we're done. */ if (nargs < 3) { mush_free(re, "regexp"); diff -u5r pennmush-vanilla/src/regexp.c project/src/regexp.c --- pennmush-vanilla/src/regexp.c Sun Jul 18 10:13:44 1999 +++ project/src/regexp.c Tue Aug 24 14:24:14 1999 @@ -218,22 +218,26 @@ * * Beware that the optimization-preparation code in here knows about some * of the structure of the compiled regexp. */ regexp * -regcomp(exp) +regcomp(exp, caseinsensitive) char *exp; + int caseinsensitive; { register regexp *r; register char *scan; register char *longest; register int len; int flags; if (exp == NULL) FAIL("NULL argument"); + if (caseinsensitive) + upcasestr(exp); + /* First pass: determine size, legality. */ regparse = exp; regnpar = 1; regsize = 0L; regcode = ®dummy; @@ -257,10 +261,11 @@ regc(MAGIC); if (reg(0, &flags) == NULL) return (NULL); /* Dig out information for optimizations. */ + r->caseinsensitive = caseinsensitive; r->regstart = '\0'; /* Worst-case defaults. */ r->reganch = 0; r->regmust = NULL; r->regmlen = 0; scan = r->program + 1; /* First BRANCH. */ @@ -718,18 +723,20 @@ /* * Global work variables for regexec(). */ static char *reginput; /* String-input pointer. */ +static char *regbegin; /* Start of the string-input buffer */ +static char *regoriginal; /* Original string - for case insensitive */ static char *regbol; /* Beginning of input, for ^ check. */ static char **regstartp; /* Pointer to startp array. */ static char **regendp; /* Ditto for endp. */ /* * Forwards. */ -STATIC int regtry _((regexp * prog, char *string)); +STATIC int regtry _((regexp * prog, char *string, char *ostring)); STATIC int regmatch _((char *prog)); STATIC int regrepeat _((char *p)); #ifdef DEBUG int regnarrate = 0; @@ -744,10 +751,12 @@ regexec(prog, string) register regexp *prog; register char *string; { register char *s; + register char *cstring; + int retval; /* Be paranoid... */ if (prog == NULL || string == NULL) { regerror("NULL parameter"); return (0); @@ -755,72 +764,95 @@ /* Check validity of program. */ if (UCHARAT(prog->program) != MAGIC) { regerror("corrupted program"); return (0); } + /* Check for a case insensitive match */ + cstring = strdup(string); + if (cstring == NULL) { + regerror("out of space"); + return (0); + } +#ifdef MEM_CHECK + add_check("regexec_cstring"); +#endif + if (prog->caseinsensitive) + upcasestr(cstring); + regbegin = cstring; /* If there is a "must appear" string, look for it. */ if (prog->regmust != NULL) { - s = string; + s = cstring; while ((s = strchr(s, prog->regmust[0])) != NULL) { if (strncmp(s, prog->regmust, prog->regmlen) == 0) break; /* Found it. */ s++; } - if (s == NULL) /* Not present. */ + if (s == NULL) { /* Not present. */ + mush_free(cstring, "regexec_cstring"); return (0); + } } /* Mark beginning of line for ^ . */ - regbol = string; + regbol = cstring; /* Simplest case: anchored match need be tried only once. */ - if (prog->reganch) - return (regtry(prog, string)); - + if (prog->reganch) { + retval = regtry(prog, cstring, string); + mush_free(cstring, "regexec_cstring"); + return (retval); + } /* Messy cases: unanchored match. */ - s = string; + s = cstring; if (prog->regstart != '\0') /* We know what char it must start with. */ while ((s = strchr(s, prog->regstart)) != NULL) { - if (regtry(prog, s)) + if (regtry(prog, s, string + (s - cstring))) { + mush_free(cstring, "regexec_cstring"); return (1); + } s++; } else /* We don't -- general case. */ do { - if (regtry(prog, s)) + if (regtry(prog, s, string + (s - cstring))) { + mush_free(cstring, "regexec_cstring"); return (1); + } } while (*s++ != '\0'); /* Failure. */ + mush_free(cstring, "regexec_cstring"); return (0); } /* - regtry - try match at specific point */ static int /* 0 failure, 1 success */ -regtry(prog, string) +regtry(prog, string, ostring) regexp *prog; char *string; + char *ostring; { register int i; register char **sp; register char **ep; reginput = string; + regoriginal = ostring; regstartp = prog->startp; regendp = prog->endp; sp = prog->startp; ep = prog->endp; for (i = NSUBEXP; i > 0; i--) { *sp++ = NULL; *ep++ = NULL; } if (regmatch(prog->program + 1)) { - prog->startp[0] = string; - prog->endp[0] = reginput; + prog->startp[0] = ostring; + prog->endp[0] = ostring + (reginput - string); return (1); } else return (0); } @@ -915,11 +947,11 @@ * Don't set startp if some later * invocation of the same parentheses * already has. */ if (regstartp[no] == NULL) - regstartp[no] = save; + regstartp[no] = regoriginal + (save - regbegin); return (1); } else return (0); } break; @@ -943,11 +975,11 @@ * Don't set endp if some later * invocation of the same parentheses * already has. */ if (regendp[no] == NULL) - regendp[no] = save; + regendp[no] = regoriginal + (save - regbegin); return (1); } else return (0); } break; diff -u5r pennmush-vanilla/src/wild.c project/src/wild.c --- pennmush-vanilla/src/wild.c Sun Jul 18 10:13:54 1999 +++ project/src/wild.c Tue Aug 24 14:11:46 1999 @@ -343,47 +343,25 @@ { int j; regexp *re; int i, len; static char wtmp[NUMARGS][BUFFER_LEN]; - char *news, *newd; - if (cs) { - news = (char *) s; - newd = (char *) d; - } else { - news = strdup(strupper(s)); - newd = strdup(strupper(d)); -#ifdef MEM_CHECK - add_check("regexp_upcase"); - add_check("regexp_upcase"); -#endif - } - - - if ((re = regcomp((char *) news)) == NULL) { + if ((re = regcomp((char *) s,cs?0:1)) == NULL) { /* * This is a matching error. We have an error message in * regexp_errbuf that we can ignore, since we're doing * command-matching. */ - if (!cs) { - mush_free(news, "regexp_upcase"); - mush_free(newd, "regexp_upcase"); - } return 0; } /* * Now we try to match the pattern. The relevant fields will * automatically be filled in by this. */ - if (!regexec(re, (char *) newd)) { + if (!regexec(re, (char *) d)) { mush_free(re, "regexp"); - if (!cs) { - mush_free(news, "regexp_upcase"); - mush_free(newd, "regexp_upcase"); - } return 0; } /* * Now we fill in our args vector. Note that in regexp matching, * 0 is the entire string matched, and the parenthesized strings @@ -412,14 +390,10 @@ wnxt[i] = wtmp[i]; } } mush_free(re, "regexp"); - if (!cs) { - mush_free(news, "regexp_upcase"); - mush_free(newd, "regexp_upcase"); - } return 1; } /* ---------------------------------------------------------------------------