head	1.2;
access;
symbols
	old_old_old_old_RELEASE_7_1_0:1.1
	old_old_old_old_RELEASE_6_4_0:1.1
	old_old_old_old_RELEASE_5_EOL:1.1
	old_old_old_old_RELEASE_7_0_0:1.1
	old_old_old_old_RELEASE_6_3_0:1.1;
locks; strict;
comment	@# @;


1.2
date	2009.01.13.21.38.50;	author skv;	state dead;
branches;
next	1.1;

1.1
date	2007.11.06.22.08.03;	author tobez;	state Exp;
branches;
next	;


desc
@@


1.2
log
@Update to 5.8.9

Also:

- add option SITECUSTOMIZE which enable -Dusesitecustomize [1]

- add patch against sv_dup() bug causes memory corruption in threaded perl [2]

Changes:	http://search.cpan.org/~nwclark/perl-5.8.9/pod/perl589delta.pod
PR:		111120 [1], 130033 [2]
Submitted by:	"Ian A. Tegebo" <yontege xx dev-mug.rescomp.berkeley.edu> [1],
		kevin brintnall <kbrint xx rufus.net> [2]
@
text
@--- regcomp.c.orig	2006-01-08 21:59:27.000000000 +0100
+++ regcomp.c	2007-11-06 22:48:26.000000000 +0100
@@@@ -135,7 +135,10 @@@@ typedef struct RExC_state_t {
     I32		extralen;
     I32		seen_zerolen;
     I32		seen_evals;
-    I32		utf8;
+    I32		utf8;		/* whether the pattern is utf8 or not */
+    I32		orig_utf8;	/* whether the pattern was originally in utf8 */
+				/* XXX use this for future optimisation of case
+				 * where pattern must be upgraded to utf8. */
 #if ADD_TO_REGEXEC
     char 	*starttry;		/* -Dr: where regtry was called. */
 #define RExC_starttry	(pRExC_state->starttry)
@@@@ -161,6 +164,7 @@@@ typedef struct RExC_state_t {
 #define RExC_seen_zerolen	(pRExC_state->seen_zerolen)
 #define RExC_seen_evals	(pRExC_state->seen_evals)
 #define RExC_utf8	(pRExC_state->utf8)
+#define RExC_orig_utf8	(pRExC_state->orig_utf8)
 
 #define	ISMULT1(c)	((c) == '*' || (c) == '+' || (c) == '?')
 #define	ISMULT2(s)	((*s) == '*' || (*s) == '+' || (*s) == '?' || \
@@@@ -1749,15 +1753,16 @@@@ Perl_pregcomp(pTHX_ char *exp, char *xen
     if (exp == NULL)
 	FAIL("NULL regexp argument");
 
-    RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
+    RExC_utf8 = RExC_orig_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
 
-    RExC_precomp = exp;
     DEBUG_r({
 	 if (!PL_colorset) reginitcolors();
 	 PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
 		       PL_colors[4],PL_colors[5],PL_colors[0],
-		       (int)(xend - exp), RExC_precomp, PL_colors[1]);
+		       (int)(xend - exp), exp, PL_colors[1]);
     });
+redo_first_pass:
+    RExC_precomp = exp;
     RExC_flags = pm->op_pmflags;
     RExC_sawback = 0;
 
@@@@ -1783,6 +1788,25 @@@@ Perl_pregcomp(pTHX_ char *exp, char *xen
 	RExC_precomp = Nullch;
 	return(NULL);
     }
+    if (RExC_utf8 && !RExC_orig_utf8) {
+        /* It's possible to write a regexp in ascii that represents unicode
+        codepoints outside of the byte range, such as via \x{100}. If we
+        detect such a sequence we have to convert the entire pattern to utf8
+        and then recompile, as our sizing calculation will have been based
+        on 1 byte == 1 character, but we will need to use utf8 to encode
+        at least some part of the pattern, and therefore must convert the whole
+        thing.
+        XXX: somehow figure out how to make this less expensive...
+        -- dmq */
+        STRLEN len = xend-exp;
+        DEBUG_r(PerlIO_printf(Perl_debug_log,
+	    "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
+        exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
+        xend = exp + len;
+        RExC_orig_utf8 = RExC_utf8;
+        SAVEFREEPV(exp);
+        goto redo_first_pass;
+    }
     DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size));
 
     /* Small enough for pointer-storage convention?
--- t/op/pat.t.orig	2006-01-07 13:53:32.000000000 +0100
+++ t/op/pat.t	2007-11-06 21:52:30.000000000 +0100
@@@@ -6,7 +6,7 @@@@
 
 $| = 1;
 
-print "1..1187\n";
+print "1..1189\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@@@ -3395,5 +3395,14 @@@@ ok(("foba  ba$s" =~ qr/(foo|BaSS|bar)/i)
        "# assigning to original string should not corrupt match vars");
 }
 
-# last test 1187
+{
+    use warnings;
+    my @@w;
+    local $SIG{__WARN__}=sub{push @@w,"@@_"};
+    my $c=qq(\x{DF}); 
+    ok($c=~/${c}|\x{100}/, "ASCII pattern that really is utf8");
+    ok(@@w==0, "ASCII pattern that really is utf8");
+}    
+
+# last test 1189
 
@


1.1
log
@Fix a possible buffer overflow with ASCII regexes that really are
Unicode regexes.

Obtained from:	perl5-porters (Nicholas Clark), with modifications
Approved by:	portmgr (marcus)
@
text
@@

