This patch makes it possible to hook into processing of constants in Perl. This is a very improved version of my previous patch. Note the following design decisions: we differentiate integers/octals+ --- ./op.c~ Tue Dec 9 00:57:24 1997 +++ ./op.c Mon Dec 15 19:36:24 1997 @@ -1496,11 +1496,39 @@ scope(OP *o) return o; } +static void +restore_hints(void *p) +{ + GV *gv = (GV*)p; + + if (!GvHV(gv)) + return; + SvREFCNT_dec((SV*)GvHV(gv)); + GvHV(gv) = NULL; +} + +void +save_hints(void) +{ + GV* gv = gv_fetchpv("\010", TRUE, SVt_PV); /* *^H */ + + SAVEI32(hints); + if (hints & HINT_LOCALIZE_HH) { + SAVESPTR(GvHV(gv)); + GvHV(gv) = newHVhv(GvHV(gv)); + SAVEFREESV(GvHV(gv)); + } else { + /* Common case, do minimal job possible */ + SAVEDESTRUCTOR(restore_hints,gv); + } +} + int block_start(int full) { dTHR; int retval = savestack_ix; + SAVEI32(comppad_name_floor); if (full) { if ((comppad_name_fill = AvFILL(comppad_name)) > 0) @@ -1515,7 +1543,7 @@ block_start(int full) SAVEI32(padix_floor); padix_floor = padix; pad_reset_pending = FALSE; - SAVEI32(hints); + save_hints(); hints &= ~HINT_BLOCK_SCOPE; return retval; } --- ./hv.c~ Thu Dec 4 01:00:18 1997 +++ ./hv.c Mon Dec 15 19:55:42 1997 @@ -734,6 +734,55 @@ newHV(void) return hv; } +HV * +newHVhv(HV *ohv) +{ + register HV *hv; + register XPVHV* xhv; + STRLEN hv_max = ohv ? HvMAX(ohv) : 0; + STRLEN hv_fill = ohv ? HvFILL(ohv) : 0; + + hv = (HV*)NEWSV(502,0); + sv_upgrade((SV *)hv, SVt_PVHV); + xhv = (XPVHV*)SvANY(hv); + SvPOK_off(hv); + SvNOK_off(hv); +#ifndef NODEFAULT_SHAREKEYS + HvSHAREKEYS_on(hv); /* key-sharing on by default */ +#endif + while (hv_max && hv_max + 1 >= hv_fill * 2) + hv_max = hv_max / 2; /* Is always 2^n-1 */ + xhv->xhv_max = hv_max; + xhv->xhv_fill = 0; + xhv->xhv_pmroot = 0; + (void)hv_iterinit(hv); /* so each() will start off right */ + if (!hv_fill) + return hv; + +#if 0 + if (!SvRMAGICAL(ohv) || !mg_find((SV*)ohv,'P')) { + /* Quick way ???*/ + } + else +#endif + { + HE *entry; + I32 hv_riter = HvRITER(ohv); /* current root of iterator */ + HE *hv_eiter = HvEITER(ohv); /* current entry of iterator */ + + /* Slow way */ + hv_iterinit(hv); + while (entry = hv_iternext(ohv)) { + hv_store(hv, HeKEY(entry), HeKLEN(entry), + SvREFCNT_inc(HeVAL(entry)), HeHASH(entry)); + } + HvRITER(ohv) = hv_riter; + HvEITER(ohv) = hv_eiter; + } + + return hv; +} + void hv_free_ent(HV *hv, register HE *entry) { --- ./t/pragma/overload.t~ Tue Nov 25 06:55:48 1997 +++ ./t/pragma/overload.t Tue Dec 16 15:32:48 1997 @@ -48,7 +48,20 @@ $| = 1; print "1..",&last,"\n"; sub test { - $test++; if (shift) {print "ok $test\n";1} else {print "not ok $test\n";0} + $test++; + if (@_ > 1) { + if ($_[0] eq $_[1]) { + print "ok $test\n"; + } else { + print "not ok $test: '$_[0]' ne '$_[1]'\n"; + } + } else { + if (shift) { + print "ok $test\n"; + } else { + print "not ok $test\n"; + } + } } $a = new Oscalar "087"; @@ -359,5 +372,70 @@ test(($aI | 3) eq '_<<_xx_<<_'); # 114 # warn $aII << 3; test(($aII << 3) eq '_<<_087_<<_'); # 115 +{ + BEGIN { $int = 7; overload::constant 'integer' => sub {$int++; shift}; } + $out = 2**10; +} +test($int, 9); # 116 +test($out, 1024); # 117 + +$foo = 'foo'; +$foo1 = 'f\'o\\o'; +{ + BEGIN { $q = $qr = 7; + overload::constant 'q' => sub {$q++; push @q, shift, ($_[1] || 'none'); shift}, + 'qr' => sub {$qr++; push @qr, shift, ($_[1] || 'none'); shift}; } + $out = 'foo'; + $out1 = 'f\'o\\o'; + $out2 = "a\a$foo,\,"; + /b\b$foo.\./; +} + +test($out, 'foo'); # 118 +test($out, $foo); # 119 +test($out1, 'f\'o\\o'); # 120 +test($out1, $foo1); # 121 +test($out2, "a\afoo,\,"); # 122 +test("@q", "foo q f'o\\\\o q a\\a qq ,\\, qq"); # 123 +test($q, 11); # 124 +test("@qr", "b\\b qq .\\. qq"); # 125 +test($qr, 9); # 126 + +{ + $_ = '!!foo!<-.>!'; + BEGIN { overload::constant 'q' => sub {push @q1, shift, ($_[1] || 'none'); "_<" . (shift) . ">_"}, + 'qr' => sub {push @qr1, shift, ($_[1] || 'none'); "!<" . (shift) . ">!"}; } + $out = 'foo'; + $out1 = 'f\'o\\o'; + $out2 = "a\a$foo,\,"; + $res = /b\b$foo.\./; + $a = <_'); # 117 +test($out1, '__'); # 128 +test($out2, "__foo_<,\,>_"); # 129 +test("@q1", "foo q f'o\\\\o q a\\a qq ,\\, qq oups + qq oups1 + q second part q tail here s z-Z tr z-Z tr"); # 130 +test("@qr1", "b\\b qq .\\. qq try it q first part q yet another qq"); # 131 +test($res, 1); # 132 +test($a, "__"); # 133 +test($b, "__"); # 134 +test($c, "bareword"); # 135 + + # Last test is: -sub last {115} +sub last {135} --- ./lib/overload.pm~ Tue Nov 25 06:52:52 1997 +++ ./lib/overload.pm Tue Dec 16 15:29:00 1997 @@ -100,6 +100,32 @@ sub mycan { # Real can would leave st return undef; } +%constants = ( + 'integer' => 0x1000, + 'float' => 0x2000, + 'binary' => 0x4000, + 'q' => 0x8000, + 'qr' => 0x10000, + ); + +sub constant { + # Arguments: what, sub + while (@_) { + $^H{$_[0]} = $_[1]; + $^H |= $constants{$_[0]} | 0x20000; + shift, shift; + } +} + +sub unconstant { + # Arguments: what, sub + while (@_) { + delete $^H{$_[0]}; + $^H &= ~ $constants{$_[0]}; + shift, shift; + } +} + 1; __END__ @@ -522,6 +548,72 @@ Returns C or a reference to the m =back +=head1 Overloading constants + +For some application Perl parser mangles constants too much. It is possible +to hook into this process via overload::constant() and overload::unconstant() +functions. + +These functions take a hash as an argument. The recognized keys of this hash +are + +=over 8 + +=item integer + +to overload integer constants, + +=item float + +to overload floating point constants, + +=item binary + +to overload octal and hexadecimal constants, + +=item q + +to overload C-quoted strings, constant pieces of C- and C-quoted +strings and here-documents, + +=item qr + +to overload constant pieces of regular expressions. + +=back + +The corresponding values are references to functions which take three arguments: +the first one is the I string form of the constant, the second one +is how Perl interprets this constant, the third one is how the constant is used. +Note that the initial string form does not +contain string delimiters, and has backslashes in backslash-delimiter +combinations stripped (thus the value of delimiter is not relevant for +processing of this string). The return value of this function is how this +constant is going to be interpreted by Perl. The third argument is undefined +unless for overloaded C- and C- constants, it is C in single-quote +context (comes from strings, regular expressions, and single-quote HERE +documents), it is C for arguments of C/C operators, +it is C for right-hand side of C-operator, and it is C otherwise. + +Since an expression C<"ab$cd,,"> is just a shortcut for C<'ab' . $cd . ',,'>, +it is expected that overloaded constant strings are equipped with reasonable +overloaded catenation operator, otherwise absurd results will result. +Similarly, negative numbers are considered as negations of positive constants. + +Note that it is probably meaningless to call the functions overload::constant() +and overload::unconstant() from anywhere but import() and unimport() methods. +From these methods they may be called as + + sub import { + shift; + return unless @_; + die "unknown import: @_" unless @_ == 1 and $_[0] eq ':constant'; + overload::constant integer => sub {Math::BigInt->new(shift)}; + } + +B Currently overloaded-ness of constants does not propagate +into C. + =head1 IMPLEMENTATION What follows is subject to change RSN. @@ -596,6 +688,8 @@ For the purpose of inheritance every ove C is present (possibly undefined). This may create interesting effects if some package is not overloaded, but inherits from two overloaded packages. + +Barewords are not covered by overloaded string constants. This document is confusing. --- ./lib/Math/BigInt.pm~ Tue Nov 25 06:52:32 1997 +++ ./lib/Math/BigInt.pm Tue Dec 16 03:02:00 1997 @@ -36,6 +36,12 @@ sub stringify { "${$_[0]}" } sub numify { 0 + "${$_[0]}" } # Not needed, additional overhead # comparing to direct compilation based on # stringify +sub import { + shift; + return unless @_; + die "unknown import: @_" unless @_ == 1 and $_[0] eq ':constant'; + overload::constant integer => sub {Math::BigInt->new(shift)}; +} $zero = 0; @@ -383,6 +389,19 @@ are not numbers, as well as the result o ' -123 123 123' canonical value '-123123123' '1 23 456 7890' canonical value '+1234567890' + +=head1 Autocreating constants + +After C all the integer decimal constants +in the given scope are converted to C. This convertion +happens at compile time. + +In particular + + perl -MMath::BigInt=:constant -e 'print 2**100' + +print the integer value of C<2**100>. Note that without convertion of +constants the expression 2**100 will be calculatted as floating point number. =head1 BUGS --- ./global.sym.orig Fri May 15 09:59:13 1998 +++ ./global.sym Wed May 27 18:43:26 1998 @@ -474,6 +474,7 @@ newGVREF newGVgen newHV newHVREF +newHVhv newIO newLISTOP newLOGOP @@ -921,6 +922,7 @@ save_freesv save_gp save_hash save_helem +save_hints save_hptr save_int save_item --- ./proto.h.orig Fri May 15 10:00:37 1998 +++ ./proto.h Wed May 27 18:44:32 1998 @@ -330,6 +330,7 @@ GV* newGVgen _((char* pack)); OP* newGVREF _((I32 type, OP* o)); OP* newHVREF _((OP* o)); HV* newHV _((void)); +HV* newHVhv _((HV *hv)); IO* newIO _((void)); OP* newLISTOP _((I32 type, I32 flags, OP* first, OP* last)); OP* newPMOP _((I32 type, I32 flags)); @@ -444,6 +445,7 @@ void save_freepv _((char* pv)); void save_gp _((GV* gv, I32 empty)); HV* save_hash _((GV* gv)); void save_helem _((HV* hv, SV *key, SV **sptr)); +void save_hints _((void)); void save_hptr _((HV** hptr)); void save_I16 _((I16* intp)); void save_I32 _((I32* intp)); --- ./pp_ctl.c.orig Fri May 15 10:00:33 1998 +++ ./pp_ctl.c Wed May 27 18:45:20 1998 @@ -2168,7 +2168,7 @@ sv_compile_2op(SV *sv, OP** startop, cha introduced within evals. See force_ident(). GSAR 96-10-12 */ safestr = savepv(tmpbuf); SAVEDELETE(defstash, safestr, strlen(safestr)); - SAVEI32(hints); + save_hints(); #ifdef OP_IN_REGISTER opsave = op; #else @@ -2496,7 +2496,7 @@ PP(pp_require) rsfp = tryrsfp; name = savepv(name); SAVEFREEPV(name); - SAVEI32(hints); + save_hints(); hints = 0; /* switch to eval mode */ @@ -2556,7 +2556,7 @@ PP(pp_entereval) introduced within evals. See force_ident(). GSAR 96-10-12 */ safestr = savepv(tmpbuf); SAVEDELETE(defstash, safestr, strlen(safestr)); - SAVEI32(hints); + save_hints(); hints = op->op_targ; push_return(op->op_next); --- ./perl.h~ Wed May 27 18:35:32 1998 +++ ./perl.h Wed May 27 18:46:22 1998 @@ -1644,6 +1644,13 @@ typedef enum { #define HINT_STRICT_VARS 0x00000400 #define HINT_LOCALE 0x00000800 +#define HINT_NEW_INTEGER 0x00001000 +#define HINT_NEW_FLOAT 0x00002000 +#define HINT_NEW_BINARY 0x00004000 +#define HINT_NEW_STRING 0x00008000 +#define HINT_NEW_RE 0x00010000 +#define HINT_LOCALIZE_HH 0x00020000 /* %^H needs to be copied */ + /* Various states of an input record separator SV (rs, nrs) */ #define RsSNARF(sv) (! SvOK(sv)) #define RsSIMPLE(sv) (SvOK(sv) && SvCUR(sv)) --- ./toke.c.orig Fri May 15 10:02:39 1998 +++ ./toke.c Wed May 27 18:50:15 1998 @@ -49,6 +49,7 @@ static int uni _((I32 f, char *s)); #endif static char * filter_gets _((SV *sv, PerlIO *fp, STRLEN append)); static void restore_rsfp _((void *f)); +static SV *new_constant _((char *s, STRLEN len, char *key, SV *sv, SV *pv, char *type)); static void restore_expect _((void *e)); static void restore_lex_expect _((void *e)); @@ -598,20 +599,23 @@ q(SV *sv) register char *s; register char *send; register char *d; - STRLEN len; + STRLEN len = 0; + SV *pv = sv; if (!SvLEN(sv)) - return sv; + goto finish; s = SvPV_force(sv, len); if (SvIVX(sv) == -1) - return sv; + goto finish; send = s + len; while (s < send && *s != '\\') s++; if (s == send) - return sv; + goto finish; d = s; + if ( hints & HINT_NEW_STRING ) + pv = sv_2mortal(newSVpv(SvPVX(pv), len)); while (s < send) { if (*s == '\\') { if (s + 1 < send && (s[1] == '\\')) @@ -621,7 +625,9 @@ q(SV *sv) } *d = '\0'; SvCUR_set(sv, d - SvPVX(sv)); - + finish: + if ( hints & HINT_NEW_STRING ) + return new_constant(NULL, 0, "q", sv, pv, "q"); return sv; } @@ -637,10 +643,19 @@ sublex_start(void) } if (op_type == OP_CONST || op_type == OP_READLINE) { SV *sv = q(lex_stuff); - STRLEN len; - char *p = SvPV(sv, len); - yylval.opval = (OP*)newSVOP(op_type, 0, newSVpv(p, len)); - SvREFCNT_dec(sv); + + if (SvTYPE(sv) == SVt_PVIV) { + /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ + STRLEN len; + char *p; + SV *nsv; + + p = SvPV(sv, len); + nsv = newSVpv(p, len); + SvREFCNT_dec(sv); + sv = nsv; + } + yylval.opval = (OP*)newSVOP(op_type, 0, sv); lex_stuff = Nullsv; return THING; } @@ -1033,9 +1048,17 @@ scan_const(char *start) } /* return the substring (via yylval) only if we parsed anything */ - if (s > bufptr) + if (s > bufptr) { + if ( hints & ( lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) + sv = new_constant(start, s - start, (lex_inpat ? "qr" : "q"), + sv, Nullsv, + ( lex_inwhat == OP_TRANS + ? "tr" + : ( (lex_inwhat == OP_SUBST && !lex_inpat) + ? "s" + : "qq"))); yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); - else + } else SvREFCNT_dec(sv); return s; } @@ -1667,6 +1690,8 @@ yylex(void) SV *sv = newSVsv(linestr); if (!lex_inpat) sv = q(sv); + else if ( hints & HINT_NEW_RE ) + sv = new_constant(NULL, 0, "qr", sv, sv, "q"); yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); s = bufend; } @@ -4700,6 +4725,74 @@ checkcomma(register char *s, char *name, } } +static SV * +new_constant(char *s, STRLEN len, char *key, SV *sv, SV *pv, char *type) +{ + HV *table = perl_get_hv("\10", FALSE); /* ^H */ + dTHR; + dSP; + BINOP myop; + SV *res; + bool oldcatch = CATCH_GET; + SV **cvp; + SV *cv, *typesv; + char buf[128]; + + if (!table) { + yyerror("%^H is not defined"); + return sv; + } + cvp = hv_fetch(table, key, strlen(key), FALSE); + if (!cvp || !SvOK(*cvp)) { + sprintf(buf,"$^H{%s} is not defined", key); + yyerror(buf); + return sv; + } + sv_2mortal(sv); /* Parent created it permanently */ + cv = *cvp; + if (!pv) + pv = sv_2mortal(newSVpv(s, len)); + if (type) + typesv = sv_2mortal(newSVpv(type, 0)); + else + typesv = &sv_undef; + CATCH_SET(TRUE); + Zero(&myop, 1, BINOP); + myop.op_last = (OP *) &myop; + myop.op_next = Nullop; + myop.op_flags = OPf_WANT_SCALAR | OPf_STACKED; + + ENTER; + SAVEOP(); + op = (OP *) &myop; + if (PERLDB_SUB && curstash != debstash) + op->op_private |= OPpENTERSUB_DB; + PUTBACK; + pp_pushmark(ARGS); + + EXTEND(sp, 3); + PUSHs(pv); + PUSHs(sv); + PUSHs(typesv); + PUSHs(cv); + PUTBACK; + + if (op = pp_entersub(ARGS)) + runops(); + LEAVE; + SPAGAIN; + + res = POPs; + PUTBACK; + CATCH_SET(oldcatch); + + if (!SvOK(res)) { + sprintf(buf,"Call to &{$^H{%s}} did not return a defined value", key); + yyerror(buf); + } + return SvREFCNT_inc(res); +} + static char * scan_word(register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) { @@ -5552,7 +5645,8 @@ scan_num(char *start) digit: n = u << shift; /* make room for the digit */ - if (!overflowed && (n >> shift) != u) { + if (!overflowed && (n >> shift) != u + && !(hints & HINT_NEW_BINARY)) { warn("Integer overflow in %s number", (shift == 4) ? "hex" : "octal"); overflowed = TRUE; @@ -5568,6 +5662,8 @@ scan_num(char *start) out: sv = NEWSV(92,0); sv_setuv(sv, u); + if ( hints & HINT_NEW_BINARY) + sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL); } break; @@ -5669,6 +5765,9 @@ scan_num(char *start) sv_setiv(sv, tryiv); else sv_setnv(sv, value); + if ( floatit ? (hints & HINT_NEW_FLOAT) : (hints & HINT_NEW_INTEGER) ) + sv = new_constant(tokenbuf, d - tokenbuf, + (floatit ? "float" : "integer"), sv, Nullsv, NULL); break; }