| File | /usr/local/lib/perl5/5.10.1/utf8_heavy.pl |
| Statements Executed | 104 |
| Statement Execution Time | 4.50ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 4.73ms | 4.77ms | utf8::SWASHNEW |
| 39 | 6 | 2 | 25µs | 25µs | utf8::CORE:match (opcode) |
| 12 | 2 | 3 | 24µs | 24µs | utf8::downgrade (xsub) |
| 23 | 1 | 2 | 22µs | 22µs | utf8::encode (xsub) |
| 1 | 1 | 1 | 12µs | 17µs | utf8::BEGIN@2 |
| 1 | 1 | 1 | 11µs | 28µs | utf8::BEGIN@211 |
| 4 | 4 | 2 | 11µs | 11µs | utf8::CORE:subst (opcode) |
| 1 | 1 | 2 | 10µs | 10µs | utf8::CORE:sort (opcode) |
| 1 | 1 | 1 | 10µs | 29µs | utf8::BEGIN@3 |
| 1 | 1 | 1 | 9µs | 20µs | utf8::BEGIN@155 |
| 1 | 1 | 1 | 8µs | 27µs | utf8::BEGIN@76 |
| 0 | 0 | 0 | 0s | 0s | utf8::DESTROY |
| 0 | 0 | 0 | 0s | 0s | utf8::croak |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package utf8; | ||||
| 2 | 3 | 21µs | 2 | 22µs | # spent 17µs (12+5) within utf8::BEGIN@2 which was called
# once (12µs+5µs) by utf8::AUTOLOAD at line 2 # spent 17µs making 1 call to utf8::BEGIN@2
# spent 5µs making 1 call to strict::import |
| 3 | 3 | 230µs | 2 | 49µs | # spent 29µs (10+20) within utf8::BEGIN@3 which was called
# once (10µs+20µs) by utf8::AUTOLOAD at line 3 # spent 29µs making 1 call to utf8::BEGIN@3
# spent 20µs making 1 call to warnings::import |
| 4 | |||||
| 5 | sub DEBUG () { 0 } | ||||
| 6 | |||||
| 7 | sub DESTROY {} | ||||
| 8 | |||||
| 9 | 1 | 200ns | my %Cache; | ||
| 10 | |||||
| 11 | 1 | 500ns | our (%PropertyAlias, %PA_reverse, %PropValueAlias, %PVA_reverse, %PVA_abbr_map); | ||
| 12 | |||||
| 13 | sub croak { require Carp; Carp::croak(@_) } | ||||
| 14 | |||||
| 15 | ## | ||||
| 16 | ## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape. | ||||
| 17 | ## It's a data structure that encodes a set of Unicode characters. | ||||
| 18 | ## | ||||
| 19 | |||||
| 20 | # spent 4.77ms (4.73+46µs) within utf8::SWASHNEW which was called
# once (4.73ms+46µs) by XML::Simple::CORE:match at line 18 of utf8.pm | ||||
| 21 | 1 | 1µs | my ($class, $type, $list, $minbits, $none) = @_; | ||
| 22 | 1 | 600ns | local $^D = 0 if $^D; | ||
| 23 | |||||
| 24 | 1 | 0s | print STDERR "SWASHNEW @_\n" if DEBUG; | ||
| 25 | |||||
| 26 | ## | ||||
| 27 | ## Get the list of codepoints for the type. | ||||
| 28 | ## Called from swash_init (see utf8.c) or SWASHNEW itself. | ||||
| 29 | ## | ||||
| 30 | ## Callers of swash_init: | ||||
| 31 | ## op.c:pmtrans -- for tr/// and y/// | ||||
| 32 | ## regexec.c:regclass_swash -- for /[]/, \p, and \P | ||||
| 33 | ## utf8.c:is_utf8_common -- for common Unicode properties | ||||
| 34 | ## utf8.c:to_utf8_case -- for lc, uc, ucfirst, etc. and //i | ||||
| 35 | ## | ||||
| 36 | ## Given a $type, our goal is to fill $list with the set of codepoint | ||||
| 37 | ## ranges. If $type is false, $list passed is used. | ||||
| 38 | ## | ||||
| 39 | ## $minbits: | ||||
| 40 | ## For binary properties, $minbits must be 1. | ||||
| 41 | ## For character mappings (case and transliteration), $minbits must | ||||
| 42 | ## be a number except 1. | ||||
| 43 | ## | ||||
| 44 | ## $list (or that filled according to $type): | ||||
| 45 | ## Refer to perlunicode.pod, "User-Defined Character Properties." | ||||
| 46 | ## | ||||
| 47 | ## For binary properties, only characters with the property value | ||||
| 48 | ## of True should be listed. The 3rd column, if any, will be ignored. | ||||
| 49 | ## | ||||
| 50 | ## To make the parsing of $type clear, this code takes the a rather | ||||
| 51 | ## unorthodox approach of last'ing out of the block once we have the | ||||
| 52 | ## info we need. Were this to be a subroutine, the 'last' would just | ||||
| 53 | ## be a 'return'. | ||||
| 54 | ## | ||||
| 55 | 1 | 0s | my $file; ## file to load data from, and also part of the %Cache key. | ||
| 56 | 1 | 200ns | my $ListSorted = 0; | ||
| 57 | |||||
| 58 | 1 | 200ns | if ($type) | ||
| 59 | { | ||||
| 60 | 1 | 8µs | 1 | 3µs | $type =~ s/^\s+//; # spent 3µs making 1 call to utf8::CORE:subst |
| 61 | 1 | 5µs | 1 | 3µs | $type =~ s/\s+$//; # spent 3µs making 1 call to utf8::CORE:subst |
| 62 | |||||
| 63 | 1 | 0s | print STDERR "type = $type\n" if DEBUG; | ||
| 64 | |||||
| 65 | 1 | 3µs | 1 | 400ns | GETFILE: # spent 400ns making 1 call to utf8::CORE:subst |
| 66 | { | ||||
| 67 | ## | ||||
| 68 | ## It could be a user-defined property. | ||||
| 69 | ## | ||||
| 70 | |||||
| 71 | 1 | 100ns | my $caller1 = $type =~ s/(.+)::// ? $1 : caller(1); | ||
| 72 | |||||
| 73 | 1 | 7µs | 1 | 3µs | if (defined $caller1 && $type =~ /^(?:\w+)$/) { # spent 3µs making 1 call to utf8::CORE:match |
| 74 | 1 | 1µs | my $prop = "${caller1}::$type"; | ||
| 75 | 1 | 1µs | if (exists &{$prop}) { | ||
| 76 | 3 | 373µs | 2 | 46µs | # spent 27µs (8+19) within utf8::BEGIN@76 which was called
# once (8µs+19µs) by utf8::AUTOLOAD at line 76 # spent 27µs making 1 call to utf8::BEGIN@76
# spent 19µs making 1 call to strict::unimport |
| 77 | |||||
| 78 | $list = &{$prop}; | ||||
| 79 | last GETFILE; | ||||
| 80 | } | ||||
| 81 | } | ||||
| 82 | |||||
| 83 | 1 | 100ns | my $wasIs; | ||
| 84 | |||||
| 85 | 1 | 7µs | 1 | 4µs | ($wasIs = $type =~ s/^Is(?:\s+|[-_])?//i) # spent 4µs making 1 call to utf8::CORE:subst |
| 86 | or | ||||
| 87 | $type =~ s/^(?:(?:General(?:\s+|_)?)?Category|gc)\s*[:=]\s*//i | ||||
| 88 | or | ||||
| 89 | $type =~ s/^(?:Script|sc)\s*[:=]\s*//i | ||||
| 90 | or | ||||
| 91 | $type =~ s/^Block\s*[:=]\s*/In/i; | ||||
| 92 | |||||
| 93 | |||||
| 94 | ## | ||||
| 95 | ## See if it's in some enumeration. | ||||
| 96 | ## | ||||
| 97 | 1 | 2.81ms | require "unicore/PVA.pl"; | ||
| 98 | 1 | 13µs | 1 | 5µs | if ($type =~ /^([\w\s]+)[:=]\s*(.*)/) { # spent 5µs making 1 call to utf8::CORE:match |
| 99 | my ($enum, $val) = (lc $1, lc $2); | ||||
| 100 | $enum =~ tr/ _-//d; | ||||
| 101 | $val =~ tr/ _-//d; | ||||
| 102 | |||||
| 103 | my $pa = $PropertyAlias{$enum} ? $enum : $PA_reverse{$enum}; | ||||
| 104 | my $f = $PropValueAlias{$pa}{$val} ? $val : $PVA_reverse{$pa}{lc $val}; | ||||
| 105 | |||||
| 106 | if ($pa and $f) { | ||||
| 107 | $pa = "gc_sc" if $pa eq "gc" or $pa eq "sc"; | ||||
| 108 | $file = "unicore/lib/$pa/$PVA_abbr_map{$pa}{lc $f}.pl"; | ||||
| 109 | last GETFILE; | ||||
| 110 | } | ||||
| 111 | } | ||||
| 112 | else { | ||||
| 113 | 1 | 1µs | my $t = lc $type; | ||
| 114 | 1 | 7µs | $t =~ tr/ _-//d; | ||
| 115 | |||||
| 116 | 1 | 2µs | if ($PropValueAlias{gc}{$t} or $PropValueAlias{sc}{$t}) { | ||
| 117 | $file = "unicore/lib/gc_sc/$PVA_abbr_map{gc_sc}{$t}.pl"; | ||||
| 118 | last GETFILE; | ||||
| 119 | } | ||||
| 120 | } | ||||
| 121 | |||||
| 122 | ## | ||||
| 123 | ## See if it's in the direct mapping table. | ||||
| 124 | ## | ||||
| 125 | 1 | 158µs | require "unicore/Exact.pl"; | ||
| 126 | 1 | 1µs | if (my $base = $utf8::Exact{$type}) { | ||
| 127 | 1 | 1µs | $file = "unicore/lib/gc_sc/$base.pl"; | ||
| 128 | 1 | 2µs | last GETFILE; | ||
| 129 | } | ||||
| 130 | |||||
| 131 | ## | ||||
| 132 | ## If not there exactly, try the canonical form. The canonical | ||||
| 133 | ## form is lowercased, with any separators (\s+|[-_]) removed. | ||||
| 134 | ## | ||||
| 135 | my $canonical = lc $type; | ||||
| 136 | $canonical =~ s/(?<=[a-z\d])(?:\s+|[-_])(?=[a-z\d])//g; | ||||
| 137 | print STDERR "canonical = $canonical\n" if DEBUG; | ||||
| 138 | |||||
| 139 | require "unicore/Canonical.pl"; | ||||
| 140 | if (my $base = ($utf8::Canonical{$canonical} || $utf8::Canonical{ lc $utf8::PropertyAlias{$canonical} })) { | ||||
| 141 | $file = "unicore/lib/gc_sc/$base.pl"; | ||||
| 142 | last GETFILE; | ||||
| 143 | } | ||||
| 144 | |||||
| 145 | ## | ||||
| 146 | ## See if it's a user-level "To". | ||||
| 147 | ## | ||||
| 148 | |||||
| 149 | my $caller0 = caller(0); | ||||
| 150 | |||||
| 151 | if (defined $caller0 && $type =~ /^To(?:\w+)$/) { | ||||
| 152 | my $map = $caller0 . "::" . $type; | ||||
| 153 | |||||
| 154 | if (exists &{$map}) { | ||||
| 155 | 3 | 171µs | 2 | 31µs | # spent 20µs (9+11) within utf8::BEGIN@155 which was called
# once (9µs+11µs) by utf8::AUTOLOAD at line 155 # spent 20µs making 1 call to utf8::BEGIN@155
# spent 11µs making 1 call to strict::unimport |
| 156 | |||||
| 157 | $list = &{$map}; | ||||
| 158 | last GETFILE; | ||||
| 159 | } | ||||
| 160 | } | ||||
| 161 | |||||
| 162 | ## | ||||
| 163 | ## Last attempt -- see if it's a standard "To" name | ||||
| 164 | ## (e.g. "ToLower") ToTitle is used by ucfirst(). | ||||
| 165 | ## The user-level way to access ToDigit() and ToFold() | ||||
| 166 | ## is to use Unicode::UCD. | ||||
| 167 | ## | ||||
| 168 | if ($type =~ /^To(Digit|Fold|Lower|Title|Upper)$/) { | ||||
| 169 | $file = "unicore/To/$1.pl"; | ||||
| 170 | ## would like to test to see if $file actually exists.... | ||||
| 171 | last GETFILE; | ||||
| 172 | } | ||||
| 173 | |||||
| 174 | ## | ||||
| 175 | ## If we reach this line, it's because we couldn't figure | ||||
| 176 | ## out what to do with $type. Ouch. | ||||
| 177 | ## | ||||
| 178 | |||||
| 179 | return $type; | ||||
| 180 | } | ||||
| 181 | |||||
| 182 | 1 | 400ns | if (defined $file) { | ||
| 183 | 1 | 100ns | print STDERR "found it (file='$file')\n" if DEBUG; | ||
| 184 | |||||
| 185 | ## | ||||
| 186 | ## If we reach here, it was due to a 'last GETFILE' above | ||||
| 187 | ## (exception: user-defined properties and mappings), so we | ||||
| 188 | ## have a filename, so now we load it if we haven't already. | ||||
| 189 | ## If we have, return the cached results. The cache key is the | ||||
| 190 | ## class and file to load. | ||||
| 191 | ## | ||||
| 192 | 1 | 2µs | my $found = $Cache{$class, $file}; | ||
| 193 | 1 | 200ns | if ($found and ref($found) eq $class) { | ||
| 194 | print STDERR "Returning cached '$file' for \\p{$type}\n" if DEBUG; | ||||
| 195 | return $found; | ||||
| 196 | } | ||||
| 197 | |||||
| 198 | 2 | 55µs | $list = do $file; die $@ if $@; | ||
| 199 | } | ||||
| 200 | |||||
| 201 | 1 | 500ns | $ListSorted = 1; ## we know that these lists are sorted | ||
| 202 | } | ||||
| 203 | |||||
| 204 | 1 | 100ns | my $extras; | ||
| 205 | 1 | 100ns | my $bits = $minbits; | ||
| 206 | |||||
| 207 | 1 | 400ns | my $ORIG = $list; | ||
| 208 | 1 | 6µs | if ($list) { | ||
| 209 | 1 | 7µs | my @tmp = split(/^/m, $list); | ||
| 210 | 1 | 100ns | my %seen; | ||
| 211 | 3 | 455µs | 2 | 45µs | # spent 28µs (11+17) within utf8::BEGIN@211 which was called
# once (11µs+17µs) by utf8::AUTOLOAD at line 211 # spent 28µs making 1 call to utf8::BEGIN@211
# spent 17µs making 1 call to warnings::unimport |
| 212 | 1 | 21µs | 12 | 4µs | $extras = join '', grep /^[^0-9a-fA-F]/, @tmp; # spent 4µs making 12 calls to utf8::CORE:match, avg 333ns/call |
| 213 | $list = join '', | ||||
| 214 | map { $_->[1] } | ||||
| 215 | 12 | 20µs | 12 | 6µs | sort { $a->[0] <=> $b->[0] } # spent 6µs making 12 calls to utf8::CORE:match, avg 508ns/call |
| 216 | 24 | 47µs | 12 | 7µs | map { /^([0-9a-fA-F]+)/; [ CORE::hex($1), $_ ] } # spent 7µs making 12 calls to utf8::CORE:match, avg 592ns/call |
| 217 | 1 | 27µs | 1 | 10µs | grep { /^([0-9a-fA-F]+)/ and not $seen{$1}++ } @tmp; # XXX doesn't do ranges right # spent 10µs making 1 call to utf8::CORE:sort |
| 218 | } | ||||
| 219 | |||||
| 220 | 1 | 100ns | if ($none) { | ||
| 221 | my $hextra = sprintf "%04x", $none + 1; | ||||
| 222 | $list =~ s/\tXXXX$/\t$hextra/mg; | ||||
| 223 | } | ||||
| 224 | |||||
| 225 | 1 | 900ns | if ($minbits != 1 && $minbits < 32) { # not binary property | ||
| 226 | my $top = 0; | ||||
| 227 | while ($list =~ /^([0-9a-fA-F]+)(?:[\t]([0-9a-fA-F]+)?)(?:[ \t]([0-9a-fA-F]+))?/mg) { | ||||
| 228 | my $min = CORE::hex $1; | ||||
| 229 | my $max = defined $2 ? CORE::hex $2 : $min; | ||||
| 230 | my $val = defined $3 ? CORE::hex $3 : 0; | ||||
| 231 | $val += $max - $min if defined $3; | ||||
| 232 | $top = $val if $val > $top; | ||||
| 233 | } | ||||
| 234 | my $topbits = | ||||
| 235 | $top > 0xffff ? 32 : | ||||
| 236 | $top > 0xff ? 16 : 8; | ||||
| 237 | $bits = $topbits if $bits < $topbits; | ||||
| 238 | } | ||||
| 239 | |||||
| 240 | 1 | 100ns | my @extras; | ||
| 241 | 1 | 800ns | for my $x ($extras) { | ||
| 242 | 1 | 2µs | pos $x = 0; | ||
| 243 | 1 | 3µs | 1 | 200ns | while ($x =~ /^([^0-9a-fA-F\n])(.*)/mg) { # spent 200ns making 1 call to utf8::CORE:match |
| 244 | my $char = $1; | ||||
| 245 | my $name = $2; | ||||
| 246 | print STDERR "$1 => $2\n" if DEBUG; | ||||
| 247 | if ($char =~ /[-+!&]/) { | ||||
| 248 | my ($c,$t) = split(/::/, $name, 2); # bogus use of ::, really | ||||
| 249 | my $subobj; | ||||
| 250 | if ($c eq 'utf8') { | ||||
| 251 | $subobj = utf8->SWASHNEW($t, "", $minbits, 0); | ||||
| 252 | } | ||||
| 253 | elsif (exists &$name) { | ||||
| 254 | $subobj = utf8->SWASHNEW($name, "", $minbits, 0); | ||||
| 255 | } | ||||
| 256 | elsif ($c =~ /^([0-9a-fA-F]+)/) { | ||||
| 257 | $subobj = utf8->SWASHNEW("", $c, $minbits, 0); | ||||
| 258 | } | ||||
| 259 | return $subobj unless ref $subobj; | ||||
| 260 | push @extras, $name => $subobj; | ||||
| 261 | $bits = $subobj->{BITS} if $bits < $subobj->{BITS}; | ||||
| 262 | } | ||||
| 263 | } | ||||
| 264 | } | ||||
| 265 | |||||
| 266 | 1 | 0s | print STDERR "CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none\nEXTRAS =>\n$extras\nLIST =>\n$list\n" if DEBUG; | ||
| 267 | |||||
| 268 | 1 | 8µs | my $SWASH = bless { | ||
| 269 | TYPE => $type, | ||||
| 270 | BITS => $bits, | ||||
| 271 | EXTRAS => $extras, | ||||
| 272 | LIST => $list, | ||||
| 273 | NONE => $none, | ||||
| 274 | @extras, | ||||
| 275 | } => $class; | ||||
| 276 | |||||
| 277 | 1 | 2µs | if ($file) { | ||
| 278 | $Cache{$class, $file} = $SWASH; | ||||
| 279 | } | ||||
| 280 | |||||
| 281 | 1 | 4µs | return $SWASH; | ||
| 282 | } | ||||
| 283 | |||||
| 284 | # Now SWASHGET is recasted into a C function S_swash_get (see utf8.c). | ||||
| 285 | |||||
| 286 | 1 | 9µs | 1; | ||
# spent 25µs within utf8::CORE:match which was called 39 times, avg 649ns/call:
# 12 times (7µs+0s) by utf8::SWASHNEW at line 216 of utf8_heavy.pl, avg 592ns/call
# 12 times (6µs+0s) by utf8::SWASHNEW at line 215 of utf8_heavy.pl, avg 508ns/call
# 12 times (4µs+0s) by utf8::SWASHNEW at line 212 of utf8_heavy.pl, avg 333ns/call
# once (5µs+0s) by utf8::SWASHNEW at line 98 of utf8_heavy.pl
# once (3µs+0s) by utf8::SWASHNEW at line 73 of utf8_heavy.pl
# once (200ns+0s) by utf8::SWASHNEW at line 243 of utf8_heavy.pl | |||||
# spent 10µs within utf8::CORE:sort which was called
# once (10µs+0s) by utf8::SWASHNEW at line 217 of utf8_heavy.pl | |||||
# spent 11µs within utf8::CORE:subst which was called 4 times, avg 3µs/call:
# once (4µs+0s) by utf8::SWASHNEW at line 85 of utf8_heavy.pl
# once (3µs+0s) by utf8::SWASHNEW at line 61 of utf8_heavy.pl
# once (3µs+0s) by utf8::SWASHNEW at line 60 of utf8_heavy.pl
# once (400ns+0s) by utf8::SWASHNEW at line 65 of utf8_heavy.pl | |||||
# spent 24µs within utf8::downgrade which was called 12 times, avg 2µs/call:
# 8 times (20µs+0s) by HTTP::Message::__ANON__[/usr/local/lib/perl5/site_perl/5.10.1/HTTP/Message.pm:18] at line 16 of HTTP/Message.pm, avg 2µs/call
# 4 times (5µs+0s) by Net::HTTP::Methods::__ANON__[/usr/local/lib/perl5/site_perl/5.10.1/Net/HTTP/Methods.pm:19] at line 14 of Net/HTTP/Methods.pm, avg 1µs/call | |||||
# spent 22µs within utf8::encode which was called 23 times, avg 974ns/call:
# 23 times (22µs+0s) by URI::Escape::uri_escape_utf8 at line 190 of URI/Escape.pm, avg 974ns/call |