Файловый менеджер - Редактировать - /var/www/xthruster/html/wp-content/uploads/flags/examples.tar
Назад
hstrip 0000755 00000003364 14722052773 0006022 0 ustar 00 #!/usr/bin/perl -w # This script cleans up an HTML document. # Specifically it removes deprecated styling and scripting tags. use strict; use HTML::Parser (); # configure these values my @ignore_attr = qw(bgcolor background color face style link alink vlink text onblur onchange onclick ondblclick onfocus onkeydown onkeyup onload onmousedown onmousemove onmouseout onmouseover onmouseup onreset onselect onunload ); my @ignore_tags = qw(font big small b i); my @ignore_elements = qw(script style); # make it easier to look up attributes my %ignore_attr = map { $_ => 1} @ignore_attr; sub tag { my($pos, $text) = @_; if (@$pos >= 4) { # kill some attributes my($k_offset, $k_len, $v_offset, $v_len) = @{$pos}[-4 .. -1]; my $next_attr = $v_offset ? $v_offset + $v_len : $k_offset + $k_len; my $edited; while (@$pos >= 4) { ($k_offset, $k_len, $v_offset, $v_len) = splice @$pos, -4; if ($ignore_attr{lc substr($text, $k_offset, $k_len)}) { substr($text, $k_offset, $next_attr - $k_offset) = ""; $edited++; } $next_attr = $k_offset; } # if we killed all attributed, kill any extra whitespace too $text =~ s/^(<\w+)\s+>$/$1>/ if $edited; } print $text; } sub decl { my $type = shift; print shift if $type eq "doctype"; } sub text { print shift; } HTML::Parser->new(api_version => 3, start_h => [\&tag, "tokenpos, text"], process_h => ["", ""], comment_h => ["", ""], declaration_h => [\&decl, "tagname, text"], default_h => [\&text, "text"], ignore_tags => \@ignore_tags, ignore_elements => \@ignore_elements, ) ->parse_file(shift) || die "Can't open file: $!\n"; htextsub 0000755 00000001673 14722052773 0006360 0 ustar 00 #!/usr/bin/perl -w # Shows how to mangle all plain text in an HTML document, using an arbitrary # Perl expression. Plain text is all text not within a tag declaration, i.e. # not in <p ...>, but possibly between <p> and </p> # Example (Reverse 'Debian' in all text) : # lynx -dump -source -raw http://www/debian.org > /tmp/a.txt # ./htextsub '$_ =~ s/Debian/Naibed/gi' /tmp/a.txt use strict; my $code = shift || usage(); $code = 'sub edit_print { local $_ = shift; ' . $code . '; print }'; #print $code; eval $code; die $@ if $@; use HTML::Parser 3.05; my $p = HTML::Parser->new(unbroken_text => 1, default_h => [ sub { print @_; }, "text" ], text_h => [ \&edit_print, "text" ], ); my $file = shift || usage(); $p->parse_file($file) || die "Can't open file $file: $!\n"; sub usage { my $progname = $0; $progname =~ s,^.*/,,; die "Usage: $progname <perlexpr> <filename>\n"; } hdump 0000755 00000001253 14722052773 0005621 0 ustar 00 #!/usr/bin/perl -w # This script will output event information as it parses the HTML document. # This gives the user a "Parser's eye view" of an HTML document. use HTML::Parser (); use Data::Dump (); sub h { my($event, $line, $column, $text, $tagname, $attr) = @_; my @d = (uc(substr($event,0,1)) . " L$line C$column"); substr($text, 40) = "..." if length($text) > 40; push(@d, $text); push(@d, $tagname) if defined $tagname; push(@d, $attr) if $attr; print Data::Dump::dump(@d), "\n"; } my $p = HTML::Parser->new(api_version => 3); $p->handler(default => \&h, "event, line, column, text, tagname, attr"); $p->parse_file(@ARGV ? shift : *STDIN); hanchors 0000755 00000002215 14722052773 0006310 0 ustar 00 #!/usr/bin/perl -w # This program will print out all <a href=".."> links in a # document together with the text that goes with it. # # See also HTML::LinkExtor use HTML::Parser; use Encode; my $p = HTML::Parser->new(api_version => 3, start_h => [\&a_start_handler, "self,tagname,attr"], report_tags => [qw(a img)], ); $p->parse_file(shift || die) || die $!; sub a_start_handler { my($self, $tag, $attr) = @_; return unless $tag eq "a"; return unless exists $attr->{href}; print "A $attr->{href}\n"; $self->handler(text => [], '@{dtext}' ); $self->handler(start => \&img_handler); $self->handler(end => \&a_end_handler, "self,tagname"); } sub img_handler { my($self, $tag, $attr) = @_; return unless $tag eq "img"; push(@{$self->handler("text")}, $attr->{alt} || "[IMG]"); } sub a_end_handler { my($self, $tag) = @_; my $text = encode('utf8',join("", @{$self->handler("text")})); $text =~ s/^\s+//; $text =~ s/\s+$//; $text =~ s/\s+/ /g; print "T $text\n"; $self->handler("text", undef); $self->handler("start", \&a_start_handler); $self->handler("end", undef); } hlc 0000755 00000001306 14722052773 0005251 0 ustar 00 #!/usr/bin/perl -w # This script will assume that the first command line argument # is a file containing HTML, and return a version # where all the tags are converted to lowercase. use strict; use HTML::Parser (); HTML::Parser->new(start_h => [ \&start_lc, "tokenpos, text" ], end_h => [ sub { print lc shift }, "text" ], default_h => [ sub { print shift }, "text" ], ) ->parse_file(shift) || die "Can't open file: $!\n"; sub start_lc { my($tpos, $text) = @_; for (my $i = 0; $i < @$tpos; $i += 2) { next if $i && ($i/2) % 2 == 0; # skip attribute values $_ = lc $_ for substr($text, $tpos->[$i], $tpos->[$i+1]); } print $text; } htext 0000755 00000001077 14722052773 0005644 0 ustar 00 #!/usr/bin/perl -w # Extract all plain text from an HTML file use strict; use HTML::Parser 3.00 (); use Encode; my %inside; sub tag { my($tag, $num) = @_; $inside{$tag} += $num; print " "; # not for all tags } sub text { return if $inside{script} || $inside{style}; print encode('utf8',$_[0]); } HTML::Parser->new(api_version => 3, handlers => [start => [\&tag, "tagname, '+1'"], end => [\&tag, "tagname, '-1'"], text => [\&text, "dtext"], ], marked_sections => 1, )->parse_file(shift) || die "Can't open file: $!\n";; README 0000644 00000002075 14722052773 0005441 0 ustar 00 For most of these scripts if you run them with a file argument, where the file contains some HTML, you should get some output. The 'h*sub' scripts take two arguments the first of which is a perl expression and the second an HTML file. In any case all of the files have an exlanatory comment. For example try running: lynx -dump -source -raw http://www.debian.org > /tmp/a.txt ./hanchors /tmp/a.txt Of course if http://www.debian.org is not your favourite web site you can make the appropriate substitution. hanchors - List all anchors in the HTML hlc - Correct any upper case tags to lower case hstrip - Removes deprecated scripting and styling tags and attributes htextsub - Apply arbirary perl expression to all text within HTML hrefsub - Apply arbirary perl expression to all hrefs within HTML htitle - Print title of the HTML document hdump - Output event information whilst parsing HTML document hform - Print analysis of form controls present in HTML htext - Print all the text from the HTML hform 0000755 00000003576 14722052773 0005631 0 ustar 00 #!/usr/bin/perl -w # Print information about forms and their controls present in the HTML. # See also HTML::Form module use HTML::PullParser (); use HTML::Entities qw(decode_entities); use Data::Dump qw(dump); my @FORM_TAGS = qw(form input textarea button select option); my $p = HTML::PullParser->new(file => shift || "xxx.html", start => 'tag, attr', end => 'tag', text => '@{text}', report_tags => \@FORM_TAGS, ) || die "$!"; # a little helper function sub get_text { my($p, $stop) = @_; my $text; while (defined(my $t = $p->get_token)) { if (ref $t) { $p->unget_token($t) unless $t->[0] eq $stop; last; } else { $text .= $t; } } return $text; } my @forms; while (defined(my $t = $p->get_token)) { next unless ref $t; # skip text if ($t->[0] eq "form") { shift @$t; push(@forms, $t); while (defined(my $t = $p->get_token)) { next unless ref $t; # skip text last if $t->[0] eq "/form"; if ($t->[0] eq "select") { my $sel = $t; push(@{$forms[-1]}, $t); while (defined(my $t = $p->get_token)) { next unless ref $t; # skip text last if $t->[0] eq "/select"; #print "select ", dump($t), "\n"; if ($t->[0] eq "option") { my $value = $t->[1]->{value}; my $text = get_text($p, "/option"); unless (defined $value) { $value = decode_entities($text); } push(@$sel, $value); } else { warn "$t->[0] inside select"; } } } elsif ($t->[0] =~ /^\/?option$/) { warn "option tag outside select"; } elsif ($t->[0] eq "textarea") { push(@{$forms[-1]}, $t); $t->[1]{value} = get_text($p, "/textarea"); } elsif ($t->[0] =~ m,^/,) { warn "stray $t->[0] tag"; } else { push(@{$forms[-1]}, $t); } } } else { warn "form tag $t->[0] outside form"; } } print dump(\@forms), "\n"; hrefsub 0000755 00000005336 14722052773 0006150 0 ustar 00 #!/usr/bin/perl # Perform transformations on link attributes in an HTML document. # Examples: # # $ hrefsub 's/foo/bar/g' index.html # $ hrefsub '$_=URI->new_abs($_, "http://foo")' index.html # # The first argument is a perl expression that might modify $_. # It is called for each link in the document with $_ set to # the original value of the link URI. The variables $tag and # $attr can be used to access the tagname and attributename # within the tag where the current link is found. # # The second argument is the name of a file to process. use strict; use HTML::Parser (); use URI; # Construct a hash of tag names that may have links. my %link_attr; { # To simplify things, reformat the %HTML::Tagset::linkElements # hash so that it is always a hash of hashes. require HTML::Tagset; while (my($k,$v) = each %HTML::Tagset::linkElements) { if (ref($v)) { $v = { map {$_ => 1} @$v }; } else { $v = { $v => 1}; } $link_attr{$k} = $v; } # Uncomment this to see what HTML::Tagset::linkElements thinks are # the tags with link attributes #use Data::Dump; Data::Dump::dump(\%link_attr); exit; } # Create a subroutine named 'edit' to perform the operation # passed in from the command line. The code should modify $_ # to change things. my $code = shift; my $code = 'sub edit { local $_ = shift; my($attr, $tag) = @_; no strict; ' . $code . '; $_; }'; #print $code; eval $code; die $@ if $@; # Set up the parser. my $p = HTML::Parser->new(api_version => 3); # The default is to print everything as is. $p->handler(default => sub { print @_ }, "text"); # All links are found in start tags. This handler will evaluate # &edit for each link attribute found. $p->handler(start => sub { my($tagname, $pos, $text) = @_; if (my $link_attr = $link_attr{$tagname}) { while (4 <= @$pos) { # use attribute sets from right to left # to avoid invalidating the offsets # when replacing the values my($k_offset, $k_len, $v_offset, $v_len) = splice(@$pos, -4); my $attrname = lc(substr($text, $k_offset, $k_len)); next unless $link_attr->{$attrname}; next unless $v_offset; # 0 v_offset means no value my $v = substr($text, $v_offset, $v_len); $v =~ s/^([\'\"])(.*)\1$/$2/; my $new_v = edit($v, $attrname, $tagname); next if $new_v eq $v; $new_v =~ s/\"/"/g; # since we quote with "" substr($text, $v_offset, $v_len) = qq("$new_v"); } } print $text; }, "tagname, tokenpos, text"); # Parse the file passed in from the command line my $file = shift || usage(); $p->parse_file($file) || die "Can't open file $file: $!\n"; sub usage { my $progname = $0; $progname =~ s,^.*/,,; die "Usage: $progname <perlexpr> <filename>\n"; } htitle 0000755 00000000662 14722052773 0006000 0 ustar 00 #!/usr/bin/perl # This program will print out the title of an HTML document. use strict; use HTML::Parser (); sub title_handler { my $self = shift; $self->handler(text => sub { print @_ }, "dtext"); $self->handler(end => "eof", "self"); } my $p = HTML::Parser->new(api_version => 3, start_h => [\&title_handler, "self"], report_tags => ['title'], ); $p->parse_file(shift || die) || die $!; print "\n";
| ver. 1.4 |
Github
|
.
| PHP 7.4.3-4ubuntu2.24 | Генерация страницы: 0 |
proxy
|
phpinfo
|
Настройка