NAME
Tags::Reader::Perl - Parse SGML/HTML/XML by each "tag".
SYNOPSIS
use Tags::Reader::Perl;
my $obj = Tags::Reader::Perl->new;
my @tokens = $obj->gettoken;
$obj->set_file($file, $force);
$obj->set_text($text, $force);
METHODS
"new()"
Constructor.
"gettoken()"
Get parsed token.
Returns structure defining parsed token in array context. See TOKEN STRUCTURE
e.g. → ('', 'xml', 1, 1)
Returns parsed token in scalar mode.
e.g. → ''
"set_file($file[, $force])"
Set file for parsing.
If $force present, reset file for parsing if exists previous text or file.
"set_text($text[, $force])"
Set text for parsing.
if $force present, reset text for parsing if exists previous text or file.
TOKEN STRUCTURE
Structure contains 4 fields in array:
- parsed data
- tag type
- number of line
- number of column in line
Tag types are:
- '[\w:]+' - element name.
- '/[\w:]+' - end of element name.
- '!data' - data
- '![cdata[' - cdata
- '!--' - comment
- '?\w+' - instruction
- '![\w+' - conditional
- '!attlist' - DTD attlist
- '!element' - DTD element
- '!entity' - DTD entity
- '!notation' - DTD notation
ERRORS
new():
From Class::Utils::set_params():
Unknown parameter '%s'.
set_text():
Bad tag.
Bad text.
Cannot set new data if exists data.
set_file():
Bad tag.
Bad file.
Cannot set new data if exists data.
Cannot open file '%s'.
EXAMPLE1
# Pragmas.
use strict;
use warnings;
# Modules.
use Encode qw(decode_utf8 encode_utf8);
use Tag::Reader::Perl;
# Object.
my $obj = Tag::Reader::Perl->new;
# Example data.
my $sgml = <<'END';
Nová
<číslo>5číslo>
END
# Set data to object.
$obj->set_text(decode_utf8($sgml));
# Tokenize.
while (my @tag = $obj->gettoken) {
print "[\n";
print "\t[0]: '".encode_utf8($tag[0])."'\n";
print "\t[1]: ".encode_utf8($tag[1])."\n";
print "\t[2]: $tag[2]\n";
print "\t[3]: $tag[3]\n";
print "]\n";
}
# Output:
# [
# [0]: ''
# [1]: dokument
# [2]: 1
# [3]: 1
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 1
# [3]: 11
# ]
# [
# [0]: ''
# [1]: adresa
# [2]: 2
# [3]: 3
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 2
# [3]: 21
# ]
# [
# [0]: ''
# [1]: město
# [2]: 3
# [3]: 5
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 3
# [3]: 12
# ]
# [
# [0]: ''
# [1]: ulice
# [2]: 4
# [3]: 5
# ]
# [
# [0]: 'Nová'
# [1]: !data
# [2]: 4
# [3]: 12
# ]
# [
# [0]: ''
# [1]: /ulice
# [2]: 4
# [3]: 16
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 4
# [3]: 24
# ]
# [
# [0]: '<číslo>'
# [1]: číslo
# [2]: 5
# [3]: 5
# ]
# [
# [0]: '5'
# [1]: !data
# [2]: 5
# [3]: 12
# ]
# [
# [0]: 'číslo>'
# [1]: /číslo
# [2]: 5
# [3]: 13
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 5
# [3]: 21
# ]
# [
# [0]: ''
# [1]: /adresa
# [2]: 6
# [3]: 3
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 6
# [3]: 12
# ]
# [
# [0]: ''
# [1]: /dokument
# [2]: 7
# [3]: 1
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 7
# [3]: 12
# ]
DEPENDENCIES
Class::Utils, Error::Pure, Readonly,
SEE ALSO
Tag::Reader
Parse SGML/HTML/XML by each "tag".
HTML::TagReader
Perl extension module for reading html/sgml/xml files by tags.
AUTHOR
Michal Špaček
LICENSE AND COPYRIGHT
© Michal Špaček 2005-2016
BSD 2-Clause License
VERSION
0.01