Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
news
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sartika Aritonang
news
Commits
d7b37421
Commit
d7b37421
authored
May 29, 2020
by
Sartika Aritonang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
931fd8de
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
252 additions
and
0 deletions
+252
-0
base.py
...ib/site-packages/pip/_vendor/html5lib/treewalkers/base.py
+252
-0
No files found.
stbi/Lib/site-packages/pip/_vendor/html5lib/treewalkers/base.py
0 → 100644
View file @
d7b37421
from
__future__
import
absolute_import
,
division
,
unicode_literals
from
xml.dom
import
Node
from
..constants
import
namespaces
,
voidElements
,
spaceCharacters
__all__
=
[
"DOCUMENT"
,
"DOCTYPE"
,
"TEXT"
,
"ELEMENT"
,
"COMMENT"
,
"ENTITY"
,
"UNKNOWN"
,
"TreeWalker"
,
"NonRecursiveTreeWalker"
]
DOCUMENT
=
Node
.
DOCUMENT_NODE
DOCTYPE
=
Node
.
DOCUMENT_TYPE_NODE
TEXT
=
Node
.
TEXT_NODE
ELEMENT
=
Node
.
ELEMENT_NODE
COMMENT
=
Node
.
COMMENT_NODE
ENTITY
=
Node
.
ENTITY_NODE
UNKNOWN
=
"<#UNKNOWN#>"
spaceCharacters
=
""
.
join
(
spaceCharacters
)
class
TreeWalker
(
object
):
"""Walks a tree yielding tokens
Tokens are dicts that all have a ``type`` field specifying the type of the
token.
"""
def
__init__
(
self
,
tree
):
"""Creates a TreeWalker
:arg tree: the tree to walk
"""
self
.
tree
=
tree
def
__iter__
(
self
):
raise
NotImplementedError
def
error
(
self
,
msg
):
"""Generates an error token with the given message
:arg msg: the error message
:returns: SerializeError token
"""
return
{
"type"
:
"SerializeError"
,
"data"
:
msg
}
def
emptyTag
(
self
,
namespace
,
name
,
attrs
,
hasChildren
=
False
):
"""Generates an EmptyTag token
:arg namespace: the namespace of the token--can be ``None``
:arg name: the name of the element
:arg attrs: the attributes of the element as a dict
:arg hasChildren: whether or not to yield a SerializationError because
this tag shouldn't have children
:returns: EmptyTag token
"""
yield
{
"type"
:
"EmptyTag"
,
"name"
:
name
,
"namespace"
:
namespace
,
"data"
:
attrs
}
if
hasChildren
:
yield
self
.
error
(
"Void element has children"
)
def
startTag
(
self
,
namespace
,
name
,
attrs
):
"""Generates a StartTag token
:arg namespace: the namespace of the token--can be ``None``
:arg name: the name of the element
:arg attrs: the attributes of the element as a dict
:returns: StartTag token
"""
return
{
"type"
:
"StartTag"
,
"name"
:
name
,
"namespace"
:
namespace
,
"data"
:
attrs
}
def
endTag
(
self
,
namespace
,
name
):
"""Generates an EndTag token
:arg namespace: the namespace of the token--can be ``None``
:arg name: the name of the element
:returns: EndTag token
"""
return
{
"type"
:
"EndTag"
,
"name"
:
name
,
"namespace"
:
namespace
}
def
text
(
self
,
data
):
"""Generates SpaceCharacters and Characters tokens
Depending on what's in the data, this generates one or more
``SpaceCharacters`` and ``Characters`` tokens.
For example:
>>> from html5lib.treewalkers.base import TreeWalker
>>> # Give it an empty tree just so it instantiates
>>> walker = TreeWalker([])
>>> list(walker.text(''))
[]
>>> list(walker.text(' '))
[{u'data': ' ', u'type': u'SpaceCharacters'}]
>>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE
[{u'data': ' ', u'type': u'SpaceCharacters'},
{u'data': u'abc', u'type': u'Characters'},
{u'data': u' ', u'type': u'SpaceCharacters'}]
:arg data: the text data
:returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
"""
data
=
data
middle
=
data
.
lstrip
(
spaceCharacters
)
left
=
data
[:
len
(
data
)
-
len
(
middle
)]
if
left
:
yield
{
"type"
:
"SpaceCharacters"
,
"data"
:
left
}
data
=
middle
middle
=
data
.
rstrip
(
spaceCharacters
)
right
=
data
[
len
(
middle
):]
if
middle
:
yield
{
"type"
:
"Characters"
,
"data"
:
middle
}
if
right
:
yield
{
"type"
:
"SpaceCharacters"
,
"data"
:
right
}
def
comment
(
self
,
data
):
"""Generates a Comment token
:arg data: the comment
:returns: Comment token
"""
return
{
"type"
:
"Comment"
,
"data"
:
data
}
def
doctype
(
self
,
name
,
publicId
=
None
,
systemId
=
None
):
"""Generates a Doctype token
:arg name:
:arg publicId:
:arg systemId:
:returns: the Doctype token
"""
return
{
"type"
:
"Doctype"
,
"name"
:
name
,
"publicId"
:
publicId
,
"systemId"
:
systemId
}
def
entity
(
self
,
name
):
"""Generates an Entity token
:arg name: the entity name
:returns: an Entity token
"""
return
{
"type"
:
"Entity"
,
"name"
:
name
}
def
unknown
(
self
,
nodeType
):
"""Handles unknown node types"""
return
self
.
error
(
"Unknown node type: "
+
nodeType
)
class
NonRecursiveTreeWalker
(
TreeWalker
):
def
getNodeDetails
(
self
,
node
):
raise
NotImplementedError
def
getFirstChild
(
self
,
node
):
raise
NotImplementedError
def
getNextSibling
(
self
,
node
):
raise
NotImplementedError
def
getParentNode
(
self
,
node
):
raise
NotImplementedError
def
__iter__
(
self
):
currentNode
=
self
.
tree
while
currentNode
is
not
None
:
details
=
self
.
getNodeDetails
(
currentNode
)
type
,
details
=
details
[
0
],
details
[
1
:]
hasChildren
=
False
if
type
==
DOCTYPE
:
yield
self
.
doctype
(
*
details
)
elif
type
==
TEXT
:
for
token
in
self
.
text
(
*
details
):
yield
token
elif
type
==
ELEMENT
:
namespace
,
name
,
attributes
,
hasChildren
=
details
if
(
not
namespace
or
namespace
==
namespaces
[
"html"
])
and
name
in
voidElements
:
for
token
in
self
.
emptyTag
(
namespace
,
name
,
attributes
,
hasChildren
):
yield
token
hasChildren
=
False
else
:
yield
self
.
startTag
(
namespace
,
name
,
attributes
)
elif
type
==
COMMENT
:
yield
self
.
comment
(
details
[
0
])
elif
type
==
ENTITY
:
yield
self
.
entity
(
details
[
0
])
elif
type
==
DOCUMENT
:
hasChildren
=
True
else
:
yield
self
.
unknown
(
details
[
0
])
if
hasChildren
:
firstChild
=
self
.
getFirstChild
(
currentNode
)
else
:
firstChild
=
None
if
firstChild
is
not
None
:
currentNode
=
firstChild
else
:
while
currentNode
is
not
None
:
details
=
self
.
getNodeDetails
(
currentNode
)
type
,
details
=
details
[
0
],
details
[
1
:]
if
type
==
ELEMENT
:
namespace
,
name
,
attributes
,
hasChildren
=
details
if
(
namespace
and
namespace
!=
namespaces
[
"html"
])
or
name
not
in
voidElements
:
yield
self
.
endTag
(
namespace
,
name
)
if
self
.
tree
is
currentNode
:
currentNode
=
None
break
nextSibling
=
self
.
getNextSibling
(
currentNode
)
if
nextSibling
is
not
None
:
currentNode
=
nextSibling
break
else
:
currentNode
=
self
.
getParentNode
(
currentNode
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment