Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
news
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sartika Aritonang
news
Commits
931fd8de
Commit
931fd8de
authored
May 29, 2020
by
Sartika Aritonang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
6c55340d
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
154 additions
and
0 deletions
+154
-0
__init__.py
...ite-packages/pip/_vendor/html5lib/treewalkers/__init__.py
+154
-0
No files found.
stbi/Lib/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py
0 → 100644
View file @
931fd8de
"""A collection of modules for iterating through different kinds of
tree, generating tokens identical to those produced by the tokenizer
module.
To create a tree walker for a new type of tree, you need to do
implement a tree walker object (called TreeWalker by convention) that
implements a 'serialize' method taking a tree as sole argument and
returning an iterator generating tokens.
"""
from
__future__
import
absolute_import
,
division
,
unicode_literals
from
..
import
constants
from
.._utils
import
default_etree
__all__
=
[
"getTreeWalker"
,
"pprint"
]
treeWalkerCache
=
{}
def
getTreeWalker
(
treeType
,
implementation
=
None
,
**
kwargs
):
"""Get a TreeWalker class for various types of tree with built-in support
:arg str treeType: the name of the tree type required (case-insensitive).
Supported values are:
* "dom": The xml.dom.minidom DOM implementation
* "etree": A generic walker for tree implementations exposing an
elementtree-like interface (known to work with ElementTree,
cElementTree and lxml.etree).
* "lxml": Optimized walker for lxml.etree
* "genshi": a Genshi stream
:arg implementation: A module implementing the tree type e.g.
xml.etree.ElementTree or cElementTree (Currently applies to the "etree"
tree type only).
:arg kwargs: keyword arguments passed to the etree walker--for other
walkers, this has no effect
:returns: a TreeWalker class
"""
treeType
=
treeType
.
lower
()
if
treeType
not
in
treeWalkerCache
:
if
treeType
==
"dom"
:
from
.
import
dom
treeWalkerCache
[
treeType
]
=
dom
.
TreeWalker
elif
treeType
==
"genshi"
:
from
.
import
genshi
treeWalkerCache
[
treeType
]
=
genshi
.
TreeWalker
elif
treeType
==
"lxml"
:
from
.
import
etree_lxml
treeWalkerCache
[
treeType
]
=
etree_lxml
.
TreeWalker
elif
treeType
==
"etree"
:
from
.
import
etree
if
implementation
is
None
:
implementation
=
default_etree
# XXX: NEVER cache here, caching is done in the etree submodule
return
etree
.
getETreeModule
(
implementation
,
**
kwargs
)
.
TreeWalker
return
treeWalkerCache
.
get
(
treeType
)
def
concatenateCharacterTokens
(
tokens
):
pendingCharacters
=
[]
for
token
in
tokens
:
type
=
token
[
"type"
]
if
type
in
(
"Characters"
,
"SpaceCharacters"
):
pendingCharacters
.
append
(
token
[
"data"
])
else
:
if
pendingCharacters
:
yield
{
"type"
:
"Characters"
,
"data"
:
""
.
join
(
pendingCharacters
)}
pendingCharacters
=
[]
yield
token
if
pendingCharacters
:
yield
{
"type"
:
"Characters"
,
"data"
:
""
.
join
(
pendingCharacters
)}
def
pprint
(
walker
):
"""Pretty printer for tree walkers
Takes a TreeWalker instance and pretty prints the output of walking the tree.
:arg walker: a TreeWalker instance
"""
output
=
[]
indent
=
0
for
token
in
concatenateCharacterTokens
(
walker
):
type
=
token
[
"type"
]
if
type
in
(
"StartTag"
,
"EmptyTag"
):
# tag name
if
token
[
"namespace"
]
and
token
[
"namespace"
]
!=
constants
.
namespaces
[
"html"
]:
if
token
[
"namespace"
]
in
constants
.
prefixes
:
ns
=
constants
.
prefixes
[
token
[
"namespace"
]]
else
:
ns
=
token
[
"namespace"
]
name
=
"
%
s
%
s"
%
(
ns
,
token
[
"name"
])
else
:
name
=
token
[
"name"
]
output
.
append
(
"
%
s<
%
s>"
%
(
" "
*
indent
,
name
))
indent
+=
2
# attributes (sorted for consistent ordering)
attrs
=
token
[
"data"
]
for
(
namespace
,
localname
),
value
in
sorted
(
attrs
.
items
()):
if
namespace
:
if
namespace
in
constants
.
prefixes
:
ns
=
constants
.
prefixes
[
namespace
]
else
:
ns
=
namespace
name
=
"
%
s
%
s"
%
(
ns
,
localname
)
else
:
name
=
localname
output
.
append
(
"
%
s
%
s=
\"
%
s
\"
"
%
(
" "
*
indent
,
name
,
value
))
# self-closing
if
type
==
"EmptyTag"
:
indent
-=
2
elif
type
==
"EndTag"
:
indent
-=
2
elif
type
==
"Comment"
:
output
.
append
(
"
%
s<!--
%
s -->"
%
(
" "
*
indent
,
token
[
"data"
]))
elif
type
==
"Doctype"
:
if
token
[
"name"
]:
if
token
[
"publicId"
]:
output
.
append
(
"""
%
s<!DOCTYPE
%
s "
%
s" "
%
s">"""
%
(
" "
*
indent
,
token
[
"name"
],
token
[
"publicId"
],
token
[
"systemId"
]
if
token
[
"systemId"
]
else
""
))
elif
token
[
"systemId"
]:
output
.
append
(
"""
%
s<!DOCTYPE
%
s "" "
%
s">"""
%
(
" "
*
indent
,
token
[
"name"
],
token
[
"systemId"
]))
else
:
output
.
append
(
"
%
s<!DOCTYPE
%
s>"
%
(
" "
*
indent
,
token
[
"name"
]))
else
:
output
.
append
(
"
%
s<!DOCTYPE >"
%
(
" "
*
indent
,))
elif
type
==
"Characters"
:
output
.
append
(
"
%
s
\"
%
s
\"
"
%
(
" "
*
indent
,
token
[
"data"
]))
elif
type
==
"SpaceCharacters"
:
assert
False
,
"concatenateCharacterTokens should have got rid of all Space tokens"
else
:
raise
ValueError
(
"Unknown token type,
%
s"
%
type
)
return
"
\n
"
.
join
(
output
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment