Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
news
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sartika Aritonang
news
Commits
6f1fee36
Commit
6f1fee36
authored
May 29, 2020
by
Sartika Aritonang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
990af9f9
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
417 additions
and
0 deletions
+417
-0
base.py
...b/site-packages/pip/_vendor/html5lib/treebuilders/base.py
+417
-0
No files found.
stbi/Lib/site-packages/pip/_vendor/html5lib/treebuilders/base.py
0 → 100644
View file @
6f1fee36
from
__future__
import
absolute_import
,
division
,
unicode_literals
from
pip._vendor.six
import
text_type
from
..constants
import
scopingElements
,
tableInsertModeElements
,
namespaces
# The scope markers are inserted when entering object elements,
# marquees, table cells, and table captions, and are used to prevent formatting
# from "leaking" into tables, object elements, and marquees.
Marker
=
None
listElementsMap
=
{
None
:
(
frozenset
(
scopingElements
),
False
),
"button"
:
(
frozenset
(
scopingElements
|
set
([(
namespaces
[
"html"
],
"button"
)])),
False
),
"list"
:
(
frozenset
(
scopingElements
|
set
([(
namespaces
[
"html"
],
"ol"
),
(
namespaces
[
"html"
],
"ul"
)])),
False
),
"table"
:
(
frozenset
([(
namespaces
[
"html"
],
"html"
),
(
namespaces
[
"html"
],
"table"
)]),
False
),
"select"
:
(
frozenset
([(
namespaces
[
"html"
],
"optgroup"
),
(
namespaces
[
"html"
],
"option"
)]),
True
)
}
class
Node
(
object
):
"""Represents an item in the tree"""
def
__init__
(
self
,
name
):
"""Creates a Node
:arg name: The tag name associated with the node
"""
# The tag name assocaited with the node
self
.
name
=
name
# The parent of the current node (or None for the document node)
self
.
parent
=
None
# The value of the current node (applies to text nodes and comments)
self
.
value
=
None
# A dict holding name -> value pairs for attributes of the node
self
.
attributes
=
{}
# A list of child nodes of the current node. This must include all
# elements but not necessarily other node types.
self
.
childNodes
=
[]
# A list of miscellaneous flags that can be set on the node.
self
.
_flags
=
[]
def
__str__
(
self
):
attributesStr
=
" "
.
join
([
"
%
s=
\"
%
s
\"
"
%
(
name
,
value
)
for
name
,
value
in
self
.
attributes
.
items
()])
if
attributesStr
:
return
"<
%
s
%
s>"
%
(
self
.
name
,
attributesStr
)
else
:
return
"<
%
s>"
%
(
self
.
name
)
def
__repr__
(
self
):
return
"<
%
s>"
%
(
self
.
name
)
def
appendChild
(
self
,
node
):
"""Insert node as a child of the current node
:arg node: the node to insert
"""
raise
NotImplementedError
def
insertText
(
self
,
data
,
insertBefore
=
None
):
"""Insert data as text in the current node, positioned before the
start of node insertBefore or to the end of the node's text.
:arg data: the data to insert
:arg insertBefore: True if you want to insert the text before the node
and False if you want to insert it after the node
"""
raise
NotImplementedError
def
insertBefore
(
self
,
node
,
refNode
):
"""Insert node as a child of the current node, before refNode in the
list of child nodes. Raises ValueError if refNode is not a child of
the current node
:arg node: the node to insert
:arg refNode: the child node to insert the node before
"""
raise
NotImplementedError
def
removeChild
(
self
,
node
):
"""Remove node from the children of the current node
:arg node: the child node to remove
"""
raise
NotImplementedError
def
reparentChildren
(
self
,
newParent
):
"""Move all the children of the current node to newParent.
This is needed so that trees that don't store text as nodes move the
text in the correct way
:arg newParent: the node to move all this node's children to
"""
# XXX - should this method be made more general?
for
child
in
self
.
childNodes
:
newParent
.
appendChild
(
child
)
self
.
childNodes
=
[]
def
cloneNode
(
self
):
"""Return a shallow copy of the current node i.e. a node with the same
name and attributes but with no parent or child nodes
"""
raise
NotImplementedError
def
hasContent
(
self
):
"""Return true if the node has children or text, false otherwise
"""
raise
NotImplementedError
class
ActiveFormattingElements
(
list
):
def
append
(
self
,
node
):
equalCount
=
0
if
node
!=
Marker
:
for
element
in
self
[::
-
1
]:
if
element
==
Marker
:
break
if
self
.
nodesEqual
(
element
,
node
):
equalCount
+=
1
if
equalCount
==
3
:
self
.
remove
(
element
)
break
list
.
append
(
self
,
node
)
def
nodesEqual
(
self
,
node1
,
node2
):
if
not
node1
.
nameTuple
==
node2
.
nameTuple
:
return
False
if
not
node1
.
attributes
==
node2
.
attributes
:
return
False
return
True
class
TreeBuilder
(
object
):
"""Base treebuilder implementation
* documentClass - the class to use for the bottommost node of a document
* elementClass - the class to use for HTML Elements
* commentClass - the class to use for comments
* doctypeClass - the class to use for doctypes
"""
# pylint:disable=not-callable
# Document class
documentClass
=
None
# The class to use for creating a node
elementClass
=
None
# The class to use for creating comments
commentClass
=
None
# The class to use for creating doctypes
doctypeClass
=
None
# Fragment class
fragmentClass
=
None
def
__init__
(
self
,
namespaceHTMLElements
):
"""Create a TreeBuilder
:arg namespaceHTMLElements: whether or not to namespace HTML elements
"""
if
namespaceHTMLElements
:
self
.
defaultNamespace
=
"http://www.w3.org/1999/xhtml"
else
:
self
.
defaultNamespace
=
None
self
.
reset
()
def
reset
(
self
):
self
.
openElements
=
[]
self
.
activeFormattingElements
=
ActiveFormattingElements
()
# XXX - rename these to headElement, formElement
self
.
headPointer
=
None
self
.
formPointer
=
None
self
.
insertFromTable
=
False
self
.
document
=
self
.
documentClass
()
def
elementInScope
(
self
,
target
,
variant
=
None
):
# If we pass a node in we match that. if we pass a string
# match any node with that name
exactNode
=
hasattr
(
target
,
"nameTuple"
)
if
not
exactNode
:
if
isinstance
(
target
,
text_type
):
target
=
(
namespaces
[
"html"
],
target
)
assert
isinstance
(
target
,
tuple
)
listElements
,
invert
=
listElementsMap
[
variant
]
for
node
in
reversed
(
self
.
openElements
):
if
exactNode
and
node
==
target
:
return
True
elif
not
exactNode
and
node
.
nameTuple
==
target
:
return
True
elif
(
invert
^
(
node
.
nameTuple
in
listElements
)):
return
False
assert
False
# We should never reach this point
def
reconstructActiveFormattingElements
(
self
):
# Within this algorithm the order of steps described in the
# specification is not quite the same as the order of steps in the
# code. It should still do the same though.
# Step 1: stop the algorithm when there's nothing to do.
if
not
self
.
activeFormattingElements
:
return
# Step 2 and step 3: we start with the last element. So i is -1.
i
=
len
(
self
.
activeFormattingElements
)
-
1
entry
=
self
.
activeFormattingElements
[
i
]
if
entry
==
Marker
or
entry
in
self
.
openElements
:
return
# Step 6
while
entry
!=
Marker
and
entry
not
in
self
.
openElements
:
if
i
==
0
:
# This will be reset to 0 below
i
=
-
1
break
i
-=
1
# Step 5: let entry be one earlier in the list.
entry
=
self
.
activeFormattingElements
[
i
]
while
True
:
# Step 7
i
+=
1
# Step 8
entry
=
self
.
activeFormattingElements
[
i
]
clone
=
entry
.
cloneNode
()
# Mainly to get a new copy of the attributes
# Step 9
element
=
self
.
insertElement
({
"type"
:
"StartTag"
,
"name"
:
clone
.
name
,
"namespace"
:
clone
.
namespace
,
"data"
:
clone
.
attributes
})
# Step 10
self
.
activeFormattingElements
[
i
]
=
element
# Step 11
if
element
==
self
.
activeFormattingElements
[
-
1
]:
break
def
clearActiveFormattingElements
(
self
):
entry
=
self
.
activeFormattingElements
.
pop
()
while
self
.
activeFormattingElements
and
entry
!=
Marker
:
entry
=
self
.
activeFormattingElements
.
pop
()
def
elementInActiveFormattingElements
(
self
,
name
):
"""Check if an element exists between the end of the active
formatting elements and the last marker. If it does, return it, else
return false"""
for
item
in
self
.
activeFormattingElements
[::
-
1
]:
# Check for Marker first because if it's a Marker it doesn't have a
# name attribute.
if
item
==
Marker
:
break
elif
item
.
name
==
name
:
return
item
return
False
def
insertRoot
(
self
,
token
):
element
=
self
.
createElement
(
token
)
self
.
openElements
.
append
(
element
)
self
.
document
.
appendChild
(
element
)
def
insertDoctype
(
self
,
token
):
name
=
token
[
"name"
]
publicId
=
token
[
"publicId"
]
systemId
=
token
[
"systemId"
]
doctype
=
self
.
doctypeClass
(
name
,
publicId
,
systemId
)
self
.
document
.
appendChild
(
doctype
)
def
insertComment
(
self
,
token
,
parent
=
None
):
if
parent
is
None
:
parent
=
self
.
openElements
[
-
1
]
parent
.
appendChild
(
self
.
commentClass
(
token
[
"data"
]))
def
createElement
(
self
,
token
):
"""Create an element but don't insert it anywhere"""
name
=
token
[
"name"
]
namespace
=
token
.
get
(
"namespace"
,
self
.
defaultNamespace
)
element
=
self
.
elementClass
(
name
,
namespace
)
element
.
attributes
=
token
[
"data"
]
return
element
def
_getInsertFromTable
(
self
):
return
self
.
_insertFromTable
def
_setInsertFromTable
(
self
,
value
):
"""Switch the function used to insert an element from the
normal one to the misnested table one and back again"""
self
.
_insertFromTable
=
value
if
value
:
self
.
insertElement
=
self
.
insertElementTable
else
:
self
.
insertElement
=
self
.
insertElementNormal
insertFromTable
=
property
(
_getInsertFromTable
,
_setInsertFromTable
)
def
insertElementNormal
(
self
,
token
):
name
=
token
[
"name"
]
assert
isinstance
(
name
,
text_type
),
"Element
%
s not unicode"
%
name
namespace
=
token
.
get
(
"namespace"
,
self
.
defaultNamespace
)
element
=
self
.
elementClass
(
name
,
namespace
)
element
.
attributes
=
token
[
"data"
]
self
.
openElements
[
-
1
]
.
appendChild
(
element
)
self
.
openElements
.
append
(
element
)
return
element
def
insertElementTable
(
self
,
token
):
"""Create an element and insert it into the tree"""
element
=
self
.
createElement
(
token
)
if
self
.
openElements
[
-
1
]
.
name
not
in
tableInsertModeElements
:
return
self
.
insertElementNormal
(
token
)
else
:
# We should be in the InTable mode. This means we want to do
# special magic element rearranging
parent
,
insertBefore
=
self
.
getTableMisnestedNodePosition
()
if
insertBefore
is
None
:
parent
.
appendChild
(
element
)
else
:
parent
.
insertBefore
(
element
,
insertBefore
)
self
.
openElements
.
append
(
element
)
return
element
def
insertText
(
self
,
data
,
parent
=
None
):
"""Insert text data."""
if
parent
is
None
:
parent
=
self
.
openElements
[
-
1
]
if
(
not
self
.
insertFromTable
or
(
self
.
insertFromTable
and
self
.
openElements
[
-
1
]
.
name
not
in
tableInsertModeElements
)):
parent
.
insertText
(
data
)
else
:
# We should be in the InTable mode. This means we want to do
# special magic element rearranging
parent
,
insertBefore
=
self
.
getTableMisnestedNodePosition
()
parent
.
insertText
(
data
,
insertBefore
)
def
getTableMisnestedNodePosition
(
self
):
"""Get the foster parent element, and sibling to insert before
(or None) when inserting a misnested table node"""
# The foster parent element is the one which comes before the most
# recently opened table element
# XXX - this is really inelegant
lastTable
=
None
fosterParent
=
None
insertBefore
=
None
for
elm
in
self
.
openElements
[::
-
1
]:
if
elm
.
name
==
"table"
:
lastTable
=
elm
break
if
lastTable
:
# XXX - we should really check that this parent is actually a
# node here
if
lastTable
.
parent
:
fosterParent
=
lastTable
.
parent
insertBefore
=
lastTable
else
:
fosterParent
=
self
.
openElements
[
self
.
openElements
.
index
(
lastTable
)
-
1
]
else
:
fosterParent
=
self
.
openElements
[
0
]
return
fosterParent
,
insertBefore
def
generateImpliedEndTags
(
self
,
exclude
=
None
):
name
=
self
.
openElements
[
-
1
]
.
name
# XXX td, th and tr are not actually needed
if
(
name
in
frozenset
((
"dd"
,
"dt"
,
"li"
,
"option"
,
"optgroup"
,
"p"
,
"rp"
,
"rt"
))
and
name
!=
exclude
):
self
.
openElements
.
pop
()
# XXX This is not entirely what the specification says. We should
# investigate it more closely.
self
.
generateImpliedEndTags
(
exclude
)
def
getDocument
(
self
):
"""Return the final tree"""
return
self
.
document
def
getFragment
(
self
):
"""Return the final fragment"""
# assert self.innerHTML
fragment
=
self
.
fragmentClass
()
self
.
openElements
[
0
]
.
reparentChildren
(
fragment
)
return
fragment
def
testSerializer
(
self
,
node
):
"""Serialize the subtree of node in the format required by unit tests
:arg node: the node from which to start serializing
"""
raise
NotImplementedError
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment