Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
news
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sartika Aritonang
news
Commits
324c06c4
Commit
324c06c4
authored
May 29, 2020
by
Sartika Aritonang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
ad23c157
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
342 additions
and
0 deletions
+342
-0
__init__.py
stbi/Lib/site-packages/pip/_vendor/webencodings/__init__.py
+342
-0
No files found.
stbi/Lib/site-packages/pip/_vendor/webencodings/__init__.py
0 → 100644
View file @
324c06c4
# coding: utf-8
"""
webencodings
~~~~~~~~~~~~
This is a Python implementation of the `WHATWG Encoding standard
<http://encoding.spec.whatwg.org/>`. See README for details.
:copyright: Copyright 2012 by Simon Sapin
:license: BSD, see LICENSE for details.
"""
from
__future__
import
unicode_literals
import
codecs
from
.labels
import
LABELS
VERSION
=
'0.5.1'
# Some names in Encoding are not valid Python aliases. Remap these.
PYTHON_NAMES
=
{
'iso-8859-8-i'
:
'iso-8859-8'
,
'x-mac-cyrillic'
:
'mac-cyrillic'
,
'macintosh'
:
'mac-roman'
,
'windows-874'
:
'cp874'
}
CACHE
=
{}
def
ascii_lower
(
string
):
r"""Transform (only) ASCII letters to lower case: A-Z is mapped to a-z.
:param string: An Unicode string.
:returns: A new Unicode string.
This is used for `ASCII case-insensitive
<http://encoding.spec.whatwg.org/#ascii-case-insensitive>`_
matching of encoding labels.
The same matching is also used, among other things,
for `CSS keywords <http://dev.w3.org/csswg/css-values/#keywords>`_.
This is different from the :meth:`~py:str.lower` method of Unicode strings
which also affect non-ASCII characters,
sometimes mapping them into the ASCII range:
>>> keyword = u'Bac\N{KELVIN SIGN}ground'
>>> assert keyword.lower() == u'background'
>>> assert ascii_lower(keyword) != keyword.lower()
>>> assert ascii_lower(keyword) == u'bac\N{KELVIN SIGN}ground'
"""
# This turns out to be faster than unicode.translate()
return
string
.
encode
(
'utf8'
)
.
lower
()
.
decode
(
'utf8'
)
def
lookup
(
label
):
"""
Look for an encoding by its label.
This is the spec’s `get an encoding
<http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm.
Supported labels are listed there.
:param label: A string.
:returns:
An :class:`Encoding` object, or :obj:`None` for an unknown label.
"""
# Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020.
label
=
ascii_lower
(
label
.
strip
(
'
\t\n\f\r
'
))
name
=
LABELS
.
get
(
label
)
if
name
is
None
:
return
None
encoding
=
CACHE
.
get
(
name
)
if
encoding
is
None
:
if
name
==
'x-user-defined'
:
from
.x_user_defined
import
codec_info
else
:
python_name
=
PYTHON_NAMES
.
get
(
name
,
name
)
# Any python_name value that gets to here should be valid.
codec_info
=
codecs
.
lookup
(
python_name
)
encoding
=
Encoding
(
name
,
codec_info
)
CACHE
[
name
]
=
encoding
return
encoding
def
_get_encoding
(
encoding_or_label
):
"""
Accept either an encoding object or label.
:param encoding: An :class:`Encoding` object or a label string.
:returns: An :class:`Encoding` object.
:raises: :exc:`~exceptions.LookupError` for an unknown label.
"""
if
hasattr
(
encoding_or_label
,
'codec_info'
):
return
encoding_or_label
encoding
=
lookup
(
encoding_or_label
)
if
encoding
is
None
:
raise
LookupError
(
'Unknown encoding label:
%
r'
%
encoding_or_label
)
return
encoding
class
Encoding
(
object
):
"""Reresents a character encoding such as UTF-8,
that can be used for decoding or encoding.
.. attribute:: name
Canonical name of the encoding
.. attribute:: codec_info
The actual implementation of the encoding,
a stdlib :class:`~codecs.CodecInfo` object.
See :func:`codecs.register`.
"""
def
__init__
(
self
,
name
,
codec_info
):
self
.
name
=
name
self
.
codec_info
=
codec_info
def
__repr__
(
self
):
return
'<Encoding
%
s>'
%
self
.
name
#: The UTF-8 encoding. Should be used for new content and formats.
UTF8
=
lookup
(
'utf-8'
)
_UTF16LE
=
lookup
(
'utf-16le'
)
_UTF16BE
=
lookup
(
'utf-16be'
)
def
decode
(
input
,
fallback_encoding
,
errors
=
'replace'
):
"""
Decode a single string.
:param input: A byte string
:param fallback_encoding:
An :class:`Encoding` object or a label string.
The encoding to use if :obj:`input` does note have a BOM.
:param errors: Type of error handling. See :func:`codecs.register`.
:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
:return:
A ``(output, encoding)`` tuple of an Unicode string
and an :obj:`Encoding`.
"""
# Fail early if `encoding` is an invalid label.
fallback_encoding
=
_get_encoding
(
fallback_encoding
)
bom_encoding
,
input
=
_detect_bom
(
input
)
encoding
=
bom_encoding
or
fallback_encoding
return
encoding
.
codec_info
.
decode
(
input
,
errors
)[
0
],
encoding
def
_detect_bom
(
input
):
"""Return (bom_encoding, input), with any BOM removed from the input."""
if
input
.
startswith
(
b
'
\xFF\xFE
'
):
return
_UTF16LE
,
input
[
2
:]
if
input
.
startswith
(
b
'
\xFE\xFF
'
):
return
_UTF16BE
,
input
[
2
:]
if
input
.
startswith
(
b
'
\xEF\xBB\xBF
'
):
return
UTF8
,
input
[
3
:]
return
None
,
input
def
encode
(
input
,
encoding
=
UTF8
,
errors
=
'strict'
):
"""
Encode a single string.
:param input: An Unicode string.
:param encoding: An :class:`Encoding` object or a label string.
:param errors: Type of error handling. See :func:`codecs.register`.
:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
:return: A byte string.
"""
return
_get_encoding
(
encoding
)
.
codec_info
.
encode
(
input
,
errors
)[
0
]
def
iter_decode
(
input
,
fallback_encoding
,
errors
=
'replace'
):
"""
"Pull"-based decoder.
:param input:
An iterable of byte strings.
The input is first consumed just enough to determine the encoding
based on the precense of a BOM,
then consumed on demand when the return value is.
:param fallback_encoding:
An :class:`Encoding` object or a label string.
The encoding to use if :obj:`input` does note have a BOM.
:param errors: Type of error handling. See :func:`codecs.register`.
:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
:returns:
An ``(output, encoding)`` tuple.
:obj:`output` is an iterable of Unicode strings,
:obj:`encoding` is the :obj:`Encoding` that is being used.
"""
decoder
=
IncrementalDecoder
(
fallback_encoding
,
errors
)
generator
=
_iter_decode_generator
(
input
,
decoder
)
encoding
=
next
(
generator
)
return
generator
,
encoding
def
_iter_decode_generator
(
input
,
decoder
):
"""Return a generator that first yields the :obj:`Encoding`,
then yields output chukns as Unicode strings.
"""
decode
=
decoder
.
decode
input
=
iter
(
input
)
for
chunck
in
input
:
output
=
decode
(
chunck
)
if
output
:
assert
decoder
.
encoding
is
not
None
yield
decoder
.
encoding
yield
output
break
else
:
# Input exhausted without determining the encoding
output
=
decode
(
b
''
,
final
=
True
)
assert
decoder
.
encoding
is
not
None
yield
decoder
.
encoding
if
output
:
yield
output
return
for
chunck
in
input
:
output
=
decode
(
chunck
)
if
output
:
yield
output
output
=
decode
(
b
''
,
final
=
True
)
if
output
:
yield
output
def
iter_encode
(
input
,
encoding
=
UTF8
,
errors
=
'strict'
):
"""
“Pull”-based encoder.
:param input: An iterable of Unicode strings.
:param encoding: An :class:`Encoding` object or a label string.
:param errors: Type of error handling. See :func:`codecs.register`.
:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
:returns: An iterable of byte strings.
"""
# Fail early if `encoding` is an invalid label.
encode
=
IncrementalEncoder
(
encoding
,
errors
)
.
encode
return
_iter_encode_generator
(
input
,
encode
)
def
_iter_encode_generator
(
input
,
encode
):
for
chunck
in
input
:
output
=
encode
(
chunck
)
if
output
:
yield
output
output
=
encode
(
''
,
final
=
True
)
if
output
:
yield
output
class
IncrementalDecoder
(
object
):
"""
“Push”-based decoder.
:param fallback_encoding:
An :class:`Encoding` object or a label string.
The encoding to use if :obj:`input` does note have a BOM.
:param errors: Type of error handling. See :func:`codecs.register`.
:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
"""
def
__init__
(
self
,
fallback_encoding
,
errors
=
'replace'
):
# Fail early if `encoding` is an invalid label.
self
.
_fallback_encoding
=
_get_encoding
(
fallback_encoding
)
self
.
_errors
=
errors
self
.
_buffer
=
b
''
self
.
_decoder
=
None
#: The actual :class:`Encoding` that is being used,
#: or :obj:`None` if that is not determined yet.
#: (Ie. if there is not enough input yet to determine
#: if there is a BOM.)
self
.
encoding
=
None
# Not known yet.
def
decode
(
self
,
input
,
final
=
False
):
"""Decode one chunk of the input.
:param input: A byte string.
:param final:
Indicate that no more input is available.
Must be :obj:`True` if this is the last call.
:returns: An Unicode string.
"""
decoder
=
self
.
_decoder
if
decoder
is
not
None
:
return
decoder
(
input
,
final
)
input
=
self
.
_buffer
+
input
encoding
,
input
=
_detect_bom
(
input
)
if
encoding
is
None
:
if
len
(
input
)
<
3
and
not
final
:
# Not enough data yet.
self
.
_buffer
=
input
return
''
else
:
# No BOM
encoding
=
self
.
_fallback_encoding
decoder
=
encoding
.
codec_info
.
incrementaldecoder
(
self
.
_errors
)
.
decode
self
.
_decoder
=
decoder
self
.
encoding
=
encoding
return
decoder
(
input
,
final
)
class
IncrementalEncoder
(
object
):
"""
“Push”-based encoder.
:param encoding: An :class:`Encoding` object or a label string.
:param errors: Type of error handling. See :func:`codecs.register`.
:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
.. method:: encode(input, final=False)
:param input: An Unicode string.
:param final:
Indicate that no more input is available.
Must be :obj:`True` if this is the last call.
:returns: A byte string.
"""
def
__init__
(
self
,
encoding
=
UTF8
,
errors
=
'strict'
):
encoding
=
_get_encoding
(
encoding
)
self
.
encode
=
encoding
.
codec_info
.
incrementalencoder
(
errors
)
.
encode
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment