2026-1-6
This commit is contained in:
62
venv/Lib/site-packages/emoji/__init__.py
Normal file
62
venv/Lib/site-packages/emoji/__init__.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""
|
||||
emoji for Python
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
emoji terminal output for Python.
|
||||
|
||||
>>> import emoji
|
||||
>>> print(emoji.emojize('Python is :thumbsup:', language='alias'))
|
||||
Python is 👍
|
||||
>>> print(emoji.emojize('Python is :thumbs_up:'))
|
||||
Python is 👍
|
||||
"""
|
||||
|
||||
|
||||
from emoji.core import *
|
||||
from emoji.unicode_codes import *
|
||||
|
||||
__all__ = [
|
||||
# emoji.core
|
||||
'emojize', 'demojize', 'analyze', 'config',
|
||||
'emoji_list', 'distinct_emoji_list', 'emoji_count',
|
||||
'replace_emoji', 'is_emoji', 'purely_emoji', 'version',
|
||||
'Token', 'EmojiMatch', 'EmojiMatchZWJ', 'EmojiMatchZWJNonRGI',
|
||||
# emoji.unicode_codes
|
||||
'EMOJI_DATA', 'STATUS', 'LANGUAGES',
|
||||
]
|
||||
|
||||
__version__ = '2.10.0'
|
||||
__author__ = 'Taehoon Kim, Kevin Wurster'
|
||||
__email__ = 'carpedm20@gmail.com'
|
||||
# and wursterk@gmail.com, tahir.jalilov@gmail.com
|
||||
__source__ = 'https://github.com/carpedm20/emoji/'
|
||||
__license__ = '''
|
||||
New BSD License
|
||||
|
||||
Copyright (c) 2014-2023, Taehoon Kim, Kevin Wurster
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* The names of its contributors may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
37
venv/Lib/site-packages/emoji/__init__.pyi
Normal file
37
venv/Lib/site-packages/emoji/__init__.pyi
Normal file
@@ -0,0 +1,37 @@
|
||||
from .core import (
|
||||
demojize as demojize,
|
||||
distinct_emoji_list as distinct_emoji_list,
|
||||
emoji_count as emoji_count,
|
||||
emoji_list as emoji_list,
|
||||
emojize as emojize,
|
||||
is_emoji as is_emoji,
|
||||
replace_emoji as replace_emoji,
|
||||
version as version,
|
||||
analyze as analyze,
|
||||
config as config,
|
||||
)
|
||||
from .tokenizer import (
|
||||
Token as Token,
|
||||
EmojiMatch as EmojiMatch,
|
||||
EmojiMatchZWJ as EmojiMatchZWJ,
|
||||
EmojiMatchZWJNonRGI as EmojiMatchZWJNonRGI,
|
||||
)
|
||||
|
||||
|
||||
from .unicode_codes import EMOJI_DATA, LANGUAGES, STATUS
|
||||
|
||||
__all__ = [
|
||||
# emoji.core
|
||||
'emojize', 'demojize', 'analyze', 'config',
|
||||
'emoji_list', 'distinct_emoji_list', 'emoji_count',
|
||||
'replace_emoji', 'is_emoji', 'version',
|
||||
'Token', 'EmojiMatch', 'EmojiMatchZWJ', 'EmojiMatchZWJNonRGI',
|
||||
# emoji.unicode_codes
|
||||
'EMOJI_DATA', 'STATUS', 'LANGUAGES',
|
||||
]
|
||||
|
||||
__version__: str
|
||||
__author__: str
|
||||
__email__: str
|
||||
__source__: str
|
||||
__license__: str
|
||||
Binary file not shown.
BIN
venv/Lib/site-packages/emoji/__pycache__/core.cpython-312.pyc
Normal file
BIN
venv/Lib/site-packages/emoji/__pycache__/core.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
373
venv/Lib/site-packages/emoji/core.py
Normal file
373
venv/Lib/site-packages/emoji/core.py
Normal file
@@ -0,0 +1,373 @@
|
||||
"""
|
||||
emoji.core
|
||||
~~~~~~~~~~
|
||||
|
||||
Core components for emoji.
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
from typing import Iterator
|
||||
|
||||
from emoji import unicode_codes
|
||||
from emoji.tokenizer import Token, EmojiMatch, EmojiMatchZWJ, EmojiMatchZWJNonRGI, tokenize, filter_tokens
|
||||
|
||||
__all__ = [
|
||||
'emojize', 'demojize', 'analyze', 'config',
|
||||
'emoji_list', 'distinct_emoji_list', 'emoji_count',
|
||||
'replace_emoji', 'is_emoji', 'purely_emoji', 'version',
|
||||
'Token', 'EmojiMatch', 'EmojiMatchZWJ', 'EmojiMatchZWJNonRGI',
|
||||
]
|
||||
|
||||
_DEFAULT_DELIMITER = ':'
|
||||
# In Arabic language, the unicode character "\u0655" should be kept so we add it to the pattern below
|
||||
_EMOJI_NAME_PATTERN = '\\w\\-&.’”“()!#*+,/«»\u0300\u0301\u0302\u0303\u0306\u0308\u030a\u0327\u064b\u064e\u064f\u0650\u0653\u0654\u3099\u30fb\u309a\u0655'
|
||||
|
||||
|
||||
class config():
|
||||
"""Module-wide configuration"""
|
||||
|
||||
demojize_keep_zwj = True
|
||||
"""Change the behavior of :func:`emoji.demojize()` regarding
|
||||
zero-width-joiners (ZWJ/``\\u200D``) in emoji that are not
|
||||
"recommended for general interchange" (non-RGI).
|
||||
It has no effect on RGI emoji.
|
||||
|
||||
For example this family emoji with different skin tones "👨👩🏿👧🏻👦🏾" contains four
|
||||
person emoji that are joined together by three ZWJ characters:
|
||||
``👨\\u200D👩🏿\\u200D👧🏻\\u200D👦🏾``
|
||||
|
||||
If ``True``, the zero-width-joiners will be kept and :func:`emoji.emojize()` can
|
||||
reverse the :func:`emoji.demojize()` operation:
|
||||
``emoji.emojize(emoji.demojize(s)) == s``
|
||||
|
||||
The example emoji would be converted to
|
||||
``:man:\\u200d:woman_dark_skin_tone:\\u200d:girl_light_skin_tone:\\u200d:boy_medium-dark_skin_tone:``
|
||||
|
||||
If ``False``, the zero-width-joiners will be removed and :func:`emoji.emojize()`
|
||||
can only reverse the individual emoji: ``emoji.emojize(emoji.demojize(s)) != s``
|
||||
|
||||
The example emoji would be converted to
|
||||
``:man::woman_dark_skin_tone::girl_light_skin_tone::boy_medium-dark_skin_tone:``
|
||||
"""
|
||||
|
||||
replace_emoji_keep_zwj = False
|
||||
"""Change the behavior of :func:`emoji.replace_emoji()` regarding
|
||||
zero-width-joiners (ZWJ/``\\u200D``) in emoji that are not
|
||||
"recommended for general interchange" (non-RGI).
|
||||
It has no effect on RGI emoji.
|
||||
|
||||
See :attr:`config.demojize_keep_zwj` for more information.
|
||||
"""
|
||||
|
||||
|
||||
def emojize(
|
||||
string,
|
||||
delimiters=(_DEFAULT_DELIMITER, _DEFAULT_DELIMITER),
|
||||
variant=None,
|
||||
language='en',
|
||||
version=None,
|
||||
handle_version=None
|
||||
):
|
||||
"""
|
||||
Replace emoji names in a string with Unicode codes.
|
||||
>>> import emoji
|
||||
>>> print(emoji.emojize("Python is fun :thumbsup:", language='alias'))
|
||||
Python is fun 👍
|
||||
>>> print(emoji.emojize("Python is fun :thumbs_up:"))
|
||||
Python is fun 👍
|
||||
>>> print(emoji.emojize("Python is fun {thumbs_up}", delimiters = ("{", "}")))
|
||||
Python is fun 👍
|
||||
>>> print(emoji.emojize("Python is fun :red_heart:", variant="text_type"))
|
||||
Python is fun ❤
|
||||
>>> print(emoji.emojize("Python is fun :red_heart:", variant="emoji_type"))
|
||||
Python is fun ❤️ # red heart, not black heart
|
||||
|
||||
:param string: String contains emoji names.
|
||||
:param delimiters: (optional) Use delimiters other than _DEFAULT_DELIMITER. Each delimiter
|
||||
should contain at least one character that is not part of a-zA-Z0-9 and ``_-&.()!?#*+,``.
|
||||
See ``emoji.core._EMOJI_NAME_PATTERN`` for the regular expression of unsafe characters.
|
||||
:param variant: (optional) Choose variation selector between "base"(None), VS-15 ("text_type") and VS-16 ("emoji_type")
|
||||
:param language: Choose language of emoji name: language code 'es', 'de', etc. or 'alias'
|
||||
to use English aliases
|
||||
:param version: (optional) Max version. If set to an Emoji Version,
|
||||
all emoji above this version will be ignored.
|
||||
:param handle_version: (optional) Replace the emoji above ``version``
|
||||
instead of ignoring it. handle_version can be either a string or a
|
||||
callable; If it is a callable, it's passed the Unicode emoji and the
|
||||
data dict from :data:`EMOJI_DATA` and must return a replacement string
|
||||
to be used::
|
||||
|
||||
handle_version('\\U0001F6EB', {
|
||||
'en' : ':airplane_departure:',
|
||||
'status' : fully_qualified,
|
||||
'E' : 1,
|
||||
'alias' : [':flight_departure:'],
|
||||
'de': ':abflug:',
|
||||
'es': ':avión_despegando:',
|
||||
...
|
||||
})
|
||||
|
||||
:raises ValueError: if ``variant`` is neither None, 'text_type' or 'emoji_type'
|
||||
|
||||
"""
|
||||
|
||||
if language == 'alias':
|
||||
language_pack = unicode_codes.get_aliases_unicode_dict()
|
||||
else:
|
||||
language_pack = unicode_codes.get_emoji_unicode_dict(language)
|
||||
|
||||
pattern = re.compile('(%s[%s]+%s)' %
|
||||
(re.escape(delimiters[0]), _EMOJI_NAME_PATTERN, re.escape(delimiters[1])))
|
||||
|
||||
def replace(match):
|
||||
name = match.group(1)[len(delimiters[0]):-len(delimiters[1])]
|
||||
emj = language_pack.get(
|
||||
_DEFAULT_DELIMITER +
|
||||
unicodedata.normalize('NFKC', name) +
|
||||
_DEFAULT_DELIMITER)
|
||||
if emj is None:
|
||||
return match.group(1)
|
||||
|
||||
if version is not None and unicode_codes.EMOJI_DATA[emj]['E'] > version:
|
||||
if callable(handle_version):
|
||||
emj_data = unicode_codes.EMOJI_DATA[emj].copy()
|
||||
emj_data['match_start'] = match.start()
|
||||
emj_data['match_end'] = match.end()
|
||||
return handle_version(emj, emj_data)
|
||||
|
||||
elif handle_version is not None:
|
||||
return str(handle_version)
|
||||
else:
|
||||
return ''
|
||||
|
||||
if variant is None or 'variant' not in unicode_codes.EMOJI_DATA[emj]:
|
||||
return emj
|
||||
|
||||
if emj[-1] == '\uFE0E' or emj[-1] == '\uFE0F':
|
||||
# Remove an existing variant
|
||||
emj = emj[0:-1]
|
||||
if variant == "text_type":
|
||||
return emj + '\uFE0E'
|
||||
elif variant == "emoji_type":
|
||||
return emj + '\uFE0F'
|
||||
else:
|
||||
raise ValueError(
|
||||
"Parameter 'variant' must be either None, 'text_type' or 'emoji_type'")
|
||||
|
||||
return pattern.sub(replace, string)
|
||||
|
||||
|
||||
def analyze(string: str, non_emoji: bool = False, join_emoji: bool = True) -> Iterator[Token]:
|
||||
"""
|
||||
Find unicode emoji in a string. Yield each emoji as a named tuple
|
||||
:class:`Token` ``(chars, EmojiMatch)`` or `:class:`Token` ``(chars, EmojiMatchZWJNonRGI)``.
|
||||
If ``non_emoji`` is True, also yield all other characters as
|
||||
:class:`Token` ``(char, char)`` .
|
||||
|
||||
:param string: String to analyze
|
||||
:param non_emoji: If True also yield all non-emoji characters as Token(char, char)
|
||||
:param join_emoji: If True, multiple EmojiMatch are merged into a single
|
||||
EmojiMatchZWJNonRGI if they are separated only by a ZWJ.
|
||||
"""
|
||||
|
||||
return filter_tokens(
|
||||
tokenize(string, keep_zwj=True), emoji_only=not non_emoji, join_emoji=join_emoji)
|
||||
|
||||
|
||||
def demojize(
|
||||
string,
|
||||
delimiters=(_DEFAULT_DELIMITER, _DEFAULT_DELIMITER),
|
||||
language='en',
|
||||
version=None,
|
||||
handle_version=None
|
||||
):
|
||||
"""
|
||||
Replace Unicode emoji in a string with emoji shortcodes. Useful for storage.
|
||||
>>> import emoji
|
||||
>>> print(emoji.emojize("Python is fun :thumbs_up:"))
|
||||
Python is fun 👍
|
||||
>>> print(emoji.demojize("Python is fun 👍"))
|
||||
Python is fun :thumbs_up:
|
||||
>>> print(emoji.demojize("icode is tricky 😯", delimiters=("__", "__")))
|
||||
Unicode is tricky __hushed_face__
|
||||
|
||||
:param string: String contains Unicode characters. MUST BE UNICODE.
|
||||
:param delimiters: (optional) User delimiters other than ``_DEFAULT_DELIMITER``
|
||||
:param language: Choose language of emoji name: language code 'es', 'de', etc. or 'alias'
|
||||
to use English aliases
|
||||
:param version: (optional) Max version. If set to an Emoji Version,
|
||||
all emoji above this version will be removed.
|
||||
:param handle_version: (optional) Replace the emoji above ``version``
|
||||
instead of removing it. handle_version can be either a string or a
|
||||
callable ``handle_version(emj: str, data: dict) -> str``; If it is
|
||||
a callable, it's passed the Unicode emoji and the data dict from
|
||||
:data:`EMOJI_DATA` and must return a replacement string to be used.
|
||||
The passed data is in the form of::
|
||||
|
||||
handle_version('\\U0001F6EB', {
|
||||
'en' : ':airplane_departure:',
|
||||
'status' : fully_qualified,
|
||||
'E' : 1,
|
||||
'alias' : [':flight_departure:'],
|
||||
'de': ':abflug:',
|
||||
'es': ':avión_despegando:',
|
||||
...
|
||||
})
|
||||
|
||||
"""
|
||||
|
||||
if language == 'alias':
|
||||
language = 'en'
|
||||
_use_aliases = True
|
||||
else:
|
||||
_use_aliases = False
|
||||
|
||||
def handle(emoji_match):
|
||||
if version is not None and emoji_match.data['E'] > version:
|
||||
if callable(handle_version):
|
||||
return handle_version(emoji_match.emoji, emoji_match.data_copy())
|
||||
elif handle_version is not None:
|
||||
return handle_version
|
||||
else:
|
||||
return ''
|
||||
elif language in emoji_match.data:
|
||||
if _use_aliases and 'alias' in emoji_match.data:
|
||||
return delimiters[0] + emoji_match.data['alias'][0][1:-1] + delimiters[1]
|
||||
else:
|
||||
return delimiters[0] + emoji_match.data[language][1:-1] + delimiters[1]
|
||||
else:
|
||||
# The emoji exists, but it is not translated, so we keep the emoji
|
||||
return emoji_match.emoji
|
||||
|
||||
matches = tokenize(string, keep_zwj=config.demojize_keep_zwj)
|
||||
return "".join(str(handle(token.value)) if isinstance(
|
||||
token.value, EmojiMatch) else token.value for token in matches)
|
||||
|
||||
|
||||
def replace_emoji(string, replace='', version=-1):
|
||||
"""
|
||||
Replace Unicode emoji in a customizable string.
|
||||
|
||||
:param string: String contains Unicode characters. MUST BE UNICODE.
|
||||
:param replace: (optional) replace can be either a string or a callable;
|
||||
If it is a callable, it's passed the Unicode emoji and the data dict from
|
||||
:data:`EMOJI_DATA` and must return a replacement string to be used.
|
||||
replace(str, dict) -> str
|
||||
:param version: (optional) Max version. If set to an Emoji Version,
|
||||
only emoji above this version will be replaced.
|
||||
"""
|
||||
|
||||
def handle(emoji_match):
|
||||
if version > -1:
|
||||
if emoji_match.data['E'] > version:
|
||||
if callable(replace):
|
||||
return replace(emoji_match.emoji, emoji_match.data_copy())
|
||||
else:
|
||||
return str(replace)
|
||||
elif callable(replace):
|
||||
return replace(emoji_match.emoji, emoji_match.data_copy())
|
||||
elif replace is not None:
|
||||
return replace
|
||||
return emoji_match.emoji
|
||||
|
||||
matches = tokenize(string, keep_zwj=config.replace_emoji_keep_zwj)
|
||||
if config.replace_emoji_keep_zwj:
|
||||
matches = filter_tokens(
|
||||
matches, emoji_only=False, join_emoji=True)
|
||||
return "".join(str(handle(m.value)) if isinstance(
|
||||
m.value, EmojiMatch) else m.value for m in matches)
|
||||
|
||||
|
||||
def emoji_list(string):
|
||||
"""
|
||||
Returns the location and emoji in list of dict format.
|
||||
>>> emoji.emoji_list("Hi, I am fine. 😁")
|
||||
[{'match_start': 15, 'match_end': 16, 'emoji': '😁'}]
|
||||
"""
|
||||
|
||||
return [{
|
||||
'match_start': m.value.start,
|
||||
'match_end': m.value.end,
|
||||
'emoji': m.value.emoji,
|
||||
} for m in tokenize(string, keep_zwj=False) if isinstance(m.value, EmojiMatch)]
|
||||
|
||||
|
||||
def distinct_emoji_list(string):
|
||||
"""Returns distinct list of emojis from the string."""
|
||||
distinct_list = list(
|
||||
{e['emoji'] for e in emoji_list(string)}
|
||||
)
|
||||
return distinct_list
|
||||
|
||||
|
||||
def emoji_count(string, unique=False):
|
||||
"""
|
||||
Returns the count of emojis in a string.
|
||||
|
||||
:param unique: (optional) True if count only unique emojis
|
||||
"""
|
||||
if unique:
|
||||
return len(distinct_emoji_list(string))
|
||||
return len(emoji_list(string))
|
||||
|
||||
|
||||
def is_emoji(string):
|
||||
"""
|
||||
Returns True if the string is a single emoji, and it is "recommended for
|
||||
general interchange" by Unicode.org.
|
||||
"""
|
||||
return string in unicode_codes.EMOJI_DATA
|
||||
|
||||
|
||||
def purely_emoji(string: str) -> bool:
|
||||
"""
|
||||
Returns True if the string contains only emojis.
|
||||
This might not imply that `is_emoji` for all the characters, for example,
|
||||
if the string contains variation selectors.
|
||||
"""
|
||||
return all(isinstance(m.value, EmojiMatch) for m in analyze(string, non_emoji=True))
|
||||
|
||||
|
||||
def version(string):
|
||||
"""
|
||||
Returns the Emoji Version of the emoji.
|
||||
|
||||
See https://www.unicode.org/reports/tr51/#Versioning for more information.
|
||||
>>> emoji.version("😁")
|
||||
0.6
|
||||
>>> emoji.version(":butterfly:")
|
||||
3
|
||||
|
||||
:param string: An emoji or a text containing an emoji
|
||||
:raises ValueError: if ``string`` does not contain an emoji
|
||||
"""
|
||||
# Try dictionary lookup
|
||||
if string in unicode_codes.EMOJI_DATA:
|
||||
return unicode_codes.EMOJI_DATA[string]['E']
|
||||
|
||||
language_pack = unicode_codes.get_emoji_unicode_dict('en')
|
||||
if string in language_pack:
|
||||
emj_code = language_pack[string]
|
||||
if emj_code in unicode_codes.EMOJI_DATA:
|
||||
return unicode_codes.EMOJI_DATA[emj_code]['E']
|
||||
|
||||
# Try to find first emoji in string
|
||||
version = []
|
||||
|
||||
def f(e, emoji_data):
|
||||
version.append(emoji_data['E'])
|
||||
return ''
|
||||
replace_emoji(string, replace=f, version=-1)
|
||||
if version:
|
||||
return version[0]
|
||||
emojize(string, language='alias', version=-1, handle_version=f)
|
||||
if version:
|
||||
return version[0]
|
||||
for lang_code in unicode_codes._EMOJI_UNICODE:
|
||||
emojize(string, language=lang_code, version=-1, handle_version=f)
|
||||
if version:
|
||||
return version[0]
|
||||
|
||||
raise ValueError("No emoji found in string")
|
||||
47
venv/Lib/site-packages/emoji/core.pyi
Normal file
47
venv/Lib/site-packages/emoji/core.pyi
Normal file
@@ -0,0 +1,47 @@
|
||||
from collections.abc import Callable
|
||||
from typing_extensions import Literal, TypedDict
|
||||
from typing import Iterator
|
||||
from .tokenizer import Token
|
||||
|
||||
|
||||
class config:
|
||||
demojize_keep_zwj: bool
|
||||
replace_emoji_keep_zwj: bool
|
||||
|
||||
|
||||
class _EmojiListReturn(TypedDict):
|
||||
emoji: str
|
||||
match_start: int
|
||||
match_end: int
|
||||
|
||||
|
||||
def emojize(
|
||||
string: str,
|
||||
delimiters: tuple[str, str] = ...,
|
||||
variant: Literal["text_type", "emoji_type", None] = ...,
|
||||
language: str = ...,
|
||||
version: float | None = ...,
|
||||
handle_version: str | Callable[[str, dict[str, str]], str] | None = ...,
|
||||
) -> str: ...
|
||||
|
||||
|
||||
def demojize(
|
||||
string: str,
|
||||
delimiters: tuple[str, str] = ...,
|
||||
language: str = ...,
|
||||
version: float | None = ...,
|
||||
handle_version: str | Callable[[str, dict[str, str]], str] | None = ...,
|
||||
) -> str: ...
|
||||
|
||||
|
||||
def analyze(string: str, non_emoji: bool,
|
||||
join_emoji: bool) -> Iterator[Token]: ...
|
||||
def replace_emoji(string: str, replace: str | Callable[[
|
||||
str, dict[str, str]], str] = ..., version: float = ...) -> str: ...
|
||||
|
||||
|
||||
def emoji_list(string: str) -> list[_EmojiListReturn]: ...
|
||||
def distinct_emoji_list(string: str) -> list[str]: ...
|
||||
def emoji_count(string: str, unique: bool = ...) -> int: ...
|
||||
def version(string: str) -> float: ...
|
||||
def is_emoji(string: str) -> bool: ...
|
||||
0
venv/Lib/site-packages/emoji/py.typed
Normal file
0
venv/Lib/site-packages/emoji/py.typed
Normal file
361
venv/Lib/site-packages/emoji/tokenizer.py
Normal file
361
venv/Lib/site-packages/emoji/tokenizer.py
Normal file
@@ -0,0 +1,361 @@
|
||||
"""
|
||||
emoji.tokenizer
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
Components for detecting and tokenizing emoji in strings.
|
||||
|
||||
"""
|
||||
from typing import NamedTuple, Dict, Union, Iterator, Any
|
||||
from emoji import unicode_codes
|
||||
|
||||
|
||||
__all__ = [
|
||||
'EmojiMatch', 'EmojiMatchZWJ', 'EmojiMatchZWJNonRGI', 'Token',
|
||||
'tokenize', 'filter_tokens',
|
||||
]
|
||||
|
||||
_ZWJ = '\u200D'
|
||||
_SEARCH_TREE = None
|
||||
|
||||
|
||||
class EmojiMatch:
|
||||
"""
|
||||
Represents a match of a "recommended for general interchange" (RGI)
|
||||
emoji in a string.
|
||||
"""
|
||||
|
||||
__slots__ = ('emoji', 'start', 'end', 'data')
|
||||
|
||||
def __init__(self, emoji: str, start: int,
|
||||
end: int, data: Union[dict, None]):
|
||||
|
||||
self.emoji = emoji
|
||||
"""The emoji substring"""
|
||||
|
||||
self.start = start
|
||||
"""The start index of the match in the string"""
|
||||
|
||||
self.end = end
|
||||
"""The end index of the match in the string"""
|
||||
|
||||
self.data = data
|
||||
"""The entry from :data:`EMOJI_DATA` for this emoji or ``None`` if the emoji is non-RGI"""
|
||||
|
||||
def data_copy(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Returns a copy of the data from :data:`EMOJI_DATA` for this match
|
||||
with the additional keys ``match_start`` and ``match_end``.
|
||||
"""
|
||||
if self.data:
|
||||
emj_data = self.data.copy()
|
||||
emj_data['match_start'] = self.start
|
||||
emj_data['match_end'] = self.end
|
||||
return emj_data
|
||||
else:
|
||||
return {
|
||||
'match_start': self.start,
|
||||
'match_end': self.end
|
||||
}
|
||||
|
||||
def is_zwj(self) -> bool:
|
||||
"""
|
||||
Checks if this is a ZWJ-emoji.
|
||||
|
||||
:returns: True if this is a ZWJ-emoji, False otherwise
|
||||
"""
|
||||
|
||||
return _ZWJ in self.emoji
|
||||
|
||||
def split(self) -> Union['EmojiMatchZWJ', 'EmojiMatch']:
|
||||
"""
|
||||
Splits a ZWJ-emoji into its constituents.
|
||||
|
||||
:returns: An :class:`EmojiMatchZWJ` containing the "sub-emoji" if this is a ZWJ-emoji, otherwise self
|
||||
"""
|
||||
|
||||
if self.is_zwj():
|
||||
return EmojiMatchZWJ(self)
|
||||
else:
|
||||
return self
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'{self.__class__.__name__}({self.emoji}, {self.start}:{self.end})'
|
||||
|
||||
|
||||
class EmojiMatchZWJ(EmojiMatch):
|
||||
"""
|
||||
Represents a match of multiple emoji in a string that were joined by
|
||||
zero-width-joiners (ZWJ/``\\u200D``)."""
|
||||
|
||||
__slots__ = ('emojis', )
|
||||
|
||||
def __init__(self, match: EmojiMatch):
|
||||
super().__init__(match.emoji, match.start, match.end, match.data)
|
||||
|
||||
self.emojis = []
|
||||
"""List of sub emoji as EmojiMatch objects"""
|
||||
|
||||
i = match.start
|
||||
for e in match.emoji.split(_ZWJ):
|
||||
m = EmojiMatch(
|
||||
e, i, i+len(e), unicode_codes.EMOJI_DATA.get(e, None))
|
||||
self.emojis.append(m)
|
||||
i += len(e) + 1
|
||||
|
||||
def join(self) -> str:
|
||||
"""
|
||||
Joins a ZWJ-emoji into a string
|
||||
"""
|
||||
|
||||
return _ZWJ.join(e.emoji for e in self.emojis)
|
||||
|
||||
def is_zwj(self) -> bool:
|
||||
return True
|
||||
|
||||
def split(self) -> 'EmojiMatchZWJ':
|
||||
return self
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'{self.__class__.__name__}({self.join()}, {self.start}:{self.end})'
|
||||
|
||||
|
||||
class EmojiMatchZWJNonRGI(EmojiMatchZWJ):
|
||||
"""
|
||||
Represents a match of multiple emoji in a string that were joined by
|
||||
zero-width-joiners (ZWJ/``\\u200D``). This class is only used for emoji
|
||||
that are not "recommended for general interchange" (non-RGI) by Unicode.org.
|
||||
The data property of this class is always None.
|
||||
"""
|
||||
|
||||
def __init__(self, first_emoji_match: EmojiMatch,
|
||||
second_emoji_match: EmojiMatch):
|
||||
|
||||
self.emojis = [first_emoji_match, second_emoji_match]
|
||||
"""List of sub emoji as EmojiMatch objects"""
|
||||
|
||||
self._update()
|
||||
|
||||
def _update(self):
|
||||
self.emoji = _ZWJ.join(e.emoji for e in self.emojis)
|
||||
self.start = self.emojis[0].start
|
||||
self.end = self.emojis[-1].end
|
||||
self.data = None
|
||||
|
||||
def _add(self, next_emoji_match: EmojiMatch):
|
||||
self.emojis.append(next_emoji_match)
|
||||
self._update()
|
||||
|
||||
|
||||
class Token(NamedTuple):
|
||||
"""
|
||||
A named tuple containing the matched string and its :class:`EmojiMatch` object if it is an emoji
|
||||
or a single character that is not a unicode emoji.
|
||||
"""
|
||||
chars: str
|
||||
value: Union[str, EmojiMatch]
|
||||
|
||||
|
||||
def tokenize(string, keep_zwj: bool) -> Iterator[Token]:
|
||||
"""
|
||||
Finds unicode emoji in a string. Yields all normal characters as a named
|
||||
tuple :class:`Token` ``(char, char)`` and all emoji as :class:`Token` ``(chars, EmojiMatch)``.
|
||||
|
||||
:param string: String contains unicode characters. MUST BE UNICODE.
|
||||
:param keep_zwj: Should ZWJ-characters (``\\u200D``) that join non-RGI emoji be
|
||||
skipped or should be yielded as normal characters
|
||||
:return: An iterable of tuples :class:`Token` ``(char, char)`` or :class:`Token` ``(chars, EmojiMatch)``
|
||||
"""
|
||||
|
||||
tree = get_search_tree()
|
||||
EMOJI_DATA = unicode_codes.EMOJI_DATA
|
||||
# result: [ Token(oldsubstring0, EmojiMatch), Token(char1, char1), ... ]
|
||||
result = []
|
||||
i = 0
|
||||
length = len(string)
|
||||
ignore = [] # index of chars in string that are skipped, i.e. the ZWJ-char in non-RGI-ZWJ-sequences
|
||||
while i < length:
|
||||
consumed = False
|
||||
char = string[i]
|
||||
if i in ignore:
|
||||
i += 1
|
||||
if char == _ZWJ and keep_zwj:
|
||||
result.append(Token(char, char))
|
||||
continue
|
||||
|
||||
elif char in tree:
|
||||
j = i + 1
|
||||
sub_tree = tree[char]
|
||||
while j < length and string[j] in sub_tree:
|
||||
if j in ignore:
|
||||
break
|
||||
sub_tree = sub_tree[string[j]]
|
||||
j += 1
|
||||
if 'data' in sub_tree:
|
||||
emj_data = sub_tree['data']
|
||||
code_points = string[i:j]
|
||||
|
||||
# We cannot yield the result here, we need to defer
|
||||
# the call until we are sure that the emoji is finished
|
||||
# i.e. we're not inside an ongoing ZWJ-sequence
|
||||
match_obj = EmojiMatch(code_points, i, j, emj_data)
|
||||
|
||||
i = j - 1
|
||||
consumed = True
|
||||
result.append(Token(code_points, match_obj))
|
||||
|
||||
elif char == _ZWJ and result and result[-1].chars in EMOJI_DATA and i > 0 and string[i - 1] in tree:
|
||||
# the current char is ZWJ and the last match was an emoji
|
||||
ignore.append(i)
|
||||
if EMOJI_DATA[result[-1].chars]["status"] == unicode_codes.STATUS["component"]:
|
||||
# last match was a component, it could be ZWJ+EMOJI+COMPONENT
|
||||
# or ZWJ+COMPONENT
|
||||
i = i - sum(len(t.chars) for t in result[-2:])
|
||||
if string[i] == _ZWJ:
|
||||
# It's ZWJ+COMPONENT, move one back
|
||||
i += 1
|
||||
del result[-1]
|
||||
else:
|
||||
# It's ZWJ+EMOJI+COMPONENT, move two back
|
||||
del result[-2:]
|
||||
else:
|
||||
# last match result[-1] was a normal emoji, move cursor
|
||||
# before the emoji
|
||||
i = i - len(result[-1].chars)
|
||||
del result[-1]
|
||||
continue
|
||||
|
||||
elif result:
|
||||
yield from result
|
||||
result = []
|
||||
|
||||
if not consumed and char != '\uFE0E' and char != '\uFE0F':
|
||||
result.append(Token(char, char))
|
||||
i += 1
|
||||
|
||||
yield from result
|
||||
|
||||
|
||||
def filter_tokens(matches: Iterator[Token], emoji_only: bool, join_emoji: bool) -> Iterator[Token]:
|
||||
"""
|
||||
Filters the output of `tokenize()`
|
||||
|
||||
:param matches: An iterable of tuples of the form ``(match_str, result)``
|
||||
where ``result`` is either an EmojiMatch or a string.
|
||||
:param emoji_only: If True, only EmojiMatch are returned in the output.
|
||||
If False all characters are returned
|
||||
:param join_emoji: If True, multiple EmojiMatch are merged into
|
||||
a single :class:`EmojiMatchZWJNonRGI` if they are separated only by a ZWJ.
|
||||
|
||||
:return: An iterable of tuples :class:`Token` ``(char, char)``,
|
||||
:class:`Token` ``(chars, EmojiMatch)`` or :class:`Token` ``(chars, EmojiMatchZWJNonRGI)``
|
||||
"""
|
||||
|
||||
if not join_emoji and not emoji_only:
|
||||
yield from matches
|
||||
return
|
||||
|
||||
if not join_emoji:
|
||||
for token in matches:
|
||||
if token.chars != _ZWJ:
|
||||
yield token
|
||||
return
|
||||
|
||||
# Combine multiple EmojiMatch that are separated by ZWJs into
|
||||
# a single EmojiMatchZWJNonRGI
|
||||
previous_is_emoji = False
|
||||
previous_is_zwj = False
|
||||
pre_previous_is_emoji = False
|
||||
accumulator = []
|
||||
for token in matches:
|
||||
pre_previous_is_emoji = previous_is_emoji
|
||||
if previous_is_emoji and token.value == _ZWJ:
|
||||
previous_is_zwj = True
|
||||
elif isinstance(token.value, EmojiMatch):
|
||||
if pre_previous_is_emoji and previous_is_zwj:
|
||||
if isinstance(accumulator[-1].value, EmojiMatchZWJNonRGI):
|
||||
accumulator[-1].value._add(token.value)
|
||||
accumulator[-1] = Token(accumulator[-1].chars +
|
||||
_ZWJ + token.chars, accumulator[-1].value)
|
||||
else:
|
||||
prev = accumulator.pop()
|
||||
accumulator.append(
|
||||
Token(prev.chars + _ZWJ + token.chars,
|
||||
EmojiMatchZWJNonRGI(
|
||||
prev.value,
|
||||
token.value)))
|
||||
else:
|
||||
accumulator.append(token)
|
||||
previous_is_emoji = True
|
||||
previous_is_zwj = False
|
||||
else:
|
||||
# Other character, not an emoji
|
||||
previous_is_emoji = False
|
||||
previous_is_zwj = False
|
||||
yield from accumulator
|
||||
if not emoji_only:
|
||||
yield token
|
||||
accumulator = []
|
||||
yield from accumulator
|
||||
|
||||
|
||||
def get_search_tree() -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a search tree for demojize().
|
||||
Example of a search tree::
|
||||
|
||||
EMOJI_DATA =
|
||||
{'a': {'en': ':Apple:'},
|
||||
'b': {'en': ':Bus:'},
|
||||
'ba': {'en': ':Bat:'},
|
||||
'band': {'en': ':Beatles:'},
|
||||
'bandit': {'en': ':Outlaw:'},
|
||||
'bank': {'en': ':BankOfEngland:'},
|
||||
'bb': {'en': ':BB-gun:'},
|
||||
'c': {'en': ':Car:'}}
|
||||
|
||||
_SEARCH_TREE =
|
||||
{'a': {'data': {'en': ':Apple:'}},
|
||||
'b': {'a': {'data': {'en': ':Bat:'},
|
||||
'n': {'d': {'data': {'en': ':Beatles:'},
|
||||
'i': {'t': {'data': {'en': ':Outlaw:'}}}},
|
||||
'k': {'data': {'en': ':BankOfEngland:'}}}},
|
||||
'b': {'data': {'en': ':BB-gun:'}},
|
||||
'data': {'en': ':Bus:'}},
|
||||
'c': {'data': {'en': ':Car:'}}}
|
||||
|
||||
_SEARCH_TREE
|
||||
/ | ⧵
|
||||
/ | ⧵
|
||||
a b c
|
||||
| / | ⧵ |
|
||||
| / | ⧵ |
|
||||
:Apple: ba :Bus: bb :Car:
|
||||
/ ⧵ |
|
||||
/ ⧵ |
|
||||
:Bat: ban :BB-gun:
|
||||
/ ⧵
|
||||
/ ⧵
|
||||
band bank
|
||||
/ ⧵ |
|
||||
/ ⧵ |
|
||||
bandi :Beatles: :BankOfEngland:
|
||||
|
|
||||
bandit
|
||||
|
|
||||
:Outlaw:
|
||||
|
||||
|
||||
"""
|
||||
global _SEARCH_TREE
|
||||
if _SEARCH_TREE is None:
|
||||
_SEARCH_TREE = {}
|
||||
for emj in unicode_codes.EMOJI_DATA:
|
||||
sub_tree = _SEARCH_TREE
|
||||
lastidx = len(emj) - 1
|
||||
for i, char in enumerate(emj):
|
||||
if char not in sub_tree:
|
||||
sub_tree[char] = {}
|
||||
sub_tree = sub_tree[char]
|
||||
if i == lastidx:
|
||||
sub_tree['data'] = unicode_codes.EMOJI_DATA[emj]
|
||||
return _SEARCH_TREE
|
||||
47
venv/Lib/site-packages/emoji/tokenizer.pyi
Normal file
47
venv/Lib/site-packages/emoji/tokenizer.pyi
Normal file
@@ -0,0 +1,47 @@
|
||||
from typing import NamedTuple, Union, Dict, Iterator, Any
|
||||
|
||||
_SearchTree = Dict[str, Union['_SearchTree', dict[str, dict[str, Any]]]]
|
||||
|
||||
_SEARCH_TREE: _SearchTree
|
||||
|
||||
|
||||
class EmojiMatch:
|
||||
emoji: str
|
||||
start: int
|
||||
end: int
|
||||
data: dict[str, Any] | None
|
||||
def __init__(self, emoji: str, start: int,
|
||||
end: int, data: dict | None): ...
|
||||
|
||||
def data_copy(self) -> Dict[str, Any]: ...
|
||||
def is_zwj(self) -> bool: ...
|
||||
def split(self) -> EmojiMatchZWJ | EmojiMatch: ...
|
||||
def __repr__(self) -> str: ...
|
||||
|
||||
|
||||
class EmojiMatchZWJ(EmojiMatch):
|
||||
def __init__(self, match: EmojiMatch): ...
|
||||
def join(self) -> str: ...
|
||||
def is_zwj(self) -> bool: ...
|
||||
def split(self) -> EmojiMatchZWJ: ...
|
||||
def __repr__(self) -> str: ...
|
||||
|
||||
|
||||
class EmojiMatchZWJNonRGI(EmojiMatchZWJ):
|
||||
def __init__(self, first_emoji_match: EmojiMatch,
|
||||
second_emoji_match: EmojiMatch): ...
|
||||
|
||||
|
||||
class Token(NamedTuple):
|
||||
chars: str
|
||||
value: str | EmojiMatch
|
||||
|
||||
|
||||
def tokenize(string, keep_zwj: bool) -> Iterator[Token]: ...
|
||||
|
||||
|
||||
def filter_tokens(matches: Iterator[Token], emoji_only: bool,
|
||||
join_emoji: bool) -> Iterator[Token]: ...
|
||||
|
||||
|
||||
def get_search_tree() -> _SearchTree: ...
|
||||
36
venv/Lib/site-packages/emoji/unicode_codes/__init__.py
Normal file
36
venv/Lib/site-packages/emoji/unicode_codes/__init__.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from emoji.unicode_codes.data_dict import *
|
||||
|
||||
__all__ = [
|
||||
'get_emoji_unicode_dict', 'get_aliases_unicode_dict',
|
||||
'EMOJI_DATA', 'STATUS', 'LANGUAGES'
|
||||
]
|
||||
|
||||
|
||||
_EMOJI_UNICODE = {lang: None for lang in LANGUAGES} # Cache for the language dicts
|
||||
|
||||
_ALIASES_UNICODE = {} # Cache for the aliases dict
|
||||
|
||||
|
||||
def get_emoji_unicode_dict(lang):
|
||||
"""Generate dict containing all fully-qualified and component emoji name for a language
|
||||
The dict is only generated once per language and then cached in _EMOJI_UNICODE[lang]"""
|
||||
|
||||
if _EMOJI_UNICODE[lang] is None:
|
||||
_EMOJI_UNICODE[lang] = {data[lang]: emj for emj, data in EMOJI_DATA.items()
|
||||
if lang in data and data['status'] <= STATUS['fully_qualified']}
|
||||
|
||||
return _EMOJI_UNICODE[lang]
|
||||
|
||||
|
||||
def get_aliases_unicode_dict():
|
||||
"""Generate dict containing all fully-qualified and component aliases
|
||||
The dict is only generated once and then cached in _ALIASES_UNICODE"""
|
||||
|
||||
if not _ALIASES_UNICODE:
|
||||
_ALIASES_UNICODE.update(get_emoji_unicode_dict('en'))
|
||||
for emj, data in EMOJI_DATA.items():
|
||||
if 'alias' in data and data['status'] <= STATUS['fully_qualified']:
|
||||
for alias in data['alias']:
|
||||
_ALIASES_UNICODE[alias] = emj
|
||||
|
||||
return _ALIASES_UNICODE
|
||||
6
venv/Lib/site-packages/emoji/unicode_codes/__init__.pyi
Normal file
6
venv/Lib/site-packages/emoji/unicode_codes/__init__.pyi
Normal file
@@ -0,0 +1,6 @@
|
||||
from .data_dict import *
|
||||
|
||||
__all__ = ["get_emoji_unicode_dict", "get_aliases_unicode_dict", "EMOJI_DATA", "STATUS", "LANGUAGES"]
|
||||
|
||||
def get_emoji_unicode_dict(lang: str) -> dict[str, str]: ...
|
||||
def get_aliases_unicode_dict() -> dict[str, str]: ...
|
||||
Binary file not shown.
Binary file not shown.
86978
venv/Lib/site-packages/emoji/unicode_codes/data_dict.py
Normal file
86978
venv/Lib/site-packages/emoji/unicode_codes/data_dict.py
Normal file
File diff suppressed because it is too large
Load Diff
7
venv/Lib/site-packages/emoji/unicode_codes/data_dict.pyi
Normal file
7
venv/Lib/site-packages/emoji/unicode_codes/data_dict.pyi
Normal file
@@ -0,0 +1,7 @@
|
||||
from typing import Any
|
||||
|
||||
__all__ = ["EMOJI_DATA", "STATUS", "LANGUAGES"]
|
||||
|
||||
STATUS: dict[str, int]
|
||||
LANGUAGES: list[str]
|
||||
EMOJI_DATA: dict[str, dict[str, Any]]
|
||||
0
venv/Lib/site-packages/emoji/unicode_codes/py.typed
Normal file
0
venv/Lib/site-packages/emoji/unicode_codes/py.typed
Normal file
Reference in New Issue
Block a user