Compare commits

...

4 commits

Author SHA1 Message Date
wulan17
4d1ae0b00f
pyrofork: Add offset_{date,id,topic} parameters to get_forum_topics method
Some checks are pending
Build-docs / build (push) Waiting to run
Pyrofork / build (macos-latest, 3.10) (push) Waiting to run
Pyrofork / build (macos-latest, 3.11) (push) Waiting to run
Pyrofork / build (macos-latest, 3.12) (push) Waiting to run
Pyrofork / build (macos-latest, 3.13) (push) Waiting to run
Pyrofork / build (macos-latest, 3.9) (push) Waiting to run
Pyrofork / build (ubuntu-latest, 3.10) (push) Waiting to run
Pyrofork / build (ubuntu-latest, 3.11) (push) Waiting to run
Pyrofork / build (ubuntu-latest, 3.12) (push) Waiting to run
Pyrofork / build (ubuntu-latest, 3.13) (push) Waiting to run
Pyrofork / build (ubuntu-latest, 3.9) (push) Waiting to run
Signed-off-by: wulan17 <wulan17@nusantararom.org>
2025-03-03 00:32:37 +07:00
wulan17
943a7e0342
pyrofork: Add support for multi-line blockquote in markdown unparser
Signed-off-by: wulan17 <wulan17@nusantararom.org>
2025-03-03 00:32:37 +07:00
wulan17
4a5af71d25
pyrofork: Adapt markdown unparser from telethon
* The problem with current implementation is when we have nested markdown inside a url the markdown order is messed up.
for example link with bold text will be unparsed like this [**github](https://github.com**).

Signed-off-by: wulan17 <wulan17@nusantararom.org>
2025-03-03 00:31:57 +07:00
wulan17
984abd2008
pyrofork: Fix nested url markdown parsing
* The problem with current implepementation is when we add another markdown inside an url markdown will not be parsed.
for example we add bold (**) markdown inside an url markdown, the url text show as `**text**` instead of making the text bold.

Signed-off-by: wulan17 <wulan17@nusantararom.org>
2025-03-03 00:01:20 +07:00
3 changed files with 109 additions and 80 deletions

View file

@ -32,7 +32,10 @@ class GetForumTopics:
async def get_forum_topics( async def get_forum_topics(
self: "pyrogram.Client", self: "pyrogram.Client",
chat_id: Union[int, str], chat_id: Union[int, str],
limit: int = 0 limit: int = 0,
offset_date: int = 0,
offset_id: int = 0,
offset_topic: int = 0
) -> Optional[AsyncGenerator["types.ForumTopic", None]]: ) -> Optional[AsyncGenerator["types.ForumTopic", None]]:
"""Get one or more topic from a chat. """Get one or more topic from a chat.
@ -46,6 +49,15 @@ class GetForumTopics:
limit (``int``, *optional*): limit (``int``, *optional*):
Limits the number of topics to be retrieved. Limits the number of topics to be retrieved.
offset_date (``int``, *optional*):
Date of the last message of the last found topic.
offset_id (``int``, *optional*):
ID of the last message of the last found topic.
offset_topic (``int``, *optional*):
ID of the last found topic.
Returns: Returns:
``Generator``: On success, a generator yielding :obj:`~pyrogram.types.ForumTopic` objects is returned. ``Generator``: On success, a generator yielding :obj:`~pyrogram.types.ForumTopic` objects is returned.
@ -62,7 +74,7 @@ class GetForumTopics:
peer = await self.resolve_peer(chat_id) peer = await self.resolve_peer(chat_id)
rpc = raw.functions.channels.GetForumTopics(channel=peer, offset_date=0, offset_id=0, offset_topic=0, limit=limit) rpc = raw.functions.channels.GetForumTopics(channel=peer, offset_date=offset_date, offset_id=offset_id, offset_topic=offset_topic, limit=limit)
r = await self.invoke(rpc, sleep_threshold=-1) r = await self.invoke(rpc, sleep_threshold=-1)

View file

@ -37,7 +37,7 @@ PRE_DELIM = "```"
BLOCKQUOTE_DELIM = ">" BLOCKQUOTE_DELIM = ">"
BLOCKQUOTE_EXPANDABLE_DELIM = "**>" BLOCKQUOTE_EXPANDABLE_DELIM = "**>"
MARKDOWN_RE = re.compile(r"({d})|(!?)\[(.+?)\]\((.+?)\)".format( MARKDOWN_RE = re.compile(r"({d})".format(
d="|".join( d="|".join(
["".join(i) for i in [ ["".join(i) for i in [
[rf"\{j}" for j in i] [rf"\{j}" for j in i]
@ -52,6 +52,7 @@ MARKDOWN_RE = re.compile(r"({d})|(!?)\[(.+?)\]\((.+?)\)".format(
] ]
]] ]]
))) )))
URL_RE = re.compile(r"(!?)\[(.+?)\]\((.+?)\)")
OPENING_TAG = "<{}>" OPENING_TAG = "<{}>"
CLOSING_TAG = "</{}>" CLOSING_TAG = "</{}>"
@ -118,7 +119,7 @@ class Markdown:
for i, match in enumerate(re.finditer(MARKDOWN_RE, text)): for i, match in enumerate(re.finditer(MARKDOWN_RE, text)):
start, _ = match.span() start, _ = match.span()
delim, is_emoji, text_url, url = match.groups() delim = match.group(1)
full = match.group(0) full = match.group(0)
if delim in FIXED_WIDTH_DELIMS: if delim in FIXED_WIDTH_DELIMS:
@ -127,16 +128,6 @@ class Markdown:
if is_fixed_width and delim not in FIXED_WIDTH_DELIMS: if is_fixed_width and delim not in FIXED_WIDTH_DELIMS:
continue continue
if not is_emoji and text_url:
text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start)
continue
if is_emoji:
emoji = text_url
emoji_id = url.lstrip("tg://emoji?id=")
text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start)
continue
if delim == BOLD_DELIM: if delim == BOLD_DELIM:
tag = "b" tag = "b"
elif delim == ITALIC_DELIM: elif delim == ITALIC_DELIM:
@ -169,6 +160,21 @@ class Markdown:
text = utils.replace_once(text, delim, tag, start) text = utils.replace_once(text, delim, tag, start)
for i, match in enumerate(re.finditer(URL_RE, text)):
start, _ = match.span()
is_emoji, text_url, url = match.groups()
full = match.group(0)
if not is_emoji and text_url:
text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start)
continue
if is_emoji:
emoji = text_url
emoji_id = url.lstrip("tg://emoji?id=")
text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start)
continue
for placeholder, code_section in placeholders.items(): for placeholder, code_section in placeholders.items():
text = text.replace(placeholder, code_section) text = text.replace(placeholder, code_section)
@ -176,78 +182,76 @@ class Markdown:
@staticmethod @staticmethod
def unparse(text: str, entities: list): def unparse(text: str, entities: list):
"""
Performs the reverse operation to .parse(), effectively returning
markdown-like syntax given a normal text and its MessageEntity's.
:param text: the text to be reconverted into markdown.
:param entities: list of MessageEntity's applied to the text.
:return: a markdown-like text representing the combination of both inputs.
"""
delimiters = {
MessageEntityType.BOLD: BOLD_DELIM,
MessageEntityType.ITALIC: ITALIC_DELIM,
MessageEntityType.UNDERLINE: UNDERLINE_DELIM,
MessageEntityType.STRIKETHROUGH: STRIKE_DELIM,
MessageEntityType.CODE: CODE_DELIM,
MessageEntityType.PRE: PRE_DELIM,
MessageEntityType.BLOCKQUOTE: BLOCKQUOTE_DELIM,
MessageEntityType.EXPANDABLE_BLOCKQUOTE: BLOCKQUOTE_EXPANDABLE_DELIM,
MessageEntityType.SPOILER: SPOILER_DELIM
}
text = utils.add_surrogates(text) text = utils.add_surrogates(text)
entities_offsets = [] insert_at = []
for i, entity in enumerate(entities):
for entity in entities: s = entity.offset
entity_type = entity.type e = entity.offset + entity.length
start = entity.offset delimiter = delimiters.get(entity.type, None)
end = start + entity.length if delimiter:
if entity.type != MessageEntityType.BLOCKQUOTE and entity.type != MessageEntityType.EXPANDABLE_BLOCKQUOTE:
if entity_type == MessageEntityType.BOLD: open_delimiter = delimiter
start_tag = end_tag = BOLD_DELIM close_delimiter = delimiter
elif entity_type == MessageEntityType.ITALIC: if entity.type == MessageEntityType.PRE:
start_tag = end_tag = ITALIC_DELIM if entity.language:
elif entity_type == MessageEntityType.UNDERLINE: open_delimiter += entity.language + '\n'
start_tag = end_tag = UNDERLINE_DELIM else:
elif entity_type == MessageEntityType.STRIKETHROUGH: open_delimiter += entity + '\n'
start_tag = end_tag = STRIKE_DELIM insert_at.append((s, i, open_delimiter))
elif entity_type == MessageEntityType.CODE: insert_at.append((e, -i, close_delimiter))
start_tag = end_tag = CODE_DELIM else:
elif entity_type == MessageEntityType.PRE: # Handle multiline blockquotes
language = getattr(entity, "language", "") or "" text_subset = text[s:e]
start_tag = f"{PRE_DELIM}{language}\n" lines = text_subset.splitlines()
end_tag = f"\n{PRE_DELIM}" for line_num, line in enumerate(lines):
elif entity_type == MessageEntityType.BLOCKQUOTE: line_start = s + sum(len(l) + 1 for l in lines[:line_num])
if entity.collapsed: if entity.collapsed:
start_tag = BLOCKQUOTE_EXPANDABLE_DELIM + " " insert_at.append((line_start, i, BLOCKQUOTE_EXPANDABLE_DELIM))
else: else:
start_tag = BLOCKQUOTE_DELIM + " " insert_at.append((line_start, i, BLOCKQUOTE_DELIM))
end_tag = "" # No closing delimiter for blockquotes
blockquote_text = text[start:end] else:
lines = blockquote_text.split("\n") url = None
last_length = 0 if entity.type == MessageEntityType.TEXT_LINK:
for line in lines:
if len(line) == 0 and last_length == end:
continue
start_offset = start+last_length
last_length = last_length+len(line)
end_offset = start_offset+last_length
entities_offsets.append((start_tag, start_offset,))
entities_offsets.append((end_tag, end_offset,))
last_length = last_length+1
continue
elif entity_type == MessageEntityType.SPOILER:
start_tag = end_tag = SPOILER_DELIM
elif entity_type == MessageEntityType.TEXT_LINK:
url = entity.url url = entity.url
start_tag = "[" elif entity.type == MessageEntityType.TEXT_MENTION:
end_tag = f"]({url})" url = 'tg://user?id={}'.format(entity.user.id)
elif entity_type == MessageEntityType.TEXT_MENTION: if url:
user = entity.user insert_at.append((s, i, '['))
start_tag = "[" insert_at.append((e, -i, ']({})'.format(url)))
end_tag = f"](tg://user?id={user.id})"
elif entity_type == MessageEntityType.CUSTOM_EMOJI:
emoji_id = entity.custom_emoji_id
start_tag = "!["
end_tag = f"](tg://emoji?id={emoji_id})"
else:
continue
entities_offsets.append((start_tag, start,)) insert_at.sort(key=lambda t: (t[0], t[1]))
entities_offsets.append((end_tag, end,)) while insert_at:
at, _, what = insert_at.pop()
entities_offsets = map( # If we are in the middle of a surrogate nudge the position by -1.
lambda x: x[1], # Otherwise we would end up with malformed text and fail to encode.
sorted( # For example of bad input: "Hi \ud83d\ude1c"
enumerate(entities_offsets), # https://en.wikipedia.org/wiki/UTF-16#U+010000_to_U+10FFFF
key=lambda x: (x[1][1], x[0]), while utils.within_surrogate(text, at):
reverse=True at += 1
)
)
for entity, offset in entities_offsets: text = text[:at] + what + text[at:]
text = text[:offset] + entity + text[offset:]
return utils.remove_surrogates(text) return utils.remove_surrogates(text)

View file

@ -40,3 +40,16 @@ def remove_surrogates(text):
def replace_once(source: str, old: str, new: str, start: int): def replace_once(source: str, old: str, new: str, start: int):
return source[:start] + source[start:].replace(old, new, 1) return source[:start] + source[start:].replace(old, new, 1)
def within_surrogate(text, index, *, length=None):
"""
`True` if ``index`` is within a surrogate (before and after it, not at!).
"""
if length is None:
length = len(text)
return (
1 < index < len(text) and # in bounds
'\ud800' <= text[index - 1] <= '\udbff' and # previous is
'\ud800' <= text[index] <= '\udfff' # current is
)