pyrofork: Add support for custom emoji in markdown unparser

Signed-off-by: wulan17 <wulan17@nusantararom.org>
pyrofork: Add offset_{date,id,topic} parameters to get_forum_topics method
2025-12-29 12:04:51 +00:00 · 2025-03-03 05:17:06 +07:00 · 2025-03-03 05:17:06 +07:00 · 2025-03-03 05:17:05 +07:00 · 2025-03-03 05:16:37 +07:00
3 changed files with 99 additions and 68 deletions
--- a/pyrogram/methods/chats/get_forum_topics.py
+++ b/pyrogram/methods/chats/get_forum_topics.py
@ -32,7 +32,10 @@ class GetForumTopics:
    async def get_forum_topics(
        self: "pyrogram.Client",
        chat_id: Union[int, str],
-        limit: int = 0
+        limit: int = 0,
        offset_date: int = 0,
        offset_id: int = 0,
        offset_topic: int = 0
    ) -> Optional[AsyncGenerator["types.ForumTopic", None]]:
        """Get one or more topic from a chat.
@ -46,6 +49,15 @@ class GetForumTopics:
            limit (``int``, *optional*):
                Limits the number of topics to be retrieved.
            offset_date (``int``, *optional*):
                Date of the last message of the last found topic.
            offset_id (``int``, *optional*):
                ID of the last message of the last found topic.
            offset_topic (``int``, *optional*):
                ID of the last found topic.
        Returns:
            ``Generator``: On success, a generator yielding :obj:`~pyrogram.types.ForumTopic` objects is returned.
@ -62,7 +74,7 @@ class GetForumTopics:
        peer = await self.resolve_peer(chat_id)
-        rpc = raw.functions.channels.GetForumTopics(channel=peer, offset_date=0, offset_id=0, offset_topic=0, limit=limit)
+        rpc = raw.functions.channels.GetForumTopics(channel=peer, offset_date=offset_date, offset_id=offset_id, offset_topic=offset_topic, limit=limit)
        r = await self.invoke(rpc, sleep_threshold=-1)
--- a/pyrogram/parser/markdown.py
+++ b/pyrogram/parser/markdown.py
@ -182,78 +182,84 @@ class Markdown:
    @staticmethod
    def unparse(text: str, entities: list):
        """
        Performs the reverse operation to .parse(), effectively returning
        markdown-like syntax given a normal text and its MessageEntity's.
        :param text: the text to be reconverted into markdown.
        :param entities: list of MessageEntity's applied to the text.
        :return: a markdown-like text representing the combination of both inputs.
        """
        delimiters = {
            MessageEntityType.BOLD: BOLD_DELIM,
            MessageEntityType.ITALIC: ITALIC_DELIM,
            MessageEntityType.UNDERLINE: UNDERLINE_DELIM,
            MessageEntityType.STRIKETHROUGH: STRIKE_DELIM,
            MessageEntityType.CODE: CODE_DELIM,
            MessageEntityType.PRE: PRE_DELIM,
            MessageEntityType.BLOCKQUOTE: BLOCKQUOTE_DELIM,
            MessageEntityType.EXPANDABLE_BLOCKQUOTE: BLOCKQUOTE_EXPANDABLE_DELIM,
            MessageEntityType.SPOILER: SPOILER_DELIM
        }
        text = utils.add_surrogates(text)
-        entities_offsets = []
+        insert_at = []
-
+        for i, entity in enumerate(entities):
-        for entity in entities:
+            s = entity.offset
-            entity_type = entity.type
+            e = entity.offset + entity.length
-            start = entity.offset
+            delimiter = delimiters.get(entity.type, None)
-            end = start + entity.length
+            if delimiter:
-
+                if entity.type != MessageEntityType.BLOCKQUOTE and entity.type != MessageEntityType.EXPANDABLE_BLOCKQUOTE:
-            if entity_type == MessageEntityType.BOLD:
+                    open_delimiter = delimiter
-                start_tag = end_tag = BOLD_DELIM
+                    close_delimiter = delimiter
-            elif entity_type == MessageEntityType.ITALIC:
+                    if entity.type == MessageEntityType.PRE:
-                start_tag = end_tag = ITALIC_DELIM
+                        close_delimiter = '\n' + delimiter
-            elif entity_type == MessageEntityType.UNDERLINE:
+                        if entity.language:
-                start_tag = end_tag = UNDERLINE_DELIM
+                            open_delimiter += entity.language + '\n'
-            elif entity_type == MessageEntityType.STRIKETHROUGH:
+                        else:
-                start_tag = end_tag = STRIKE_DELIM
+                            open_delimiter += '\n'
-            elif entity_type == MessageEntityType.CODE:
+                    insert_at.append((s, i, open_delimiter))
-                start_tag = end_tag = CODE_DELIM
+                    insert_at.append((e, -i, close_delimiter))
-            elif entity_type == MessageEntityType.PRE:
+                else:
-                language = getattr(entity, "language", "") or ""
+                    # Handle multiline blockquotes
-                start_tag = f"{PRE_DELIM}{language}\n"
+                    text_subset = text[s:e]
-                end_tag = f"\n{PRE_DELIM}"
+                    lines = text_subset.splitlines()
-            elif entity_type == MessageEntityType.BLOCKQUOTE:
+                    for line_num, line in enumerate(lines):
                        line_start = s + sum(len(l) + 1 for l in lines[:line_num])
                        if entity.collapsed:
-                    start_tag = BLOCKQUOTE_EXPANDABLE_DELIM + " "
+                            insert_at.append((line_start, i, BLOCKQUOTE_EXPANDABLE_DELIM))
                        else:
-                    start_tag = BLOCKQUOTE_DELIM + " "
+                            insert_at.append((line_start, i, BLOCKQUOTE_DELIM))
-                end_tag = ""
+                    # No closing delimiter for blockquotes
-                blockquote_text = text[start:end]
+            else:
-                lines = blockquote_text.split("\n")
+                url = None
-                last_length = 0
+                is_emoji = False
-                for line in lines:
+                if entity.type == MessageEntityType.TEXT_LINK:
                    if len(line) == 0 and last_length == end:
                        continue
                    start_offset = start+last_length
                    last_length = last_length+len(line)
                    end_offset = start_offset+last_length
                    entities_offsets.append((start_tag, start_offset,))
                    entities_offsets.append((end_tag, end_offset,))
                    last_length = last_length+1
                continue
            elif entity_type == MessageEntityType.SPOILER:
                start_tag = end_tag = SPOILER_DELIM
            elif entity_type == MessageEntityType.TEXT_LINK:
                    url = entity.url
-                start_tag = "["
+                elif entity.type == MessageEntityType.TEXT_MENTION:
-                end_tag = f"]({url})"
+                    url = f'tg://user?id={entity.user.id}'
-            elif entity_type == MessageEntityType.TEXT_MENTION:
+                elif entity.type == MessageEntityType.CUSTOM_EMOJI:
-                user = entity.user
+                    url = f"tg://emoji?id={entity.custom_emoji_id}"
-                start_tag = "["
+                    is_emoji = True
-                end_tag = f"](tg://user?id={user.id})"
+                if url:
-            elif entity_type == MessageEntityType.CUSTOM_EMOJI:
+                    if is_emoji:
-                emoji_id = entity.custom_emoji_id
+                        insert_at.append((s, i, '!['))
                start_tag = "!["
                end_tag = f"](tg://emoji?id={emoji_id})"
                    else:
-                continue
+                        insert_at.append((s, i, '['))
                    insert_at.append((e, -i, f']({url})'))
-            entities_offsets.append((start_tag, start,))
+        insert_at.sort(key=lambda t: (t[0], t[1]))
-            entities_offsets.append((end_tag, end,))
+        while insert_at:
            at, _, what = insert_at.pop()
-        entities_offsets = map(
+            # If we are in the middle of a surrogate nudge the position by -1.
-            lambda x: x[1],
+            # Otherwise we would end up with malformed text and fail to encode.
-            sorted(
+            # For example of bad input: "Hi \ud83d\ude1c"
-                enumerate(entities_offsets),
+            # https://en.wikipedia.org/wiki/UTF-16#U+010000_to_U+10FFFF
-                key=lambda x: (x[1][1], x[0]),
+            while utils.within_surrogate(text, at):
-                reverse=True
+                at += 1
            )
        )
-        for entity, offset in entities_offsets:
+            text = text[:at] + what + text[at:]
            text = text[:offset] + entity + text[offset:]
        return utils.remove_surrogates(text)
--- a/pyrogram/parser/utils.py
+++ b/pyrogram/parser/utils.py
@ -40,3 +40,16 @@ def remove_surrogates(text):
 def replace_once(source: str, old: str, new: str, start: int):
    return source[:start] + source[start:].replace(old, new, 1)
 def within_surrogate(text, index, *, length=None):
    """
    `True` if ``index`` is within a surrogate (before and after it, not at!).
    """
    if length is None:
        length = len(text)
    return (
            1 < index < len(text) and  # in bounds
            '\ud800' <= text[index - 1] <= '\udbff' and  # previous is
            '\ud800' <= text[index] <= '\udfff'  # current is
    )
Author	SHA1	Message	Date
wulan17	2de6d80905	pyrofork: Add support for custom emoji in markdown unparser Some checks failed Build-docs / build (push) Has been cancelled Details Pyrofork / build (macos-latest, 3.10) (push) Has been cancelled Details Pyrofork / build (macos-latest, 3.11) (push) Has been cancelled Details Pyrofork / build (macos-latest, 3.12) (push) Has been cancelled Details Pyrofork / build (macos-latest, 3.13) (push) Has been cancelled Details Pyrofork / build (macos-latest, 3.9) (push) Has been cancelled Details Pyrofork / build (ubuntu-latest, 3.10) (push) Has been cancelled Details Pyrofork / build (ubuntu-latest, 3.11) (push) Has been cancelled Details Pyrofork / build (ubuntu-latest, 3.12) (push) Has been cancelled Details Pyrofork / build (ubuntu-latest, 3.13) (push) Has been cancelled Details Pyrofork / build (ubuntu-latest, 3.9) (push) Has been cancelled Details Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-03-03 05:17:06 +07:00
wulan17	e58354c98a	pyrofork: Add offset_{date,id,topic} parameters to get_forum_topics method Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-03-03 05:17:06 +07:00
wulan17	a556504770	pyrofork: Add support for multi-line blockquote in markdown unparser Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-03-03 05:17:05 +07:00
wulan17	9105c1a9f3	pyrofork: Adapt markdown unparser from telethon * The problem with current implementation is when we have nested markdown inside a url the markdown order is messed up. for example link with bold text will be unparsed like this [github](https://github.com). Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-03-03 05:16:37 +07:00