Add parsing for messages with lxml

2022-05-27 12:19:22 +02:00 · 2022-05-27 12:19:22 +02:00 · 6b576f102a
parent c0c7aa772f
commit 6b576f102a
1 changed files with 29 additions and 2 deletions
--- a/main.py
+++ b/main.py
@ -3,6 +3,7 @@ from settings import *
 from mastodon import Mastodon
 from os.path import exists
 import tweepy
+from lxml.html import document_fromstring
 def main():
    if not exists(app_path):
        Mastodon.create_app(
@ -36,7 +37,33 @@ def main():
    timeline = mastodon_api.account_statuses(mastodon_user,exclude_replies=True)
    for toot in timeline:
        if(toot.visibility == 'public' and toot.account.id == mastodon_user.id and not toot.reblog):
-            print(toot)
+            tootfrm(toot.content)
    #print(timeline)
+def tootfrm(content):
+    content = content.replace('<br />', "\n")
+    res = document_fromstring(content)
+    printit(res, "")
+    print(res.text_content())
+def printit(parent, body, block=" "):
+    for el in parent:
+ #       print(block, el, el.text_content())
+        if 'u-url' in el.classes:
+            el.text = el.text_content() + "@grml.de"
+            el.find('.//span').text = ''
+            el.find('.//span').drop_tag()
+        # for classname in iter(el.classes):
+        #     print(block + " " + classname)
+        body = printit(el, body, block + " ")
+    return body
+def test():
+    content = '<p>Python Test <a href="https://mastodon.grml.de/tags/ignore" class="mention hashtag" rel="tag">#<span>ignore</span></a><br /><span class="h-card"><a href="https://mastodon.grml.de/@toot" class="u-url mention">@<span>toot</span></a></span><br /><a href="https://twitter.com/mattxiv/status/1529181072931659777" target="_blank" rel="nofollow noopener noreferrer"><span class="invisible">https://</span><span class="ellipsis">twitter.com/mattxiv/status/152</span><span class="invisible">9181072931659777</span></a><br /><a href="https://mastodon.grml.de/tags/python" class="mention hashtag" rel="tag">#<span>python</span></a> <a href="https://mastodon.grml.de/tags/test" class="mention hashtag" rel="tag">#<span>test</span></a></p>'
+ #   parser = MyHTMLParser()
+    content = content.replace('<br />', "\n")
+    res = document_fromstring(content)
+    print(res.text_content())
+    body = printit(res, "")
+    print(body)
+    print(res.text_content())
 if __name__ == "__main__":
    main()
+    #test()