From 6b576f102ab858e485d8980eabf4243cea3b8f9d Mon Sep 17 00:00:00 2001
From: robin <robin@amiganiac.net>
Date: Fri, 27 May 2022 12:19:22 +0200
Subject: [PATCH] Add parsing for messages with lxml

---
 main.py | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)
diff --git a/main.py b/main.py
index 069b414..6bb7283 100755
--- a/main.py
+++ b/main.py
@@ -3,6 +3,7 @@ from settings import *
 from mastodon import Mastodon
 from os.path import exists
 import tweepy
+from lxml.html import document_fromstring
 def main():
     if not exists(app_path):
         Mastodon.create_app(
@@ -36,7 +37,33 @@ def main():
     timeline = mastodon_api.account_statuses(mastodon_user,exclude_replies=True)
     for toot in timeline:
         if(toot.visibility == 'public' and toot.account.id == mastodon_user.id and not toot.reblog):
-            print(toot)
+            tootfrm(toot.content)
     #print(timeline)
+def tootfrm(content):
+    content = content.replace('<br />', "\n")
+    res = document_fromstring(content)
+    printit(res, "")
+    print(res.text_content())
+def printit(parent, body, block=" "):
+    for el in parent:
+ #       print(block, el, el.text_content())
+        if 'u-url' in el.classes:
+            el.text = el.text_content() + "@grml.de"
+            el.find('.//span').text = ''
+            el.find('.//span').drop_tag()
+        # for classname in iter(el.classes):
+        #     print(block + " " + classname)
+        body = printit(el, body, block + " ")
+    return body
+def test():
+    content = '<p>Python Test <a href="https://mastodon.grml.de/tags/ignore" class="mention hashtag" rel="tag">#<span>ignore</span></a><br /><span class="h-card"><a href="https://mastodon.grml.de/@toot" class="u-url mention">@<span>toot</span></a></span><br /><a href="https://twitter.com/mattxiv/status/1529181072931659777" target="_blank" rel="nofollow noopener noreferrer"><span class="invisible">https://</span><span class="ellipsis">twitter.com/mattxiv/status/152</span><span class="invisible">9181072931659777</span></a><br /><a href="https://mastodon.grml.de/tags/python" class="mention hashtag" rel="tag">#<span>python</span></a> <a href="https://mastodon.grml.de/tags/test" class="mention hashtag" rel="tag">#<span>test</span></a></p>'
+ #   parser = MyHTMLParser()
+    content = content.replace('<br />', "\n")
+    res = document_fromstring(content)
+    print(res.text_content())
+    body = printit(res, "")
+    print(body)
+    print(res.text_content())
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
+    #test()
\ No newline at end of file