Add parsing for messages with lxml
This commit is contained in:
parent
c0c7aa772f
commit
6b576f102a
29
main.py
29
main.py
|
@ -3,6 +3,7 @@ from settings import *
|
|||
from mastodon import Mastodon
|
||||
from os.path import exists
|
||||
import tweepy
|
||||
from lxml.html import document_fromstring
|
||||
def main():
|
||||
if not exists(app_path):
|
||||
Mastodon.create_app(
|
||||
|
@ -36,7 +37,33 @@ def main():
|
|||
timeline = mastodon_api.account_statuses(mastodon_user,exclude_replies=True)
|
||||
for toot in timeline:
|
||||
if(toot.visibility == 'public' and toot.account.id == mastodon_user.id and not toot.reblog):
|
||||
print(toot)
|
||||
tootfrm(toot.content)
|
||||
#print(timeline)
|
||||
def tootfrm(content):
|
||||
content = content.replace('<br />', "\n")
|
||||
res = document_fromstring(content)
|
||||
printit(res, "")
|
||||
print(res.text_content())
|
||||
def printit(parent, body, block=" "):
|
||||
for el in parent:
|
||||
# print(block, el, el.text_content())
|
||||
if 'u-url' in el.classes:
|
||||
el.text = el.text_content() + "@grml.de"
|
||||
el.find('.//span').text = ''
|
||||
el.find('.//span').drop_tag()
|
||||
# for classname in iter(el.classes):
|
||||
# print(block + " " + classname)
|
||||
body = printit(el, body, block + " ")
|
||||
return body
|
||||
def test():
|
||||
content = '<p>Python Test <a href="https://mastodon.grml.de/tags/ignore" class="mention hashtag" rel="tag">#<span>ignore</span></a><br /><span class="h-card"><a href="https://mastodon.grml.de/@toot" class="u-url mention">@<span>toot</span></a></span><br /><a href="https://twitter.com/mattxiv/status/1529181072931659777" target="_blank" rel="nofollow noopener noreferrer"><span class="invisible">https://</span><span class="ellipsis">twitter.com/mattxiv/status/152</span><span class="invisible">9181072931659777</span></a><br /><a href="https://mastodon.grml.de/tags/python" class="mention hashtag" rel="tag">#<span>python</span></a> <a href="https://mastodon.grml.de/tags/test" class="mention hashtag" rel="tag">#<span>test</span></a></p>'
|
||||
# parser = MyHTMLParser()
|
||||
content = content.replace('<br />', "\n")
|
||||
res = document_fromstring(content)
|
||||
print(res.text_content())
|
||||
body = printit(res, "")
|
||||
print(body)
|
||||
print(res.text_content())
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
#test()
|
Loading…
Reference in New Issue