Add parsing for messages with lxml
This commit is contained in:
parent
c0c7aa772f
commit
6b576f102a
31
main.py
31
main.py
|
@ -3,6 +3,7 @@ from settings import *
|
||||||
from mastodon import Mastodon
|
from mastodon import Mastodon
|
||||||
from os.path import exists
|
from os.path import exists
|
||||||
import tweepy
|
import tweepy
|
||||||
|
from lxml.html import document_fromstring
|
||||||
def main():
|
def main():
|
||||||
if not exists(app_path):
|
if not exists(app_path):
|
||||||
Mastodon.create_app(
|
Mastodon.create_app(
|
||||||
|
@ -36,7 +37,33 @@ def main():
|
||||||
timeline = mastodon_api.account_statuses(mastodon_user,exclude_replies=True)
|
timeline = mastodon_api.account_statuses(mastodon_user,exclude_replies=True)
|
||||||
for toot in timeline:
|
for toot in timeline:
|
||||||
if(toot.visibility == 'public' and toot.account.id == mastodon_user.id and not toot.reblog):
|
if(toot.visibility == 'public' and toot.account.id == mastodon_user.id and not toot.reblog):
|
||||||
print(toot)
|
tootfrm(toot.content)
|
||||||
#print(timeline)
|
#print(timeline)
|
||||||
|
def tootfrm(content):
|
||||||
|
content = content.replace('<br />', "\n")
|
||||||
|
res = document_fromstring(content)
|
||||||
|
printit(res, "")
|
||||||
|
print(res.text_content())
|
||||||
|
def printit(parent, body, block=" "):
|
||||||
|
for el in parent:
|
||||||
|
# print(block, el, el.text_content())
|
||||||
|
if 'u-url' in el.classes:
|
||||||
|
el.text = el.text_content() + "@grml.de"
|
||||||
|
el.find('.//span').text = ''
|
||||||
|
el.find('.//span').drop_tag()
|
||||||
|
# for classname in iter(el.classes):
|
||||||
|
# print(block + " " + classname)
|
||||||
|
body = printit(el, body, block + " ")
|
||||||
|
return body
|
||||||
|
def test():
|
||||||
|
content = '<p>Python Test <a href="https://mastodon.grml.de/tags/ignore" class="mention hashtag" rel="tag">#<span>ignore</span></a><br /><span class="h-card"><a href="https://mastodon.grml.de/@toot" class="u-url mention">@<span>toot</span></a></span><br /><a href="https://twitter.com/mattxiv/status/1529181072931659777" target="_blank" rel="nofollow noopener noreferrer"><span class="invisible">https://</span><span class="ellipsis">twitter.com/mattxiv/status/152</span><span class="invisible">9181072931659777</span></a><br /><a href="https://mastodon.grml.de/tags/python" class="mention hashtag" rel="tag">#<span>python</span></a> <a href="https://mastodon.grml.de/tags/test" class="mention hashtag" rel="tag">#<span>test</span></a></p>'
|
||||||
|
# parser = MyHTMLParser()
|
||||||
|
content = content.replace('<br />', "\n")
|
||||||
|
res = document_fromstring(content)
|
||||||
|
print(res.text_content())
|
||||||
|
body = printit(res, "")
|
||||||
|
print(body)
|
||||||
|
print(res.text_content())
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
#test()
|
Loading…
Reference in New Issue