79 lines
2.9 KiB
Python
79 lines
2.9 KiB
Python
|
from src.worker import Worker
|
||
|
from types import SimpleNamespace
|
||
|
from entry.email.inbox import connect, check_inbox
|
||
|
import re
|
||
|
from bs4 import BeautifulSoup
|
||
|
import email.utils
|
||
|
from src.communication.message import Message
|
||
|
import time
|
||
|
|
||
|
|
||
|
class Obtenedor(Worker):
|
||
|
"""
|
||
|
Trabajador que obtiene la lista de correos
|
||
|
"""
|
||
|
def __init__(self, configs, params):
|
||
|
super(Obtenedor, self).__init__(configs, params)
|
||
|
|
||
|
self.url = configs.get('email.server')
|
||
|
self.port = configs.get('email.port')
|
||
|
user = {'user': '', 'password': ''}
|
||
|
self.user = SimpleNamespace(**user)
|
||
|
self.user.name = configs.get('email.user.name')
|
||
|
self.user.password = configs.get('email.user.password')
|
||
|
self.ssl = configs.get('email.ssl')
|
||
|
|
||
|
self.revisados = []
|
||
|
|
||
|
self.queue = params['queues']['emails']
|
||
|
self.frec = configs.get('supervisor.wait')
|
||
|
|
||
|
def is_revisado(self, uid):
|
||
|
return uid in self.revisados
|
||
|
|
||
|
def add_revisado(self, uid):
|
||
|
if self.is_revisado(uid):
|
||
|
return
|
||
|
self.revisados.append(uid)
|
||
|
|
||
|
def build_message(self, email_part):
|
||
|
output = []
|
||
|
if email_part.is_multipart():
|
||
|
for part in email_part.get_payload():
|
||
|
output.append(self.build_message(part))
|
||
|
else:
|
||
|
html = email_part.get_payload(decode=True)
|
||
|
bs = BeautifulSoup(html, 'html.parser')
|
||
|
if bs.body:
|
||
|
html = bs.body.get_text()
|
||
|
else:
|
||
|
html = bs.get_text()
|
||
|
html = re.sub(' +', ' ', re.sub("\n+", ' ', html)).strip(' ')
|
||
|
output.append(html)
|
||
|
return output
|
||
|
|
||
|
def run(self) -> None:
|
||
|
self.logger.log('Starting', type(self))
|
||
|
self.diary.put({'action': 'Inicio de jornada de Obtenedor'})
|
||
|
while not self.stop.is_set():
|
||
|
e = 0
|
||
|
with connect(self.url, self.port, self.user.name, self.user.password, self.ssl) as imap:
|
||
|
self.logger.log('Getting emails', type(self))
|
||
|
emails = check_inbox(imap)
|
||
|
if emails is None:
|
||
|
continue
|
||
|
for em in emails:
|
||
|
if self.is_revisado(em.uid):
|
||
|
continue
|
||
|
sender = em.message['from']
|
||
|
text = ' '.join([em.message['subject'] + '.'] + self.build_message(em.message))
|
||
|
msg = Message('email', text=text, original=em, sender=sender,
|
||
|
datetime=email.utils.parsedate_to_datetime(em.message['Date']))
|
||
|
self.queue.put(msg)
|
||
|
self.add_revisado(em.uid)
|
||
|
e += 1
|
||
|
self.diary.put({'action': 'Obtenidos {0} correos nuevos'.format(e)})
|
||
|
time.sleep(self.frec)
|
||
|
self.logger.log('Exiting', type(self))
|
||
|
self.diary.put({'action': 'Terminando el turno de Obtenedor'})
|