from src.worker import Worker from types import SimpleNamespace from entry.email.inbox import connect, check_inbox import re from bs4 import BeautifulSoup import email.utils from src.communication.message import Message import time class Obtenedor(Worker): """ Trabajador que obtiene la lista de correos """ def __init__(self, configs, params): super(Obtenedor, self).__init__(configs, params) self.url = configs.get('email.server') self.port = configs.get('email.port') user = {'user': '', 'password': ''} self.user = SimpleNamespace(**user) self.user.name = configs.get('email.user.name') self.user.password = configs.get('email.user.password') self.ssl = configs.get('email.ssl') self.revisados = [] self.queue = params['queues']['emails'] self.frec = configs.get('supervisor.wait') def is_revisado(self, uid): return uid in self.revisados def add_revisado(self, uid): if self.is_revisado(uid): return self.revisados.append(uid) def build_message(self, email_part): output = [] if email_part.is_multipart(): for part in email_part.get_payload(): output.append(self.build_message(part)) else: html = email_part.get_payload(decode=True) bs = BeautifulSoup(html, 'html.parser') if bs.body: html = bs.body.get_text() else: html = bs.get_text() html = re.sub(' +', ' ', re.sub("\n+", ' ', html)).strip(' ') output.append(html) return output def run(self) -> None: self.logger.log('Starting', type(self)) self.diary.put({'action': 'Inicio de jornada de Obtenedor'}) while not self.stop.is_set(): e = 0 with connect(self.url, self.port, self.user.name, self.user.password, self.ssl) as imap: self.logger.log('Getting emails', type(self)) emails = check_inbox(imap) if emails is None: continue for em in emails: if self.is_revisado(em.uid): continue sender = em.message['from'] text = ' '.join([em.message['subject'] + '.'] + self.build_message(em.message)) msg = Message('email', text=text, original=em, sender=sender, datetime=email.utils.parsedate_to_datetime(em.message['Date'])) self.queue.put(msg) self.add_revisado(em.uid) e += 1 self.diary.put({'action': 'Obtenidos {0} correos nuevos'.format(e)}) time.sleep(self.frec) self.logger.log('Exiting', type(self)) self.diary.put({'action': 'Terminando el turno de Obtenedor'})