From d67ffbced2d843596e93893fd1aee7f2a9551f47 Mon Sep 17 00:00:00 2001 From: Aldarien Date: Wed, 18 Nov 2020 01:41:51 -0300 Subject: [PATCH] Mejorar el uso de spacy para comandos --- Pipfile | 2 +- src/brain/brain.py | 5 ++++- src/email/definitions.py | 6 +++--- src/email/workers.py | 5 ++--- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Pipfile b/Pipfile index 0dd72a9..c9bb322 100644 --- a/Pipfile +++ b/Pipfile @@ -14,4 +14,4 @@ nltk = "*" spacy = "*" [requires] -python_version = "3.9" +python_version = "3.8" diff --git a/src/brain/brain.py b/src/brain/brain.py index d6f1934..7b8d0b6 100644 --- a/src/brain/brain.py +++ b/src/brain/brain.py @@ -1,5 +1,6 @@ import os import spacy +from pprint import pprint class Brain: @@ -9,4 +10,6 @@ class Brain: def get_command(self, phrase): doc = self.nlp(phrase) - return doc + verbs = [t for t in doc if t.pos_ == 'VERB'] + pprint(verbs) + return doc.ents[0] diff --git a/src/email/definitions.py b/src/email/definitions.py index 0ed85d1..6b0734d 100644 --- a/src/email/definitions.py +++ b/src/email/definitions.py @@ -1,4 +1,5 @@ -import email +from email.parser import BytesParser +from email.policy import default as DefaultPolicy class Email: @@ -10,8 +11,7 @@ class Email: status, raw_data = imap.uid('fetch', self.uid, '(RFC822)') if status != 'OK': raise Exception('Could not recover message {0}'.format(self.uid)) - - self.message = email.message_from_bytes(raw_data[0][1]) + self.message = BytesParser(policy=DefaultPolicy).parsebytes(text=raw_data[0][1]) def delete(self, imap): status, result = imap.uid('STORE', self.uid, '+FLAGS', '(\\Deleted)') diff --git a/src/email/workers.py b/src/email/workers.py index 4040802..54c0c94 100644 --- a/src/email/workers.py +++ b/src/email/workers.py @@ -70,7 +70,7 @@ class Obtenedor(Worker): for part in email_part.get_payload(): output += self.build_message(part) else: - html = email_part.get_payload(decode=True) + html = email_part.get_payload(decode=True).decode('utf-8') bs = BeautifulSoup(html, 'html.parser') if bs.body: html = bs.body.get_text() @@ -288,7 +288,6 @@ class Procesador(Worker): em = self.queue.get(timeout=self.frec) except queue.Empty: continue - print(em.text) command = self.brain.get_command(em.text) - [print(ent) for ent in command.ents] + self.end_turn()