From e2fd9b7daa40dfc6ad6239817ed369f6d63bdf86 Mon Sep 17 00:00:00 2001 From: Aldarien Date: Tue, 17 Nov 2020 18:44:32 -0300 Subject: [PATCH] Usando spaCy para NLP que permite diferenciar entidades en el texto --- Pipfile | 2 ++ src/brain/aiml.py | 3 +++ src/brain/brain.py | 12 ++++++++++ src/email/main.py | 2 ++ src/email/workers.py | 24 ++++++++++++++------ src/instrucciones.py | 52 +++++++++++++++++++++++++++++++++++++++++--- 6 files changed, 85 insertions(+), 10 deletions(-) create mode 100644 src/brain/aiml.py create mode 100644 src/brain/brain.py diff --git a/Pipfile b/Pipfile index bcf6f42..0dd72a9 100644 --- a/Pipfile +++ b/Pipfile @@ -10,6 +10,8 @@ keyboard = "*" beautifulsoup4 = "*" pytz = "*" flask = "*" +nltk = "*" +spacy = "*" [requires] python_version = "3.9" diff --git a/src/brain/aiml.py b/src/brain/aiml.py new file mode 100644 index 0000000..cea7165 --- /dev/null +++ b/src/brain/aiml.py @@ -0,0 +1,3 @@ +class Aiml: + def __init__(self): + pass \ No newline at end of file diff --git a/src/brain/brain.py b/src/brain/brain.py new file mode 100644 index 0000000..d6f1934 --- /dev/null +++ b/src/brain/brain.py @@ -0,0 +1,12 @@ +import os +import spacy + + +class Brain: + def __init__(self, data_folder): + self.filename = os.path.join(data_folder, 'brain.json') + self.nlp = spacy.load('es_core_news_sm') + + def get_command(self, phrase): + doc = self.nlp(phrase) + return doc diff --git a/src/email/main.py b/src/email/main.py index 7f1c185..f9e6f55 100644 --- a/src/email/main.py +++ b/src/email/main.py @@ -6,6 +6,7 @@ import pytz from src.bosses import Bosses from src.instrucciones import Instrucciones from src.email.supervisor import Email +from src.brain.brain import Brain def set_params(args, configs): @@ -18,6 +19,7 @@ def set_params(args, configs): }, 'bosses': Bosses(args.data_folder), 'instrucciones': Instrucciones(args.data_folder), + 'brain': Brain(args.data_folder), 'logging': Logging(configs.get('timezone'), args.log_folder, log_name), 'logger': { 'name': log_name diff --git a/src/email/workers.py b/src/email/workers.py index 39edb6b..4040802 100644 --- a/src/email/workers.py +++ b/src/email/workers.py @@ -103,7 +103,7 @@ class Obtenedor(Worker): self.logger.log('{0} new emails found'.format(e), type(self)) self.diary.put({'message': 'Obtenidos {0} correos nuevos'.format(e)}) time.sleep(self.frec) - self.save_revisados() + # self.save_revisados() self.end_turn() @@ -141,7 +141,7 @@ class Validador(Worker): if self.validar_instrucciones(message): self.invalidos.put(message) return - self.borrar.put(message) + self.borrar.put(message.original.uid) def run(self): self.start_turn() @@ -245,11 +245,12 @@ class Borrador(Worker): return for uid in self.borrar: - status, ids = imap.uid('store', uid, '+FLAGS', b'\\Deleted') - if status != 'OK': - continue + print(uid) + # status, ids = imap.uid('store', uid, '+FLAGS', b'\\Deleted') + # if status != 'OK': + # continue - imap.expunge() + # imap.expunge() self.borrar = [] def run(self) -> None: @@ -276,9 +277,18 @@ class Procesador(Worker): super(Procesador, self).__init__(configs=configs, params=params) self.name = 'Email:Procesador' + self.queue = params['queues']['valid'] + self.frec = configs.get('supervisor.wait') + self.brain = params['brain'] def run(self) -> None: self.start_turn() while not self.stop.is_set(): - continue + try: + em = self.queue.get(timeout=self.frec) + except queue.Empty: + continue + print(em.text) + command = self.brain.get_command(em.text) + [print(ent) for ent in command.ents] self.end_turn() diff --git a/src/instrucciones.py b/src/instrucciones.py index f9e5d80..938db92 100644 --- a/src/instrucciones.py +++ b/src/instrucciones.py @@ -2,10 +2,43 @@ import os import json +class Command: + def __init__(self): + self.command = '' + + +class Commands: + def __init__(self, data_folder): + self.filename = os.path.join(data_folder, 'commands.json') + data = [] + try: + with open(self.filename, 'r') as f: + data = json.load(f) + except FileNotFoundError: + pass + + self.commands = [] + for c in data: + cmd = Command() + cmd.command = c + self.commands.append(cmd) + + def get(self, command): + for i, c in enumerate(self.commands): + if command == c.command: + return i + return None + + def find(self, command): + return self.commands[self.get(command=command)] + + class Instruccion: def __init__(self): self.instruccion = '' self.aliases = [] + self.command = None + self.params = {} class Instrucciones: @@ -18,12 +51,20 @@ class Instrucciones: except FileNotFoundError: pass + self.commands = Commands(data_folder) + self.instrucciones = [] for d in data: i = Instruccion() - i.instruccion = d['instruccion'] - for a in d['aliases']: - i.aliases.append(a) + i.instruccion = d['name'] + if 'aliases' in d: + for a in d['aliases']: + i.aliases.append(a) + if 'params' in d: + for param, val in d['params'].items(): + i.params[param] = val + if 'command' in d: + i.command = self.commands.find(d['command']) self.instrucciones.append(i) def get(self, instruccion): @@ -35,6 +76,11 @@ class Instrucciones: if instruccion in ins.aliases: return i + def find(self, instruccion): + if not self.is_valid(instruccion): + return None + return self.instrucciones[self.get(instruccion)] + def is_valid(self, instruccion): for i in self.instrucciones: if instruccion == i.instruccion: