Files
contabilidad/python/src/ai/network.py

127 lines
3.6 KiB
Python
Raw Normal View History

2021-12-06 22:13:06 -03:00
import json
import os
2021-12-22 21:53:30 -03:00
import time
import timeit
2021-12-06 22:13:06 -03:00
import tensorflow as tf
import sklearn
import numpy as np
from sklearn.preprocessing import LabelEncoder
import src.contabilidad.pdf as pdf
import src.contabilidad.text_handler as th
class Layer:
def __init__(self):
self.__weights = None
self.__bias = None
def set_size(self, inputs: int, size: int):
self.__weights = [[0 for j in range(0, inputs)] for i in range(0, size)]
self.__bias = [0 for i in range(0, size)]
def add_weight(self, vector: list, idx: int = None):
if idx is None:
self.__weights.append(vector)
return self
self.__weights = self.__weights[:idx] + [vector] + self.__weights[idx:]
return self
def set_weight(self, value: float, weight_index: int, input_index: int):
self.__weights[weight_index][input_index] = value
def set_bias(self, value: list):
self.__bias = value
def train(self, input_values: list, output_values: list):
output = self.get_output(input_values)
errors = []
for i, v in enumerate(output):
error = (output_values[i] - v) / output_values[i]
new_value = v * error
def to_json(self):
return {
'bias': self.__bias,
'weights': self.__weights
}
def get_output(self, vector: list):
output = []
for i, weight in enumerate(self.__weights):
val = 0
for j, v in enumerate(weight):
val += v * vector[j]
output[i] = val + self.__bias[i]
return output
def layer_factory(layer_dict: dict):
layer = Layer()
layer.set_bias(layer_dict['bias'])
[layer.add_weight(w) for w in layer_dict['weights']]
return layer
class Network:
def __init__(self, filename: str):
self._filename = filename
self.__layers = None
def load(self):
with open(self._filename) as f:
data = json.load(f)
if 'layers' in data.keys():
self.add_layers(data['layers'])
def add_layers(self, layers: list):
for lr in layers:
layer = layer_factory(lr)
self.__layers.append(layer)
class AI:
def __init__(self, dictionary_filename, logger):
self.__dict = None
self.__network = None
self.__sources = None
2021-12-22 21:53:30 -03:00
self._phrases = None
2021-12-06 22:13:06 -03:00
self.filename = ''
def add_source(self, text):
if self.__sources is None:
self.__sources = []
self.__sources.append(text)
return self
def set_filename(self, filename: str):
self.filename = filename
return self
def process_sources(self):
for source in self.__sources:
self.process(**source)
def process(self, filename, password):
encoder = LabelEncoder()
t = filename.split('.')
temp = os.path.realpath(os.path.join(os.path.dirname(filename), t[0] + '-temp.pdf'))
pdf.remove_encryption(filename, password, temp)
obj = pdf.get_text(temp)
os.remove(temp)
word_list = th.split_words(obj)
fits = encoder.fit_transform(word_list)
phrases = []
for length in range(1, len(word_list) + 1):
for start in range(0, len(word_list)):
phrase = word_list[start:(start + length)]
phrase = np.append(np.array([fits[word_list.index(w)] for w in phrase]),
np.zeros([len(word_list) - len(phrase)]))
phrases.append(phrase)
phrases = np.array(phrases)
2021-12-22 21:53:30 -03:00
self._phrases = phrases
2021-12-06 22:13:06 -03:00
def active_train(self):
pass