Compare commits
14 Commits
python
...
430e29eaec
Author | SHA1 | Date | |
---|---|---|---|
430e29eaec | |||
79c7d5ad63 | |||
e6ebb2c279 | |||
45952bb3ac | |||
6b03d62ce0 | |||
894cc26b21 | |||
64ffb53f0c | |||
42310ef0e4 | |||
a6362a6770 | |||
e9c63abc3a | |||
9f47c8a85f | |||
34eedb93d7 | |||
0e5714edc8 | |||
f33bddfbea |
@ -1,3 +1,4 @@
|
||||
COMPOSE_PROFILES=
|
||||
MYSQL_HOST=
|
||||
MYSQL_ROOT_PASSWORD=
|
||||
MYSQL_DATABASE=
|
||||
|
@ -24,4 +24,11 @@ class Base {
|
||||
$key = urlencode(base64_encode($signature));
|
||||
return $this->withJson($response, ['key' => $key]);
|
||||
}
|
||||
public function info(Request $request, Response $response): Response {
|
||||
ob_start();
|
||||
phpinfo();
|
||||
$data = ob_get_clean();
|
||||
$response->getBody()->write($data);
|
||||
return $response;
|
||||
}
|
||||
}
|
||||
|
@ -13,33 +13,35 @@ class Categorias {
|
||||
|
||||
public function __invoke(Request $request, Response $response, Factory $factory, Service $service): Response {
|
||||
$categorias = $factory->find(Categoria::class)->many();
|
||||
array_walk($categorias, function(&$item) use ($service) {
|
||||
$arr = $item->toArray();
|
||||
$arr['cuentas'] = array_map(function($item) {
|
||||
return $item->toArray();
|
||||
}, $item->cuentas());
|
||||
$maps = ['activo', 'pasivo', 'ganancia', 'perdida'];
|
||||
foreach ($maps as $m) {
|
||||
$p = $m . 's';
|
||||
$t = ucfirst($m);
|
||||
$cuentas = $item->getCuentasOf($t);
|
||||
if ($cuentas === false or $cuentas === null) {
|
||||
$arr[$p] = 0;
|
||||
continue;
|
||||
if ($categorias !== null) {
|
||||
array_walk($categorias, function(&$item) use ($service) {
|
||||
$arr = $item->toArray();
|
||||
if ($item->cuentas()) {
|
||||
$arr['cuentas'] = array_map(function($item) {
|
||||
return $item->toArray();
|
||||
}, $item->cuentas());
|
||||
}
|
||||
$arr[$p] = array_reduce($cuentas, function($sum, $item) use($service) {
|
||||
return $sum + $item->saldo($service, true);
|
||||
});
|
||||
}
|
||||
$item = $arr;
|
||||
});
|
||||
if ($categorias) {
|
||||
usort($categorias, function($a, $b) {
|
||||
return strcmp($a['nombre'], $b['nombre']);
|
||||
});
|
||||
$maps = ['activo', 'pasivo', 'ganancia', 'perdida'];
|
||||
foreach ($maps as $m) {
|
||||
$p = $m . 's';
|
||||
$t = ucfirst($m);
|
||||
$cuentas = $item->getCuentasOf($t);
|
||||
if ($cuentas === false or $cuentas === null) {
|
||||
$arr[$p] = 0;
|
||||
continue;
|
||||
}
|
||||
$arr[$p] = array_reduce($cuentas, function($sum, $item) use($service) {
|
||||
return $sum + $item->saldo($service, true);
|
||||
});
|
||||
}
|
||||
$item = $arr;
|
||||
});
|
||||
usort($categorias, function($a, $b) {
|
||||
return strcmp($a['nombre'], $b['nombre']);
|
||||
});
|
||||
}
|
||||
$output = [
|
||||
'categorias' => $categorias
|
||||
'categorias' => $categorias
|
||||
];
|
||||
return $this->withJson($response, $output);
|
||||
}
|
||||
|
@ -12,8 +12,13 @@ class Cuentas {
|
||||
use Json;
|
||||
|
||||
public function __invoke(Request $request, Response $response, Factory $factory): Response {
|
||||
$cuentas = $factory->find(Cuenta::class)->array();
|
||||
$cuentas = $factory->find(Cuenta::class)->many();
|
||||
if ($cuentas) {
|
||||
array_walk($cuentas, function (&$item) {
|
||||
$arr = $item->toArray();
|
||||
$arr['categoria'] = $item->categoria()->toArray();
|
||||
$item = $arr;
|
||||
});
|
||||
usort($cuentas, function($a, $b) {
|
||||
$t = strcmp($a['tipo']['descripcion'], $b['tipo']['descripcion']);
|
||||
if ($t != 0) {
|
||||
|
@ -3,19 +3,47 @@ namespace Contabilidad\Common\Controller;
|
||||
|
||||
use Psr\Http\Message\ServerRequestInterface as Request;
|
||||
use Psr\Http\Message\ResponseInterface as Response;
|
||||
use Psr\Container\ContainerInterface as Container;
|
||||
use ProVM\Common\Define\Controller\Json;
|
||||
use ProVM\Common\Factory\Model as Factory;
|
||||
use Contabilidad\Common\Service\DocumentHandler as Handler;
|
||||
use Contabilidad\Cuenta;
|
||||
|
||||
class Import {
|
||||
use Json;
|
||||
|
||||
public function __invoke(Request $request, Response $response, Factory $factory): Response {
|
||||
$post = $request->getParsedBody();
|
||||
return $this->withJson($response, $post);
|
||||
public function __invoke(Request $request, Response $response, Factory $factory, Container $container): Response {
|
||||
$post =$request->getParsedBody();
|
||||
$cuenta = $factory->find(Cuenta::class)->one($post['cuenta']);
|
||||
$file = $request->getUploadedFiles()['archivo'];
|
||||
$valid_media = [
|
||||
'text/csv' => 'csvs',
|
||||
'application/pdf' => 'pdfs',
|
||||
'application/vnd.ms-excel' => 'xlss',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'xlss',
|
||||
'application/json' => 'jsons'
|
||||
];
|
||||
if ($file->getError() === 0 and in_array($file->getClientMediaType(), array_keys($valid_media))) {
|
||||
$filenfo = new \SplFileInfo($file->getClientFilename());
|
||||
$new_name = implode('.', [implode(' - ', [$cuenta->nombre, $cuenta->categoria()->nombre, $post['fecha']]), $filenfo->getExtension()]);
|
||||
$to = implode(DIRECTORY_SEPARATOR, [$container->get('folders')->uploads, $valid_media[$file->getClientMediaType()], $new_name]);
|
||||
$file->moveTo($to);
|
||||
$status = file_exists($to);
|
||||
}
|
||||
$output = [
|
||||
'input' => [
|
||||
'name' => $file->getClientFilename(),
|
||||
'type' => $file->getClientMediaType(),
|
||||
'size' => $file->getSize(),
|
||||
'error' => $file->getError()
|
||||
],
|
||||
'new_name' => $new_name,
|
||||
'uploaded' => $status
|
||||
];
|
||||
return $this->withJson($response, $output);
|
||||
}
|
||||
public function uploads(Request $request, Response $response, Handler $handler): Response {
|
||||
$output = $handler->handle();
|
||||
return $this->withJson($response, $output);
|
||||
$output = $handler->handle();
|
||||
return $this->withJson($response, $output);
|
||||
}
|
||||
}
|
||||
|
@ -13,31 +13,34 @@ class TiposCategorias {
|
||||
|
||||
public function __invoke(Request $request, Response $response, Factory $factory, Service $service): Response {
|
||||
$tipos = $factory->find(TipoCategoria::class)->many();
|
||||
array_walk($tipos, function(&$item) use ($service) {
|
||||
$arr = $item->toArray();
|
||||
$arr['categorias'] = array_map(function($item) {
|
||||
return $item->toArray();
|
||||
}, $item->categorias());
|
||||
$arr['saldo'] = abs($item->saldo($service));
|
||||
$maps = ['activo', 'pasivo', 'ganancia', 'perdida'];
|
||||
foreach ($maps as $m) {
|
||||
$p = $m . 's';
|
||||
$t = ucfirst($m);
|
||||
$cuentas = $item->getCuentasOf($t);
|
||||
if ($cuentas === false or $cuentas === null) {
|
||||
$arr[$p] = 0;
|
||||
continue;
|
||||
if ($tipos !== null) {
|
||||
array_walk($tipos, function(&$item) use ($service) {
|
||||
$arr = $item->toArray();
|
||||
$arr['categorias'] = $item->categorias();
|
||||
if ($arr['categorias'] !== null) {
|
||||
$arr['categorias'] = array_map(function($item) {
|
||||
return $item->toArray();
|
||||
}, $item->categorias());
|
||||
}
|
||||
$arr[$p] = array_reduce($cuentas, function($sum, $item) use($service) {
|
||||
return $sum + $item->saldo($service, true);
|
||||
});
|
||||
}
|
||||
$item = $arr;
|
||||
});
|
||||
if ($tipos) {
|
||||
usort($tipos, function($a, $b) {
|
||||
return strcmp($a['descripcion'], $b['descripcion']);
|
||||
});
|
||||
$arr['saldo'] = abs($item->saldo($service));
|
||||
$maps = ['activo', 'pasivo', 'ganancia', 'perdida'];
|
||||
foreach ($maps as $m) {
|
||||
$p = $m . 's';
|
||||
$t = ucfirst($m);
|
||||
$cuentas = $item->getCuentasOf($t);
|
||||
if ($cuentas === false or $cuentas === null) {
|
||||
$arr[$p] = 0;
|
||||
continue;
|
||||
}
|
||||
$arr[$p] = array_reduce($cuentas, function($sum, $item) use($service) {
|
||||
return $sum + $item->saldo($service, true);
|
||||
});
|
||||
}
|
||||
$item = $arr;
|
||||
});
|
||||
usort($tipos, function($a, $b) {
|
||||
return strcmp($a['descripcion'], $b['descripcion']);
|
||||
});
|
||||
}
|
||||
$output = [
|
||||
'tipos' => $tipos
|
||||
|
@ -20,7 +20,7 @@ final class TipoCuenta extends AbstractMigration
|
||||
{
|
||||
$this->table('tipos_cuenta')
|
||||
->addColumn('descripcion', 'string')
|
||||
->addColumn('color', 'string', ['length' => 6])
|
||||
->addColumn('color', 'string', ['length' => 6, 'default' => 'ffffff'])
|
||||
->create();
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,8 @@ server {
|
||||
access_log /var/log/nginx/access.log;
|
||||
root /app/public;
|
||||
|
||||
client_max_body_size 50M;
|
||||
|
||||
location / {
|
||||
try_files $uri $uri/ /index.php?$query_string;
|
||||
}
|
||||
|
@ -1,2 +1,4 @@
|
||||
log_errors = true
|
||||
error_log = /var/log/php/error.log
|
||||
error_log = /var/log/php/error.log
|
||||
upload_max_filesize = 50M
|
||||
max_input_vars = 5000
|
||||
|
@ -3,4 +3,5 @@ use Contabilidad\Common\Controller\Base;
|
||||
|
||||
$app->get('/key/generate[/]', [Base::class, 'generate_key']);
|
||||
$app->get('/balance[/]', [Contabilidad\Common\Controller\TiposCategorias::class, 'balance']);
|
||||
$app->get('/info', [Base::class, 'info']);
|
||||
$app->get('/', Base::class);
|
||||
|
@ -2,9 +2,9 @@
|
||||
use Psr\Container\ContainerInterface as Container;
|
||||
|
||||
return [
|
||||
GuzzleHttp\Client::class => function(Container $c) {
|
||||
return new GuzzleHttp\Client();
|
||||
},
|
||||
GuzzleHttp\Client::class => function(Container $c) {
|
||||
return new GuzzleHttp\Client();
|
||||
},
|
||||
Contabilidad\Common\Service\Auth::class => function(Container $c) {
|
||||
return new Contabilidad\Common\Service\Auth($c->get('api_key'));
|
||||
},
|
||||
|
@ -89,4 +89,10 @@ class Categoria extends Model {
|
||||
}
|
||||
return $this->saldo;
|
||||
}
|
||||
|
||||
public function toArray(): array {
|
||||
$arr = parent::toArray();
|
||||
$arr['tipo'] = $this->tipo()->toArray();
|
||||
return $arr;
|
||||
}
|
||||
}
|
||||
|
@ -10,20 +10,20 @@ use Contabilidad\Common\Service\TiposCambios as Service;
|
||||
* @property int $activo
|
||||
*/
|
||||
class TipoCategoria extends Model {
|
||||
public static $_table = 'tipos_categoria';
|
||||
protected static $fields = ['descripcion', 'activo'];
|
||||
public static $_table = 'tipos_categoria';
|
||||
protected static $fields = ['descripcion', 'activo'];
|
||||
|
||||
protected $categorias;
|
||||
public function categorias() {
|
||||
if ($this->categorias === null) {
|
||||
$this->categorias = $this->parentOf(Categoria::class, [Model::CHILD_KEY => 'tipo_id']);
|
||||
protected $categorias;
|
||||
public function categorias() {
|
||||
if ($this->categorias === null) {
|
||||
$this->categorias = $this->parentOf(Categoria::class, [Model::CHILD_KEY => 'tipo_id']);
|
||||
}
|
||||
return $this->categorias;
|
||||
}
|
||||
return $this->categorias;
|
||||
}
|
||||
|
||||
public function getCuentasOf($tipo) {
|
||||
return $this->factory->find(Cuenta::class)
|
||||
->select([['cuentas', '*']])
|
||||
->select('cuentas.*')
|
||||
->join([
|
||||
['tipos_cuenta', 'tipos_cuenta.id', 'cuentas.tipo_id'],
|
||||
['categorias', 'categorias.id', 'cuentas.categoria_id']
|
||||
@ -37,7 +37,7 @@ class TipoCategoria extends Model {
|
||||
protected $saldo;
|
||||
public function saldo(Service $service = null) {
|
||||
if ($this->saldo === null) {
|
||||
$this->saldo = array_reduce($this->categorias(), function($sum, $item) use ($service) {
|
||||
$this->saldo = array_reduce($this->categorias() ?? [], function($sum, $item) use ($service) {
|
||||
return $sum + $item->saldo($service);
|
||||
});
|
||||
}
|
||||
|
@ -50,6 +50,7 @@ services:
|
||||
image: php-ui
|
||||
env_file:
|
||||
- .api.env
|
||||
- .env
|
||||
build:
|
||||
context: ui
|
||||
volumes:
|
||||
|
@ -2,7 +2,7 @@ FROM python
|
||||
|
||||
RUN apt-get update -y && apt-get install -y ghostscript python3-tk libgl-dev
|
||||
|
||||
RUN pip install flask pyyaml pypdf4 gunicorn camelot-py[cv] pikepdf httpx
|
||||
RUN pip install flask pyyaml pypdf4 gunicorn camelot-py[cv] pikepdf
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
@ -12,5 +12,4 @@ EXPOSE 5000
|
||||
|
||||
WORKDIR /app/src
|
||||
|
||||
CMD ["python", "app.py"]
|
||||
#CMD ["gunicorn", "-b 0.0.0.0:5000", "app:app"]
|
||||
CMD ["gunicorn", "-b 0.0.0.0:5000", "app:app"]
|
||||
|
@ -1,4 +1,3 @@
|
||||
passwords:
|
||||
- 0839
|
||||
- 159608395
|
||||
- 15960839
|
||||
|
Binary file not shown.
@ -1,285 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import sklearn
|
||||
import enlighten
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
|
||||
import src.contabilidad.pdf as pdf
|
||||
import src.contabilidad.text_handler as th
|
||||
from src.ai.models import Phrase, phrase_factory, Word, word_factory
|
||||
from src.contabilidad.log import LOG_LEVEL
|
||||
|
||||
|
||||
class Dictionary:
|
||||
def __init__(self, filename, logger):
|
||||
self.filename = filename
|
||||
self._logger = logger
|
||||
self.__processed = []
|
||||
self.__phrases = None
|
||||
self.__words = None
|
||||
self.load()
|
||||
|
||||
def load(self):
|
||||
if not os.path.isfile(self.filename):
|
||||
return
|
||||
with open(self.filename, 'r') as file:
|
||||
data = json.load(file)
|
||||
if 'words' in data.keys():
|
||||
self.__words = []
|
||||
[self.__words.append(word_factory(w)) for w in data['words']]
|
||||
if 'phrases' in data.keys():
|
||||
self.__phrases = []
|
||||
[self.__phrases.append(phrase_factory(ph)) for ph in data['phrases']]
|
||||
if 'processed' in data.keys():
|
||||
self.__processed = []
|
||||
self.__processed = data['processed']
|
||||
|
||||
def save(self):
|
||||
self.sort_words()
|
||||
self.sort_phrases()
|
||||
with open(self.filename, 'w') as file:
|
||||
json.dump(self.to_json(), file, indent=2)
|
||||
|
||||
def to_data(self):
|
||||
encoder = LabelEncoder()
|
||||
data = encoder.fit_transform([w.get_word() for w in self.get_words()])
|
||||
[self.__words[i].set_fit(f) for i, f in enumerate(data)]
|
||||
print(data)
|
||||
# return [ph.to_data() for ph in self.get_phrases()]
|
||||
|
||||
def to_json(self):
|
||||
output = {
|
||||
'processed': [],
|
||||
'words': [],
|
||||
'phrases': []
|
||||
}
|
||||
if self.__processed is not None and len(self.__processed) > 0:
|
||||
output['processed'] = self.__processed
|
||||
if self.__words is not None and len(self.__words) > 0:
|
||||
output['words'] = [w.to_json() for w in self.__words]
|
||||
if self.__phrases is not None and len(self.__phrases) > 0:
|
||||
output['phrases'] = [p.to_json() for p in self.__phrases]
|
||||
return output
|
||||
|
||||
def find_phrase(self, phrase: Phrase = None, phrase_dict: dict = None, phrase_list: list = None):
|
||||
if not self.__phrases:
|
||||
return -1
|
||||
if phrase is not None:
|
||||
phrase_list = [w.get_word() for w in phrase.get_words()]
|
||||
elif phrase_dict is not None:
|
||||
phrase_list = phrase_dict['words']
|
||||
elif phrase_list is not None:
|
||||
pass
|
||||
else:
|
||||
return -1
|
||||
return find_phrase(self.__phrases, phrase_list)
|
||||
|
||||
def add_phrase(self, phrase: Phrase = None, phrase_dict: dict = None, phrase_list: list = None):
|
||||
if self.__phrases is None:
|
||||
self.__phrases = []
|
||||
if phrase is not None:
|
||||
pass
|
||||
elif phrase_dict is not None:
|
||||
phrase = phrase_factory(phrase_dict)
|
||||
elif phrase_list is not None:
|
||||
phrase = phrase_factory({'words': phrase_list})
|
||||
else:
|
||||
return self
|
||||
i = self.find_phrase(phrase)
|
||||
if i > -1:
|
||||
self.__phrases[i].add_freq()
|
||||
return self
|
||||
self.__phrases.append(phrase)
|
||||
return self
|
||||
|
||||
def add_phrases(self, phrase_list: list):
|
||||
if self.__phrases is None:
|
||||
self.__phrases = []
|
||||
phs = [sorted(w.get_word() for w in p) for p in self.__phrases]
|
||||
with enlighten.get_manager() as manager:
|
||||
with manager.counter(total=len(phrase_list), desc='Phrases', unit='phrases', color='green') as bar1:
|
||||
for i, phrase in enumerate(phrase_list):
|
||||
# print(f'Adding phrase {i}.')
|
||||
p2 = sorted([w.get_word() for w in phrase])
|
||||
if p2 in phs:
|
||||
k = phs.index(p2)
|
||||
self.__phrases[k].add_freq()
|
||||
continue
|
||||
ph = phrase_factory({'words': phrase})
|
||||
self.__phrases.append(ph)
|
||||
phs.append(p2)
|
||||
bar1.update()
|
||||
|
||||
def get_phrases(self):
|
||||
return self.__phrases
|
||||
|
||||
def sort_phrases(self):
|
||||
if self.__phrases is None:
|
||||
return
|
||||
try:
|
||||
def sort_phrase(p):
|
||||
if p is None:
|
||||
return 0
|
||||
if isinstance(p, Phrase):
|
||||
return p.get_freq(), p.get_type().get_desc(), len(p.get_words())
|
||||
return p['frequency'], p['type']['description'], len(p['words'])
|
||||
self.__phrases = sorted(self.__phrases,
|
||||
key=sort_phrase)
|
||||
except Exception as e:
|
||||
self._logger.log(repr(self.__phrases), LOG_LEVEL.ERROR)
|
||||
self._logger.log(e)
|
||||
return self
|
||||
|
||||
def sort_words(self):
|
||||
if self.__words is None:
|
||||
return
|
||||
try:
|
||||
def sort_word(w):
|
||||
if w is None:
|
||||
return 0
|
||||
if isinstance(w, Word):
|
||||
return w.get_freq(), w.get_type().get_desc(), w.get_word()
|
||||
return w['frequency'], w['type']['description'], w['word']
|
||||
self.__words = sorted(self.__words, key=sort_word, reverse=True)
|
||||
except Exception as e:
|
||||
self._logger.log(repr(self.__words))
|
||||
self._logger.log(e)
|
||||
return self
|
||||
|
||||
def find_word(self, word: Word = None, word_dict: dict = None, word_str: str = None):
|
||||
if not self.__words:
|
||||
return -1
|
||||
if word is not None:
|
||||
word_str = word.get_word()
|
||||
elif word_dict is not None:
|
||||
word_str = word_dict['word']
|
||||
elif word_str is not None:
|
||||
pass
|
||||
else:
|
||||
return -1
|
||||
|
||||
return find_word(self.__words, word_str)
|
||||
|
||||
def add_word(self, word: Word = None, word_dict: dict = None, word_str: str = None):
|
||||
if self.__words is None:
|
||||
self.__words = []
|
||||
if word is not None:
|
||||
pass
|
||||
elif word_dict is not None:
|
||||
word = word_factory(word_dict)
|
||||
elif word_str is not None:
|
||||
word = word_factory({'word': word_str})
|
||||
else:
|
||||
return self
|
||||
i = self.find_word(word)
|
||||
if i > -1:
|
||||
self.__words[i].add_freq()
|
||||
return self
|
||||
self.__words.append(word)
|
||||
return self
|
||||
|
||||
def add_words(self, words: list):
|
||||
[self.add_word(word=w) for w in words if isinstance(w, Word)]
|
||||
[self.add_word(word_dict=w) for w in words if isinstance(w, dict)]
|
||||
[self.add_word(word_str=w) for w in words if isinstance(w, str)]
|
||||
return self
|
||||
|
||||
def get_words(self):
|
||||
return filter_unique_words(self.__words)
|
||||
|
||||
def match_words(self, word_list: list):
|
||||
new_list = []
|
||||
for w in word_list:
|
||||
wi = self.find_word(word_str=w)
|
||||
new_list.append(self.__words[wi])
|
||||
return new_list
|
||||
|
||||
def append_to_phrase(self, seed: list = None, length: int = 1):
|
||||
if seed is None:
|
||||
return [self.__words[0]]
|
||||
max_index = max(seed) + length
|
||||
if max_index > len(self.__words):
|
||||
if length == 1:
|
||||
return False
|
||||
return self.append_to_phrase(seed, length - 1)
|
||||
return seed + self.__words[max_index]
|
||||
|
||||
def get_possible_phrases(self, word_list):
|
||||
print('Adding words.')
|
||||
self.add_words(word_list)
|
||||
|
||||
print('Creating phrases.')
|
||||
with enlighten.get_manager() as manager:
|
||||
with manager.counter(total=len(word_list)**2, desc='Phrases', unit='words', color='red') as bar1:
|
||||
phrases = []
|
||||
for length in range(1, len(word_list) + 1):
|
||||
bar2 = bar1.add_subcounter(color='green')
|
||||
for start in range(0, len(word_list)):
|
||||
phrase = build_phrase(word_list, start, start + length)
|
||||
phrase = self.match_words(phrase)
|
||||
phrases.append(phrase)
|
||||
start += length
|
||||
bar2.update()
|
||||
bar1.update()
|
||||
|
||||
print(f'Created {len(phrases)} phrases.')
|
||||
phrases = sorted(phrases, key=lambda e: len(e))
|
||||
|
||||
print('Adding phrases.')
|
||||
# Really slow (~115000 phrases in one pdf)
|
||||
self.add_phrases(phrases)
|
||||
return self.__phrases
|
||||
|
||||
def is_processed(self, filename: str):
|
||||
return os.path.basename(filename) in self.__processed
|
||||
|
||||
def process(self, filename: str, password: str = None):
|
||||
if self.is_processed(filename):
|
||||
print('Already processed.')
|
||||
return
|
||||
t = filename.split('.')
|
||||
temp = os.path.realpath(os.path.join(os.path.dirname(filename), t[0] + '-temp.pdf'))
|
||||
print('Removing PDF encryption.')
|
||||
pdf.remove_encryption(filename, password, temp)
|
||||
print('Getting text')
|
||||
obj = pdf.get_text(temp)
|
||||
os.remove(temp)
|
||||
print('Getting possible phrases.')
|
||||
phrases = self.get_possible_phrases(th.split_words(obj))
|
||||
self.__processed.append(os.path.basename(filename))
|
||||
return phrases
|
||||
|
||||
|
||||
def build_phrase(word_list, start: int, end: int = None):
|
||||
if end is None:
|
||||
return word_list[start:]
|
||||
return word_list[start:end]
|
||||
|
||||
|
||||
def filter_unique_words(words):
|
||||
new_list = []
|
||||
for w in words:
|
||||
if w not in new_list:
|
||||
new_list.append(w)
|
||||
return new_list
|
||||
|
||||
|
||||
def validate_phrase(phrase):
|
||||
return True
|
||||
|
||||
|
||||
def find_phrase(phrases: list, phrase: list):
|
||||
phrase_list = [sorted([w.get_word() for w in p.get_words()]) for p in phrases]
|
||||
sphrase = sorted(phrase)
|
||||
if sphrase in phrase_list:
|
||||
return phrase_list.index(sphrase)
|
||||
return -1
|
||||
|
||||
|
||||
def find_word(words: list, word: str):
|
||||
word_list = [w.get_word() for w in words]
|
||||
if word in word_list:
|
||||
return word_list.index(word)
|
||||
return -1
|
@ -1,243 +0,0 @@
|
||||
import json
|
||||
|
||||
|
||||
class Type:
|
||||
def __init__(self, _id, _description):
|
||||
self.__id = _id
|
||||
self.__description = _description
|
||||
|
||||
def get_id(self):
|
||||
return self.__id
|
||||
|
||||
def get_desc(self):
|
||||
return self.__description
|
||||
|
||||
def to_json(self):
|
||||
return self.get_id()
|
||||
|
||||
def __repr__(self):
|
||||
return json.dumps({
|
||||
'id': self.get_id(),
|
||||
'description': self.get_desc()
|
||||
})
|
||||
|
||||
|
||||
def type_factory(_type: str, _id: int):
|
||||
if _type == 'Word' or _type == 'WordType':
|
||||
t = WordType()
|
||||
elif _type == 'Phrase' or _type == 'PhraseType':
|
||||
t = PhraseType()
|
||||
else:
|
||||
return None
|
||||
t.load(_id)
|
||||
return t
|
||||
|
||||
|
||||
class WordType(Type):
|
||||
STRING = 0
|
||||
NUMERIC = 1
|
||||
CURRENCY = 2
|
||||
DATE = 4
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(0, 'string')
|
||||
|
||||
def load(self, word_type: int):
|
||||
if word_type == self.STRING:
|
||||
self.__description = 'string'
|
||||
elif word_type == self.NUMERIC:
|
||||
self.__description = 'numeric'
|
||||
elif word_type == self.CURRENCY:
|
||||
self.__description = 'currency'
|
||||
elif word_type == self.DATE:
|
||||
self.__description = 'date'
|
||||
return self
|
||||
|
||||
|
||||
class PhraseType(Type):
|
||||
TEXT = 0
|
||||
TITLE = 1
|
||||
HEADER = 2
|
||||
MOVEMENT = 4
|
||||
INVALID = 99
|
||||
|
||||
def __init__(self):
|
||||
super(PhraseType, self).__init__(0, 'text')
|
||||
|
||||
def load(self, phrase_type: int):
|
||||
if phrase_type == self.TEXT:
|
||||
self.__description = 'text'
|
||||
elif phrase_type == self.TITLE:
|
||||
self.__description = 'title'
|
||||
elif phrase_type == self.HEADER:
|
||||
self.__description = 'header'
|
||||
|
||||
|
||||
class Word:
|
||||
def __init__(self):
|
||||
self.__id = 0
|
||||
self.__word = None
|
||||
self.__type_id = 0
|
||||
self.__type = None
|
||||
self.__frequency = 1
|
||||
|
||||
def set_id(self, idx: int):
|
||||
self.__id = idx
|
||||
return self
|
||||
|
||||
def set_word(self, word: str):
|
||||
self.__word = word
|
||||
return self
|
||||
|
||||
def set_type(self, word_type):
|
||||
if isinstance(word_type, WordType):
|
||||
self.__type_id = word_type.get_id()
|
||||
# self.__type = word_type
|
||||
if isinstance(word_type, int):
|
||||
self.__type_id = word_type
|
||||
# self.__type = type_factory('Word', word_type)
|
||||
return self
|
||||
|
||||
def add_freq(self, amount: int = 1):
|
||||
self.__frequency += amount
|
||||
return self
|
||||
|
||||
def get_id(self) -> int:
|
||||
return self.__id
|
||||
|
||||
def get_word(self) -> str:
|
||||
return self.__word
|
||||
|
||||
def get_type_id(self) -> int:
|
||||
return self.__type_id
|
||||
|
||||
def get_type(self) -> WordType:
|
||||
if self.__type is None:
|
||||
self.__type = type_factory('Word', self.__type_id)
|
||||
return self.__type
|
||||
|
||||
def get_freq(self) -> int:
|
||||
return self.__frequency
|
||||
|
||||
def to_json(self) -> dict:
|
||||
output = {
|
||||
'id': self.get_id(),
|
||||
'word': self.get_word(),
|
||||
'type': self.get_type_id(),
|
||||
'freq': self.get_freq()
|
||||
}
|
||||
return output
|
||||
|
||||
def __repr__(self):
|
||||
return json.dumps(self.to_json())
|
||||
|
||||
|
||||
def word_factory(word: dict) -> Word:
|
||||
w = Word()
|
||||
w.set_id(word['id'])
|
||||
w.set_word(word['word'])
|
||||
if 'type' in word:
|
||||
w.set_type(word['type'])
|
||||
if 'freq' in word:
|
||||
w.add_freq(word['freq'] - 1)
|
||||
return w
|
||||
|
||||
|
||||
class Phrase:
|
||||
def __init__(self):
|
||||
self.__id = 0
|
||||
self.__words = None
|
||||
self.__type_id = 0
|
||||
self.__type = None
|
||||
self.__frequency = 1
|
||||
|
||||
def set_id(self, idx: int):
|
||||
self.__id = idx
|
||||
return self
|
||||
|
||||
def add_word(self, word):
|
||||
if isinstance(word, Word):
|
||||
self.__words.append(word.get_id())
|
||||
if isinstance(word, dict):
|
||||
if 'id' in word:
|
||||
self.__words.append(word['id'])
|
||||
if isinstance(word, int):
|
||||
self.__words.append(word)
|
||||
return self
|
||||
|
||||
def set_words(self, words: list):
|
||||
if self.__words is None:
|
||||
self.__words = []
|
||||
for w in words:
|
||||
if isinstance(w, Word):
|
||||
self.add_word(w)
|
||||
if isinstance(w, dict):
|
||||
self.add_word(w)
|
||||
if isinstance(w, int):
|
||||
self.add_word(w)
|
||||
return self
|
||||
|
||||
def set_type(self, phrase_type):
|
||||
if isinstance(phrase_type, PhraseType):
|
||||
self.__type_id = phrase_type.get_id()
|
||||
# self.__type = phrase_type
|
||||
if isinstance(phrase_type, int):
|
||||
self.__type_id = phrase_type
|
||||
# self.__type = type_factory('Phrase', phrase_type)
|
||||
return self
|
||||
|
||||
def add_freq(self, amount: int = 1):
|
||||
self.__frequency += amount
|
||||
return self
|
||||
|
||||
def get_id(self) -> int:
|
||||
return self.__id
|
||||
|
||||
def get_words(self) -> list:
|
||||
return self.__words
|
||||
|
||||
def get_type_id(self) -> int:
|
||||
return self.__type_id
|
||||
|
||||
def get_type(self) -> PhraseType:
|
||||
if self.__type is None:
|
||||
self.__type = type_factory('Phrase', self.__type_id)
|
||||
return self.__type
|
||||
|
||||
def get_freq(self) -> int:
|
||||
return self.__frequency
|
||||
|
||||
def match(self, word_list: list):
|
||||
if len(word_list) != len(self.__words):
|
||||
return False
|
||||
new_words = sorted(self.__words)
|
||||
new_list = sorted(word_list)
|
||||
if new_words == new_list:
|
||||
return True
|
||||
return False
|
||||
|
||||
def to_json(self):
|
||||
output = {
|
||||
'id': self.get_id(),
|
||||
'words': self.get_words(),
|
||||
'type': self.get_type_id(),
|
||||
'freq': self.get_freq()
|
||||
}
|
||||
return output
|
||||
|
||||
def __repr__(self):
|
||||
return json.dumps(self.to_json())
|
||||
|
||||
def __len__(self):
|
||||
return len(self.get_words())
|
||||
|
||||
|
||||
def phrase_factory(phrase: dict) -> Phrase:
|
||||
ph = Phrase()
|
||||
ph.set_id(phrase['id'])
|
||||
ph.set_words(phrase['words'])
|
||||
if 'type' in phrase:
|
||||
ph.set_type(phrase['type'])
|
||||
if 'freq' in phrase:
|
||||
ph.add_freq(phrase['freq'] - 1)
|
||||
return ph
|
@ -1,126 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import timeit
|
||||
|
||||
import tensorflow as tf
|
||||
import sklearn
|
||||
import numpy as np
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
|
||||
import src.contabilidad.pdf as pdf
|
||||
import src.contabilidad.text_handler as th
|
||||
|
||||
|
||||
class Layer:
|
||||
def __init__(self):
|
||||
self.__weights = None
|
||||
self.__bias = None
|
||||
|
||||
def set_size(self, inputs: int, size: int):
|
||||
self.__weights = [[0 for j in range(0, inputs)] for i in range(0, size)]
|
||||
self.__bias = [0 for i in range(0, size)]
|
||||
|
||||
def add_weight(self, vector: list, idx: int = None):
|
||||
if idx is None:
|
||||
self.__weights.append(vector)
|
||||
return self
|
||||
self.__weights = self.__weights[:idx] + [vector] + self.__weights[idx:]
|
||||
return self
|
||||
|
||||
def set_weight(self, value: float, weight_index: int, input_index: int):
|
||||
self.__weights[weight_index][input_index] = value
|
||||
|
||||
def set_bias(self, value: list):
|
||||
self.__bias = value
|
||||
|
||||
def train(self, input_values: list, output_values: list):
|
||||
output = self.get_output(input_values)
|
||||
errors = []
|
||||
for i, v in enumerate(output):
|
||||
error = (output_values[i] - v) / output_values[i]
|
||||
new_value = v * error
|
||||
|
||||
def to_json(self):
|
||||
return {
|
||||
'bias': self.__bias,
|
||||
'weights': self.__weights
|
||||
}
|
||||
|
||||
def get_output(self, vector: list):
|
||||
output = []
|
||||
for i, weight in enumerate(self.__weights):
|
||||
val = 0
|
||||
for j, v in enumerate(weight):
|
||||
val += v * vector[j]
|
||||
output[i] = val + self.__bias[i]
|
||||
return output
|
||||
|
||||
|
||||
def layer_factory(layer_dict: dict):
|
||||
layer = Layer()
|
||||
layer.set_bias(layer_dict['bias'])
|
||||
[layer.add_weight(w) for w in layer_dict['weights']]
|
||||
return layer
|
||||
|
||||
|
||||
class Network:
|
||||
def __init__(self, filename: str):
|
||||
self._filename = filename
|
||||
self.__layers = None
|
||||
|
||||
def load(self):
|
||||
with open(self._filename) as f:
|
||||
data = json.load(f)
|
||||
if 'layers' in data.keys():
|
||||
self.add_layers(data['layers'])
|
||||
|
||||
def add_layers(self, layers: list):
|
||||
for lr in layers:
|
||||
layer = layer_factory(lr)
|
||||
self.__layers.append(layer)
|
||||
|
||||
|
||||
class AI:
|
||||
def __init__(self, dictionary_filename, logger):
|
||||
self.__dict = None
|
||||
self.__network = None
|
||||
self.__sources = None
|
||||
self._phrases = None
|
||||
self.filename = ''
|
||||
|
||||
def add_source(self, text):
|
||||
if self.__sources is None:
|
||||
self.__sources = []
|
||||
self.__sources.append(text)
|
||||
return self
|
||||
|
||||
def set_filename(self, filename: str):
|
||||
self.filename = filename
|
||||
return self
|
||||
|
||||
def process_sources(self):
|
||||
for source in self.__sources:
|
||||
self.process(**source)
|
||||
|
||||
def process(self, filename, password):
|
||||
encoder = LabelEncoder()
|
||||
t = filename.split('.')
|
||||
temp = os.path.realpath(os.path.join(os.path.dirname(filename), t[0] + '-temp.pdf'))
|
||||
pdf.remove_encryption(filename, password, temp)
|
||||
obj = pdf.get_text(temp)
|
||||
os.remove(temp)
|
||||
word_list = th.split_words(obj)
|
||||
fits = encoder.fit_transform(word_list)
|
||||
phrases = []
|
||||
for length in range(1, len(word_list) + 1):
|
||||
for start in range(0, len(word_list)):
|
||||
phrase = word_list[start:(start + length)]
|
||||
phrase = np.append(np.array([fits[word_list.index(w)] for w in phrase]),
|
||||
np.zeros([len(word_list) - len(phrase)]))
|
||||
phrases.append(phrase)
|
||||
phrases = np.array(phrases)
|
||||
self._phrases = phrases
|
||||
|
||||
def active_train(self):
|
||||
pass
|
@ -1,40 +1,22 @@
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import httpx
|
||||
from flask import Flask, request, jsonify
|
||||
from flask import Flask, request
|
||||
|
||||
import contabilidad.pdf as pdf
|
||||
import contabilidad.passwords as passwords
|
||||
import contabilidad.log as log
|
||||
import contabilidad.text_handler as th
|
||||
from contabilidad.log import Log
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
log = Log('/var/log/python/contabilidad.log')
|
||||
api_key = os.environ.get('PYTHON_KEY')
|
||||
|
||||
|
||||
def validate_key(request_obj):
|
||||
if 'Authorization' in request_obj.headers:
|
||||
auth = request_obj.headers.get('Authorization')
|
||||
if isinstance(auth, list):
|
||||
auth = auth[0]
|
||||
if 'Bearer' in auth:
|
||||
auth = auth.split(' ')[1]
|
||||
return auth == api_key
|
||||
if 'API_KEY' in request_obj.values:
|
||||
return request_obj.values.get('API_KEY') == api_key
|
||||
if 'api_key' in request_obj.values:
|
||||
return request_obj.values.get('api_key') == api_key
|
||||
return False
|
||||
log.logging['filename'] = '/var/log/python/contabilidad.log'
|
||||
|
||||
|
||||
@app.route('/pdf/parse', methods=['POST'])
|
||||
def pdf_parse():
|
||||
if not validate_key(request):
|
||||
return jsonify({'message': 'Not Authorized'})
|
||||
data = request.get_json()
|
||||
if not isinstance(data['files'], list):
|
||||
data['files'] = [data['files']]
|
||||
@ -50,11 +32,6 @@ def pdf_parse():
|
||||
continue
|
||||
pdf.remove_encryption(filename, p, temp)
|
||||
obj = pdf.get_data(temp)
|
||||
try:
|
||||
text = th.text_cleanup(pdf.get_text(temp))
|
||||
except IndexError as ie:
|
||||
print(ie, file=sys.stderr)
|
||||
continue
|
||||
outputs = []
|
||||
for o in obj:
|
||||
out = json.loads(o.df.to_json(orient='records'))
|
||||
@ -71,35 +48,8 @@ def pdf_parse():
|
||||
out[i] = line
|
||||
outputs.append(out)
|
||||
os.remove(temp)
|
||||
output.append({'bank': text['bank'], 'filename': file['filename'], 'tables': outputs, 'text': text['text']})
|
||||
return jsonify(output)
|
||||
|
||||
|
||||
@app.route('/cambio/get', methods=['POST'])
|
||||
def cambios():
|
||||
if not validate_key(request):
|
||||
return jsonify({'message': 'Not Authorized'})
|
||||
data = request.get_json()
|
||||
valid = {
|
||||
"CLF": "uf",
|
||||
"IVP": "ivp",
|
||||
"USD": "dolar",
|
||||
"USDo": "dolar_intercambio",
|
||||
"EUR": "euro",
|
||||
"IPC": "ipc",
|
||||
"UTM": "utm",
|
||||
"IMACEC": "imacec",
|
||||
"TPM": "tpm",
|
||||
"CUP": "libra_cobre",
|
||||
"TZD": "tasa_desempleo",
|
||||
"BTC": "bitcoin"
|
||||
}
|
||||
base_url = 'https://mindicador.cl/api/'
|
||||
url = f"{base_url}{valid[data['desde']]}/{'-'.join(list(reversed(data['fecha'].split('-'))))}"
|
||||
res = httpx.get(url)
|
||||
if res.status_code != httpx.codes.OK:
|
||||
return jsonify({'error': 'Valor no encontrado.'})
|
||||
return jsonify(res.json())
|
||||
output.append({'filename': file['filename'], 'text': outputs})
|
||||
return json.dumps(output)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
BIN
python/src/contabilidad/__pycache__/log.cpython-39.pyc
Normal file
BIN
python/src/contabilidad/__pycache__/log.cpython-39.pyc
Normal file
Binary file not shown.
@ -1,65 +1,19 @@
|
||||
import os.path
|
||||
import time
|
||||
import traceback
|
||||
|
||||
|
||||
logging = {
|
||||
'filename': '/var/log/python/error.log'
|
||||
}
|
||||
|
||||
|
||||
class LOG_LEVEL:
|
||||
INFO = 0
|
||||
WARNING = 1
|
||||
DEBUG = 2
|
||||
ERROR = 4
|
||||
|
||||
@staticmethod
|
||||
def desc(level):
|
||||
mapping = {
|
||||
LOG_LEVEL.INFO: 'INFO',
|
||||
LOG_LEVEL.WARNING: 'WARNING',
|
||||
LOG_LEVEL.DEBUG: 'DEBUG',
|
||||
LOG_LEVEL.ERROR: 'ERROR'
|
||||
}
|
||||
return mapping[level]
|
||||
INFO = 'INFO'
|
||||
WARNING = 'WARNING'
|
||||
DEBUG = 'DEBUG'
|
||||
ERROR = 'ERROR'
|
||||
|
||||
|
||||
class Logger:
|
||||
def __init__(self):
|
||||
self._logs = []
|
||||
|
||||
def add_log(self, filename: str, min_level: int = LOG_LEVEL.INFO):
|
||||
self._logs.append({'log': Log(filename), 'level': min_level})
|
||||
self._logs.sort(key=lambda e: e['level'])
|
||||
return self
|
||||
|
||||
def log(self, message, level: int = LOG_LEVEL.INFO):
|
||||
for log in self._logs:
|
||||
if log['level'] >= level:
|
||||
log['log'].log(message, level)
|
||||
|
||||
|
||||
class Log:
|
||||
MAX_SIZE = 10 * 1024 * 1024
|
||||
|
||||
def __init__(self, filename: str = '/var/log/python/error.log'):
|
||||
self._filename = filename
|
||||
|
||||
def log(self, message, level: int = LOG_LEVEL.INFO):
|
||||
if isinstance(message, Exception):
|
||||
message = traceback.format_exc()
|
||||
if level < LOG_LEVEL.ERROR:
|
||||
level = LOG_LEVEL.ERROR
|
||||
self.rotate_file()
|
||||
with open(self._filename, 'a') as f:
|
||||
f.write(time.strftime('[%Y-%m-%d %H:%M:%S] ') + ' - ' + LOG_LEVEL.desc(level=level) + ': ' + message + "\n")
|
||||
|
||||
def rotate_file(self):
|
||||
if not os.path.isfile(self._filename):
|
||||
return
|
||||
file_size = os.path.getsize(self._filename)
|
||||
if file_size > self.MAX_SIZE:
|
||||
self.next_file()
|
||||
|
||||
def next_file(self):
|
||||
name = self._filename.split('.')
|
||||
n = 1
|
||||
if name[-2].isnumeric():
|
||||
n = int(name[-2]) + 1
|
||||
self._filename = '.'.join([name[0], str(n), name[-1]])
|
||||
def log(message, level=LOG_LEVEL.INFO):
|
||||
filename = logging['filename']
|
||||
with open(filename, 'a') as f:
|
||||
f.write(time.strftime('[%Y-%m-%d %H:%M:%S] ') + ' - ' + level + ': ' + message)
|
||||
|
@ -1,112 +1,48 @@
|
||||
def text_cleanup(text: str):
|
||||
def text_cleanup(text, filename: str = None):
|
||||
if isinstance(text, list):
|
||||
text = "\n\n\n".join(text)
|
||||
if 'bice' in text.lower():
|
||||
return {'bank': 'BICE', 'text': bice(text)}
|
||||
if 'scotiabank' in text.lower():
|
||||
return {'bank': 'Scotiabank', 'text': scotiabank(text)}
|
||||
if 'TARJETA' in text:
|
||||
return {'bank': 'Scotiabank', 'text': tarjeta(text)}
|
||||
return {'bank': 'unknown', 'text': basic(text)}
|
||||
output = []
|
||||
for t in text:
|
||||
output.append(text_cleanup(t, filename=filename))
|
||||
return output
|
||||
if filename is None:
|
||||
return text
|
||||
if 'bice' in filename.lower():
|
||||
return bice(text)
|
||||
if 'scotiabank' in filename.lower():
|
||||
return scotiabank(text)
|
||||
return text
|
||||
|
||||
|
||||
def bice(text):
|
||||
lines = [t2.strip() for t in text.split("\n\n\n")
|
||||
for t1 in t.split("\n\n") for t2 in t1.split("\n") if t2.strip() != '']
|
||||
output = []
|
||||
output += extract_from_to(lines, 'NOMBRE DEL CLIENTE', end='LAS CONDES', line_length=3)
|
||||
ti = [t for t in lines if 'MOVIMIENTOS DE LA CUENTA CORRIENTE' in t][0]
|
||||
output += extract_from_to(lines, 'LAS CONDES', end=ti, line_length=3)
|
||||
output += [ti]
|
||||
ti = [i for i, t in enumerate(lines) if 'FECHA' in t]
|
||||
output += extract_from_to(lines, ti[0], end=ti[1], line_length=4)
|
||||
output += extract_from_to(lines, 'RESUMEN DEL PERIODO', end='SALDO INICIAL', line_length=1)
|
||||
output += extract_from_to(lines, 'SALDO INICIAL', end='LINEA SOBREGIRO AUTORIZADA', line_length=4)
|
||||
output += extract_from_to(lines, 'LINEA SOBREGIRO AUTORIZADA', end='OBSERVACIONES', line_length=3)
|
||||
output += extract_from_to(lines, 'OBSERVACIONES', line_length=1)
|
||||
return output
|
||||
lines = text.split("\n\n\n")
|
||||
print(lines)
|
||||
return text
|
||||
|
||||
|
||||
def scotiabank(text):
|
||||
words = split_words(text)
|
||||
words = text.split("\n")
|
||||
output = [words[0]]
|
||||
output += extract_from_to(words, 'No. CTA.', end='VENCIMIENTO LINEA DE CREDITO', line_length=3)
|
||||
output += extract_from_to(words, 'VENCIMIENTO LINEA DE CREDITO',
|
||||
end='NOMBRE EJECUTIVO: LILIAN AVILA MANRIQUEZ', line_length=2)
|
||||
output += extract_from_to(words, 'NOMBRE EJECUTIVO: LILIAN AVILA MANRIQUEZ', end='SALDO ANTERIOR',
|
||||
line_length=1)
|
||||
output += extract_from_to(words, 'SALDO ANTERIOR', end='FECHA', line_length=4)
|
||||
output += extract_data(words, 'FECHA', end='ACTUALICE SIEMPRE ANTECEDENTES LEGALES, ', line_length=6,
|
||||
merge_list=[['DOCTO', 'No.'], ['SALDO', 'DIARIO']])
|
||||
output += extract_from_to(words, 'ACTUALICE SIEMPRE ANTECEDENTES LEGALES, ', 1)
|
||||
return output
|
||||
output = output + extract_from_to(words, 'No. CTA.', end='VENCIMIENTO LINEA DE CREDITO', line_length=3)
|
||||
output = output + extract_from_to(words, 'VENCIMIENTO LINEA DE CREDITO',
|
||||
end='NOMBRE EJECUTIVO: LILIAN AVILA MANRIQUEZ', line_length=2)
|
||||
output = output + extract_from_to(words, 'NOMBRE EJECUTIVO: LILIAN AVILA MANRIQUEZ', end='SALDO ANTERIOR',
|
||||
line_length=1)
|
||||
output = output + extract_from_to(words, 'SALDO ANTERIOR', end='FECHA', line_length=4)
|
||||
output = output + extract_data(words, 'FECHA', end='ACTUALICE SIEMPRE ANTECEDENTES LEGALES, ', line_length=6,
|
||||
merge_list=[['DOCTO', 'No.'], ['SALDO', 'DIARIO']])
|
||||
[print(li) for li in output]
|
||||
return text
|
||||
|
||||
|
||||
def tarjeta(text):
|
||||
words = split_words(text)
|
||||
output = ['ESTADO DE CUENTA NACIONAL DE TARJETA DE CRÉDITO']
|
||||
i = [i for i, w in enumerate(words) if 'FECHA ESTADO DE CUENTA' in w][0] + 2
|
||||
output += extract_from_to(words, 'NOMBRE DEL TITULAR', end=i, line_length=2)
|
||||
output += ['I. INFORMACIóN GENERAL']
|
||||
i = [i for i, w in enumerate(words) if 'CUPO TOTAL' in w][1]
|
||||
output += extract_from_to(words, 'CUPO TOTAL', end=i, line_length=3)
|
||||
output += extract_from_to(words, i, end='ROTATIVO', line_length=4)
|
||||
output += extract_from_to(words, 'ROTATIVO', end='TASA INTERÉS VIGENTE', line_length=3)
|
||||
output += extract_from_to(words, 'TASA INTERÉS VIGENTE',
|
||||
end='CAE se calcula sobre un supuesto de gasto mensual de UF 20 y pagadero en 12 cuotas.',
|
||||
line_length=4)
|
||||
output += extract_from_to(words, 'DESDE', end='PERÍODO FACTURADO', line_length=2)
|
||||
output += extract_from_to(words, 'PERÍODO FACTURADO', end='II.', line_length=3)
|
||||
output += ['II. DETALLE']
|
||||
output += extract_from_to(words, '1. PERÍODO ANTERIOR', end='SALDO ADEUDADO INICIO PERÍODO ANTERIOR', line_length=3)
|
||||
i = words.index('2. PERÍODO ACTUAL')
|
||||
output += extract_from_to(words, 'SALDO ADEUDADO INICIO PERÍODO ANTERIOR', end=i - 1, line_length=2,
|
||||
merge_list=[['MONTO FACTURADO A PAGAR (PERÍODO ANTERIOR)', '(A)']], merge_character=" ")
|
||||
output += ['2. PERÍODO ACTUAL']
|
||||
output += extract_from_to(words, 'LUGAR DE', end='1.TOTAL OPERACIONES', line_length=7,
|
||||
merge_list=[['OPERACIÓN', 'O COBRO'], ['TOTAL A', 'PAGAR'], ['VALOR CUOTA', 'MENSUAL']])
|
||||
i = words.index('1.TOTAL OPERACIONES') + 3
|
||||
output += extract_from_to(words, '1.TOTAL OPERACIONES', end=i, line_length=3)
|
||||
output += extract_from_to(words, i, end='TOTAL PAGOS A LA CUENTA', line_length=7)
|
||||
i = words.index('TOTAL PAGOS A LA CUENTA') + 2
|
||||
output += extract_from_to(words, 'TOTAL PAGOS A LA CUENTA', end=i, line_length=2)
|
||||
output += extract_from_to(words, i, end='TOTAL PAT A LA CUENTA', line_length=8)
|
||||
i = words.index('TOTAL PAT A LA CUENTA') + 2
|
||||
output += extract_from_to(words, 'TOTAL PAT A LA CUENTA', end=i, line_length=2)
|
||||
output += extract_from_to(words, i, end=i + 3, line_length=2,
|
||||
merge_list=[
|
||||
['2.PRODUCTOS O SERVICIOS VOLUNTARIAMENTE CONTRATADOS SIN MOVIMIENTOS', '(C)']],
|
||||
merge_character=" ")
|
||||
if '3.CARGOS, COMISIONES, IMPUESTOS Y ABONOS' in words:
|
||||
i = words.index('3.CARGOS, COMISIONES, IMPUESTOS Y ABONOS') + 3
|
||||
output += extract_from_to(words, '3.CARGOS, COMISIONES, IMPUESTOS Y ABONOS', end=i, line_length=3)
|
||||
return output
|
||||
|
||||
|
||||
def basic(text):
|
||||
return split_words(text)
|
||||
|
||||
|
||||
def split_words(text):
|
||||
if isinstance(text, list):
|
||||
text = "\n\n\n".join(text)
|
||||
words = [t.strip() for t in text.split("\n") if t.strip() != '']
|
||||
return words
|
||||
|
||||
|
||||
def extract_from_to(word_list, start, line_length, end=None, merge_list=None, merge_character="\n"):
|
||||
if not isinstance(start, int):
|
||||
start = word_list.index(start)
|
||||
def extract_from_to(word_list, start, line_length, end: str = None, merge_list=None):
|
||||
if end is not None:
|
||||
if not isinstance(end, int):
|
||||
end = word_list.index(end)
|
||||
return extract_by_line(word_list[start:end], line_length, merge_list, merge_character)
|
||||
return extract_by_line(word_list[start:], line_length, merge_list, merge_character)
|
||||
return extract_by_line(word_list[word_list.index(start):word_list.index(end)], line_length, merge_list)
|
||||
return extract_by_line(word_list[word_list.index(start):], line_length, merge_list)
|
||||
|
||||
|
||||
def extract_by_line(word_list, line_length, merge_list=None, merge_character="\n"):
|
||||
def extract_by_line(word_list, line_length, merge_list=None):
|
||||
if merge_list is not None:
|
||||
word_list = merge_words(word_list, merge_list, merge_character)
|
||||
word_list = merge_words(word_list, merge_list)
|
||||
output = []
|
||||
line = []
|
||||
for k, w in enumerate(word_list):
|
||||
@ -118,39 +54,22 @@ def extract_by_line(word_list, line_length, merge_list=None, merge_character="\n
|
||||
return output
|
||||
|
||||
|
||||
def merge_words(word_list, merge_list, merge_character):
|
||||
def merge_words(word_list, merge_list):
|
||||
for m in merge_list:
|
||||
ixs = find_words(word_list, m)
|
||||
if ixs is None:
|
||||
continue
|
||||
for i in ixs:
|
||||
word_list = word_list[:i] + [merge_character.join(m)] + word_list[i + len(m):]
|
||||
i = word_list.index(m[0])
|
||||
word_list = word_list[:i] + ["\n".join(m)] + word_list[i+len(m):]
|
||||
return word_list
|
||||
|
||||
|
||||
def find_words(word_list, find_list):
|
||||
ixs = [i for i, w in enumerate(word_list) if find_list[0] == w]
|
||||
output = []
|
||||
for i in ixs:
|
||||
mistake = False
|
||||
for k, m in enumerate(find_list):
|
||||
if m != word_list[i + k]:
|
||||
mistake = True
|
||||
break
|
||||
if mistake:
|
||||
continue
|
||||
output.append(i)
|
||||
return output
|
||||
|
||||
|
||||
def extract_data(word_list, start, line_length, end=None, merge_list=None, merge_character="\n", date_sep='/'):
|
||||
def extract_data(word_list, start, line_length, end=None, merge_list=None, date_sep='/'):
|
||||
word_list = word_list[word_list.index(start):]
|
||||
if end is not None:
|
||||
word_list = word_list[:word_list.index(end)]
|
||||
if merge_list is not None:
|
||||
word_list = merge_words(word_list, merge_list, merge_character)
|
||||
word_list = merge_words(word_list, merge_list)
|
||||
output = []
|
||||
line = []
|
||||
line_num = 0
|
||||
col = 0
|
||||
for k, w in enumerate(word_list):
|
||||
if col > 0 and col % line_length == 0:
|
||||
@ -168,5 +87,4 @@ def extract_data(word_list, start, line_length, end=None, merge_list=None, merge
|
||||
continue
|
||||
line.append(w)
|
||||
col += 1
|
||||
output.append(line)
|
||||
return output
|
||||
|
@ -3,51 +3,22 @@ import os
|
||||
|
||||
import contabilidad.pdf as pdf
|
||||
import contabilidad.text_handler as th
|
||||
from contabilidad.log import Logger, LOG_LEVEL
|
||||
import ai.dictionary as dictionary
|
||||
from ai.network import AI
|
||||
|
||||
|
||||
def parse_settings(args):
|
||||
output = {'filename': args.filename}
|
||||
if not os.path.isfile(output['filename']):
|
||||
output['filename'] = os.path.realpath(os.path.join(os.path.dirname(__file__), '..', 'data', args.filename))
|
||||
t = args.filename.split('.')
|
||||
output['temp'] = os.path.realpath(os.path.join(os.path.dirname(output['filename']), t[0] + '-temp.pdf'))
|
||||
output['dictionary'] = os.path.join(os.path.dirname(output['filename']), 'dictionary.json')
|
||||
output['network'] = os.path.join(os.path.dirname(output['filename']), 'network.json')
|
||||
output['log_file'] = args.log_file
|
||||
if not os.path.isfile(output['log_file']):
|
||||
output['log_file'] = os.path.join(os.path.dirname(os.path.dirname(output['filename'])), output['log_file'])
|
||||
output['error_log_file'] = os.path.join(os.path.dirname(output['log_file']), 'error.log')
|
||||
output['logger'] = Logger()
|
||||
output['logger'].add_log(output['log_file']).add_log(output['error_log_file'], LOG_LEVEL.ERROR)
|
||||
return output
|
||||
|
||||
|
||||
def main(args):
|
||||
settings = parse_settings(args)
|
||||
|
||||
print('Loading AI')
|
||||
network = AI(settings['dictionary'], settings['logger'])
|
||||
network.set_filename(settings['network'])
|
||||
network.add_source({'filename': settings['filename'], 'password': args.password})
|
||||
network.process_sources()
|
||||
exit()
|
||||
|
||||
print('Loading dictionary.')
|
||||
dictio = dictionary.Dictionary(settings['dictionary'], settings['logger'])
|
||||
print('Getting possible phrases.')
|
||||
dictio.process(settings['filename'], args.password)
|
||||
dictio.to_data()
|
||||
# print('Saving dictionary.')
|
||||
# dictio.save()
|
||||
filename = os.path.realpath(os.path.join(os.path.dirname(__file__), '..', 'data', args.filename))
|
||||
temp = os.path.realpath(os.path.join(os.path.dirname(__file__), '..', 'data', args.temp_filename))
|
||||
pdf.remove_encryption(filename, args.password, temp)
|
||||
obj = pdf.get_data(temp)
|
||||
obj = pdf.get_text(filename, args.password)
|
||||
text = th.text_cleanup(obj, filename=str(args.filename))
|
||||
os.remove(temp)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-f', '--filename', type=str)
|
||||
parser.add_argument('-p', '--password', type=str, default='')
|
||||
parser.add_argument('-l', '--log_file', type=str, default=None)
|
||||
parser.add_argument('-t', '--temp_filename', type=str)
|
||||
_args = parser.parse_args()
|
||||
main(_args)
|
||||
|
@ -1,3 +1,5 @@
|
||||
FROM php:8-fpm
|
||||
|
||||
COPY --from=composer /usr/bin/composer /usr/bin/composer
|
||||
|
||||
WORKDIR /app
|
||||
|
12
ui/common/Controller/Importar.php
Normal file
12
ui/common/Controller/Importar.php
Normal file
@ -0,0 +1,12 @@
|
||||
<?php
|
||||
namespace Contabilidad\Common\Controller;
|
||||
|
||||
use Psr\Http\Message\ServerRequestInterface as Request;
|
||||
use Psr\Http\Message\ResponseInterface as Response;
|
||||
use Slim\Views\Blade as View;
|
||||
|
||||
class Importar {
|
||||
public function __invoke(Request $request, Response $response, View $view): Response {
|
||||
return $view->render($response, 'importar');
|
||||
}
|
||||
}
|
4
ui/resources/routes/importar.php
Normal file
4
ui/resources/routes/importar.php
Normal file
@ -0,0 +1,4 @@
|
||||
<?php
|
||||
use Contabilidad\Common\Controller\Importar;
|
||||
|
||||
$app->get('/importar[/]', Importar::class);
|
92
ui/resources/views/importar.blade.php
Normal file
92
ui/resources/views/importar.blade.php
Normal file
@ -0,0 +1,92 @@
|
||||
@extends('layout.base')
|
||||
|
||||
@section('page_title')
|
||||
Importar
|
||||
@endsection
|
||||
|
||||
@section('page_content')
|
||||
<h1>Importar</h1>
|
||||
<form class="ui form" action="#" method="post" id="importar_form" enctype="multipart/form-data">
|
||||
<div class="two wide field">
|
||||
<label>Fecha</label>
|
||||
<div class="ui date calendar">
|
||||
<div class="ui icon input">
|
||||
<input type="text" name="fecha" />
|
||||
<i class="calendar outline icon"></i>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="six wide field">
|
||||
<label>Cuenta</label>
|
||||
<select name="cuenta"></select>
|
||||
</div>
|
||||
<div class="inline field">
|
||||
<input type="file" name="archivo" style="display: none;" />
|
||||
<div class="ui labeled icon input" id="archivo_btn">
|
||||
<div class="ui label">Archivo</div>
|
||||
<input type="text" readonly="" />
|
||||
<i class="search icon"></i>
|
||||
</div>
|
||||
</div>
|
||||
<button class="ui button">Importar</button>
|
||||
</form>
|
||||
@endsection
|
||||
|
||||
@push('scripts')
|
||||
<script type="text/javascript">
|
||||
function getCuentas() {
|
||||
sendGet(_urls.api + '/cuentas').then((data) => {
|
||||
if (data.cuentas === null || data.cuentas.length === 0) {
|
||||
return
|
||||
}
|
||||
const select = $("select[name='cuenta']")
|
||||
let values = []
|
||||
$.each(data.cuentas, (i, el) => {
|
||||
const nombre = [el.nombre, el.categoria.nombre].join(' - ')
|
||||
values.push({
|
||||
name: nombre,
|
||||
value: el.id,
|
||||
text: nombre
|
||||
})
|
||||
})
|
||||
select.dropdown({values})
|
||||
})
|
||||
}
|
||||
$(document).ready(() => {
|
||||
getCuentas()
|
||||
const today = new Date()
|
||||
const start = new Date(today.getFullYear(), today.getMonth() - 1)
|
||||
$('.ui.calendar').calendar({
|
||||
type: 'month',
|
||||
initialDate: start,
|
||||
maxDate: start,
|
||||
months: ['Enero', 'Febrero', 'Marzo', 'Abril', 'Mayo', 'Junio', 'Julio', 'Agosto', 'Septiembre', 'Octubre', 'Noviembre', 'Diciembre'],
|
||||
monthsShort: ['Ene', 'Feb', 'Mar', 'Abr', 'May', 'Jun', 'Jul', 'Ago', 'Sep', 'Oct', 'Nov', 'Dic'],
|
||||
formatter: {
|
||||
date: function(date, settings) {
|
||||
if (!date) return ''
|
||||
const year = date.getFullYear()
|
||||
const month = date.getMonth() + 1
|
||||
return [year, month].join('-')
|
||||
}
|
||||
}
|
||||
})
|
||||
$('#archivo_btn').css('cursor', 'pointer').click(() => {
|
||||
$("[name='archivo']").trigger('click')
|
||||
})
|
||||
$("[name='archivo']").change((e) => {
|
||||
const arch = $(e.currentTarget)
|
||||
const filename = arch[0].files[0].name
|
||||
$('#archivo_btn').find('input').val(filename)
|
||||
})
|
||||
$('#importar_form').submit((e) => {
|
||||
e.preventDefault()
|
||||
const data = new FormData(e.currentTarget)
|
||||
sendPost(_urls.api + '/import', data, true).then((resp) => {
|
||||
console.debug(resp)
|
||||
})
|
||||
return false
|
||||
})
|
||||
})
|
||||
</script>
|
||||
@endpush
|
@ -2,6 +2,7 @@
|
||||
<a class="item" href="{{$urls->base}}">Inicio</a>
|
||||
@include('layout.body.menu.cuentas')
|
||||
@include('layout.body.menu.categorias')
|
||||
<a class="item" href="{{$urls->base}}importar">Importar</a>
|
||||
<div class="right menu">
|
||||
<a class="item" href="{{$urls->base}}config">Config</a>
|
||||
</div>
|
||||
|
@ -7,7 +7,18 @@
|
||||
base: '{{$urls->base}}',
|
||||
api: '{{$urls->api}}'
|
||||
}
|
||||
function buildAjax(url, method) {
|
||||
function buildAjax(url, method, files=false) {
|
||||
if (files) {
|
||||
return {
|
||||
url: url,
|
||||
headers: {
|
||||
'Authorization': 'Bearer ' + API_KEY
|
||||
},
|
||||
method: method,
|
||||
processData: false,
|
||||
contentType: false
|
||||
}
|
||||
}
|
||||
return {
|
||||
url: url,
|
||||
headers: {
|
||||
@ -21,8 +32,8 @@
|
||||
let ajax_obj = buildAjax(url, 'GET')
|
||||
return $.ajax(ajax_obj)
|
||||
}
|
||||
function sendPost(url, data) {
|
||||
let ajax_obj = buildAjax(url, 'POST')
|
||||
function sendPost(url, data, files=false) {
|
||||
let ajax_obj = buildAjax(url, 'POST', files)
|
||||
ajax_obj['data'] = data
|
||||
return $.ajax(ajax_obj)
|
||||
}
|
||||
|
Reference in New Issue
Block a user