2021-11-02 22:12:25 -03:00
|
|
|
import io
|
2021-11-01 11:00:59 -03:00
|
|
|
import json
|
|
|
|
import os
|
2021-11-02 22:12:25 -03:00
|
|
|
import sys
|
|
|
|
|
2021-11-01 11:00:59 -03:00
|
|
|
from flask import Flask, request
|
|
|
|
|
|
|
|
import contabilidad.pdf as pdf
|
|
|
|
import contabilidad.passwords as passwords
|
|
|
|
import contabilidad.log as log
|
2021-11-02 15:37:36 -03:00
|
|
|
import contabilidad.text_handler as th
|
2021-11-01 11:00:59 -03:00
|
|
|
|
|
|
|
|
|
|
|
app = Flask(__name__)
|
|
|
|
log.logging['filename'] = '/var/log/python/contabilidad.log'
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/pdf/parse', methods=['POST'])
|
|
|
|
def pdf_parse():
|
|
|
|
data = request.get_json()
|
|
|
|
if not isinstance(data['files'], list):
|
|
|
|
data['files'] = [data['files']]
|
|
|
|
password_file = '/app/config/.passwords.yml'
|
|
|
|
pwds = passwords.get_passwords(password_file)
|
2021-11-02 15:37:36 -03:00
|
|
|
output = []
|
2021-11-01 11:00:59 -03:00
|
|
|
for file in data['files']:
|
|
|
|
filename = os.path.realpath(os.path.join('/app/data', file['filename']))
|
2021-11-02 22:12:25 -03:00
|
|
|
t = file['filename'].split('.')
|
|
|
|
temp = os.path.realpath(os.path.join('/app/data', t[0] + '-temp.pdf'))
|
2021-11-01 11:00:59 -03:00
|
|
|
for p in pwds:
|
2021-11-02 22:12:25 -03:00
|
|
|
if not pdf.check_password(filename, p):
|
2021-11-01 11:00:59 -03:00
|
|
|
continue
|
2021-11-02 22:12:25 -03:00
|
|
|
pdf.remove_encryption(filename, p, temp)
|
|
|
|
obj = pdf.get_data(temp)
|
|
|
|
outputs = []
|
|
|
|
for o in obj:
|
|
|
|
out = json.loads(o.df.to_json(orient='records'))
|
|
|
|
if out[0]['0'] == 'FECHA':
|
|
|
|
for i, line in enumerate(out):
|
|
|
|
if 'FECHA' in line['0'] or 'ACTUALICE' in line['0']:
|
|
|
|
continue
|
|
|
|
if line['0'] == '':
|
|
|
|
spl = line['1'].split(' ')
|
|
|
|
else:
|
|
|
|
spl = line['0'].split(' ')
|
|
|
|
line['0'] = ' '.join(spl[:3])
|
|
|
|
line['1'] = ' '.join(spl[3:])
|
|
|
|
out[i] = line
|
|
|
|
outputs.append(out)
|
|
|
|
os.remove(temp)
|
|
|
|
output.append({'filename': file['filename'], 'text': outputs})
|
2021-11-02 15:37:36 -03:00
|
|
|
return json.dumps(output)
|
2021-11-01 11:00:59 -03:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2021-11-02 15:37:36 -03:00
|
|
|
app.run(host='0.0.0.0', debug=True)
|