Files
contabilidad/python/src/app.py

57 lines
1.8 KiB
Python
Raw Normal View History

2021-11-02 22:12:25 -03:00
import io
2021-11-01 11:00:59 -03:00
import json
import os
2021-11-02 22:12:25 -03:00
import sys
2021-11-01 11:00:59 -03:00
from flask import Flask, request
import contabilidad.pdf as pdf
import contabilidad.passwords as passwords
import contabilidad.log as log
2021-11-02 15:37:36 -03:00
import contabilidad.text_handler as th
2021-11-01 11:00:59 -03:00
app = Flask(__name__)
log.logging['filename'] = '/var/log/python/contabilidad.log'
@app.route('/pdf/parse', methods=['POST'])
def pdf_parse():
data = request.get_json()
if not isinstance(data['files'], list):
data['files'] = [data['files']]
password_file = '/app/config/.passwords.yml'
pwds = passwords.get_passwords(password_file)
2021-11-02 15:37:36 -03:00
output = []
2021-11-01 11:00:59 -03:00
for file in data['files']:
filename = os.path.realpath(os.path.join('/app/data', file['filename']))
2021-11-02 22:12:25 -03:00
t = file['filename'].split('.')
temp = os.path.realpath(os.path.join('/app/data', t[0] + '-temp.pdf'))
2021-11-01 11:00:59 -03:00
for p in pwds:
2021-11-02 22:12:25 -03:00
if not pdf.check_password(filename, p):
2021-11-01 11:00:59 -03:00
continue
2021-11-02 22:12:25 -03:00
pdf.remove_encryption(filename, p, temp)
obj = pdf.get_data(temp)
outputs = []
for o in obj:
out = json.loads(o.df.to_json(orient='records'))
if out[0]['0'] == 'FECHA':
for i, line in enumerate(out):
if 'FECHA' in line['0'] or 'ACTUALICE' in line['0']:
continue
if line['0'] == '':
spl = line['1'].split(' ')
else:
spl = line['0'].split(' ')
line['0'] = ' '.join(spl[:3])
line['1'] = ' '.join(spl[3:])
out[i] = line
outputs.append(out)
os.remove(temp)
output.append({'filename': file['filename'], 'text': outputs})
2021-11-02 15:37:36 -03:00
return json.dumps(output)
2021-11-01 11:00:59 -03:00
if __name__ == '__main__':
2021-11-02 15:37:36 -03:00
app.run(host='0.0.0.0', debug=True)