Files
contabilidad/python/src/app.py
2021-11-02 22:12:25 -03:00

57 lines
1.8 KiB
Python

import io
import json
import os
import sys
from flask import Flask, request
import contabilidad.pdf as pdf
import contabilidad.passwords as passwords
import contabilidad.log as log
import contabilidad.text_handler as th
app = Flask(__name__)
log.logging['filename'] = '/var/log/python/contabilidad.log'
@app.route('/pdf/parse', methods=['POST'])
def pdf_parse():
data = request.get_json()
if not isinstance(data['files'], list):
data['files'] = [data['files']]
password_file = '/app/config/.passwords.yml'
pwds = passwords.get_passwords(password_file)
output = []
for file in data['files']:
filename = os.path.realpath(os.path.join('/app/data', file['filename']))
t = file['filename'].split('.')
temp = os.path.realpath(os.path.join('/app/data', t[0] + '-temp.pdf'))
for p in pwds:
if not pdf.check_password(filename, p):
continue
pdf.remove_encryption(filename, p, temp)
obj = pdf.get_data(temp)
outputs = []
for o in obj:
out = json.loads(o.df.to_json(orient='records'))
if out[0]['0'] == 'FECHA':
for i, line in enumerate(out):
if 'FECHA' in line['0'] or 'ACTUALICE' in line['0']:
continue
if line['0'] == '':
spl = line['1'].split(' ')
else:
spl = line['0'].split(' ')
line['0'] = ' '.join(spl[:3])
line['1'] = ' '.join(spl[3:])
out[i] = line
outputs.append(out)
os.remove(temp)
output.append({'filename': file['filename'], 'text': outputs})
return json.dumps(output)
if __name__ == '__main__':
app.run(host='0.0.0.0', debug=True)