import io import json import os import sys from flask import Flask, request import contabilidad.pdf as pdf import contabilidad.passwords as passwords import contabilidad.log as log import contabilidad.text_handler as th app = Flask(__name__) log.logging['filename'] = '/var/log/python/contabilidad.log' @app.route('/pdf/parse', methods=['POST']) def pdf_parse(): data = request.get_json() if not isinstance(data['files'], list): data['files'] = [data['files']] password_file = '/app/config/.passwords.yml' pwds = passwords.get_passwords(password_file) output = [] for file in data['files']: filename = os.path.realpath(os.path.join('/app/data', file['filename'])) t = file['filename'].split('.') temp = os.path.realpath(os.path.join('/app/data', t[0] + '-temp.pdf')) for p in pwds: if not pdf.check_password(filename, p): continue pdf.remove_encryption(filename, p, temp) obj = pdf.get_data(temp) outputs = [] for o in obj: out = json.loads(o.df.to_json(orient='records')) if out[0]['0'] == 'FECHA': for i, line in enumerate(out): if 'FECHA' in line['0'] or 'ACTUALICE' in line['0']: continue if line['0'] == '': spl = line['1'].split(' ') else: spl = line['0'].split(' ') line['0'] = ' '.join(spl[:3]) line['1'] = ' '.join(spl[3:]) out[i] = line outputs.append(out) os.remove(temp) output.append({'filename': file['filename'], 'text': outputs}) return json.dumps(output) if __name__ == '__main__': app.run(host='0.0.0.0', debug=True)