Files
contabilidad/python/src/app.py

40 lines
1.1 KiB
Python
Raw Normal View History

2021-11-01 11:00:59 -03:00
import json
import os
from flask import Flask, request
import contabilidad.pdf as pdf
import contabilidad.passwords as passwords
import contabilidad.log as log
2021-11-02 15:37:36 -03:00
import contabilidad.text_handler as th
2021-11-01 11:00:59 -03:00
app = Flask(__name__)
log.logging['filename'] = '/var/log/python/contabilidad.log'
@app.route('/pdf/parse', methods=['POST'])
def pdf_parse():
data = request.get_json()
if not isinstance(data['files'], list):
data['files'] = [data['files']]
password_file = '/app/config/.passwords.yml'
pwds = passwords.get_passwords(password_file)
2021-11-02 15:37:36 -03:00
output = []
2021-11-01 11:00:59 -03:00
for file in data['files']:
filename = os.path.realpath(os.path.join('/app/data', file['filename']))
2021-11-02 15:37:36 -03:00
texts = []
2021-11-01 11:00:59 -03:00
for p in pwds:
2021-11-02 15:37:36 -03:00
obj = pdf.get_data(filename, p)
print(obj)
2021-11-01 11:00:59 -03:00
obj = pdf.get_text(filename, p)
if obj is None:
continue
2021-11-02 15:37:36 -03:00
text = th.text_cleanup(obj, file['filename'])
texts.append(text)
output.append({'filename': file['filename'], 'text': texts})
return json.dumps(output)
2021-11-01 11:00:59 -03:00
if __name__ == '__main__':
2021-11-02 15:37:36 -03:00
app.run(host='0.0.0.0', debug=True)