extract and analyze pdf file
This commit is contained in:
@@ -1,11 +1,6 @@
|
||||
<metal:block use-macro="main_template">
|
||||
<div metal:fill-slot="content">
|
||||
|
||||
<p>
|
||||
<a href="${request.application_url}/" class="btn btn-default" role="button">
|
||||
<span class="glyphicon glyphicon-chevron-left"></span> Retour</a>
|
||||
</p>
|
||||
|
||||
<table id="users_list" class="table table-striped table-bordered">
|
||||
<thead>
|
||||
<tr>
|
||||
@@ -19,6 +14,20 @@
|
||||
</table>
|
||||
|
||||
<br />
|
||||
<form id="change-dossier-details-form" action="${url}" method="post" tal:condition="dt_data"
|
||||
data-fv-framework="bootstrap"
|
||||
data-fv-icon-valid="glyphicon glyphicon-ok"
|
||||
data-fv-icon-invalid="glyphicon glyphicon-remove"
|
||||
data-fv-icon-validating="glyphicon glyphicon-refresh">
|
||||
|
||||
<div class="form-group">
|
||||
<a href="${request.application_url}/" class="btn btn-default" role="button">
|
||||
<span class="glyphicon glyphicon-chevron-left"></span> Retour</a>
|
||||
<button class="btn btn-success" type="submit" name="form.submitted">
|
||||
<span class="glyphicon glyphicon-download-alt"></span> Générer les dossiers</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<br />
|
||||
|
||||
<script type="text/javascript">
|
||||
|
||||
@@ -82,7 +82,7 @@
|
||||
<a class="btn btn-default" href="${request.application_url}/dossier_view/${nodossier}">
|
||||
<span class="glyphicon glyphicon-chevron-left"></span> Annuler</a>
|
||||
<button class="btn btn-primary" type="submit" name="form.submitted">
|
||||
<span class="glyphicon glyphicon-ok"></span> Enregistrer</button>
|
||||
<span class="glyphicon glyphicon-ok"></span> Enregistrer</button>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
@@ -22,12 +22,21 @@ from sqlalchemy.exc import DBAPIError
|
||||
from ..security import groupfinder
|
||||
|
||||
import os
|
||||
import io
|
||||
import shutil
|
||||
import pdfkit
|
||||
import imaplib
|
||||
import base64
|
||||
import email
|
||||
|
||||
from pdfminer3.layout import LAParams, LTTextBox, LTTextLine
|
||||
from pdfminer3.pdfpage import PDFPage
|
||||
from pdfminer3.pdfparser import PDFParser
|
||||
from pdfminer3.pdfdocument import PDFDocument
|
||||
from pdfminer3.pdfdevice import PDFDevice
|
||||
from pdfminer3.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||
from pdfminer3.converter import PDFPageAggregator
|
||||
|
||||
from ..views.default import *
|
||||
from ..models.default import *
|
||||
from ..models.dossier import *
|
||||
@@ -714,36 +723,7 @@ def rdf_bill(request):
|
||||
|
||||
@view_config(route_name='demandes', renderer='../templates/dossier/demandes.pt', permission='view')
|
||||
def demandes(request):
|
||||
|
||||
def process_messages(data, search_criteria, liste):
|
||||
# créer la liste des entêtes des messages à afficher
|
||||
rv, data = conn.search(None, search_criteria)
|
||||
if rv != 'OK':
|
||||
request.session.flash("ERREUR de lecture de la boîte de réception", 'danger')
|
||||
return HTTPFound(location=request.route_url('home'))
|
||||
|
||||
mail_ids = data[0]
|
||||
for num in mail_ids.split():
|
||||
rv, msg_data = conn.fetch(num, '(RFC822)')
|
||||
if rv != 'OK':
|
||||
request.session.flash("ERREUR de lecture du message %s" % num, 'danger')
|
||||
return HTTPFound(location=request.route_url('home'))
|
||||
|
||||
msg = email.message_from_bytes(msg_data[0][1])
|
||||
hdr = email.header.make_header(email.header.decode_header(msg['Subject']))
|
||||
email_subject = str(hdr)
|
||||
email_from = email.utils.parseaddr(msg['from'])[1]
|
||||
import pdb;pdb.set_trace()
|
||||
# Now convert to local date-time
|
||||
date_tuple = email.utils.parsedate_tz(msg['Date'])
|
||||
if date_tuple:
|
||||
email_date = datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
|
||||
else:
|
||||
email_date = datetime.now()
|
||||
|
||||
d = (str(int(num)), email_date.strftime('%d-%m-%Y %H:%M:%S'), email_from, mbx_name.replace('entreprise-dumas.com', ''), email_subject)
|
||||
liste.append(d)
|
||||
return liste
|
||||
url = request.route_url('demandes')
|
||||
|
||||
# lire les demandes d'interventions arrivées par email
|
||||
mbx_name = 'peinture-dumas@entreprise-dumas.com'
|
||||
@@ -761,16 +741,167 @@ def demandes(request):
|
||||
|
||||
liste=[]
|
||||
# lire demandes de la MAIF
|
||||
mbx_subject = 'FROM gestionsinistre@maif.fr SUBJECT "Missionnement r"'
|
||||
process_messages(data, mbx_subject, liste)
|
||||
mbx_search = 'FROM gestionsinistre@maif.fr SUBJECT "Missionnement r"'
|
||||
if 'form.submitted' in request.params:
|
||||
demandes_generer(conn, mbx_name, mbx_search, liste)
|
||||
demandes_afficher(conn, mbx_name, mbx_search, liste)
|
||||
|
||||
# lire demandes de DOMUS
|
||||
mbx_subject = 'FROM service.sinistres@domus-services.fr SUBJECT "Ordre de mission DOMUS - Dossier"'
|
||||
process_messages(data, mbx_subject, liste)
|
||||
mbx_search = 'FROM service.sinistres@domus-services.fr SUBJECT "Ordre de mission DOMUS - Dossier"'
|
||||
demandes_afficher(conn, mbx_name, mbx_search, liste)
|
||||
|
||||
conn.logout()
|
||||
|
||||
return {
|
||||
'page_title': 'Liste des demandes pour la PEINTURE',
|
||||
'url': url,
|
||||
'dt_data': json.dumps(liste),
|
||||
}
|
||||
|
||||
def demandes_afficher(conn, mbx_name, search_criteria, liste):
|
||||
# créer la liste des entêtes des messages à afficher
|
||||
rv, data = conn.search(None, search_criteria)
|
||||
if rv != 'OK':
|
||||
request.session.flash("ERREUR de lecture de la boîte de réception", 'danger')
|
||||
return HTTPFound(location=request.route_url('home'))
|
||||
|
||||
mail_ids = data[0]
|
||||
for num in mail_ids.split():
|
||||
rv, msg_data = conn.fetch(num, '(RFC822)')
|
||||
if rv != 'OK':
|
||||
request.session.flash("ERREUR de lecture du message %s" % num, 'danger')
|
||||
return HTTPFound(location=request.route_url('home'))
|
||||
|
||||
msg = email.message_from_bytes(msg_data[0][1])
|
||||
hdr = email.header.make_header(email.header.decode_header(msg['Subject']))
|
||||
email_subject = str(hdr)
|
||||
email_from = email.utils.parseaddr(msg['from'])[1]
|
||||
# Now convert to local date-time
|
||||
date_tuple = email.utils.parsedate_tz(msg['Date'])
|
||||
if date_tuple:
|
||||
email_date = datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
|
||||
else:
|
||||
email_date = datetime.now()
|
||||
|
||||
d = (str(int(num)), email_date.strftime('%d-%m-%Y %H:%M:%S'), email_from, mbx_name.replace('entreprise-dumas.com', ''), email_subject)
|
||||
liste.append(d)
|
||||
return liste
|
||||
|
||||
def demandes_generer(conn, mbx_name, search_criteria, liste):
|
||||
|
||||
def download_pdf_to_tmp(email_message):
|
||||
# downloading attachments
|
||||
for part in email_message.walk():
|
||||
# this part comes from the snipped I don't understand yet...
|
||||
if part.get_content_maintype() == 'multipart':
|
||||
continue
|
||||
if part.get('Content-Disposition') is None:
|
||||
continue
|
||||
fileName = part.get_filename()
|
||||
if bool(fileName):
|
||||
# copier le fichier PDF dans le dossier /tmp
|
||||
filePath = os.path.join('/tmp/', fileName)
|
||||
if not os.path.isfile(filePath) :
|
||||
fp = open(filePath, 'wb')
|
||||
fp.write(part.get_payload(decode=True))
|
||||
fp.close()
|
||||
return filePath
|
||||
|
||||
def convert_pdf_to_txt(path):
|
||||
resource_manager = PDFResourceManager()
|
||||
laparams = LAParams()
|
||||
converter = PDFPageAggregator(resource_manager, laparams=laparams)
|
||||
page_interpreter = PDFPageInterpreter(resource_manager, converter)
|
||||
|
||||
extracted_text = ""
|
||||
with open(path, 'rb') as fh:
|
||||
|
||||
for page in PDFPage.get_pages(fh,
|
||||
caching=True,
|
||||
check_extractable=True):
|
||||
page_interpreter.process_page(page)
|
||||
# The converter renders the layout from interpreter
|
||||
layout = converter.get_result()
|
||||
# Out of the many LT objects within layout, we are interested in LTTextBox and LTTextLine
|
||||
for lt_obj in layout:
|
||||
if isinstance(lt_obj, LTTextBox) or isinstance(lt_obj, LTTextLine):
|
||||
extracted_text += lt_obj.get_text()
|
||||
|
||||
# close open handles
|
||||
converter.close()
|
||||
# ecrire le texte dans un fichier
|
||||
extracted_file = '/tmp/log_file.txt'
|
||||
with open(extracted_file, "w") as my_log:
|
||||
my_log.write(extracted_text)
|
||||
my_log.close()
|
||||
|
||||
return extracted_text, extracted_file
|
||||
|
||||
def generer_dossier(mbx_name, filepath):
|
||||
# parcourir les lignes pour retrouver les infos utiles
|
||||
with open(filepath) as fp:
|
||||
cnt = 1
|
||||
line = fp.readline()
|
||||
while line:
|
||||
if line.find('Nos références') == 0:
|
||||
line = fp.readline()
|
||||
line = fp.readline()
|
||||
no_sinistre = line[:-1]
|
||||
if line.find('Bénéficiaire des travaux :') == 0:
|
||||
elt = line.split(' : ')
|
||||
nom = elt[1][:-1]
|
||||
line = fp.readline()
|
||||
line = fp.readline()
|
||||
line = fp.readline()
|
||||
adr = line[:-1]
|
||||
line = fp.readline()
|
||||
cp = line[0:5]
|
||||
ville = line[6:-1]
|
||||
if line.find('N° de téléphone :') == 0:
|
||||
# les 10 derniers caratères
|
||||
tel = line[-11:-1]
|
||||
|
||||
# lire ligne suivante
|
||||
line = fp.readline()
|
||||
cnt += 1
|
||||
# créer un dem_devis
|
||||
import pdb;pdb.set_trace()
|
||||
|
||||
return
|
||||
|
||||
# rechercher les emails de demandes dans le INBOX
|
||||
rv, data = conn.search(None, search_criteria)
|
||||
if rv != 'OK':
|
||||
request.session.flash("ERREUR de lecture de la boîte de réception", 'danger')
|
||||
return HTTPFound(location=request.route_url('home'))
|
||||
|
||||
mail_ids = data[0]
|
||||
for num in mail_ids.split():
|
||||
rv, msg_data = conn.fetch(num, '(RFC822)')
|
||||
if rv != 'OK':
|
||||
request.session.flash("ERREUR de lecture du message %s" % num, 'danger')
|
||||
return HTTPFound(location=request.route_url('home'))
|
||||
|
||||
raw_email = msg_data[0][1]
|
||||
# converts byte literal to string removing b''
|
||||
raw_email_string = raw_email.decode('utf-8')
|
||||
email_message = email.message_from_string(raw_email_string)
|
||||
|
||||
email_subject = email_message['subject']
|
||||
# demande annulée ?
|
||||
if email_subject.find('Annulation ') < 0:
|
||||
# downloading attachment
|
||||
filePath = download_pdf_to_tmp(email_message)
|
||||
|
||||
# convertir le fichier pdf en texte
|
||||
texte, extracted_file = convert_pdf_to_txt(filePath)
|
||||
# mission annulée
|
||||
if 'Objet : ANNULATION MISSION' in texte:
|
||||
# supprime le pdf
|
||||
os.remove(filePath)
|
||||
else:
|
||||
# genere le dossier d'après
|
||||
generer_dossier(mbx_name, extracted_file)
|
||||
|
||||
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user