extract and analyze pdf file
This commit is contained in:
@@ -1,11 +1,6 @@
|
|||||||
<metal:block use-macro="main_template">
|
<metal:block use-macro="main_template">
|
||||||
<div metal:fill-slot="content">
|
<div metal:fill-slot="content">
|
||||||
|
|
||||||
<p>
|
|
||||||
<a href="${request.application_url}/" class="btn btn-default" role="button">
|
|
||||||
<span class="glyphicon glyphicon-chevron-left"></span> Retour</a>
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<table id="users_list" class="table table-striped table-bordered">
|
<table id="users_list" class="table table-striped table-bordered">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
@@ -19,6 +14,20 @@
|
|||||||
</table>
|
</table>
|
||||||
|
|
||||||
<br />
|
<br />
|
||||||
|
<form id="change-dossier-details-form" action="${url}" method="post" tal:condition="dt_data"
|
||||||
|
data-fv-framework="bootstrap"
|
||||||
|
data-fv-icon-valid="glyphicon glyphicon-ok"
|
||||||
|
data-fv-icon-invalid="glyphicon glyphicon-remove"
|
||||||
|
data-fv-icon-validating="glyphicon glyphicon-refresh">
|
||||||
|
|
||||||
|
<div class="form-group">
|
||||||
|
<a href="${request.application_url}/" class="btn btn-default" role="button">
|
||||||
|
<span class="glyphicon glyphicon-chevron-left"></span> Retour</a>
|
||||||
|
<button class="btn btn-success" type="submit" name="form.submitted">
|
||||||
|
<span class="glyphicon glyphicon-download-alt"></span> Générer les dossiers</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
|
||||||
<br />
|
<br />
|
||||||
|
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
|
|||||||
@@ -82,7 +82,7 @@
|
|||||||
<a class="btn btn-default" href="${request.application_url}/dossier_view/${nodossier}">
|
<a class="btn btn-default" href="${request.application_url}/dossier_view/${nodossier}">
|
||||||
<span class="glyphicon glyphicon-chevron-left"></span> Annuler</a>
|
<span class="glyphicon glyphicon-chevron-left"></span> Annuler</a>
|
||||||
<button class="btn btn-primary" type="submit" name="form.submitted">
|
<button class="btn btn-primary" type="submit" name="form.submitted">
|
||||||
<span class="glyphicon glyphicon-ok"></span> Enregistrer</button>
|
<span class="glyphicon glyphicon-ok"></span> Enregistrer</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
|
|||||||
@@ -22,12 +22,21 @@ from sqlalchemy.exc import DBAPIError
|
|||||||
from ..security import groupfinder
|
from ..security import groupfinder
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import io
|
||||||
import shutil
|
import shutil
|
||||||
import pdfkit
|
import pdfkit
|
||||||
import imaplib
|
import imaplib
|
||||||
import base64
|
import base64
|
||||||
import email
|
import email
|
||||||
|
|
||||||
|
from pdfminer3.layout import LAParams, LTTextBox, LTTextLine
|
||||||
|
from pdfminer3.pdfpage import PDFPage
|
||||||
|
from pdfminer3.pdfparser import PDFParser
|
||||||
|
from pdfminer3.pdfdocument import PDFDocument
|
||||||
|
from pdfminer3.pdfdevice import PDFDevice
|
||||||
|
from pdfminer3.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||||
|
from pdfminer3.converter import PDFPageAggregator
|
||||||
|
|
||||||
from ..views.default import *
|
from ..views.default import *
|
||||||
from ..models.default import *
|
from ..models.default import *
|
||||||
from ..models.dossier import *
|
from ..models.dossier import *
|
||||||
@@ -714,36 +723,7 @@ def rdf_bill(request):
|
|||||||
|
|
||||||
@view_config(route_name='demandes', renderer='../templates/dossier/demandes.pt', permission='view')
|
@view_config(route_name='demandes', renderer='../templates/dossier/demandes.pt', permission='view')
|
||||||
def demandes(request):
|
def demandes(request):
|
||||||
|
url = request.route_url('demandes')
|
||||||
def process_messages(data, search_criteria, liste):
|
|
||||||
# créer la liste des entêtes des messages à afficher
|
|
||||||
rv, data = conn.search(None, search_criteria)
|
|
||||||
if rv != 'OK':
|
|
||||||
request.session.flash("ERREUR de lecture de la boîte de réception", 'danger')
|
|
||||||
return HTTPFound(location=request.route_url('home'))
|
|
||||||
|
|
||||||
mail_ids = data[0]
|
|
||||||
for num in mail_ids.split():
|
|
||||||
rv, msg_data = conn.fetch(num, '(RFC822)')
|
|
||||||
if rv != 'OK':
|
|
||||||
request.session.flash("ERREUR de lecture du message %s" % num, 'danger')
|
|
||||||
return HTTPFound(location=request.route_url('home'))
|
|
||||||
|
|
||||||
msg = email.message_from_bytes(msg_data[0][1])
|
|
||||||
hdr = email.header.make_header(email.header.decode_header(msg['Subject']))
|
|
||||||
email_subject = str(hdr)
|
|
||||||
email_from = email.utils.parseaddr(msg['from'])[1]
|
|
||||||
import pdb;pdb.set_trace()
|
|
||||||
# Now convert to local date-time
|
|
||||||
date_tuple = email.utils.parsedate_tz(msg['Date'])
|
|
||||||
if date_tuple:
|
|
||||||
email_date = datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
|
|
||||||
else:
|
|
||||||
email_date = datetime.now()
|
|
||||||
|
|
||||||
d = (str(int(num)), email_date.strftime('%d-%m-%Y %H:%M:%S'), email_from, mbx_name.replace('entreprise-dumas.com', ''), email_subject)
|
|
||||||
liste.append(d)
|
|
||||||
return liste
|
|
||||||
|
|
||||||
# lire les demandes d'interventions arrivées par email
|
# lire les demandes d'interventions arrivées par email
|
||||||
mbx_name = 'peinture-dumas@entreprise-dumas.com'
|
mbx_name = 'peinture-dumas@entreprise-dumas.com'
|
||||||
@@ -761,16 +741,167 @@ def demandes(request):
|
|||||||
|
|
||||||
liste=[]
|
liste=[]
|
||||||
# lire demandes de la MAIF
|
# lire demandes de la MAIF
|
||||||
mbx_subject = 'FROM gestionsinistre@maif.fr SUBJECT "Missionnement r"'
|
mbx_search = 'FROM gestionsinistre@maif.fr SUBJECT "Missionnement r"'
|
||||||
process_messages(data, mbx_subject, liste)
|
if 'form.submitted' in request.params:
|
||||||
|
demandes_generer(conn, mbx_name, mbx_search, liste)
|
||||||
|
demandes_afficher(conn, mbx_name, mbx_search, liste)
|
||||||
|
|
||||||
# lire demandes de DOMUS
|
# lire demandes de DOMUS
|
||||||
mbx_subject = 'FROM service.sinistres@domus-services.fr SUBJECT "Ordre de mission DOMUS - Dossier"'
|
mbx_search = 'FROM service.sinistres@domus-services.fr SUBJECT "Ordre de mission DOMUS - Dossier"'
|
||||||
process_messages(data, mbx_subject, liste)
|
demandes_afficher(conn, mbx_name, mbx_search, liste)
|
||||||
|
|
||||||
conn.logout()
|
conn.logout()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'page_title': 'Liste des demandes pour la PEINTURE',
|
'page_title': 'Liste des demandes pour la PEINTURE',
|
||||||
|
'url': url,
|
||||||
'dt_data': json.dumps(liste),
|
'dt_data': json.dumps(liste),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def demandes_afficher(conn, mbx_name, search_criteria, liste):
|
||||||
|
# créer la liste des entêtes des messages à afficher
|
||||||
|
rv, data = conn.search(None, search_criteria)
|
||||||
|
if rv != 'OK':
|
||||||
|
request.session.flash("ERREUR de lecture de la boîte de réception", 'danger')
|
||||||
|
return HTTPFound(location=request.route_url('home'))
|
||||||
|
|
||||||
|
mail_ids = data[0]
|
||||||
|
for num in mail_ids.split():
|
||||||
|
rv, msg_data = conn.fetch(num, '(RFC822)')
|
||||||
|
if rv != 'OK':
|
||||||
|
request.session.flash("ERREUR de lecture du message %s" % num, 'danger')
|
||||||
|
return HTTPFound(location=request.route_url('home'))
|
||||||
|
|
||||||
|
msg = email.message_from_bytes(msg_data[0][1])
|
||||||
|
hdr = email.header.make_header(email.header.decode_header(msg['Subject']))
|
||||||
|
email_subject = str(hdr)
|
||||||
|
email_from = email.utils.parseaddr(msg['from'])[1]
|
||||||
|
# Now convert to local date-time
|
||||||
|
date_tuple = email.utils.parsedate_tz(msg['Date'])
|
||||||
|
if date_tuple:
|
||||||
|
email_date = datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
|
||||||
|
else:
|
||||||
|
email_date = datetime.now()
|
||||||
|
|
||||||
|
d = (str(int(num)), email_date.strftime('%d-%m-%Y %H:%M:%S'), email_from, mbx_name.replace('entreprise-dumas.com', ''), email_subject)
|
||||||
|
liste.append(d)
|
||||||
|
return liste
|
||||||
|
|
||||||
|
def demandes_generer(conn, mbx_name, search_criteria, liste):
|
||||||
|
|
||||||
|
def download_pdf_to_tmp(email_message):
|
||||||
|
# downloading attachments
|
||||||
|
for part in email_message.walk():
|
||||||
|
# this part comes from the snipped I don't understand yet...
|
||||||
|
if part.get_content_maintype() == 'multipart':
|
||||||
|
continue
|
||||||
|
if part.get('Content-Disposition') is None:
|
||||||
|
continue
|
||||||
|
fileName = part.get_filename()
|
||||||
|
if bool(fileName):
|
||||||
|
# copier le fichier PDF dans le dossier /tmp
|
||||||
|
filePath = os.path.join('/tmp/', fileName)
|
||||||
|
if not os.path.isfile(filePath) :
|
||||||
|
fp = open(filePath, 'wb')
|
||||||
|
fp.write(part.get_payload(decode=True))
|
||||||
|
fp.close()
|
||||||
|
return filePath
|
||||||
|
|
||||||
|
def convert_pdf_to_txt(path):
|
||||||
|
resource_manager = PDFResourceManager()
|
||||||
|
laparams = LAParams()
|
||||||
|
converter = PDFPageAggregator(resource_manager, laparams=laparams)
|
||||||
|
page_interpreter = PDFPageInterpreter(resource_manager, converter)
|
||||||
|
|
||||||
|
extracted_text = ""
|
||||||
|
with open(path, 'rb') as fh:
|
||||||
|
|
||||||
|
for page in PDFPage.get_pages(fh,
|
||||||
|
caching=True,
|
||||||
|
check_extractable=True):
|
||||||
|
page_interpreter.process_page(page)
|
||||||
|
# The converter renders the layout from interpreter
|
||||||
|
layout = converter.get_result()
|
||||||
|
# Out of the many LT objects within layout, we are interested in LTTextBox and LTTextLine
|
||||||
|
for lt_obj in layout:
|
||||||
|
if isinstance(lt_obj, LTTextBox) or isinstance(lt_obj, LTTextLine):
|
||||||
|
extracted_text += lt_obj.get_text()
|
||||||
|
|
||||||
|
# close open handles
|
||||||
|
converter.close()
|
||||||
|
# ecrire le texte dans un fichier
|
||||||
|
extracted_file = '/tmp/log_file.txt'
|
||||||
|
with open(extracted_file, "w") as my_log:
|
||||||
|
my_log.write(extracted_text)
|
||||||
|
my_log.close()
|
||||||
|
|
||||||
|
return extracted_text, extracted_file
|
||||||
|
|
||||||
|
def generer_dossier(mbx_name, filepath):
|
||||||
|
# parcourir les lignes pour retrouver les infos utiles
|
||||||
|
with open(filepath) as fp:
|
||||||
|
cnt = 1
|
||||||
|
line = fp.readline()
|
||||||
|
while line:
|
||||||
|
if line.find('Nos références') == 0:
|
||||||
|
line = fp.readline()
|
||||||
|
line = fp.readline()
|
||||||
|
no_sinistre = line[:-1]
|
||||||
|
if line.find('Bénéficiaire des travaux :') == 0:
|
||||||
|
elt = line.split(' : ')
|
||||||
|
nom = elt[1][:-1]
|
||||||
|
line = fp.readline()
|
||||||
|
line = fp.readline()
|
||||||
|
line = fp.readline()
|
||||||
|
adr = line[:-1]
|
||||||
|
line = fp.readline()
|
||||||
|
cp = line[0:5]
|
||||||
|
ville = line[6:-1]
|
||||||
|
if line.find('N° de téléphone :') == 0:
|
||||||
|
# les 10 derniers caratères
|
||||||
|
tel = line[-11:-1]
|
||||||
|
|
||||||
|
# lire ligne suivante
|
||||||
|
line = fp.readline()
|
||||||
|
cnt += 1
|
||||||
|
# créer un dem_devis
|
||||||
|
import pdb;pdb.set_trace()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
# rechercher les emails de demandes dans le INBOX
|
||||||
|
rv, data = conn.search(None, search_criteria)
|
||||||
|
if rv != 'OK':
|
||||||
|
request.session.flash("ERREUR de lecture de la boîte de réception", 'danger')
|
||||||
|
return HTTPFound(location=request.route_url('home'))
|
||||||
|
|
||||||
|
mail_ids = data[0]
|
||||||
|
for num in mail_ids.split():
|
||||||
|
rv, msg_data = conn.fetch(num, '(RFC822)')
|
||||||
|
if rv != 'OK':
|
||||||
|
request.session.flash("ERREUR de lecture du message %s" % num, 'danger')
|
||||||
|
return HTTPFound(location=request.route_url('home'))
|
||||||
|
|
||||||
|
raw_email = msg_data[0][1]
|
||||||
|
# converts byte literal to string removing b''
|
||||||
|
raw_email_string = raw_email.decode('utf-8')
|
||||||
|
email_message = email.message_from_string(raw_email_string)
|
||||||
|
|
||||||
|
email_subject = email_message['subject']
|
||||||
|
# demande annulée ?
|
||||||
|
if email_subject.find('Annulation ') < 0:
|
||||||
|
# downloading attachment
|
||||||
|
filePath = download_pdf_to_tmp(email_message)
|
||||||
|
|
||||||
|
# convertir le fichier pdf en texte
|
||||||
|
texte, extracted_file = convert_pdf_to_txt(filePath)
|
||||||
|
# mission annulée
|
||||||
|
if 'Objet : ANNULATION MISSION' in texte:
|
||||||
|
# supprime le pdf
|
||||||
|
os.remove(filePath)
|
||||||
|
else:
|
||||||
|
# genere le dossier d'après
|
||||||
|
generer_dossier(mbx_name, extracted_file)
|
||||||
|
|
||||||
|
|
||||||
|
return
|
||||||
|
|||||||
Reference in New Issue
Block a user