test upload ok
This commit is contained in:
@@ -14,9 +14,8 @@ pyramid.default_locale_name = en
|
||||
pyramid.includes =
|
||||
pyramid_debugtoolbar
|
||||
|
||||
# By default, the toolbar only appears for clients from IP addresses
|
||||
# '127.0.0.1' and '::1'.
|
||||
# debugtoolbar.hosts = 127.0.0.1 ::1
|
||||
# temp folder location
|
||||
temp_folder = /Users/phuoc/pyramid/html_cleanup/html_cleanup/static/temp/
|
||||
|
||||
###
|
||||
# wsgi server configuration
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<title>untitled</title>
|
||||
</head>
|
||||
5
html_cleanup/static/temp/clean_www.icloud.com.html
Normal file
5
html_cleanup/static/temp/clean_www.icloud.com.html
Normal file
@@ -0,0 +1,5 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<title>untitled</title>
|
||||
</head>
|
||||
496
html_cleanup/static/temp/www.icloud.com.html
Normal file
496
html_cleanup/static/temp/www.icloud.com.html
Normal file
File diff suppressed because one or more lines are too long
@@ -2,7 +2,7 @@
|
||||
|
||||
{% block content %}
|
||||
<div class="content">
|
||||
<h1><span class="font-semi-bold">Pyramid</span> <span class="smaller">Starter project</span></h1>
|
||||
<h1>HTML Cleanup</h1>
|
||||
<p class="lead"><span class="font-semi-bold">404</span> Page Not Found</p>
|
||||
</div>
|
||||
{% endblock content %}
|
||||
|
||||
39
html_cleanup/templates/home.jinja2
Normal file
39
html_cleanup/templates/home.jinja2
Normal file
@@ -0,0 +1,39 @@
|
||||
{% extends "layout.jinja2" %}
|
||||
|
||||
{% block content %}
|
||||
|
||||
{% if message %}
|
||||
<div class="alert alert-danger">
|
||||
{{ message }}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<form id="uploadfile-form" action="{{ request.route_url('home') }}" method="post"
|
||||
accept-charset="utf-8" enctype="multipart/form-data">
|
||||
<div class="form-group">
|
||||
<label for="uploadfile">Veuillez séléctionner un fichier :</label>
|
||||
<input class="file" id="uploadfile" name="uploadfile" type="file" />
|
||||
</div>
|
||||
<br />
|
||||
<div class="form-group">
|
||||
<button id="uploadButton" class="btn btn-success" type="submit" name="form.submitted">
|
||||
<i class="glyphicon glyphicon-arrow-up"></i> Clean up</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<div class="panel-footer">
|
||||
<ul>
|
||||
<li>Seuls les fichiers au format <b>HTML</b> seront acceptés.</li>
|
||||
<li>La taille du fichier ne doit <b>pas dépasser 10 Mo</b>.</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{% if file_url %}
|
||||
<h2 class="text-info font-semi-bold">CLEANUP réussi</h2>
|
||||
Votre fichier nettoyé : <b>{{ file_name }}</b>: <a href="{{ file_url }}""></a>
|
||||
<a class="btn btn-primary" href="{{ file_url }}" download="{{ file_name }}"><span class="glyphicon glyphicon-arrow-up"></span> Télécharger</a>
|
||||
{% endif %}
|
||||
|
||||
{% endblock content %}
|
||||
|
||||
|
||||
@@ -24,19 +24,23 @@
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<!-- Container (Above content Section) -->
|
||||
<div class="container">
|
||||
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<!-- Display Page Title -->
|
||||
{% if page_title %}
|
||||
<h1 class="font-semi-bold">{{ page_title }}</h1>
|
||||
<br />
|
||||
{% endif %}
|
||||
|
||||
<!-- display page content-->
|
||||
<div class="container row">
|
||||
{% block content %}
|
||||
<p>No content</p>
|
||||
{% endblock content %}
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="copyright">
|
||||
Copyright © Pylons Project
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<!-- Container (Above content Section) -->
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
{% extends "layout.jinja2" %}
|
||||
|
||||
{% block content %}
|
||||
<div class="content">
|
||||
<h1><span class="font-semi-bold">Pyramid</span> <span class="smaller">Starter project</span></h1>
|
||||
<p class="lead">Welcome to <span class="font-normal">{{project}}</span>, a Pyramid application generated by<br><span class="font-normal">Cookiecutter</span>.</p>
|
||||
</div>
|
||||
{% endblock content %}
|
||||
@@ -1,6 +1,150 @@
|
||||
from pyramid.view import view_config
|
||||
from pyramid.httpexceptions import HTTPFound
|
||||
import os
|
||||
import shutil
|
||||
import magic
|
||||
import sys
|
||||
|
||||
@view_config(route_name='home', renderer='html_cleanup:templates/home.jinja2')
|
||||
def home(request):
|
||||
|
||||
@view_config(route_name='home', renderer='html_cleanup:templates/mytemplate.jinja2')
|
||||
def my_view(request):
|
||||
return {'project': 'html_cleanup'}
|
||||
message = ''
|
||||
file_name = ''
|
||||
file_url = ''
|
||||
|
||||
if 'form.submitted' in request.params:
|
||||
if request.POST['uploadfile'] != b'':
|
||||
input_file = request.POST['uploadfile'].file
|
||||
input_name = request.POST['uploadfile'].filename
|
||||
# récupère le fichier download, faire les controles et traiter
|
||||
message = process_file(request, input_file, input_name)
|
||||
if message == '':
|
||||
file_name = "clean_" + input_name
|
||||
file_url = request.static_url('html_cleanup:static/temp/') + file_name
|
||||
|
||||
return {
|
||||
'page_title': "HTML cleanup",
|
||||
'message': message,
|
||||
'file_url': file_url,
|
||||
'file_name': file_name,
|
||||
}
|
||||
|
||||
def clean_html(input_file, output_file):
|
||||
# cleanup undesirable tags in html file
|
||||
encoding = "utf-8"
|
||||
message = ""
|
||||
import pdb;pdb.set_trace()
|
||||
|
||||
try:
|
||||
with open(input_file, 'r', encoding='utf-8') as fi, open(output_file, 'w', encoding='utf-8') as fo:
|
||||
fo.write("<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset={}\">\n<title>untitled</title>\n</head>\n")
|
||||
|
||||
body = False
|
||||
skip_tag = False
|
||||
nb_lines = 0
|
||||
|
||||
for line_in in fi:
|
||||
line_out = ""
|
||||
|
||||
if not body:
|
||||
init = line_in.find("<body")
|
||||
if init != -1 and init == 0:
|
||||
body = True
|
||||
|
||||
if body:
|
||||
nb_lines += 1
|
||||
|
||||
if skip_tag:
|
||||
start_from = line_in.find(">")
|
||||
if start_from != -1 and start_from >= 0:
|
||||
skip_tag = False
|
||||
start_from = start_from + 1 if start_from < len(line_in) else 0
|
||||
else:
|
||||
start_from = init if nb_lines == 1 else 0
|
||||
|
||||
if start_from >= 0:
|
||||
done = False
|
||||
while not done:
|
||||
next_tag = line_in.find("<", start_from)
|
||||
if next_tag == -1:
|
||||
line_out += line_in[start_from:]
|
||||
done = True
|
||||
skip_tag = False
|
||||
else:
|
||||
if next_tag > start_from:
|
||||
line_out += line_in[start_from:next_tag]
|
||||
|
||||
end_tag = line_in.find(">", next_tag + 1)
|
||||
if end_tag == -1:
|
||||
s = line_in[next_tag + 1:]
|
||||
done = True
|
||||
skip_tag = True
|
||||
else:
|
||||
s = line_in[next_tag + 1:end_tag]
|
||||
|
||||
if end_tag < len(line_in):
|
||||
start_from = end_tag + 1
|
||||
done = False
|
||||
skip_tag = False
|
||||
else:
|
||||
done = True
|
||||
skip_tag = False
|
||||
|
||||
tag = s.split(" ")
|
||||
|
||||
if tag:
|
||||
tag[0] = tag[0].lower()
|
||||
|
||||
if tag[0] in ["table", "div", "img", "a"]:
|
||||
line_out += "<{}>".format(s)
|
||||
elif tag[0] in ["p"]:
|
||||
line_out += "<p align=\"justify\">"
|
||||
elif tag[0] in ["b", "/b", "em", "/em", "i", "/i", "li", "li/", "/li",
|
||||
"ol", "/ol", "ul", "/ul", "strong", "/strong", "sub", "/sub",
|
||||
"sup", "/sup", "u", "/u"]:
|
||||
line_out += "<{}>".format(tag[0])
|
||||
elif tag[0] in ["h1", "/h1", "h2", "/h2", "h3", "/h3", "h4", "/h4", "h5",
|
||||
"/h5", "h6", "/h6", "/p", "body", "/body", "/html", "/div",
|
||||
"/img", "/a"]:
|
||||
line_out += "<{}>".format(tag[0])
|
||||
elif tag[0] in ["table", "/table", "td", "/td", "th", "/th", "tr", "/tr"]:
|
||||
line_out += "<{}>".format(tag[0])
|
||||
elif tag[0] in ["br", "br/"]:
|
||||
line_out += "<{}>".format(tag[0])
|
||||
|
||||
if line_out != "":
|
||||
fo.write(line_out + "\n")
|
||||
|
||||
except Exception as e:
|
||||
message = "Error: {}".format(str(e))
|
||||
|
||||
return message
|
||||
|
||||
def process_file(request, input_file, input_name):
|
||||
# Check file mime type and size
|
||||
# and if OK, process file
|
||||
message = ''
|
||||
ext_allowed = ['text/html', 'image/png', 'image/jpeg']
|
||||
max_size = 10 * (1024 ** 2) # 10 Mb
|
||||
temp_folder = request.registry.settings['temp_folder']
|
||||
|
||||
mime = magic.from_buffer(input_file.read(), mime=True)
|
||||
# types de fichiers autorisés ?
|
||||
if mime not in ext_allowed:
|
||||
message = "ERREUR: Le format du fichier n'est pas valide. Téléchargement refusé."
|
||||
else:
|
||||
# lire la taille du fichier
|
||||
pos = input_file.tell()
|
||||
input_file.seek(0, 2) #seek to end
|
||||
filesize = input_file.tell()
|
||||
input_file.seek(pos) # back to original position
|
||||
# controler la taille du fichier
|
||||
if filesize > max_size:
|
||||
message = "ERREUR: La taille du fichier dépasse la limite autorisée. Téléchargement refusé."
|
||||
else:
|
||||
# controle OK, traiter le fichier
|
||||
output_name = "clean_" + input_name
|
||||
output_file = os.path.join(temp_folder, output_name)
|
||||
message = clean_html(input_file, output_file)
|
||||
|
||||
return message
|
||||
@@ -1,6 +1,5 @@
|
||||
from pyramid.view import notfound_view_config
|
||||
|
||||
|
||||
@notfound_view_config(renderer='html_cleanup:templates/404.jinja2')
|
||||
def notfound_view(request):
|
||||
request.response.status = 404
|
||||
|
||||
@@ -12,6 +12,9 @@ pyramid.debug_notfound = false
|
||||
pyramid.debug_routematch = false
|
||||
pyramid.default_locale_name = en
|
||||
|
||||
# temp folder location
|
||||
temp_folder = /pyramid/html_cleanup/html_cleanup/static/temp/
|
||||
|
||||
###
|
||||
# wsgi server configuration
|
||||
###
|
||||
|
||||
Reference in New Issue
Block a user