diff --git a/development.ini b/development.ini
index 8a33c74..edb6096 100644
--- a/development.ini
+++ b/development.ini
@@ -14,9 +14,8 @@ pyramid.default_locale_name = en
pyramid.includes =
pyramid_debugtoolbar
-# By default, the toolbar only appears for clients from IP addresses
-# '127.0.0.1' and '::1'.
-# debugtoolbar.hosts = 127.0.0.1 ::1
+# temp folder location
+temp_folder = /Users/phuoc/pyramid/html_cleanup/html_cleanup/static/temp/
###
# wsgi server configuration
diff --git a/html_cleanup/static/temp/clean_Nantes_anthologie_cinema_viet.htm b/html_cleanup/static/temp/clean_Nantes_anthologie_cinema_viet.htm
new file mode 100644
index 0000000..23b4f73
--- /dev/null
+++ b/html_cleanup/static/temp/clean_Nantes_anthologie_cinema_viet.htm
@@ -0,0 +1,5 @@
+
+
+
+untitled
+
diff --git a/html_cleanup/static/temp/clean_www.icloud.com.html b/html_cleanup/static/temp/clean_www.icloud.com.html
new file mode 100644
index 0000000..23b4f73
--- /dev/null
+++ b/html_cleanup/static/temp/clean_www.icloud.com.html
@@ -0,0 +1,5 @@
+
+
+
+untitled
+
diff --git a/html_cleanup/static/temp/www.icloud.com.html b/html_cleanup/static/temp/www.icloud.com.html
new file mode 100644
index 0000000..f7a55ae
--- /dev/null
+++ b/html_cleanup/static/temp/www.icloud.com.html
@@ -0,0 +1,496 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ iCloud
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/html_cleanup/templates/404.jinja2 b/html_cleanup/templates/404.jinja2
index aaf1241..a9c7cce 100644
--- a/html_cleanup/templates/404.jinja2
+++ b/html_cleanup/templates/404.jinja2
@@ -2,7 +2,7 @@
{% block content %}
-
Pyramid Starter project
+
HTML Cleanup
404 Page Not Found
{% endblock content %}
diff --git a/html_cleanup/templates/home.jinja2 b/html_cleanup/templates/home.jinja2
new file mode 100644
index 0000000..2bcb185
--- /dev/null
+++ b/html_cleanup/templates/home.jinja2
@@ -0,0 +1,39 @@
+{% extends "layout.jinja2" %}
+
+{% block content %}
+
+ {% if message %}
+
+ {{ message }}
+
+ {% endif %}
+
+
+
+
+
+ {% if file_url %}
+ CLEANUP réussi
+ Votre fichier nettoyé : {{ file_name }}:
+ Télécharger
+ {% endif %}
+
+{% endblock content %}
+
+
diff --git a/html_cleanup/templates/layout.jinja2 b/html_cleanup/templates/layout.jinja2
index ad1a574..63f50b3 100644
--- a/html_cleanup/templates/layout.jinja2
+++ b/html_cleanup/templates/layout.jinja2
@@ -24,19 +24,23 @@
+
+
-
-
- {% block content %}
-
No content
- {% endblock content %}
-
-
-
- Copyright © Pylons Project
-
-
-
+
+ {% if page_title %}
+
{{ page_title }}
+
+ {% endif %}
+
+
+
+ {% block content %}
+
No content
+ {% endblock content %}
+
+
+
diff --git a/html_cleanup/templates/mytemplate.jinja2 b/html_cleanup/templates/mytemplate.jinja2
deleted file mode 100644
index f2e7283..0000000
--- a/html_cleanup/templates/mytemplate.jinja2
+++ /dev/null
@@ -1,8 +0,0 @@
-{% extends "layout.jinja2" %}
-
-{% block content %}
-
-
Pyramid Starter project
-
Welcome to {{project}}, a Pyramid application generated by
Cookiecutter.
-
-{% endblock content %}
diff --git a/html_cleanup/views/default.py b/html_cleanup/views/default.py
index b2a1790..1ef91d0 100644
--- a/html_cleanup/views/default.py
+++ b/html_cleanup/views/default.py
@@ -1,6 +1,150 @@
from pyramid.view import view_config
+from pyramid.httpexceptions import HTTPFound
+import os
+import shutil
+import magic
+import sys
+@view_config(route_name='home', renderer='html_cleanup:templates/home.jinja2')
+def home(request):
-@view_config(route_name='home', renderer='html_cleanup:templates/mytemplate.jinja2')
-def my_view(request):
- return {'project': 'html_cleanup'}
+ message = ''
+ file_name = ''
+ file_url = ''
+
+ if 'form.submitted' in request.params:
+ if request.POST['uploadfile'] != b'':
+ input_file = request.POST['uploadfile'].file
+ input_name = request.POST['uploadfile'].filename
+ # récupère le fichier download, faire les controles et traiter
+ message = process_file(request, input_file, input_name)
+ if message == '':
+ file_name = "clean_" + input_name
+ file_url = request.static_url('html_cleanup:static/temp/') + file_name
+
+ return {
+ 'page_title': "HTML cleanup",
+ 'message': message,
+ 'file_url': file_url,
+ 'file_name': file_name,
+ }
+
+def clean_html(input_file, output_file):
+ # cleanup undesirable tags in html file
+ encoding = "utf-8"
+ message = ""
+ import pdb;pdb.set_trace()
+
+ try:
+ with open(input_file, 'r', encoding='utf-8') as fi, open(output_file, 'w', encoding='utf-8') as fo:
+ fo.write("\n\n\nuntitled\n\n")
+
+ body = False
+ skip_tag = False
+ nb_lines = 0
+
+ for line_in in fi:
+ line_out = ""
+
+ if not body:
+ init = line_in.find("")
+ if start_from != -1 and start_from >= 0:
+ skip_tag = False
+ start_from = start_from + 1 if start_from < len(line_in) else 0
+ else:
+ start_from = init if nb_lines == 1 else 0
+
+ if start_from >= 0:
+ done = False
+ while not done:
+ next_tag = line_in.find("<", start_from)
+ if next_tag == -1:
+ line_out += line_in[start_from:]
+ done = True
+ skip_tag = False
+ else:
+ if next_tag > start_from:
+ line_out += line_in[start_from:next_tag]
+
+ end_tag = line_in.find(">", next_tag + 1)
+ if end_tag == -1:
+ s = line_in[next_tag + 1:]
+ done = True
+ skip_tag = True
+ else:
+ s = line_in[next_tag + 1:end_tag]
+
+ if end_tag < len(line_in):
+ start_from = end_tag + 1
+ done = False
+ skip_tag = False
+ else:
+ done = True
+ skip_tag = False
+
+ tag = s.split(" ")
+
+ if tag:
+ tag[0] = tag[0].lower()
+
+ if tag[0] in ["table", "div", "img", "a"]:
+ line_out += "<{}>".format(s)
+ elif tag[0] in ["p"]:
+ line_out += ""
+ elif tag[0] in ["b", "/b", "em", "/em", "i", "/i", "li", "li/", "/li",
+ "ol", "/ol", "ul", "/ul", "strong", "/strong", "sub", "/sub",
+ "sup", "/sup", "u", "/u"]:
+ line_out += "<{}>".format(tag[0])
+ elif tag[0] in ["h1", "/h1", "h2", "/h2", "h3", "/h3", "h4", "/h4", "h5",
+ "/h5", "h6", "/h6", "/p", "body", "/body", "/html", "/div",
+ "/img", "/a"]:
+ line_out += "<{}>".format(tag[0])
+ elif tag[0] in ["table", "/table", "td", "/td", "th", "/th", "tr", "/tr"]:
+ line_out += "<{}>".format(tag[0])
+ elif tag[0] in ["br", "br/"]:
+ line_out += "<{}>".format(tag[0])
+
+ if line_out != "":
+ fo.write(line_out + "\n")
+
+ except Exception as e:
+ message = "Error: {}".format(str(e))
+
+ return message
+
+def process_file(request, input_file, input_name):
+ # Check file mime type and size
+ # and if OK, process file
+ message = ''
+ ext_allowed = ['text/html', 'image/png', 'image/jpeg']
+ max_size = 10 * (1024 ** 2) # 10 Mb
+ temp_folder = request.registry.settings['temp_folder']
+
+ mime = magic.from_buffer(input_file.read(), mime=True)
+ # types de fichiers autorisés ?
+ if mime not in ext_allowed:
+ message = "ERREUR: Le format du fichier n'est pas valide. Téléchargement refusé."
+ else:
+ # lire la taille du fichier
+ pos = input_file.tell()
+ input_file.seek(0, 2) #seek to end
+ filesize = input_file.tell()
+ input_file.seek(pos) # back to original position
+ # controler la taille du fichier
+ if filesize > max_size:
+ message = "ERREUR: La taille du fichier dépasse la limite autorisée. Téléchargement refusé."
+ else:
+ # controle OK, traiter le fichier
+ output_name = "clean_" + input_name
+ output_file = os.path.join(temp_folder, output_name)
+ message = clean_html(input_file, output_file)
+
+ return message
\ No newline at end of file
diff --git a/html_cleanup/views/notfound.py b/html_cleanup/views/notfound.py
index f96ca7b..59817e8 100644
--- a/html_cleanup/views/notfound.py
+++ b/html_cleanup/views/notfound.py
@@ -1,6 +1,5 @@
from pyramid.view import notfound_view_config
-
@notfound_view_config(renderer='html_cleanup:templates/404.jinja2')
def notfound_view(request):
request.response.status = 404
diff --git a/production.ini b/production.ini
index 1e4e85e..7db9660 100644
--- a/production.ini
+++ b/production.ini
@@ -12,6 +12,9 @@ pyramid.debug_notfound = false
pyramid.debug_routematch = false
pyramid.default_locale_name = en
+# temp folder location
+temp_folder = /pyramid/html_cleanup/html_cleanup/static/temp/
+
###
# wsgi server configuration
###
diff --git a/setup.py b/setup.py
index c288a4e..24eabe2 100644
--- a/setup.py
+++ b/setup.py
@@ -13,6 +13,7 @@ requires = [
'pyramid',
'pyramid_jinja2',
'pyramid_debugtoolbar',
+ 'python-magic',
'waitress',
]