diff --git a/.gitignore b/.gitignore index b24d71e..1de985c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,50 +1,22 @@ -# These are some examples of commonly ignored file patterns. -# You should customize this list as applicable to your project. -# Learn more about .gitignore: -# https://www.atlassian.com/git/tutorials/saving-changes/gitignore - -# Node artifact files -node_modules/ +*.egg +*.egg-info +*.pyc +*$py.class +*~ +*.sqlite +.coverage +coverage.xml +build/ dist/ - -# Compiled Java class files -*.class - -# Compiled Python bytecode -*.py[cod] - -# Log files -*.log - -# Package files -*.jar - -# Maven -target/ -dist/ - -# JetBrains IDE -.idea/ - -# Unit test reports -TEST*.xml - -# Generated by MacOS +.tox/ +nosetests.xml +env*/ +tmp/ +Data.fs* +*.sublime-project +*.sublime-workspace +.*.sw? +.sw? .DS_Store - -# Generated by Windows -Thumbs.db - -# Applications -*.app -*.exe -*.war - -# Large media files -*.mp4 -*.tiff -*.avi -*.flv -*.mov -*.wmv - +coverage +test diff --git a/cleanup_html/__init__.py b/cleanup_html/__init__.py deleted file mode 100644 index a3d5a64..0000000 --- a/cleanup_html/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from pyramid.config import Configurator - - -def main(global_config, **settings): - """ This function returns a Pyramid WSGI application. - """ - with Configurator(settings=settings) as config: - config.include('pyramid_jinja2') - config.include('.routes') - config.scan() - return config.make_wsgi_app() diff --git a/cleanup_html/routes.py b/cleanup_html/routes.py deleted file mode 100644 index 25504ad..0000000 --- a/cleanup_html/routes.py +++ /dev/null @@ -1,3 +0,0 @@ -def includeme(config): - config.add_static_view('static', 'static', cache_max_age=3600) - config.add_route('home', '/') diff --git a/cleanup_html/static/pyramid-16x16.png b/cleanup_html/static/pyramid-16x16.png deleted file mode 100644 index 9792031..0000000 Binary files a/cleanup_html/static/pyramid-16x16.png and /dev/null differ diff --git a/cleanup_html/static/pyramid.png b/cleanup_html/static/pyramid.png deleted file mode 100644 index 4ab837b..0000000 Binary files a/cleanup_html/static/pyramid.png and /dev/null differ diff --git a/cleanup_html/static/theme.css b/cleanup_html/static/theme.css deleted file mode 100644 index 6d03c6e..0000000 --- a/cleanup_html/static/theme.css +++ /dev/null @@ -1,32 +0,0 @@ -@import url(//fonts.googleapis.com/css?family=Open+Sans:300,400,600,700); -body { - font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; - font-weight: 300; - color: #1c1b1b; - background: #ffffff; -} -h1, -h2, -h3, -h4, -h5, -h6 { - font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; - font-weight: 300; -} -p { - font-weight: 300; -} -button, input, optgroup, select, textarea { - color: black; -} -.font-normal { - font-weight: 400; -} -.font-semi-bold { - font-weight: 600; -} -.font-bold { - font-weight: 700; -} - diff --git a/cleanup_html/templates/404.jinja2 b/cleanup_html/templates/404.jinja2 deleted file mode 100644 index aaf1241..0000000 --- a/cleanup_html/templates/404.jinja2 +++ /dev/null @@ -1,8 +0,0 @@ -{% extends "layout.jinja2" %} - -{% block content %} -
-

Pyramid Starter project

-

404 Page Not Found

-
-{% endblock content %} diff --git a/cleanup_html/templates/layout.jinja2 b/cleanup_html/templates/layout.jinja2 deleted file mode 100644 index 4aadb90..0000000 --- a/cleanup_html/templates/layout.jinja2 +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - Cookiecutter Starter project for the Pyramid Web Framework - - - - - - - - - - - - - -
-
- {% block content %} -

No content

- {% endblock content %} -
-
- -
-
- - - - - - - - - diff --git a/cleanup_html/templates/mytemplate.jinja2 b/cleanup_html/templates/mytemplate.jinja2 deleted file mode 100644 index f2e7283..0000000 --- a/cleanup_html/templates/mytemplate.jinja2 +++ /dev/null @@ -1,8 +0,0 @@ -{% extends "layout.jinja2" %} - -{% block content %} -
-

Pyramid Starter project

-

Welcome to {{project}}, a Pyramid application generated by
Cookiecutter.

-
-{% endblock content %} diff --git a/cleanup_html/views/__init__.py b/cleanup_html/views/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cleanup_html/views/default.py b/cleanup_html/views/default.py deleted file mode 100644 index a765fc8..0000000 --- a/cleanup_html/views/default.py +++ /dev/null @@ -1,6 +0,0 @@ -from pyramid.view import view_config - - -@view_config(route_name='home', renderer='cleanup_html:templates/mytemplate.jinja2') -def my_view(request): - return {'project': 'cleanup_html'} diff --git a/cleanup_html/views/notfound.py b/cleanup_html/views/notfound.py deleted file mode 100644 index 2a8fcd2..0000000 --- a/cleanup_html/views/notfound.py +++ /dev/null @@ -1,7 +0,0 @@ -from pyramid.view import notfound_view_config - - -@notfound_view_config(renderer='cleanup_html:templates/404.jinja2') -def notfound_view(request): - request.response.status = 404 - return {} diff --git a/html_cleanup/routes.py b/html_cleanup/routes.py index 25504ad..f33cc30 100644 --- a/html_cleanup/routes.py +++ b/html_cleanup/routes.py @@ -1,3 +1,4 @@ def includeme(config): config.add_static_view('static', 'static', cache_max_age=3600) config.add_route('home', '/') + config.add_route('view_log', '/view_log') diff --git a/html_cleanup/static/temp/clean_all_gender.png b/html_cleanup/static/temp/clean_all_gender.png new file mode 100644 index 0000000..23b4f73 --- /dev/null +++ b/html_cleanup/static/temp/clean_all_gender.png @@ -0,0 +1,5 @@ + + + +untitled + diff --git a/html_cleanup/static/temp/clean_clean_Nantes_anthologie_cinema_viet.htm b/html_cleanup/static/temp/clean_clean_Nantes_anthologie_cinema_viet.htm new file mode 100644 index 0000000..6a6db1b --- /dev/null +++ b/html_cleanup/static/temp/clean_clean_Nantes_anthologie_cinema_viet.htm @@ -0,0 +1,769 @@ + + + +untitled + + + + + + + + + +
+ + + + + + + +

+ +Festival des 3 Continents, Nantes 24.11 - 3.12 2023

+ + + + + + + +

+ +Anthologie du cinéma vietnamien

+ + + + + + + +

 

+ + + + + + + +

19 films vietnamiens + + + +de 1974 à 2022

+ + + + + + + +

 

+ + + + + + + +We will meet again

+ + + + + + + +

We will meet + + + +again / Đến hẹn lại lên - Tran Vu, 1974, 108’

+ + + + + + + +

Dimanche 26.11, + + + +18:30 - Vendredi 1.12, 13:45

+ + + + + + + +

 

+ + + + + + + +

Premier amour + + / Mối tình đầu - Hai Ninh, 1977, 112’

+ + + + + + + +

Samedi 25.11, 13:45 + + + +- Jeudi 30.11, 20:30

+ + + + + + + +

 

+ + + + + + + +

The faces of may + + / Tháng năm, những gương mặt- Dang Nhat Minh, 1975, 37’

+ + + + + + + +Nostalgie de la campagne

+ + + + + + + +

Nostalgie de la + + + +campagne / Thương nhớ đồng quê - Dang Nhat Minh, + + + +1995, 116’

+ + + + + + + +

Mercredi 29.11, + + + +20:30 - Vendredi 1.12, 17:00

+ + + + + + + +

 

+ + + + + + + +

Chom et Sa + + / Chom và Sa - Pham Ky Nam, 1979, 70’

+ + + + + + + +

Dimanche 26.11, + + + +10:00 - Lundi 27.11, 16:15

+ + + + + + + +

 

+ + + + + + + +

Hanoi through + + + +whose eyes ? / Hà Nội trong mắt ai - Tran Van + + + +Thuy, 1982, 45’

+ + + + + + + +

The story of kindness + + + +or How to behave / Chuyện tử tế - Tran Van + + + +Thuy, 1987, 43’

+ + + + + + + +

Mardi 28.11, 16:00 - + + + +Dimanche 3.12, 10:30

+ + + + + + + +

 

+ + + + + + + +

Brothers + + / Anh và em - Nguyen Huu Luyen & Tran Vu, 1986, 87’

+ + + + + + + +

Dimanche 26.11, 20:45 + + + +- Jeudi 30.11, 18:30

+ + + + + + + +

 

+ + + + + + + +

La fille du + + + +fleuve / Cô gái trên sông - Dang Nhat Minh, 1987, 100’

+ + + + + + + +

Samedi 25.11, 20:45 + + + +- Jeudi 30.11, 10:15

+ + + + + + + +

 

+ + + + + + + +

Troupe de cirque + + + +ambulant / Gánh xiếc rong - Viet Linh, 1988, 80’

+ + + + + + + +

Lundi 27.11, 17:30 - + + + +Dimanche 3.12, 17:30

+ + + + + + + +

 

+ + + + + + + +

Fairytale for a + + + +17-year-old girl /  Chuyện cổ tích cho tuổi 17 - + + + +Nguyen Xuan Son, 1988, 77’

+ + + + + + + +

Mercredi 29.11, + + + +13 :00 - Samedi 2.12, 13 :00

+ + + + + + + +

 

+ + + + + + + +

La lampe dans le + + + +rêve / Ngọn đèn trong mơ - Do Minh Tuan, 1988, 75’

+ + + + + + + +

Mardi 28.11, 10:15 - + + + +Vendredi 1.12, 18:45

+ + + + + + + +

 

+ + + + + + + +

Money, + + + +Money ! / Tiền ơi ! - Tran Vu & Nguyen + + + +Huu Luyen, 1989, 93’

+ + + + + + + +

Dimanche 26.11, + + + +15:30 - Samedi 2.12, 15:45

+ + + + + + + +

 

+ + + + + + + +

Enfance orageuse + + / Tuổi thơ dữ dội - Nguyen Vinh Son, 1990, 135’

+ + + + + + + +

Lundi 27.11, 13:30 - + + + +Samedi 2.12, 20:30

+ + + + + + + +

 

+ + + + + + + +

PLease forgive me + + / Hãy tha thứ cho em - Luu Trong Ninh, 1992, 90’

+ + + + + + + +

Lundi 27.11, 18:30 - + + + +Mercredi 29.11, 18:15

+ + + + + + + +

 

+ + + + + + + +

Piège d’amour + + / Cạm bảy tình yêu - Pham Loc, 1992, 82’

+ + + + + + + +

Lundi 27.11, 10:30 - + + + +Mercredi 29.11, 14:45

+ + + + + + + +

 

+ + + + + + + +

In the lane + + / Ngõ hẹp - Bach Diep, 1993, 98’

+ + + + + + + +

Samedi 25.11, 10:15 + + + +- Mardi 28.11, 14:00

+ + + + + + + +

 

+ + + + + + + +L’Immeuble

+ + + + + + + +

L’Immeuble + + / Chung cư - Viet Linh, 1999, 90’

+ + + + + + + +

Samedi 25.11, 16:15 + + + +- Mercredi 29.11, 15:00

+ + + + + + + +

 

+ + + + + + + +Dust & metal

+ + + +

Dust & metal + + / Cát bụi và kim loại - Esther Johnson, 2022, 83’

+ + + + + + + +

Vendredi 1.12, 20:30 + + + +- Dimanche 3.12, 18:00

+ + + + + + + +

 

+ + + + + + + +

Anthologie du cinéma vietnamien :

+ + + + + + + +

+ +https://www.3continents.com/fr/programme/2023/anthologie-du-cinema-vietnamien/

+ + + + + + + +

Programme 2023, horaires et lieux :

+ + + + + + + +

+ +https://www.3continents.com/wp-content/uploads/f3c-prog-2023-40p-net-planche.pdf

+ + + + + + + +

 

+ + + + + + + +

 

+ + + + + + + +

 

+ + + + + + + +

 

+ + + + + + + +
+ + + + + + + + + + + +
+ +
+ + + +
+ +
+ + + + + + + + + + + + + diff --git a/html_cleanup/static/temp/clean_proposed_file_name.html b/html_cleanup/static/temp/clean_proposed_file_name.html new file mode 100644 index 0000000..23b4f73 --- /dev/null +++ b/html_cleanup/static/temp/clean_proposed_file_name.html @@ -0,0 +1,5 @@ + + + +untitled + diff --git a/html_cleanup/static/temp/errors_log b/html_cleanup/static/temp/errors_log index 8d30082..8cbe55c 100644 --- a/html_cleanup/static/temp/errors_log +++ b/html_cleanup/static/temp/errors_log @@ -1,3 +1,2 @@ -2023-11-29 11:40:58 - DSC03776.JPGERREUR: La taille du fichier dépasse la limite autorisée. Téléchargement refusé. -2023-11-29 11:44:29 - DSC03777.JPG - ERREUR: La taille du fichier dépasse la limite autorisée. Téléchargement refusé. -2023-11-29 11:45:44 - devis prothèse dentaire CKT.pdf - ERREUR: Le format du fichier n'est pas valide. Téléchargement refusé. +2023-11-29 17:22:16 [ Nantes_anthologie_cinema_viet.htm ] *** Cleanup réussi. *** +2023-11-29 17:23:56 [ Screenshot 2023-11-20 at 12.07.54.png ] Le format du fichier n'est pas valide. Téléchargement refusé. diff --git a/html_cleanup/templates/home.jinja2 b/html_cleanup/templates/home.jinja2 index f17c821..159d1b8 100644 --- a/html_cleanup/templates/home.jinja2 +++ b/html_cleanup/templates/home.jinja2 @@ -7,7 +7,14 @@ {{ message }} {% endif %} - + +
+ +
+
@@ -21,12 +28,6 @@
- {% if file_url %}

Cleanup réussi

diff --git a/html_cleanup/templates/layout.jinja2 b/html_cleanup/templates/layout.jinja2 index 63f50b3..5584c23 100644 --- a/html_cleanup/templates/layout.jinja2 +++ b/html_cleanup/templates/layout.jinja2 @@ -8,7 +8,7 @@ - Cookiecutter Starter project for the Pyramid Web Framework + HTML cleanup diff --git a/html_cleanup/templates/view_log.jinja2 b/html_cleanup/templates/view_log.jinja2 new file mode 100644 index 0000000..e65443c --- /dev/null +++ b/html_cleanup/templates/view_log.jinja2 @@ -0,0 +1,19 @@ +{% extends "layout.jinja2" %} + +{% block content %} + +
+ {% for line in lines %} +
{{ line }}
+ {% endfor %} +
+ + {%if lines %} +
+
+ +
+
+ {% endif %} + +{% endblock content %} diff --git a/html_cleanup/views/default.py b/html_cleanup/views/default.py index 84abc52..5e16392 100644 --- a/html_cleanup/views/default.py +++ b/html_cleanup/views/default.py @@ -30,6 +30,43 @@ def home(request): 'file_name': file_name, } +def process_file(request, input_file, input_name): + # Check file mime type and size + # and if OK, process file + message = '' + ext_allowed = ['text/html'] + max_size = 5 * (1024 ** 2) # 10 Mb + temp_folder = request.registry.settings['temp_folder'] + logfile_name = os.path.join(temp_folder, 'errors_log') + + mime = magic.from_buffer(input_file.read(), mime=True) + # types de fichiers autorisés ? + if mime not in ext_allowed: + message = "Le format du fichier n'est pas valide. Téléchargement refusé." + add_error2log(logfile_name, input_name, message) + else: + # lire la taille du fichier + input_file.seek(0, 2) #seek to end + filesize = input_file.tell() + input_file.seek(0) # back to original position + # controler la taille du fichier + if filesize > max_size: + message = "La taille du fichier dépasse la limite autorisée. Téléchargement refusé." + add_error2log(logfile_name, input_name, message) + else: + # controle OK, traiter le fichier + output_name = "clean_" + input_name + output_file = os.path.join(temp_folder, output_name) + message = clean_html(input_file, output_file) + if message: + add_error2log(logfile_name, input_name, message) + else: + add_error2log(logfile_name, input_name, "*** Cleanup réussi. ***") + + + + return message + def clean_html(input_file, output_file): # cleanup undesirable tags in html file encoding = "utf-8" @@ -122,46 +159,37 @@ def clean_html(input_file, output_file): return message -def process_file(request, input_file, input_name): - # Check file mime type and size - # and if OK, process file - message = '' - ext_allowed = ['text/html', 'image/png', 'image/jpeg'] - max_size = 5 * (1024 ** 2) # 10 Mb - temp_folder = request.registry.settings['temp_folder'] - logfile_name = os.path.join(temp_folder, 'errors_log') - - mime = magic.from_buffer(input_file.read(), mime=True) - # types de fichiers autorisés ? - if mime not in ext_allowed: - message = "Le format du fichier n'est pas valide. Téléchargement refusé." - add_error2log(logfile_name, input_name, message) - else: - # lire la taille du fichier - input_file.seek(0, 2) #seek to end - filesize = input_file.tell() - input_file.seek(0) # back to original position - # controler la taille du fichier - if filesize > max_size: - message = "La taille du fichier dépasse la limite autorisée. Téléchargement refusé." - add_error2log(logfile_name, input_name, message) - else: - # controle OK, traiter le fichier - output_name = "clean_" + input_name - output_file = os.path.join(temp_folder, output_name) - message = clean_html(input_file, output_file) - if message: - add_error2log(logfile_name, input_name, message) - - - return message - def add_error2log(logfile_name, input_name, message): # ecrire message d'erreur dans le log - # Get the current date and time now = datetime.datetime.now() with open(logfile_name, 'a', encoding='utf-8') as file: - line = now.strftime("%Y-%m-%d %H:%M:%S") + ' [' + input_name + '] ' + f'{message}\n' + line = now.strftime("%Y-%m-%d %H:%M:%S") + ' [ ' + input_name + ' ] ' + f'{message}\n' file.write(line) + +@view_config(route_name='view_log', renderer='html_cleanup:templates/view_log.jinja2') +def view_log(request): + + message = '' + temp_folder = request.registry.settings['temp_folder'] + logfile_name = os.path.join(temp_folder, 'errors_log') + lines = [] + + if 'form.submitted' in request.params: + if os.path.exists(logfile_name): + os.remove(logfile_name) + + # Ouvrir le fichier en lecture seule + try: + file = open(logfile_name, "r") + lines = file.readlines() + file.close() + except Exception as e: + pass + + return { + 'page_title': "HTML cleanup log", + 'message': message, + 'lines': lines, + } diff --git a/setup.py b/setup.py index 24eabe2..a4b939c 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ tests_require = [ setup( name='html_cleanup', - version='0.0', + version='1.0', description='html_cleanup', long_description=README + '\n\n' + CHANGES, classifiers=[