added id in tag

This commit is contained in:
Phuoc Cao
2025-12-20 11:38:47 +01:00
parent 76f840b48c
commit 785f69b59b
2 changed files with 23 additions and 10 deletions

View File

@@ -8,6 +8,7 @@ import datetime
import time
import charset_normalizer
import html
import re
@view_config(route_name='home', renderer='html_cleanup:templates/home.jinja2')
def home(request):
@@ -182,6 +183,7 @@ def clean_html(input_file, output_file):
if tag:
tag[0] = tag[0].lower()
id_match_regex=r'.*(id=\"\w+\").*'
if tag[0] in ["table", "div", "img", "a"]:
line_out += "<{}".format(s)
@@ -191,18 +193,29 @@ def clean_html(input_file, output_file):
else:
# sinon contine de recopier la ligne
skip_tag = False
elif tag[0] in ["p"]:
line_out += "<p align=\"justify\">"
elif tag[0] in ["b", "/b", "em", "/em", "i", "/i", "li", "li/", "/li",
"ol", "/ol", "ul", "/ul", "strong", "/strong", "sub", "/sub",
"sup", "/sup", "u", "/u"]:
elif tag[0] in ["span", "p", "h1", "h2", "h3", "h4", "h5", "h6"]:
line_out += "<{}".format(tag[0])
if tag[0] == "p":
# ajouter text-align dans p
line_out += ' style="text-align:justify;"'
if len(tag) > 1:
matched = re.match(id_match_regex, line_in)
if matched:
line_out += ' '+ matched.group(1)
line_out += '>'
elif tag[0] in ["b", "/b", "em", "/em", "i", "/i", "li", "li/", "/li", "ol", "/ol",
"ul", "/ul", "strong", "/strong", "sub", "/sub", "sup", "/sup", "u", "/u"]:
line_out += "<{}>".format(tag[0])
elif tag[0] in ["h1", "/h1", "h2", "/h2", "h3", "/h3", "h4", "/h4", "h5",
"/h5", "h6", "/h6", "/p", "body", "/body", "/html", "/div",
"/img", "/a"]:
elif tag[0] in ["/span", "/h1", "/h2", "/h3", "/h4", "/h5", "/h6", "/p", "body", "/body",
"/html", "/div", "/img", "/a"]:
line_out += "<{}>".format(tag[0])
elif tag[0] in ["table", "/table", "td", "/td", "th", "/th", "tr", "/tr"]:
line_out += "<{}>".format(tag[0])
elif tag[0] in ["br", "br/"]:
line_out += "<{}>".format(tag[0])

View File

@@ -14,7 +14,7 @@ requires = [
'pyramid_jinja2',
'pyramid_debugtoolbar',
'python-magic',
'html',
'html3',
'charset_normalizer',
'waitress',
]
@@ -27,7 +27,7 @@ tests_require = [
setup(
name='html_cleanup',
version='1.0',
version='1.1',
description='html_cleanup',
long_description=README + '\n\n' + CHANGES,
classifiers=[