removed <span>

This commit is contained in:
Phuoc Cao
2026-01-28 17:08:00 +01:00
parent 92352450f6
commit ce7e0c6624

View File

@@ -133,7 +133,8 @@ def clean_html(input_file, output_file):
#20260104: treated "div" the same as "span", "h1"..."h6"
#20260104: removed "span", </span>
#20260122: move the function check_input_name into the function clean_html as inner function
#version: 3_5_3_2 (26.01)
#20260128: removed text-align:justify in <p>
#version: 3_5_3_3
def check_input_name(attribute):
ret_flag=False
@@ -211,7 +212,7 @@ def clean_html(input_file, output_file):
tag[0] = tag[0].lower()
id_match_regex=r'.*(id=\"\w+\").*'
if tag[0] in ["table", "div", "img", "a"]:
if tag[0] in ["table", "img", "a"]:
line_out += "<{}".format(s)
if end_tag > 0:
# si end tag trouvé, fermer
@@ -220,7 +221,7 @@ def clean_html(input_file, output_file):
# sinon contine de recopier la ligne
skip_tag = False
elif tag[0] in ["span", "p", "h1", "h2", "h3", "h4", "h5", "h6"]:
elif tag[0] in ["div", "p", "h1", "h2", "h3", "h4", "h5", "h6"]:
line_out += "<{}".format(tag[0])
# 26.01 : ne plus ajouter text-align dans <p>
# if tag[0] == "p":