re.match -> re.search

This commit is contained in:
Phuoc Cao
2025-12-21 12:38:52 +01:00
parent 25e1bd020a
commit f92b700dff
3 changed files with 1 additions and 643 deletions

View File

@@ -1,429 +0,0 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>untitled</title>
</head>
<body>
<p style="text-align:justify;">Empty p</p>
<p style="text-align:justify;" id="id_in_p">An id in p</p>
<h2 id="id_in_h2">An id in h2</h2>
<div style:"text-align:justify;"> Quá trình tiểu sử của nhà học giả lỗi
trinh thám.<span id="index_1"></span><a href="#endnote_1"><sup>1</sup></a>
</div>
<p style="text-align:justify;" id="index_2"> Quá trình tiểu sử của nhà học giả lỗi
trinh thám.<a href="#endnote_2"><sup>2</sup></a>
</p>
<p style="text-align:justify;"> Quá trình tiểu sử của nhà học giả lỗi
trinh thám.<a name="index_3" href="#endnote_3"><sup>3</sup></a>
</p>
<p style="text-align:justify;"> Quá trình tiểu sử của nhà học giả lỗi
trinh thám.<a id="index_4" href="#endnote_4"><sup>4</sup></a>
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;">
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p style="text-align:justify;" id="endnote_1"><a href="#index_1">1</a>. Báo mạng của
Trường Bách Khoa:</p>
<h1 id="endnote_2"><a href="#index_2">2</a>. Báo mạng của
Trường Bách Khoa:</h1>
<p style="text-align:justify;"><a name="endnote_3" href="#index_3">3</a>. Báo mạng của
Trường Bách Khoa:</p>
<p style="text-align:justify;"><a id="endnote_4" href="#index_4">4</a>. Báo mạng của Trường Bách Khoa:</p>
</body>
</html>

View File

@@ -1,213 +0,0 @@
<html>
<body>
<p>Empty p</p>
<p id="id_in_p">An id in p</p>
<h2 id="id_in_h2">An id in h2</h2>
<div style:"text-align:justify;"> Quá trình tiểu sử của nhà học giả lỗi
trinh thám.<span id="index_1"></span><a href="#endnote_1"><sup>1</sup></a>
</div>
<p id="index_2"> Quá trình tiểu sử của nhà học giả lỗi
trinh thám.<a href="#endnote_2"><sup>2</sup></a>
</p>
<p> Quá trình tiểu sử của nhà học giả lỗi
trinh thám.<a name="index_3" href="#endnote_3"><sup>3</sup></a>
</p>
<p> Quá trình tiểu sử của nhà học giả lỗi
trinh thám.<a id="index_4" href="#endnote_4"><sup>4</sup></a>
</p>
<p>
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of lines
in the source code,
but the browser
ignores it.
</p>
<p>
This paragraph
contains a lot of spaces
in the source code,
but the browser
ignores it.
</p>
<p id="endnote_1"><a href="#index_1">1</a>. Báo mạng của
Trường Bách Khoa:</p>
<h1 id="endnote_2"><a href="#index_2">2</a>. Báo mạng của
Trường Bách Khoa:</h1>
<p><a name="endnote_3" href="#index_3">3</a>. Báo mạng của
Trường Bách Khoa:</p>
<p><a id="endnote_4" href="#index_4">4</a>. Báo mạng của Trường Bách Khoa:</p>
</body>
</html>

View File

@@ -200,7 +200,7 @@ def clean_html(input_file, output_file):
# ajouter text-align dans p # ajouter text-align dans p
line_out += ' style="text-align:justify;"' line_out += ' style="text-align:justify;"'
if len(tag) > 1: if len(tag) > 1:
matched = re.match(id_match_regex, line_in) matched = re.search(id_match_regex, line_in)
if matched: if matched:
line_out += ' '+ matched.group(1) line_out += ' '+ matched.group(1)
line_out += '>' line_out += '>'