diff --git a/html_cleanup/static/temp/tien-clean.html b/html_cleanup/static/temp/tien-clean.html deleted file mode 100644 index 85d8050..0000000 --- a/html_cleanup/static/temp/tien-clean.html +++ /dev/null @@ -1,429 +0,0 @@ - - - -untitled - - - -

Empty p

- -

An id in p

- -

An id in h2

- -
Quá trình tiểu sử của nhà học giả lỗi - - trinh thám.1 - -
- -

Quá trình tiểu sử của nhà học giả lỗi - - trinh thám.2 - -

- -

Quá trình tiểu sử của nhà học giả lỗi - - trinh thám.3 - -

- -

Quá trình tiểu sử của nhà học giả lỗi - - trinh thám.4 - -

- -

- - This paragraph - - contains a lot of lines - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of spaces - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of lines - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of spaces - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of lines - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of spaces - - in the source code, - - but the browser - - ignores it. - -

- -

- - This paragraph - - contains a lot of lines - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of spaces - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of lines - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of spaces - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of lines - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of spaces - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of lines - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of spaces - - in the source code, - - but the browser - - ignores it. - -

- -

- - This paragraph - - contains a lot of lines - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of spaces - - in the source code, - - but the browser - - ignores it. - -

- -

- - This paragraph - - contains a lot of lines - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of spaces - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of lines - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of spaces - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of lines - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of spaces - - in the source code, - - but the browser - - ignores it. - -

- -

- - This paragraph - - contains a lot of lines - - in the source code, - - but the browser - - ignores it. - -

- - - -

- - This paragraph - - contains a lot of spaces - - in the source code, - - but the browser - - ignores it. - -

- -

1. Báo mạng của - - Trường Bách Khoa:

- -

2. Báo mạng của - - Trường Bách Khoa:

- -

3. Báo mạng của - - Trường Bách Khoa:

- -

4. Báo mạng của Trường Bách Khoa:

- - - - - diff --git a/html_cleanup/static/temp/tien.html b/html_cleanup/static/temp/tien.html deleted file mode 100644 index 1d03215..0000000 --- a/html_cleanup/static/temp/tien.html +++ /dev/null @@ -1,213 +0,0 @@ - - -

Empty p

-

An id in p

-

An id in h2

-
Quá trình tiểu sử của nhà học giả lỗi - trinh thám.1 -
-

Quá trình tiểu sử của nhà học giả lỗi - trinh thám.2 -

-

Quá trình tiểu sử của nhà học giả lỗi - trinh thám.3 -

-

Quá trình tiểu sử của nhà học giả lỗi - trinh thám.4 -

-

- This paragraph - contains a lot of lines - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of spaces - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of lines - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of spaces - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of lines - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of spaces - in the source code, - but the browser - ignores it. -

-

- This paragraph - contains a lot of lines - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of spaces - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of lines - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of spaces - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of lines - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of spaces - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of lines - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of spaces - in the source code, - but the browser - ignores it. -

-

- This paragraph - contains a lot of lines - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of spaces - in the source code, - but the browser - ignores it. -

-

- This paragraph - contains a lot of lines - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of spaces - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of lines - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of spaces - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of lines - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of spaces - in the source code, - but the browser - ignores it. -

-

- This paragraph - contains a lot of lines - in the source code, - but the browser - ignores it. -

- -

- This paragraph - contains a lot of spaces - in the source code, - but the browser - ignores it. -

-

1. Báo mạng của - Trường Bách Khoa:

-

2. Báo mạng của - Trường Bách Khoa:

-

3. Báo mạng của - Trường Bách Khoa:

-

4. Báo mạng của Trường Bách Khoa:

- - diff --git a/html_cleanup/views/default.py b/html_cleanup/views/default.py index 6e7a886..9d0d294 100644 --- a/html_cleanup/views/default.py +++ b/html_cleanup/views/default.py @@ -200,7 +200,7 @@ def clean_html(input_file, output_file): # ajouter text-align dans p line_out += ' style="text-align:justify;"' if len(tag) > 1: - matched = re.match(id_match_regex, line_in) + matched = re.search(id_match_regex, line_in) if matched: line_out += ' '+ matched.group(1) line_out += '>'