suivi et exclusion de pages fr des réunions
This commit is contained in:
parent
471eab4cd0
commit
466f9c773b
5 changed files with 231 additions and 4 deletions
|
@ -135,6 +135,11 @@ def extract_pages_from_category(html_content, current_url):
|
|||
title = link.get_text()
|
||||
url = WIKI_BASE_URL + link.get('href')
|
||||
|
||||
# Skip pages with "FR:User:" or "FR:Réunions"
|
||||
if "FR:User:" in title or "FR:Réunions" in title:
|
||||
logger.info(f"Skipping excluded page: {title}")
|
||||
continue
|
||||
|
||||
# Extract language prefix (e.g., "En:", "De:", etc.)
|
||||
language_prefix = "Other"
|
||||
match = re.match(r'^([A-Za-z]{2}):', title)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue