wiki illustrations et team osm fr
This commit is contained in:
parent
d7a54458dc
commit
77ad76cc7e
13 changed files with 78859 additions and 13414 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because one or more lines are too long
|
@ -103,6 +103,7 @@ class WikiController extends AbstractController
|
|||
|
||||
return $alignedSections;
|
||||
}
|
||||
|
||||
#[Route('/wiki/recent-changes', name: 'app_admin_wiki_recent_changes')]
|
||||
public function recentChanges(): Response
|
||||
{
|
||||
|
@ -874,6 +875,7 @@ class WikiController extends AbstractController
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// Calculate differences between English and French versions
|
||||
foreach ($wikiPages as $key => $languages) {
|
||||
if (isset($languages['en']) && isset($languages['fr'])) {
|
||||
|
@ -1032,21 +1034,21 @@ class WikiController extends AbstractController
|
|||
if ($linkComparison) {
|
||||
// Sort English-only links
|
||||
if (isset($linkComparison['en_only']) && is_array($linkComparison['en_only'])) {
|
||||
usort($linkComparison['en_only'], function($a, $b) {
|
||||
usort($linkComparison['en_only'], function ($a, $b) {
|
||||
return strcmp($a['href'], $b['href']);
|
||||
});
|
||||
}
|
||||
|
||||
// Sort French-only links
|
||||
if (isset($linkComparison['fr_only']) && is_array($linkComparison['fr_only'])) {
|
||||
usort($linkComparison['fr_only'], function($a, $b) {
|
||||
usort($linkComparison['fr_only'], function ($a, $b) {
|
||||
return strcmp($a['href'], $b['href']);
|
||||
});
|
||||
}
|
||||
|
||||
// Sort common links
|
||||
if (isset($linkComparison['common']) && is_array($linkComparison['common'])) {
|
||||
usort($linkComparison['common'], function($a, $b) {
|
||||
usort($linkComparison['common'], function ($a, $b) {
|
||||
return strcmp($a['en']['href'], $b['en']['href']);
|
||||
});
|
||||
}
|
||||
|
@ -1065,7 +1067,7 @@ class WikiController extends AbstractController
|
|||
if ($sectionComparison) {
|
||||
// Filter common sections
|
||||
if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
|
||||
$sectionComparison['common'] = array_filter($sectionComparison['common'], function($section) use ($excludedSections) {
|
||||
$sectionComparison['common'] = array_filter($sectionComparison['common'], function ($section) use ($excludedSections) {
|
||||
// Skip if either English or French title is in the excluded list
|
||||
return !(in_array($section['en']['title'], $excludedSections) || in_array($section['fr']['title'], $excludedSections));
|
||||
});
|
||||
|
@ -1075,7 +1077,7 @@ class WikiController extends AbstractController
|
|||
|
||||
// Filter English-only sections
|
||||
if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) {
|
||||
$sectionComparison['en_only'] = array_filter($sectionComparison['en_only'], function($section) use ($excludedSections) {
|
||||
$sectionComparison['en_only'] = array_filter($sectionComparison['en_only'], function ($section) use ($excludedSections) {
|
||||
return !in_array($section['title'], $excludedSections);
|
||||
});
|
||||
// Re-index array
|
||||
|
@ -1084,7 +1086,7 @@ class WikiController extends AbstractController
|
|||
|
||||
// Filter French-only sections
|
||||
if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) {
|
||||
$sectionComparison['fr_only'] = array_filter($sectionComparison['fr_only'], function($section) use ($excludedSections) {
|
||||
$sectionComparison['fr_only'] = array_filter($sectionComparison['fr_only'], function ($section) use ($excludedSections) {
|
||||
return !in_array($section['title'], $excludedSections);
|
||||
});
|
||||
// Re-index array
|
||||
|
@ -1144,7 +1146,7 @@ class WikiController extends AbstractController
|
|||
|
||||
// Also check common sections (English side)
|
||||
if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
|
||||
$commonEnSections = array_map(function($section) {
|
||||
$commonEnSections = array_map(function ($section) {
|
||||
return $section['en'];
|
||||
}, $sectionComparison['common']);
|
||||
|
||||
|
@ -1158,7 +1160,7 @@ class WikiController extends AbstractController
|
|||
|
||||
// Also check common sections (French side)
|
||||
if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
|
||||
$commonFrSections = array_map(function($section) {
|
||||
$commonFrSections = array_map(function ($section) {
|
||||
return $section['fr'];
|
||||
}, $sectionComparison['common']);
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
<table class="table table-striped table-hover">
|
||||
<thead class="thead-dark">
|
||||
<tr>
|
||||
<th rowspan="2">Image</th>
|
||||
<th rowspan="2">Clé</th>
|
||||
<th colspan="4" class="text-center">Différences FR vs EN</th>
|
||||
<th rowspan="2" class="text-center">Score de<br>décrépitude</th>
|
||||
|
@ -34,6 +35,11 @@
|
|||
{% for key, languages in wiki_pages %}
|
||||
{% if languages['en'] is defined and languages['fr'] is defined %}
|
||||
<tr>
|
||||
|
||||
<td>
|
||||
<img src="{{ languages['en'].description_img_url }}" alt="image" style="height: 2rem;">
|
||||
|
||||
</td>
|
||||
<td>
|
||||
<strong>{{ key }}</strong>
|
||||
</td>
|
||||
|
@ -147,7 +153,8 @@
|
|||
<i class="bi bi-flag-fill"></i> EN
|
||||
</a>
|
||||
<a href="{{ path('app_admin_wiki_create_french', {'key': key}) }}"
|
||||
class="btn btn-sm btn-outline-primary" title="Créer une traduction française">
|
||||
class="btn btn-sm btn-outline-primary"
|
||||
title="Créer une traduction française">
|
||||
<i class="bi bi-translate"></i> créer FR
|
||||
</a>
|
||||
{# <a href="{{ path('app_admin_wiki_compare', {'key': key}) }}" #}
|
||||
|
@ -187,27 +194,27 @@
|
|||
{{ parent() }}
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
document.addEventListener('DOMContentLoaded', function () {
|
||||
// Collect data from the table
|
||||
const labels = [];
|
||||
const scores = [];
|
||||
const colors = [];
|
||||
|
||||
{% for key, languages in wiki_pages %}
|
||||
{% if languages['en'] is defined and languages['fr'] is defined %}
|
||||
labels.push("{{ key }}");
|
||||
{% set score = languages['en'].staleness_score|default(0) %}
|
||||
scores.push({{ score }});
|
||||
{% if languages['en'] is defined and languages['fr'] is defined %}
|
||||
labels.push("{{ key }}");
|
||||
{% set score = languages['en'].staleness_score|default(0) %}
|
||||
scores.push({{ score }});
|
||||
|
||||
// Set color based on score
|
||||
{% if score > 50 %}
|
||||
colors.push('rgba(220, 53, 69, 0.7)'); // danger
|
||||
{% elseif score > 20 %}
|
||||
colors.push('rgba(255, 193, 7, 0.7)'); // warning
|
||||
{% else %}
|
||||
colors.push('rgba(25, 135, 84, 0.7)'); // success
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
// Set color based on score
|
||||
{% if score > 50 %}
|
||||
colors.push('rgba(220, 53, 69, 0.7)'); // danger
|
||||
{% elseif score > 20 %}
|
||||
colors.push('rgba(255, 193, 7, 0.7)'); // warning
|
||||
{% else %}
|
||||
colors.push('rgba(25, 135, 84, 0.7)'); // success
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
// Sort data by score (descending)
|
||||
|
@ -247,7 +254,7 @@
|
|||
},
|
||||
tooltip: {
|
||||
callbacks: {
|
||||
label: function(context) {
|
||||
label: function (context) {
|
||||
return `Score: ${context.raw}`;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -59,6 +59,37 @@
|
|||
</h1>
|
||||
<p class="lead">Comparaison détaillée des pages wiki en français et en anglais pour la clé OSM "{{ key }}".</p>
|
||||
|
||||
<div class="row mb-4">
|
||||
<div class="col-md-6">
|
||||
<div class="card">
|
||||
<div class="card-header bg-primary text-white">
|
||||
<h3>Page anglaise</h3>
|
||||
</div>
|
||||
<div class="card-body text-center">
|
||||
{% if en_page.description_img_url is defined and en_page.description_img_url %}
|
||||
<img src="{{ en_page.description_img_url }}" alt="{{ key }}" class="img-fluid" style="max-height: 200px; object-fit: contain;">
|
||||
{% else %}
|
||||
<div class="alert alert-secondary">Pas d'image d'illustration</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<div class="card">
|
||||
<div class="card-header bg-info text-white">
|
||||
<h3>Page française</h3>
|
||||
</div>
|
||||
<div class="card-body text-center">
|
||||
{% if fr_page is defined and fr_page is not null and fr_page.description_img_url is defined and fr_page.description_img_url %}
|
||||
<img src="{{ fr_page.description_img_url }}" alt="{{ key }}" class="img-fluid" style="max-height: 200px; object-fit: contain;">
|
||||
{% else %}
|
||||
<div class="alert alert-secondary">Pas d'image d'illustration</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if fr_page is defined and fr_page is not null %}
|
||||
|
||||
|
||||
|
@ -212,88 +243,87 @@
|
|||
</div>
|
||||
{% endif %}
|
||||
|
||||
{# {% if detailed_comparison and detailed_comparison.media_comparison %} #}
|
||||
{# <div class="card mb-4"> #}
|
||||
{# <div class="card-header"> #}
|
||||
{# <h2>Comparaison des médias</h2> #}
|
||||
{# </div> #}
|
||||
{# <div class="card-body"> #}
|
||||
{# <div class="row"> #}
|
||||
{# <div class="col-md-6"> #}
|
||||
{# <div class="card h-100"> #}
|
||||
{# <div class="card-header bg-primary text-white"> #}
|
||||
{# <h3>Images en anglais</h3> #}
|
||||
{# <span class="badge bg-light text-dark">{{ en_page.media_count|default(0) }} images</span> #}
|
||||
{# </div> #}
|
||||
{# <div class="card-body"> #}
|
||||
{% if detailed_comparison and detailed_comparison.media_comparison %}
|
||||
<div class="card mb-4">
|
||||
<div class="card-header">
|
||||
<h2>Comparaison des médias</h2>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="row">
|
||||
<div class="col-md-6">
|
||||
<div class="card h-100">
|
||||
<div class="card-header bg-primary text-white">
|
||||
<h3>Images en anglais</h3>
|
||||
<span class="badge bg-light text-dark">{{ en_page.media_count|default(0) }} images</span>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<h4>Images uniquement en anglais
|
||||
({{ detailed_comparison.media_comparison.en_only|length }} uniques
|
||||
sur {{ detailed_comparison.media_comparison.en_only_count }} total)</h4>
|
||||
<div class="row">
|
||||
{% for media in detailed_comparison.media_comparison.en_only %}
|
||||
<div class="col-12 mb-2">
|
||||
<div class="card border-warning">
|
||||
<img src="{{ media.src }}" class="card-img-top"
|
||||
alt="{{ media.alt }}"
|
||||
style="max-height: 150px; object-fit: contain;">
|
||||
<div class="card-body p-2">
|
||||
<p class="card-text small">{{ media.alt }}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<div class="card h-100">
|
||||
<div class="card-header bg-info text-white">
|
||||
<h3>Images en français</h3>
|
||||
<span class="badge bg-light text-dark">{{ fr_page.media_count|default(0) }} images</span>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<h4>Images communes ({{ detailed_comparison.media_comparison.common|length }})</h4>
|
||||
<div class="row mb-3">
|
||||
{% for media in detailed_comparison.media_comparison.common %}
|
||||
<div class="col-md-6 mb-2">
|
||||
<div class="card">
|
||||
<img src="{{ media.fr.src }}" class="card-img-top"
|
||||
alt="{{ media.fr.alt }}"
|
||||
style="max-height: 150px; object-fit: contain;">
|
||||
<div class="card-body p-2">
|
||||
<p class="card-text small">{{ media.fr.alt }}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
{# <h4>Images uniquement en anglais #}
|
||||
{# ({{ detailed_comparison.media_comparison.en_only|length }} uniques #}
|
||||
{# sur {{ detailed_comparison.media_comparison.en_only_count }} total)</h4> #}
|
||||
{# <div class="row"> #}
|
||||
{# {% for media in detailed_comparison.media_comparison.en_only %} #}
|
||||
{# <div class="col-12 mb-2"> #}
|
||||
{# <div class="card border-warning"> #}
|
||||
{# <img src="{{ media.src }}" class="card-img-top" #}
|
||||
{# alt="{{ media.alt }}" #}
|
||||
{# style="max-height: 150px; object-fit: contain;"> #}
|
||||
{# <div class="card-body p-2"> #}
|
||||
{# <p class="card-text small">{{ media.alt }}</p> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# {% endfor %} #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# <div class="col-md-6"> #}
|
||||
{# <div class="card h-100"> #}
|
||||
{# <div class="card-header bg-info text-white"> #}
|
||||
{# <h3>Images en français</h3> #}
|
||||
{# <span class="badge bg-light text-dark">{{ fr_page.media_count|default(0) }} images</span> #}
|
||||
{# </div> #}
|
||||
{# <div class="card-body"> #}
|
||||
{# <h4>Images communes ({{ detailed_comparison.media_comparison.common|length }})</h4> #}
|
||||
{# <div class="row mb-3"> #}
|
||||
{# {% for media in detailed_comparison.media_comparison.common %} #}
|
||||
{# <div class="col-md-6 mb-2"> #}
|
||||
{# <div class="card"> #}
|
||||
{# <img src="{{ media.fr.src }}" class="card-img-top" #}
|
||||
{# alt="{{ media.fr.alt }}" #}
|
||||
{# style="max-height: 150px; object-fit: contain;"> #}
|
||||
{# <div class="card-body p-2"> #}
|
||||
{# <p class="card-text small">{{ media.fr.alt }}</p> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# {% endfor %} #}
|
||||
{# </div> #}
|
||||
|
||||
{# <h4>Images uniquement en français #}
|
||||
{# ({{ detailed_comparison.media_comparison.fr_only|length }} uniques #}
|
||||
{# sur {{ detailed_comparison.media_comparison.fr_only_count }} total)</h4> #}
|
||||
{# <div class="row"> #}
|
||||
{# {% for media in detailed_comparison.media_comparison.fr_only %} #}
|
||||
{# <div class="col-12 mb-2"> #}
|
||||
{# <div class="card border-info"> #}
|
||||
{# <img src="{{ media.src }}" class="card-img-top" #}
|
||||
{# alt="{{ media.alt }}" #}
|
||||
{# style="max-height: 150px; object-fit: contain;"> #}
|
||||
{# <div class="card-body p-2"> #}
|
||||
{# <p class="card-text small">{{ media.alt }}</p> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# {% endfor %} #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# </div> #}
|
||||
{# {% endif %} #}
|
||||
<h4>Images uniquement en français
|
||||
({{ detailed_comparison.media_comparison.fr_only|length }} uniques
|
||||
sur {{ detailed_comparison.media_comparison.fr_only_count }} total)</h4>
|
||||
<div class="row">
|
||||
{% for media in detailed_comparison.media_comparison.fr_only %}
|
||||
<div class="col-12 mb-2">
|
||||
<div class="card border-info">
|
||||
<img src="{{ media.src }}" class="card-img-top"
|
||||
alt="{{ media.alt }}"
|
||||
style="max-height: 150px; object-fit: contain;">
|
||||
<div class="card-body p-2">
|
||||
<p class="card-text small">{{ media.alt }}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{# {% if detailed_comparison and detailed_comparison.link_comparison %} #}
|
||||
{# <div class="card mb-4"> #}
|
||||
|
|
|
@ -89,10 +89,20 @@
|
|||
{% for page in pages %}
|
||||
<tr>
|
||||
<td>
|
||||
<strong>{{ page.title }}</strong>
|
||||
{% if page.is_english %}
|
||||
<span class="badge bg-success">Priorité</span>
|
||||
{% endif %}
|
||||
<div class="d-flex align-items-center">
|
||||
{% if page.description_img_url is defined and page.description_img_url %}
|
||||
<div class="me-3">
|
||||
<img src="{{ page.description_img_url }}" alt="{{ page.title }}"
|
||||
style="max-width: 80px; max-height: 60px; object-fit: contain;">
|
||||
</div>
|
||||
{% endif %}
|
||||
<div>
|
||||
<strong>{{ page.title }}</strong>
|
||||
{% if page.is_english %}
|
||||
<span class="badge bg-success">Priorité</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</td>
|
||||
<td>
|
||||
{% if page.outdatedness_score is defined %}
|
||||
|
@ -171,9 +181,18 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||
|
||||
// Format titles in MediaWiki format
|
||||
let mediawikiText = '';
|
||||
titleElements.forEach(function(element) {
|
||||
const title = element.textContent.trim();
|
||||
mediawikiText += '* [[' + title + ']]\n';
|
||||
const rows = englishSection.querySelectorAll('tbody tr');
|
||||
|
||||
rows.forEach(function(row) {
|
||||
const title = row.querySelector('td:first-child strong').textContent.trim();
|
||||
const imgElement = row.querySelector('td:first-child img');
|
||||
|
||||
if (imgElement) {
|
||||
const imgSrc = imgElement.getAttribute('src');
|
||||
mediawikiText += '* [[' + title + ']] - Image: ' + imgSrc + '\n';
|
||||
} else {
|
||||
mediawikiText += '* [[' + title + ']]\n';
|
||||
}
|
||||
});
|
||||
|
||||
// Copy to clipboard
|
||||
|
|
|
@ -27,6 +27,7 @@ import os
|
|||
import re
|
||||
import random
|
||||
import hashlib
|
||||
import csv
|
||||
from datetime import datetime, timedelta
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
@ -41,10 +42,33 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
# Constants
|
||||
OUTPUT_FILE = "pages_unavailable_in_french.json"
|
||||
WIKI_PAGES_CSV = "wiki_pages.csv"
|
||||
BASE_URL = "https://wiki.openstreetmap.org/wiki/Category:Pages_unavailable_in_French"
|
||||
WIKI_BASE_URL = "https://wiki.openstreetmap.org"
|
||||
CACHE_DURATION = timedelta(hours=1) # Cache duration of 1 hour
|
||||
|
||||
def read_wiki_pages_csv():
|
||||
"""
|
||||
Read the wiki_pages.csv file and create a mapping of URLs to description_img_url values
|
||||
|
||||
Returns:
|
||||
dict: Dictionary mapping URLs to description_img_url values
|
||||
"""
|
||||
url_to_img_map = {}
|
||||
|
||||
try:
|
||||
with open(WIKI_PAGES_CSV, 'r', newline='', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
if 'url' in row and 'description_img_url' in row and row['description_img_url']:
|
||||
url_to_img_map[row['url']] = row['description_img_url']
|
||||
|
||||
logger.info(f"Read {len(url_to_img_map)} image URLs from {WIKI_PAGES_CSV}")
|
||||
return url_to_img_map
|
||||
except (IOError, csv.Error) as e:
|
||||
logger.error(f"Error reading {WIKI_PAGES_CSV}: {e}")
|
||||
return {}
|
||||
|
||||
def is_cache_fresh():
|
||||
"""
|
||||
Check if the cache file exists and is less than CACHE_DURATION old
|
||||
|
@ -273,6 +297,9 @@ def main():
|
|||
logger.info(f"Use --force to update anyway")
|
||||
return
|
||||
|
||||
# Read image URLs from wiki_pages.csv
|
||||
url_to_img_map = read_wiki_pages_csv()
|
||||
|
||||
# Scrape pages
|
||||
pages = scrape_all_pages()
|
||||
|
||||
|
@ -280,6 +307,11 @@ def main():
|
|||
logger.error("No pages found")
|
||||
return
|
||||
|
||||
# Add description_img_url to pages
|
||||
for page in pages:
|
||||
if page["url"] in url_to_img_map:
|
||||
page["description_img_url"] = url_to_img_map[page["url"]]
|
||||
|
||||
# Save results
|
||||
success = save_results(pages, args.dry_run)
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Binary file not shown.
Before Width: | Height: | Size: 48 KiB After Width: | Height: | Size: 52 KiB |
|
@ -18,5 +18,65 @@
|
|||
{
|
||||
"key": "addr:street",
|
||||
"count": 161005721
|
||||
},
|
||||
{
|
||||
"key": "addr:city",
|
||||
"count": 123355107
|
||||
},
|
||||
{
|
||||
"key": "name",
|
||||
"count": 109342549
|
||||
},
|
||||
{
|
||||
"key": "addr:postcode",
|
||||
"count": 107014659
|
||||
},
|
||||
{
|
||||
"key": "natural",
|
||||
"count": 84723029
|
||||
},
|
||||
{
|
||||
"key": "surface",
|
||||
"count": 72309071
|
||||
},
|
||||
{
|
||||
"key": "addr:country",
|
||||
"count": 50567842
|
||||
},
|
||||
{
|
||||
"key": "landuse",
|
||||
"count": 48196369
|
||||
},
|
||||
{
|
||||
"key": "power",
|
||||
"count": 44787307
|
||||
},
|
||||
{
|
||||
"key": "waterway",
|
||||
"count": 37279458
|
||||
},
|
||||
{
|
||||
"key": "building:levels",
|
||||
"count": 36502866
|
||||
},
|
||||
{
|
||||
"key": "amenity",
|
||||
"count": 30994353
|
||||
},
|
||||
{
|
||||
"key": "barrier",
|
||||
"count": 30164354
|
||||
},
|
||||
{
|
||||
"key": "source:date",
|
||||
"count": 29112775
|
||||
},
|
||||
{
|
||||
"key": "service",
|
||||
"count": 28396250
|
||||
},
|
||||
{
|
||||
"key": "addr:state",
|
||||
"count": 25367076
|
||||
}
|
||||
]
|
|
@ -211,6 +211,145 @@ def fetch_wiki_page(key, language='en'):
|
|||
|
||||
# Get media details (src and alt text)
|
||||
media_details = []
|
||||
|
||||
# Extract description image specifically
|
||||
# Try multiple selectors to find the description image
|
||||
description_img = None
|
||||
|
||||
# Debug: Log the key we're processing
|
||||
logger.info(f"Looking for description image for key '{key}' in {language}")
|
||||
|
||||
# Function to filter out OSM logo and small icons
|
||||
def is_relevant_image(img):
|
||||
src = img.get('src', '')
|
||||
# Skip OSM logo
|
||||
if 'osm_logo' in src:
|
||||
return False
|
||||
# Skip small icons (usually less than 30px)
|
||||
width = img.get('width')
|
||||
if width and int(width) < 30:
|
||||
return False
|
||||
height = img.get('height')
|
||||
if height and int(height) < 30:
|
||||
return False
|
||||
return True
|
||||
|
||||
# Special case for highway key - directly target the image we want
|
||||
if key == 'highway':
|
||||
# Try to find the specific image in figure elements
|
||||
highway_img_elements = content.select('figure.mw-halign-center img')
|
||||
logger.info(f" Highway specific selector 'figure.mw-halign-center img' found {len(highway_img_elements)} elements")
|
||||
|
||||
# Filter for relevant images
|
||||
relevant_images = [img for img in highway_img_elements if is_relevant_image(img)]
|
||||
logger.info(f" Found {len(relevant_images)} relevant images for highway")
|
||||
|
||||
if relevant_images:
|
||||
description_img = relevant_images[0]
|
||||
logger.info(f" Using highway-specific image: {description_img.get('src', '')}")
|
||||
|
||||
# If not found with highway-specific selector, try the td.d_image selector
|
||||
if not description_img:
|
||||
description_img_elements = content.select('td.d_image img')
|
||||
logger.info(f" Selector 'td.d_image img' found {len(description_img_elements)} elements")
|
||||
|
||||
# Filter for relevant images
|
||||
relevant_images = [img for img in description_img_elements if is_relevant_image(img)]
|
||||
logger.info(f" Found {len(relevant_images)} relevant images in td.d_image")
|
||||
|
||||
if relevant_images:
|
||||
description_img = relevant_images[0]
|
||||
logger.info(f" Using image from 'td.d_image img': {description_img.get('src', '')}")
|
||||
|
||||
# If still not found, try the specific selector for .description img.mw-file-element
|
||||
if not description_img:
|
||||
description_img_elements = content.select('.description img.mw-file-element')
|
||||
logger.info(f" Selector '.description img.mw-file-element' found {len(description_img_elements)} elements")
|
||||
|
||||
# Filter for relevant images
|
||||
relevant_images = [img for img in description_img_elements if is_relevant_image(img)]
|
||||
logger.info(f" Found {len(relevant_images)} relevant images in .description")
|
||||
|
||||
if relevant_images:
|
||||
description_img = relevant_images[0]
|
||||
logger.info(f" Using image from '.description img.mw-file-element': {description_img.get('src', '')}")
|
||||
|
||||
# If still not found, try images in figures within the description box
|
||||
if not description_img:
|
||||
description_img_elements = content.select('.description figure img')
|
||||
logger.info(f" Selector '.description figure img' found {len(description_img_elements)} elements")
|
||||
|
||||
# Filter for relevant images
|
||||
relevant_images = [img for img in description_img_elements if is_relevant_image(img)]
|
||||
logger.info(f" Found {len(relevant_images)} relevant images in .description figure")
|
||||
|
||||
if relevant_images:
|
||||
description_img = relevant_images[0]
|
||||
logger.info(f" Using image from '.description figure img': {description_img.get('src', '')}")
|
||||
|
||||
# If still not found, try any image in the description box
|
||||
if not description_img:
|
||||
description_img_elements = content.select('.description img')
|
||||
logger.info(f" Selector '.description img' found {len(description_img_elements)} elements")
|
||||
|
||||
# Filter for relevant images
|
||||
relevant_images = [img for img in description_img_elements if is_relevant_image(img)]
|
||||
logger.info(f" Found {len(relevant_images)} relevant images in .description general")
|
||||
|
||||
if relevant_images:
|
||||
description_img = relevant_images[0]
|
||||
logger.info(f" Using image from '.description img': {description_img.get('src', '')}")
|
||||
|
||||
# If still not found, try images in the DescriptionBox table
|
||||
if not description_img:
|
||||
description_img_elements = content.select('table.DescriptionBox img')
|
||||
logger.info(f" Selector 'table.DescriptionBox img' found {len(description_img_elements)} elements")
|
||||
|
||||
# Filter for relevant images
|
||||
relevant_images = [img for img in description_img_elements if is_relevant_image(img)]
|
||||
logger.info(f" Found {len(relevant_images)} relevant images in DescriptionBox")
|
||||
|
||||
if relevant_images:
|
||||
description_img = relevant_images[0]
|
||||
logger.info(f" Using image from 'table.DescriptionBox img': {description_img.get('src', '')}")
|
||||
|
||||
# If still not found, try images in figure elements anywhere in the content
|
||||
if not description_img:
|
||||
description_img_elements = content.select('figure img')
|
||||
logger.info(f" Selector 'figure img' found {len(description_img_elements)} elements")
|
||||
|
||||
# Filter for relevant images
|
||||
relevant_images = [img for img in description_img_elements if is_relevant_image(img)]
|
||||
logger.info(f" Found {len(relevant_images)} relevant images in figure elements")
|
||||
|
||||
if relevant_images:
|
||||
description_img = relevant_images[0]
|
||||
logger.info(f" Using image from 'figure img': {description_img.get('src', '')}")
|
||||
|
||||
# If we still don't have an image, use any image that's not the OSM logo
|
||||
if not description_img:
|
||||
all_images = content.select('img')
|
||||
relevant_images = [img for img in all_images if is_relevant_image(img)]
|
||||
logger.info(f" Found {len(relevant_images)} relevant images in the entire page")
|
||||
|
||||
if relevant_images:
|
||||
description_img = relevant_images[0]
|
||||
logger.info(f" Using fallback image: {description_img.get('src', '')}")
|
||||
|
||||
# Process the found image
|
||||
description_img_url = None
|
||||
if description_img:
|
||||
src = description_img.get('src', '')
|
||||
if src:
|
||||
# Make relative URLs absolute
|
||||
if src.startswith('//'):
|
||||
src = 'https:' + src
|
||||
elif src.startswith('/'):
|
||||
src = 'https://wiki.openstreetmap.org' + src
|
||||
|
||||
description_img_url = src
|
||||
|
||||
# Process all images
|
||||
for img in media_elements:
|
||||
src = img.get('src', '')
|
||||
if src:
|
||||
|
@ -251,7 +390,8 @@ def fetch_wiki_page(key, language='en'):
|
|||
'link_details': link_details,
|
||||
'media_count': media_count,
|
||||
'media_details': media_details,
|
||||
'categories': categories
|
||||
'categories': categories,
|
||||
'description_img_url': description_img_url
|
||||
}
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
|
@ -692,7 +832,7 @@ def main():
|
|||
try:
|
||||
with open(WIKI_PAGES_CSV, 'w', newline='', encoding='utf-8') as f:
|
||||
# Basic fields for CSV (detailed content will be in JSON only)
|
||||
fieldnames = ['key', 'language', 'url', 'last_modified', 'sections', 'word_count', 'link_count', 'media_count', 'staleness_score']
|
||||
fieldnames = ['key', 'language', 'url', 'last_modified', 'sections', 'word_count', 'link_count', 'media_count', 'staleness_score', 'description_img_url']
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
|
||||
writer.writeheader()
|
||||
|
|
|
@ -1,11 +1,40 @@
|
|||
key,language,url,last_modified,sections,word_count,link_count,media_count,staleness_score
|
||||
building,en,https://wiki.openstreetmap.org/wiki/Key:building,2025-06-10,31,3774,627,158,8.91
|
||||
building,fr,https://wiki.openstreetmap.org/wiki/FR:Key:building,2025-05-22,25,3181,544,155,8.91
|
||||
source,en,https://wiki.openstreetmap.org/wiki/Key:source,2025-08-12,27,2752,314,42,113.06
|
||||
source,fr,https://wiki.openstreetmap.org/wiki/FR:Key:source,2024-02-07,23,2593,230,35,113.06
|
||||
highway,en,https://wiki.openstreetmap.org/wiki/Key:highway,2025-04-10,30,4126,780,314,20.35
|
||||
highway,fr,https://wiki.openstreetmap.org/wiki/FR:Key:highway,2025-01-05,30,4141,695,313,20.35
|
||||
addr:housenumber,en,https://wiki.openstreetmap.org/wiki/Key:addr:housenumber,2025-07-24,11,330,97,20,14.01
|
||||
addr:housenumber,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:housenumber,2025-08-23,15,1653,150,77,14.01
|
||||
addr:street,en,https://wiki.openstreetmap.org/wiki/Key:addr:street,2024-10-29,12,602,101,16,66.04
|
||||
addr:street,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:street,2025-08-23,15,1653,150,77,66.04
|
||||
key,language,url,last_modified,sections,word_count,link_count,media_count,staleness_score,description_img_url
|
||||
building,en,https://wiki.openstreetmap.org/wiki/Key:building,2025-06-10,31,3774,627,158,8.91,https://wiki.openstreetmap.org/w/images/thumb/6/61/Emptyhouse.jpg/200px-Emptyhouse.jpg
|
||||
building,fr,https://wiki.openstreetmap.org/wiki/FR:Key:building,2025-05-22,25,3181,544,155,8.91,https://wiki.openstreetmap.org/w/images/thumb/6/61/Emptyhouse.jpg/200px-Emptyhouse.jpg
|
||||
source,en,https://wiki.openstreetmap.org/wiki/Key:source,2025-08-12,27,2752,314,42,113.06,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
|
||||
source,fr,https://wiki.openstreetmap.org/wiki/FR:Key:source,2024-02-07,23,2593,230,35,113.06,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
|
||||
highway,en,https://wiki.openstreetmap.org/wiki/Key:highway,2025-04-10,30,4126,780,314,20.35,https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Roads_in_Switzerland_%2827965437018%29.jpg/200px-Roads_in_Switzerland_%2827965437018%29.jpg
|
||||
highway,fr,https://wiki.openstreetmap.org/wiki/FR:Key:highway,2025-01-05,30,4141,695,313,20.35,https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Roads_in_Switzerland_%2827965437018%29.jpg/200px-Roads_in_Switzerland_%2827965437018%29.jpg
|
||||
addr:housenumber,en,https://wiki.openstreetmap.org/wiki/Key:addr:housenumber,2025-07-24,11,330,97,20,14.01,https://upload.wikimedia.org/wikipedia/commons/thumb/1/16/Ferry_Street%2C_Portaferry_%2809%29%2C_October_2009.JPG/200px-Ferry_Street%2C_Portaferry_%2809%29%2C_October_2009.JPG
|
||||
addr:housenumber,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:housenumber,2025-08-23,15,1653,150,77,14.01,https://wiki.openstreetmap.org/w/images/thumb/e/e9/Housenumber-karlsruhe-de.png/200px-Housenumber-karlsruhe-de.png
|
||||
addr:street,en,https://wiki.openstreetmap.org/wiki/Key:addr:street,2024-10-29,12,602,101,16,66.04,https://upload.wikimedia.org/wikipedia/commons/thumb/6/64/UK_-_London_%2830474933636%29.jpg/200px-UK_-_London_%2830474933636%29.jpg
|
||||
addr:street,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:street,2025-08-23,15,1653,150,77,66.04,https://wiki.openstreetmap.org/w/images/thumb/e/e9/Housenumber-karlsruhe-de.png/200px-Housenumber-karlsruhe-de.png
|
||||
addr:city,en,https://wiki.openstreetmap.org/wiki/Key:addr:city,2025-07-29,15,802,105,17,9.93,https://upload.wikimedia.org/wikipedia/commons/thumb/1/18/Lillerod.jpg/200px-Lillerod.jpg
|
||||
addr:city,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:city,2025-08-23,15,1653,150,77,9.93,https://wiki.openstreetmap.org/w/images/thumb/e/e9/Housenumber-karlsruhe-de.png/200px-Housenumber-karlsruhe-de.png
|
||||
name,en,https://wiki.openstreetmap.org/wiki/Key:name,2025-07-25,17,2196,281,82,42.39,https://upload.wikimedia.org/wikipedia/commons/thumb/6/61/Helena%2C_Montana.jpg/200px-Helena%2C_Montana.jpg
|
||||
name,fr,https://wiki.openstreetmap.org/wiki/FR:Key:name,2025-01-16,21,1720,187,60,42.39,https://wiki.openstreetmap.org/w/images/3/37/Strakers.jpg
|
||||
addr:postcode,en,https://wiki.openstreetmap.org/wiki/Key:addr:postcode,2024-10-29,14,382,83,11,67.11,https://upload.wikimedia.org/wikipedia/commons/thumb/0/04/Farrer_post_code.jpg/200px-Farrer_post_code.jpg
|
||||
addr:postcode,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:postcode,2025-08-23,15,1653,150,77,67.11,https://wiki.openstreetmap.org/w/images/thumb/e/e9/Housenumber-karlsruhe-de.png/200px-Housenumber-karlsruhe-de.png
|
||||
natural,en,https://wiki.openstreetmap.org/wiki/Key:natural,2025-07-17,17,2070,535,189,22.06,https://upload.wikimedia.org/wikipedia/commons/thumb/0/0e/VocaDi-Nature%2CGeneral.jpeg/200px-VocaDi-Nature%2CGeneral.jpeg
|
||||
natural,fr,https://wiki.openstreetmap.org/wiki/FR:Key:natural,2025-04-21,13,1499,455,174,22.06,https://upload.wikimedia.org/wikipedia/commons/thumb/0/0e/VocaDi-Nature%2CGeneral.jpeg/200px-VocaDi-Nature%2CGeneral.jpeg
|
||||
surface,en,https://wiki.openstreetmap.org/wiki/Key:surface,2025-08-28,24,3475,591,238,264.64,https://upload.wikimedia.org/wikipedia/commons/thumb/a/a2/Transportation_in_Tanzania_Traffic_problems.JPG/200px-Transportation_in_Tanzania_Traffic_problems.JPG
|
||||
surface,fr,https://wiki.openstreetmap.org/wiki/FR:Key:surface,2022-02-22,13,2587,461,232,264.64,https://upload.wikimedia.org/wikipedia/commons/thumb/a/a2/Transportation_in_Tanzania_Traffic_problems.JPG/200px-Transportation_in_Tanzania_Traffic_problems.JPG
|
||||
addr:country,en,https://wiki.openstreetmap.org/wiki/Key:addr:country,2024-12-01,9,184,65,11,22.96,https://upload.wikimedia.org/wikipedia/commons/thumb/8/86/Europe_ISO_3166-1.svg/200px-Europe_ISO_3166-1.svg.png
|
||||
addr:country,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:country,2025-03-25,8,187,65,11,22.96,https://upload.wikimedia.org/wikipedia/commons/thumb/8/86/Europe_ISO_3166-1.svg/200px-Europe_ISO_3166-1.svg.png
|
||||
landuse,en,https://wiki.openstreetmap.org/wiki/Key:landuse,2025-03-01,17,2071,446,168,39.41,https://upload.wikimedia.org/wikipedia/commons/thumb/d/d3/Changing_landuse_-_geograph.org.uk_-_1137810.jpg/200px-Changing_landuse_-_geograph.org.uk_-_1137810.jpg
|
||||
landuse,fr,https://wiki.openstreetmap.org/wiki/FR:Key:landuse,2024-08-20,19,2053,418,182,39.41,https://upload.wikimedia.org/wikipedia/commons/thumb/d/d3/Changing_landuse_-_geograph.org.uk_-_1137810.jpg/200px-Changing_landuse_-_geograph.org.uk_-_1137810.jpg
|
||||
power,en,https://wiki.openstreetmap.org/wiki/Key:power,2025-02-28,20,641,127,21,124.89,https://wiki.openstreetmap.org/w/images/thumb/0/01/Power-tower.JPG/200px-Power-tower.JPG
|
||||
power,fr,https://wiki.openstreetmap.org/wiki/FR:Key:power,2023-06-27,14,390,105,25,124.89,https://wiki.openstreetmap.org/w/images/thumb/0/01/Power-tower.JPG/200px-Power-tower.JPG
|
||||
waterway,en,https://wiki.openstreetmap.org/wiki/Key:waterway,2025-03-10,21,1830,365,118,77.94,https://wiki.openstreetmap.org/w/images/thumb/f/fe/450px-Marshall-county-indiana-yellow-river.jpg/200px-450px-Marshall-county-indiana-yellow-river.jpg
|
||||
waterway,fr,https://wiki.openstreetmap.org/wiki/FR:Key:waterway,2024-03-08,18,1291,272,113,77.94,https://wiki.openstreetmap.org/w/images/thumb/f/fe/450px-Marshall-county-indiana-yellow-river.jpg/200px-450px-Marshall-county-indiana-yellow-river.jpg
|
||||
building:levels,en,https://wiki.openstreetmap.org/wiki/Key:building:levels,2025-08-13,16,1351,204,25,76.11,https://wiki.openstreetmap.org/w/images/thumb/4/47/Building-levels.png/200px-Building-levels.png
|
||||
building:levels,fr,https://wiki.openstreetmap.org/wiki/FR:Key:building:levels,2024-08-01,15,1457,202,26,76.11,https://wiki.openstreetmap.org/w/images/thumb/4/47/Building-levels.png/200px-Building-levels.png
|
||||
amenity,en,https://wiki.openstreetmap.org/wiki/Key:amenity,2025-08-24,29,3066,915,504,160.78,https://wiki.openstreetmap.org/w/images/thumb/a/a5/Mapping-Features-Parking-Lot.png/200px-Mapping-Features-Parking-Lot.png
|
||||
amenity,fr,https://wiki.openstreetmap.org/wiki/FR:Key:amenity,2023-07-19,22,2146,800,487,160.78,https://wiki.openstreetmap.org/w/images/thumb/a/a5/Mapping-Features-Parking-Lot.png/200px-Mapping-Features-Parking-Lot.png
|
||||
barrier,en,https://wiki.openstreetmap.org/wiki/Key:barrier,2025-04-15,17,2137,443,173,207.98,https://upload.wikimedia.org/wikipedia/commons/thumb/4/4c/2014_Bystrzyca_K%C5%82odzka%2C_mury_obronne_05.jpg/200px-2014_Bystrzyca_K%C5%82odzka%2C_mury_obronne_05.jpg
|
||||
barrier,fr,https://wiki.openstreetmap.org/wiki/FR:Key:barrier,2022-08-16,15,542,103,18,207.98,https://upload.wikimedia.org/wikipedia/commons/thumb/4/4c/2014_Bystrzyca_K%C5%82odzka%2C_mury_obronne_05.jpg/200px-2014_Bystrzyca_K%C5%82odzka%2C_mury_obronne_05.jpg
|
||||
source:date,en,https://wiki.openstreetmap.org/wiki/Key:source:date,2023-04-01,11,395,75,10,22.47,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
|
||||
source:date,fr,https://wiki.openstreetmap.org/wiki/FR:Key:source:date,2023-07-21,10,419,75,11,22.47,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
|
||||
service,en,https://wiki.openstreetmap.org/wiki/Key:service,2025-03-16,22,1436,218,17,83.79,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
|
||||
service,fr,https://wiki.openstreetmap.org/wiki/FR:Key:service,2024-03-04,11,443,100,10,83.79,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
|
||||
addr:state,en,https://wiki.openstreetmap.org/wiki/Key:addr:state,2023-06-23,12,289,74,11,100,https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/WVaCent.jpg/200px-WVaCent.jpg
|
||||
|
|
|
Loading…
Add table
Add a link
Reference in a new issue