osm-labo/src/Command/LinkDemandesPlacesCommand.php
2025-07-16 23:01:13 +02:00

203 lines
6.6 KiB
PHP

<?php
namespace App\Command;
use App\Entity\Demande;
use App\Entity\Place;
use App\Repository\DemandeRepository;
use App\Repository\PlaceRepository;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'app:link-demandes-places',
description: 'Link Demandes to Places based on name similarity',
)]
class LinkDemandesPlacesCommand extends Command
{
private EntityManagerInterface $entityManager;
private DemandeRepository $demandeRepository;
private PlaceRepository $placeRepository;
public function __construct(
EntityManagerInterface $entityManager,
DemandeRepository $demandeRepository,
PlaceRepository $placeRepository
) {
parent::__construct();
$this->entityManager = $entityManager;
$this->demandeRepository = $demandeRepository;
$this->placeRepository = $placeRepository;
}
protected function configure(): void
{
$this
->addOption('threshold', null, InputOption::VALUE_REQUIRED, 'Similarity threshold (0-100)', 70)
->addOption('dry-run', null, InputOption::VALUE_NONE, 'Show matches without linking');
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
$io->title('Linking Demandes to Places based on name similarity');
$threshold = (int) $input->getOption('threshold');
$dryRun = $input->getOption('dry-run');
if ($threshold < 0 || $threshold > 100) {
$io->error('Threshold must be between 0 and 100');
return Command::FAILURE;
}
// Find all Demandes without linked Places
$demandesWithoutPlace = $this->demandeRepository->createQueryBuilder('d')
->where('d.place IS NULL')
->getQuery()
->getResult();
if (empty($demandesWithoutPlace)) {
$io->warning('No Demandes without linked Places found.');
return Command::SUCCESS;
}
$io->info(sprintf('Found %d Demandes without linked Places.', count($demandesWithoutPlace)));
// Process each Demande
$linkedCount = 0;
/** @var Demande $demande */
foreach ($demandesWithoutPlace as $demande) {
$query = $demande->getQuery();
$insee = $demande->getInsee();
if (empty($query)) {
continue;
}
// Find Places with similar names
$places = $this->findSimilarPlaces($query, $insee);
if (empty($places)) {
continue;
}
// Find the best match
$bestMatch = null;
$bestSimilarity = 0;
foreach ($places as $place) {
$similarity = $this->calculateSimilarity($query, $place->getName());
if ($similarity > $bestSimilarity) {
$bestSimilarity = $similarity;
$bestMatch = $place;
}
}
// If similarity is above threshold, link the Demande to the Place
if ($bestMatch && $bestSimilarity >= $threshold) {
$io->text(sprintf(
'Match found: "%s" (Demande) -> "%s" (Place) with similarity %d%%',
$query,
$bestMatch->getName(),
$bestSimilarity
));
if (!$dryRun) {
$demande->setPlace($bestMatch);
$demande->setPlaceUuid($bestMatch->getUuidForUrl());
$demande->setStatus('linked_to_place');
$this->entityManager->persist($demande);
$linkedCount++;
}
}
}
if (!$dryRun && $linkedCount > 0) {
$this->entityManager->flush();
$io->success(sprintf('Linked %d Demandes to Places.', $linkedCount));
} elseif ($dryRun) {
$io->info('Dry run completed. No changes were made.');
} else {
$io->info('No Demandes were linked to Places.');
}
return Command::SUCCESS;
}
/**
* Find Places with names similar to the query
*/
private function findSimilarPlaces(string $query, ?int $insee): array
{
$queryBuilder = $this->placeRepository->createQueryBuilder('p')
->where('p.name IS NOT NULL');
// If INSEE code is available, filter by Stats zone
if ($insee !== null) {
$queryBuilder
->join('p.stats', 's')
->andWhere('s.zone = :insee')
->setParameter('insee', (string) $insee);
}
// Use LIKE for initial filtering to reduce the number of results
$queryBuilder
->andWhere('p.name LIKE :query')
->setParameter('query', '%' . $this->sanitizeForLike($query) . '%');
return $queryBuilder->getQuery()->getResult();
}
/**
* Calculate similarity between two strings (0-100)
*/
private function calculateSimilarity(string $str1, string $str2): int
{
// Normalize strings for comparison
$str1 = $this->normalizeString($str1);
$str2 = $this->normalizeString($str2);
// If either string is empty after normalization, return 0
if (empty($str1) || empty($str2)) {
return 0;
}
// Calculate Levenshtein distance
$levenshtein = levenshtein($str1, $str2);
$maxLength = max(strlen($str1), strlen($str2));
// Convert to similarity percentage (0-100)
$similarity = (1 - $levenshtein / $maxLength) * 100;
return (int) $similarity;
}
/**
* Normalize a string for comparison
*/
private function normalizeString(string $str): string
{
// Convert to lowercase
$str = mb_strtolower($str);
// Remove accents
$str = transliterator_transliterate('Any-Latin; Latin-ASCII', $str);
// Remove special characters and extra spaces
$str = preg_replace('/[^a-z0-9\s]/', '', $str);
$str = preg_replace('/\s+/', ' ', $str);
return trim($str);
}
/**
* Sanitize a string for use in LIKE queries
*/
private function sanitizeForLike(string $str): string
{
return str_replace(['%', '_'], ['\%', '\_'], $str);
}
}