203 lines
6.6 KiB
PHP
203 lines
6.6 KiB
PHP
<?php
|
|
|
|
namespace App\Command;
|
|
|
|
use App\Entity\Demande;
|
|
use App\Entity\Place;
|
|
use App\Repository\DemandeRepository;
|
|
use App\Repository\PlaceRepository;
|
|
use Doctrine\ORM\EntityManagerInterface;
|
|
use Symfony\Component\Console\Attribute\AsCommand;
|
|
use Symfony\Component\Console\Command\Command;
|
|
use Symfony\Component\Console\Input\InputInterface;
|
|
use Symfony\Component\Console\Input\InputOption;
|
|
use Symfony\Component\Console\Output\OutputInterface;
|
|
use Symfony\Component\Console\Style\SymfonyStyle;
|
|
|
|
#[AsCommand(
|
|
name: 'app:link-demandes-places',
|
|
description: 'Link Demandes to Places based on name similarity',
|
|
)]
|
|
class LinkDemandesPlacesCommand extends Command
|
|
{
|
|
private EntityManagerInterface $entityManager;
|
|
private DemandeRepository $demandeRepository;
|
|
private PlaceRepository $placeRepository;
|
|
|
|
public function __construct(
|
|
EntityManagerInterface $entityManager,
|
|
DemandeRepository $demandeRepository,
|
|
PlaceRepository $placeRepository
|
|
) {
|
|
parent::__construct();
|
|
$this->entityManager = $entityManager;
|
|
$this->demandeRepository = $demandeRepository;
|
|
$this->placeRepository = $placeRepository;
|
|
}
|
|
|
|
protected function configure(): void
|
|
{
|
|
$this
|
|
->addOption('threshold', null, InputOption::VALUE_REQUIRED, 'Similarity threshold (0-100)', 70)
|
|
->addOption('dry-run', null, InputOption::VALUE_NONE, 'Show matches without linking');
|
|
}
|
|
|
|
protected function execute(InputInterface $input, OutputInterface $output): int
|
|
{
|
|
$io = new SymfonyStyle($input, $output);
|
|
$io->title('Linking Demandes to Places based on name similarity');
|
|
|
|
$threshold = (int) $input->getOption('threshold');
|
|
$dryRun = $input->getOption('dry-run');
|
|
|
|
if ($threshold < 0 || $threshold > 100) {
|
|
$io->error('Threshold must be between 0 and 100');
|
|
return Command::FAILURE;
|
|
}
|
|
|
|
// Find all Demandes without linked Places
|
|
$demandesWithoutPlace = $this->demandeRepository->createQueryBuilder('d')
|
|
->where('d.place IS NULL')
|
|
->getQuery()
|
|
->getResult();
|
|
|
|
if (empty($demandesWithoutPlace)) {
|
|
$io->warning('No Demandes without linked Places found.');
|
|
return Command::SUCCESS;
|
|
}
|
|
|
|
$io->info(sprintf('Found %d Demandes without linked Places.', count($demandesWithoutPlace)));
|
|
|
|
// Process each Demande
|
|
$linkedCount = 0;
|
|
/** @var Demande $demande */
|
|
foreach ($demandesWithoutPlace as $demande) {
|
|
$query = $demande->getQuery();
|
|
$insee = $demande->getInsee();
|
|
|
|
if (empty($query)) {
|
|
continue;
|
|
}
|
|
|
|
// Find Places with similar names
|
|
$places = $this->findSimilarPlaces($query, $insee);
|
|
if (empty($places)) {
|
|
continue;
|
|
}
|
|
|
|
// Find the best match
|
|
$bestMatch = null;
|
|
$bestSimilarity = 0;
|
|
foreach ($places as $place) {
|
|
$similarity = $this->calculateSimilarity($query, $place->getName());
|
|
if ($similarity > $bestSimilarity) {
|
|
$bestSimilarity = $similarity;
|
|
$bestMatch = $place;
|
|
}
|
|
}
|
|
|
|
// If similarity is above threshold, link the Demande to the Place
|
|
if ($bestMatch && $bestSimilarity >= $threshold) {
|
|
$io->text(sprintf(
|
|
'Match found: "%s" (Demande) -> "%s" (Place) with similarity %d%%',
|
|
$query,
|
|
$bestMatch->getName(),
|
|
$bestSimilarity
|
|
));
|
|
|
|
if (!$dryRun) {
|
|
$demande->setPlace($bestMatch);
|
|
$demande->setPlaceUuid($bestMatch->getUuidForUrl());
|
|
$demande->setStatus('linked_to_place');
|
|
$this->entityManager->persist($demande);
|
|
$linkedCount++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!$dryRun && $linkedCount > 0) {
|
|
$this->entityManager->flush();
|
|
$io->success(sprintf('Linked %d Demandes to Places.', $linkedCount));
|
|
} elseif ($dryRun) {
|
|
$io->info('Dry run completed. No changes were made.');
|
|
} else {
|
|
$io->info('No Demandes were linked to Places.');
|
|
}
|
|
|
|
return Command::SUCCESS;
|
|
}
|
|
|
|
/**
|
|
* Find Places with names similar to the query
|
|
*/
|
|
private function findSimilarPlaces(string $query, ?int $insee): array
|
|
{
|
|
$queryBuilder = $this->placeRepository->createQueryBuilder('p')
|
|
->where('p.name IS NOT NULL');
|
|
|
|
// If INSEE code is available, filter by Stats zone
|
|
if ($insee !== null) {
|
|
$queryBuilder
|
|
->join('p.stats', 's')
|
|
->andWhere('s.zone = :insee')
|
|
->setParameter('insee', (string) $insee);
|
|
}
|
|
|
|
// Use LIKE for initial filtering to reduce the number of results
|
|
$queryBuilder
|
|
->andWhere('p.name LIKE :query')
|
|
->setParameter('query', '%' . $this->sanitizeForLike($query) . '%');
|
|
|
|
return $queryBuilder->getQuery()->getResult();
|
|
}
|
|
|
|
/**
|
|
* Calculate similarity between two strings (0-100)
|
|
*/
|
|
private function calculateSimilarity(string $str1, string $str2): int
|
|
{
|
|
// Normalize strings for comparison
|
|
$str1 = $this->normalizeString($str1);
|
|
$str2 = $this->normalizeString($str2);
|
|
|
|
// If either string is empty after normalization, return 0
|
|
if (empty($str1) || empty($str2)) {
|
|
return 0;
|
|
}
|
|
|
|
// Calculate Levenshtein distance
|
|
$levenshtein = levenshtein($str1, $str2);
|
|
$maxLength = max(strlen($str1), strlen($str2));
|
|
|
|
// Convert to similarity percentage (0-100)
|
|
$similarity = (1 - $levenshtein / $maxLength) * 100;
|
|
|
|
return (int) $similarity;
|
|
}
|
|
|
|
/**
|
|
* Normalize a string for comparison
|
|
*/
|
|
private function normalizeString(string $str): string
|
|
{
|
|
// Convert to lowercase
|
|
$str = mb_strtolower($str);
|
|
|
|
// Remove accents
|
|
$str = transliterator_transliterate('Any-Latin; Latin-ASCII', $str);
|
|
|
|
// Remove special characters and extra spaces
|
|
$str = preg_replace('/[^a-z0-9\s]/', '', $str);
|
|
$str = preg_replace('/\s+/', ' ', $str);
|
|
|
|
return trim($str);
|
|
}
|
|
|
|
/**
|
|
* Sanitize a string for use in LIKE queries
|
|
*/
|
|
private function sanitizeForLike(string $str): string
|
|
{
|
|
return str_replace(['%', '_'], ['\%', '\_'], $str);
|
|
}
|
|
}
|