entityManager = $entityManager; $this->demandeRepository = $demandeRepository; $this->placeRepository = $placeRepository; } protected function configure(): void { $this ->addOption('threshold', null, InputOption::VALUE_REQUIRED, 'Similarity threshold (0-100)', 70) ->addOption('dry-run', null, InputOption::VALUE_NONE, 'Show matches without linking'); } protected function execute(InputInterface $input, OutputInterface $output): int { $io = new SymfonyStyle($input, $output); $io->title('Linking Demandes to Places based on name similarity'); $threshold = (int) $input->getOption('threshold'); $dryRun = $input->getOption('dry-run'); if ($threshold < 0 || $threshold > 100) { $io->error('Threshold must be between 0 and 100'); return Command::FAILURE; } // Find all Demandes without linked Places $demandesWithoutPlace = $this->demandeRepository->createQueryBuilder('d') ->where('d.place IS NULL') ->getQuery() ->getResult(); if (empty($demandesWithoutPlace)) { $io->warning('No Demandes without linked Places found.'); return Command::SUCCESS; } $io->info(sprintf('Found %d Demandes without linked Places.', count($demandesWithoutPlace))); // Process each Demande $linkedCount = 0; /** @var Demande $demande */ foreach ($demandesWithoutPlace as $demande) { $query = $demande->getQuery(); $insee = $demande->getInsee(); if (empty($query)) { continue; } // Find Places with similar names $places = $this->findSimilarPlaces($query, $insee); if (empty($places)) { continue; } // Find the best match $bestMatch = null; $bestSimilarity = 0; foreach ($places as $place) { $similarity = $this->calculateSimilarity($query, $place->getName()); if ($similarity > $bestSimilarity) { $bestSimilarity = $similarity; $bestMatch = $place; } } // If similarity is above threshold, link the Demande to the Place if ($bestMatch && $bestSimilarity >= $threshold) { $io->text(sprintf( 'Match found: "%s" (Demande) -> "%s" (Place) with similarity %d%%', $query, $bestMatch->getName(), $bestSimilarity )); if (!$dryRun) { $demande->setPlace($bestMatch); $demande->setPlaceUuid($bestMatch->getUuidForUrl()); $demande->setStatus('linked_to_place'); $this->entityManager->persist($demande); $linkedCount++; } } } if (!$dryRun && $linkedCount > 0) { $this->entityManager->flush(); $io->success(sprintf('Linked %d Demandes to Places.', $linkedCount)); } elseif ($dryRun) { $io->info('Dry run completed. No changes were made.'); } else { $io->info('No Demandes were linked to Places.'); } return Command::SUCCESS; } /** * Find Places with names similar to the query */ private function findSimilarPlaces(string $query, ?int $insee): array { $queryBuilder = $this->placeRepository->createQueryBuilder('p') ->where('p.name IS NOT NULL'); // If INSEE code is available, filter by Stats zone if ($insee !== null) { $queryBuilder ->join('p.stats', 's') ->andWhere('s.zone = :insee') ->setParameter('insee', (string) $insee); } // Use LIKE for initial filtering to reduce the number of results $queryBuilder ->andWhere('p.name LIKE :query') ->setParameter('query', '%' . $this->sanitizeForLike($query) . '%'); return $queryBuilder->getQuery()->getResult(); } /** * Calculate similarity between two strings (0-100) */ private function calculateSimilarity(string $str1, string $str2): int { // Normalize strings for comparison $str1 = $this->normalizeString($str1); $str2 = $this->normalizeString($str2); // If either string is empty after normalization, return 0 if (empty($str1) || empty($str2)) { return 0; } // Calculate Levenshtein distance $levenshtein = levenshtein($str1, $str2); $maxLength = max(strlen($str1), strlen($str2)); // Convert to similarity percentage (0-100) $similarity = (1 - $levenshtein / $maxLength) * 100; return (int) $similarity; } /** * Normalize a string for comparison */ private function normalizeString(string $str): string { // Convert to lowercase $str = mb_strtolower($str); // Remove accents $str = transliterator_transliterate('Any-Latin; Latin-ASCII', $str); // Remove special characters and extra spaces $str = preg_replace('/[^a-z0-9\s]/', '', $str); $str = preg_replace('/\s+/', ' ', $str); return trim($str); } /** * Sanitize a string for use in LIKE queries */ private function sanitizeForLike(string $str): string { return str_replace(['%', '_'], ['\%', '\_'], $str); } }