from pathlib import Path
import logging
import json
import re
from co_op_translator.utils.common.file_utils import (
    get_unique_id,
    get_filename_and_extension,
    HASH_PREFIX_LENGTH,
)
from pathlib import PurePosixPath
from co_op_translator.utils.common.metadata_utils import (
    extract_metadata_from_content,
    remove_image_metadata,
    remove_text_metadata_for_source,
)
from co_op_translator.config.constants import (
    SUPPORTED_MARKDOWN_EXTENSIONS,
    SUPPORTED_NOTEBOOK_EXTENSIONS,
    SUPPORTED_IMAGE_EXTENSIONS,
)
from co_op_translator.utils.common.lang_utils import normalize_language_code

logger = logging.getLogger(__name__)


class DirectoryManager:
    """
    Manages directory structure and file cleanup for translation project.
    """

    def __init__(
        self,
        root_dir: Path,
        translations_dir: Path,
        language_codes: list[str],
        excluded_dirs: list[str],
        image_dir: Path | None = None,
    ):
        """Initialize directory manager with project configuration.

        Args:
            root_dir: Root directory containing original files
            translations_dir: Directory for translated text files (markdown/notebooks)
            language_codes: List of target language codes
            excluded_dirs: List of directories to exclude
            image_dir: Directory for translated images (flat tree, language code embedded in filename)
        """
        self.root_dir = root_dir
        self.translations_dir = translations_dir
        self.language_codes = language_codes
        self.excluded_dirs = excluded_dirs
        # Default to root_dir / "translated_images" if not provided
        self.image_dir = (
            image_dir
            if image_dir is not None
            else (self.root_dir / "translated_images")
        )

    def sync_directory_structure(
        self, markdown: bool = True, images: bool = True, notebooks: bool = True
    ) -> tuple[int, int, int]:
        """
        Synchronize the directory structure of translations with the original structure.

        Process:
        1. Scan original directory structure
        2. For each language:
           - Create missing directories that exist in original
           - Remove directories that don't exist in original

        Args:
            markdown: Whether to sync markdown directories
            images: Whether to sync image directories

        Returns:
            Tuple containing counts of created directories, removed directories,
            and number of languages synchronized
        """
        created_count = 0
        removed_count = 0

        # Get original directory structure (excluding files)
        original_dirs = set()
        for path in self.root_dir.rglob("*"):
            if path.is_dir() and not any(
                excluded in str(path) for excluded in self.excluded_dirs
            ):
                # Determine if this dir contains relevant file types per flags
                has_md = any(
                    any(path.glob(f"*{ext}")) for ext in SUPPORTED_MARKDOWN_EXTENSIONS
                )
                has_img = any(path.glob("*.png")) or any(path.glob("*.jpg"))
                has_nb = any(
                    any(path.glob(f"*{ext}")) for ext in SUPPORTED_NOTEBOOK_EXTENSIONS
                )

                if not (
                    (markdown and has_md)
                    or (images and has_img)
                    or (notebooks and has_nb)
                ):
                    continue

                # Store relative path for comparison
                original_dirs.add(path.relative_to(self.root_dir))

        # Sync each language directory
        for lang_code in self.language_codes:
            lang_dir = self.translations_dir / lang_code
            if not lang_dir.exists():
                lang_dir.mkdir(parents=True)
                logger.info(f"Created language directory: {lang_dir}")

            # Get existing translation directories
            translation_dirs = set()
            if lang_dir.exists():
                for path in lang_dir.rglob("*"):
                    if path.is_dir():
                        try:
                            translation_dirs.add(path.relative_to(lang_dir))
                        except ValueError:
                            continue

            # Create missing directories
            for orig_dir in original_dirs:
                target_dir = lang_dir / orig_dir
                if not target_dir.exists():
                    target_dir.mkdir(parents=True, exist_ok=True)
                    created_count += 1
                    logger.info(f"Created directory: {target_dir}")

            # Remove extra directories that don't exist in original
            for trans_dir in sorted(
                translation_dirs, reverse=True
            ):  # Sort reverse to handle deep paths first
                if trans_dir not in original_dirs:
                    target_dir = lang_dir / trans_dir
                    try:
                        # Only remove if empty or contains no relevant files
                        has_relevant_files = False
                        if markdown and any(
                            any(target_dir.rglob(f"*{ext}"))
                            for ext in SUPPORTED_MARKDOWN_EXTENSIONS
                        ):
                            has_relevant_files = True
                        if images and (
                            any(target_dir.rglob("*.png"))
                            or any(target_dir.rglob("*.jpg"))
                        ):
                            has_relevant_files = True
                        if notebooks and any(
                            any(target_dir.rglob(f"*{ext}"))
                            for ext in SUPPORTED_NOTEBOOK_EXTENSIONS
                        ):
                            has_relevant_files = True

                        if not has_relevant_files:
                            target_dir.rmdir()  # This will only remove empty directories
                            removed_count += 1
                            logger.info(f"Removed empty directory: {target_dir}")
                        else:
                            logger.info(f"Skipping non-empty directory: {target_dir}")
                    except OSError as e:
                        logger.warning(f"Could not remove directory {target_dir}: {e}")

        return created_count, removed_count, len(self.language_codes)

    def cleanup_orphaned_translations(
        self, markdown: bool = True, images: bool = True, notebooks: bool = True
    ) -> int:
        """Remove orphaned translation files that no longer have source files.

        Identifies and removes translation files where the original source file
        has been deleted or moved. Processes files by matching metadata to determine
        the correct language code and source file relationship.

        Args:
            markdown: Whether to clean up markdown files
            images: Whether to clean up image files
            notebooks: Whether to clean up notebook files

        Returns:
            Number of removed translation files
        """
        removed_count = 0
        logger.info(
            f"Starting cleanup with markdown={markdown}, images={images}, notebooks={notebooks}"
        )

        # Handle markdown files
        if markdown:
            for lang_code in self.language_codes:
                translation_dir = self.translations_dir / lang_code
                if not translation_dir.exists():
                    logger.info(
                        f"Translation directory does not exist: {translation_dir}"
                    )
                    continue

                logger.info(f"Checking translations in: {translation_dir}")

                try:
                    md_files: list[Path] = []
                    for ext in SUPPORTED_MARKDOWN_EXTENSIONS:
                        md_files.extend(translation_dir.rglob(f"*{ext}"))
                except Exception as e:
                    logger.warning(f"Error scanning for MD files: {e}")
                    md_files = []

                for trans_file in md_files:
                    try:
                        if not trans_file.exists():
                            continue

                        logger.info(f"Processing translation file: {trans_file}")

                        original_file = None
                        # Prefer legacy inline metadata if present to resolve original path robustly
                        try:
                            content = trans_file.read_text(encoding="utf-8")
                            metadata = extract_metadata_from_content(content)
                            source_file = (
                                metadata.get("source_file") if metadata else None
                            )
                            if source_file:
                                # Normalize backslashes and construct a proper relative Path
                                rel_parts = (
                                    str(source_file).replace("\\", "/").split("/")
                                )
                                rel_path = Path(*rel_parts)
                                original_file = self.root_dir / rel_path
                        except Exception:
                            # Ignore content read/parse issues; will fallback to relative mapping
                            pass

                        if original_file is None:
                            # Fallback: compute original path by relative path from language dir
                            try:
                                rel = trans_file.relative_to(translation_dir)
                                original_file = self.root_dir / rel
                            except ValueError:
                                logger.warning(
                                    f"Unable to determine source for: {trans_file}"
                                )
                                continue

                        logger.info(f"Checking original file: {original_file}")
                        if not original_file.exists():
                            logger.info(
                                f"Original file not found, deleting: {trans_file}"
                            )
                            try:
                                trans_file.unlink()
                                removed_count += 1
                                logger.info(f"Successfully deleted: {trans_file}")
                            finally:
                                # Remove centralized metadata entry for this source
                                try:
                                    remove_text_metadata_for_source(
                                        translation_dir, original_file
                                    )
                                except Exception:
                                    pass

                            parent = trans_file.parent
                            while parent != translation_dir:
                                if parent.exists() and not any(parent.iterdir()):
                                    try:
                                        parent.rmdir()
                                        logger.info(
                                            f"Removed empty directory: {parent}"
                                        )
                                    except OSError as e:
                                        logger.warning(
                                            f"Could not remove directory {parent}: {e}"
                                        )
                                        break
                                else:
                                    break
                                parent = parent.parent
                        else:
                            logger.info(f"Original file exists, keeping: {trans_file}")

                    except (json.JSONDecodeError, KeyError, FileNotFoundError) as e:
                        logger.warning(f"Error processing {trans_file}: {e}")
                        continue

        # Handle notebook files
        if notebooks:
            for lang_code in self.language_codes:
                translation_dir = self.translations_dir / lang_code
                if not translation_dir.exists():
                    logger.info(
                        f"Notebook translation directory does not exist: {translation_dir}"
                    )
                    continue

                logger.info(f"Checking translated notebooks in: {translation_dir}")

                try:
                    notebook_files: list[Path] = []
                    for ext in SUPPORTED_NOTEBOOK_EXTENSIONS:
                        notebook_files.extend(translation_dir.rglob(f"*{ext}"))
                except Exception as e:
                    logger.warning(f"Error scanning for notebook files: {e}")
                    notebook_files = []

                for nb_file in notebook_files:
                    try:
                        if not nb_file.exists():
                            continue

                        original_file = None
                        # Prefer legacy notebook inline metadata if present
                        try:
                            with open(nb_file, "r", encoding="utf-8") as f:
                                nb_json = json.load(f)
                            coop_meta = nb_json.get("metadata", {}).get(
                                "coopTranslator", {}
                            )
                            source_file = coop_meta.get("source_file")
                            if source_file:
                                rel_parts = (
                                    str(source_file).replace("\\", "/").split("/")
                                )
                                rel_path = Path(*rel_parts)
                                original_file = self.root_dir / rel_path
                        except Exception:
                            # Ignore JSON read/parse issues; will fallback to relative mapping
                            pass

                        if original_file is None:
                            # Fallback: compute original notebook path by relative path from language dir
                            try:
                                rel = nb_file.relative_to(translation_dir)
                                original_file = self.root_dir / rel
                            except ValueError:
                                logger.warning(
                                    f"Unable to determine source for notebook: {nb_file}"
                                )
                                continue

                        if not original_file.exists():
                            logger.info(
                                f"Original notebook not found, deleting: {nb_file}"
                            )
                            try:
                                nb_file.unlink()
                                removed_count += 1
                            finally:
                                # Remove centralized metadata entry for this source
                                try:
                                    remove_text_metadata_for_source(
                                        translation_dir, original_file
                                    )
                                except Exception:
                                    pass

                            parent = nb_file.parent
                            while parent != translation_dir:
                                if parent.exists() and not any(parent.iterdir()):
                                    try:
                                        parent.rmdir()
                                        logger.info(
                                            f"Removed empty directory: {parent}"
                                        )
                                    except OSError as e:
                                        logger.warning(
                                            f"Could not remove directory {parent}: {e}"
                                        )
                                        break
                                else:
                                    break
                                parent = parent.parent
                        else:
                            logger.info(f"Original notebook exists, keeping: {nb_file}")
                    except (json.JSONDecodeError, KeyError, FileNotFoundError) as e:
                        logger.warning(f"Error processing notebook {nb_file}: {e}")
                        continue

        # Handle image files
        if images:
            # Collect all candidate original images (compute path hash map)
            original_images: dict[str, Path] = {}
            try:
                for original_img_file in self.root_dir.rglob("*"):
                    if not original_img_file.is_file():
                        continue
                    # Skip any files under excluded directories (e.g., translations, translated_images, translated_images_fast)
                    try:
                        rel_path = original_img_file.relative_to(self.root_dir)
                        if any(part in self.excluded_dirs for part in rel_path.parts):
                            continue
                    except ValueError:
                        # Outside root_dir
                        continue

                    if (
                        original_img_file.suffix.lower()
                        not in SUPPORTED_IMAGE_EXTENSIONS
                    ):
                        continue
                    try:
                        path_hash = get_unique_id(original_img_file, self.root_dir)
                        original_images[path_hash] = original_img_file
                    except ValueError:
                        continue
            except Exception as e:
                logger.warning(f"Error scanning for original images: {e}")

            image_dir = self.image_dir
            if not image_dir.exists():
                logger.info(f"Image directory does not exist: {image_dir}")
            else:
                logger.info(f"Checking translated images in: {image_dir}")

                try:
                    image_files = list(image_dir.rglob("*"))
                except Exception as e:
                    logger.warning(f"Error scanning for image files: {e}")
                    image_files = []

                for image_file in image_files:
                    if not image_file.is_file():
                        continue
                    if image_file.suffix.lower() not in SUPPORTED_IMAGE_EXTENSIONS:
                        continue

                    try:
                        parts = image_file.name.split(".")
                        if len(parts) < 3:
                            continue

                        # Determine language code primarily from subdirectory (new format)
                        try:
                            rel_parts = image_file.relative_to(image_dir).parts
                        except Exception:
                            rel_parts = ()

                        lang_code = None
                        # Accept alias language folder names by normalizing to canonical
                        if len(rel_parts) >= 2:
                            parent_lang = rel_parts[0]
                            normalized_parent = normalize_language_code(parent_lang)
                            if normalized_parent in self.language_codes:
                                lang_code = normalized_parent
                                path_hash_segment = parts[-2]
                                base_name = ".".join(parts[:-2])
                        else:
                            # Legacy format: base.hash.lang.ext
                            if len(parts) < 4:
                                continue
                            lang_code = normalize_language_code(parts[-2])
                            path_hash_segment = parts[-3]
                            base_name = ".".join(parts[:-3])

                        if lang_code not in self.language_codes:
                            try:
                                image_file.unlink()
                                removed_count += 1
                                logger.debug(
                                    f"Removed image with unsupported language code: {image_file}"
                                )
                                # Remove from central metadata file
                                remove_image_metadata(image_file, image_dir)
                            except Exception as e:
                                logger.warning(
                                    f"Failed to delete image with unsupported language {image_file}: {e}"
                                )
                            continue

                        segment = (path_hash_segment or "").lower()
                        hex_allowed = re.fullmatch(
                            r"[0-9a-f]{16}|[0-9a-f]{64}", segment
                        )

                        if not hex_allowed:
                            has_match = False
                        else:
                            if len(segment) == 16:
                                candidates = [
                                    h
                                    for h in original_images.keys()
                                    if h.startswith(segment)
                                ]
                                has_match = False
                                for h in candidates:
                                    orig_path = original_images[h]
                                    orig_base, _ = get_filename_and_extension(orig_path)
                                    if orig_base == base_name:
                                        has_match = True
                                        break
                            elif len(segment) == 64:
                                if segment in original_images:
                                    orig_path = original_images[segment]
                                    orig_base, _ = get_filename_and_extension(orig_path)
                                    has_match = orig_base == base_name
                                else:
                                    has_match = False
                            else:
                                has_match = False

                        if not has_match:
                            try:
                                image_file.unlink()
                                removed_count += 1
                                logger.debug(f"Removed orphaned image: {image_file}")
                                # Remove from central metadata file
                                remove_image_metadata(image_file, image_dir)
                            except Exception as e:
                                logger.warning(
                                    f"Failed to delete orphaned image {image_file}: {e}"
                                )
                                continue

                            parent = image_file.parent
                            while parent != image_dir:
                                if parent.exists() and not any(parent.iterdir()):
                                    try:
                                        parent.rmdir()
                                        logger.debug(
                                            f"Removed empty directory: {parent}"
                                        )
                                    except OSError as e:
                                        logger.warning(
                                            f"Could not remove directory {parent}: {e}"
                                        )
                                        break
                                else:
                                    break
                                parent = parent.parent

                    except Exception as e:
                        logger.warning(f"Error processing image {image_file}: {e}")
                        continue

        return removed_count

    def migrate_markdown_image_links(self, rename_map: dict[str, str]) -> int:
        """Update translated markdown files to use new image basenames.

        The rename_map keys and values are basenames (no directory component).
        This helper scans all translated markdown files and replaces any
        occurrences of old basenames with the corresponding new basenames.
        """

        # Proceed even if rename_map is empty to apply regex-based rewrites

        updated_files = 0

        try:
            md_files: list[Path] = []
            for lang_code in self.language_codes:
                lang_dir = self.translations_dir / lang_code
                if not lang_dir.exists():
                    continue
                for path in lang_dir.rglob("*"):
                    if (
                        path.is_file()
                        and path.suffix.lower() in SUPPORTED_MARKDOWN_EXTENSIONS
                    ):
                        md_files.append(path)
        except Exception as e:
            logger.warning(
                f"Error scanning markdown files for migration in {self.translations_dir}: {e}"
            )
            return 0

        for md_file in md_files:
            try:
                original_content = md_file.read_text(encoding="utf-8")
            except Exception as e:
                logger.warning(f"Error reading markdown file {md_file}: {e}")
                continue

            migrated_content = original_content
            for old_name, new_name in rename_map.items():
                if old_name in migrated_content:
                    migrated_content = migrated_content.replace(old_name, new_name)

            # Additionally, rewrite legacy flattened image links to folder-based structure via regex
            # Pattern matches: [../]*/<base_dir>/<basename>.<hash>.<lang>.<ext>
            base_dir_name = self.image_dir.name
            pattern = re.compile(
                rf"(?P<prefix>(?:\.\./)*/?)?"
                rf"(?P<bdir>{re.escape(base_dir_name)}|translated_images|translated_images_fast)"
                rf"/"
                rf"(?P<basename>[^/]+?)\.(?P<hash>[0-9a-fA-F]{{16,64}})\.(?P<lang>[a-z]{{2}})(?P<ext>\.(?:png|jpg|jpeg|gif))"
            )

            def _rewrite_legacy(m: re.Match) -> str:
                prefix = m.group("prefix") or ""
                lang = m.group("lang")
                basename = m.group("basename")
                hashseg = m.group("hash")
                ext = m.group("ext")
                # Normalize to configured base dir name and folder-based path
                return f"{prefix}{base_dir_name}/{lang}/{basename}.{hashseg}{ext}"

            migrated_content = pattern.sub(_rewrite_legacy, migrated_content)

            migrated_content = self._rewrite_existing_webp_links(migrated_content)

            if migrated_content != original_content:
                try:
                    md_file.write_text(migrated_content, encoding="utf-8")
                    updated_files += 1
                except Exception as e:
                    logger.warning(
                        f"Error writing migrated markdown file {md_file}: {e}"
                    )

        return updated_files

    def _rewrite_existing_webp_links(self, text: str) -> str:
        """Replace .png/.jpg/.jpeg translated_images links with .webp when available."""

        if not self.image_dir.exists():
            return text

        base_dir_name = self.image_dir.name
        pattern = re.compile(
            rf"(?P<prefix>(?:\.\./)*/?)?"
            rf"(?P<bdir>{re.escape(base_dir_name)}|translated_images|translated_images_fast)"
            rf"/(?P<lang>[A-Za-z0-9-]+)/"
            rf"(?P<basename>[^/]+?)\.(?P<hash>[0-9a-fA-F]{{16,64}})(?P<ext>\.(?:png|jpg|jpeg))"
        )

        def _rewrite_existing(m: re.Match) -> str:
            ext = m.group("ext").lower()
            if ext == ".webp":
                return m.group(0)

            lang = m.group("lang")
            basename = m.group("basename")
            hashseg = m.group("hash")
            prefix = m.group("prefix") or ""

            candidate_hash = (
                hashseg[:HASH_PREFIX_LENGTH]
                if len(hashseg) > HASH_PREFIX_LENGTH
                else hashseg
            )
            candidate_rel = Path(lang) / f"{basename}.{candidate_hash}.webp"
            candidate_path = self.image_dir / candidate_rel

            if candidate_path.exists():
                return (
                    f"{prefix}{base_dir_name}/{lang}/{basename}.{candidate_hash}.webp"
                )

            return m.group(0)

        return pattern.sub(_rewrite_existing, text)

    def migrate_notebook_image_links(self, rename_map: dict[str, str]) -> int:
        # Proceed even if rename_map is empty to apply regex-based rewrites

        updated_files = 0

        try:
            nb_files: list[Path] = []
            for lang_code in self.language_codes:
                lang_dir = self.translations_dir / lang_code
                if not lang_dir.exists():
                    continue
                for path in lang_dir.rglob("*"):
                    if (
                        path.is_file()
                        and path.suffix.lower() in SUPPORTED_NOTEBOOK_EXTENSIONS
                    ):
                        nb_files.append(path)
        except Exception as e:
            logger.warning(
                f"Error scanning notebook files for migration in {self.translations_dir}: {e}"
            )
            return 0

        for nb_file in nb_files:
            try:
                with nb_file.open("r", encoding="utf-8") as f:
                    notebook = json.load(f)
            except Exception as e:
                logger.warning(f"Error reading notebook file {nb_file}: {e}")
                continue

            changed = False

            for cell in notebook.get("cells", []):
                if cell.get("cell_type") != "markdown":
                    continue

                source = cell.get("source", [])
                if isinstance(source, list):
                    original_text = "".join(source)
                else:
                    original_text = str(source)

                migrated_text = original_text
                for old_name, new_name in rename_map.items():
                    if old_name in migrated_text:
                        migrated_text = migrated_text.replace(old_name, new_name)

                # Regex-based rewrite for legacy flattened image links in notebook markdown cells
                base_dir_name = self.image_dir.name
                pattern = re.compile(
                    rf"(?P<prefix>(?:\.\./)*/?)?"
                    rf"(?P<bdir>{re.escape(base_dir_name)}|translated_images|translated_images_fast)"
                    rf"/"
                    rf"(?P<basename>[^/]+?)\.(?P<hash>[0-9a-fA-F]{{16,64}})\.(?P<lang>[a-z]{{2}})(?P<ext>\.(?:png|jpg|jpeg|gif))"
                )

                def _rewrite_legacy_nb(m: re.Match) -> str:
                    prefix = m.group("prefix") or ""
                    lang = m.group("lang")
                    basename = m.group("basename")
                    hashseg = m.group("hash")
                    ext = m.group("ext")
                    return f"{prefix}{base_dir_name}/{lang}/{basename}.{hashseg}{ext}"

                migrated_text = pattern.sub(_rewrite_legacy_nb, migrated_text)

                migrated_text = self._rewrite_existing_webp_links(migrated_text)

                if migrated_text != original_text:
                    changed = True
                    if isinstance(source, list):
                        lines = migrated_text.splitlines(keepends=True)
                        lines = [
                            line if line.endswith("\n") else line + "\n"
                            for line in lines
                        ]
                        cell["source"] = lines
                    else:
                        cell["source"] = migrated_text

            if changed:
                try:
                    with nb_file.open("w", encoding="utf-8") as f:
                        json.dump(notebook, f, ensure_ascii=False, indent=1)
                    updated_files += 1
                except Exception as e:
                    logger.warning(
                        f"Error writing migrated notebook file {nb_file}: {e}"
                    )

        return updated_files
