import os
import re

# Define the target directory
target_dir = '/home/book/quant_book/gemini_gen/A_quant'

def clean_filenames():
    if not os.path.exists(target_dir):
        print(f"Error: Directory {target_dir} does not exist.")
        return

    files_renamed = 0
    
    for filename in os.listdir(target_dir):
        # This regex looks for files starting with 'A股' and containing 
        # non-ASCII characters or the specific broken patterns you mentioned
        format_str='docx'
        if (format_str in filename):
            
            # 1. Strip out the escape sequences and special characters
            # This keeps the Chinese characters and the standard extension
            new_name = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9._-]', '', filename)
            
            # 2. Ensure it ends properly with .pdf
            if not new_name.endswith('.'+format_str):
                new_name = new_name.replace(format_str, '') + '.' + format_str

            # Build full paths
            old_path = os.path.join(target_dir, filename)
            new_path = os.path.join(target_dir, new_name)

            # Avoid renaming if the name is already clean or if target exists
            if old_path != new_path:
                try:
                    os.rename(old_path, new_path)
                    print(f"Renamed: '{filename}' -> '{new_name}'")
                    files_renamed += 1
                except OSError as e:
                    print(f"Error renaming {filename}: {e}")

    print(f"\nTask complete. Total files renamed: {files_renamed}")

if __name__ == "__main__":
    clean_filenames()
