import hashlib
from typing import Any, Dict, List

def str_to_identifier(x: str) -> str:
    """Convert a string to a small string with negligible collision probability
    and where the smaller string can be used to identifier the larger string in
    file names.

    Importantly, this function is deterministic between runs and between
    platforms, unlike python's built-in hash function.

    References:
        https://stackoverflow.com/questions/45015180
        https://stackoverflow.com/questions/5297448
    """
    return hashlib.md5(x.encode('utf-8')).hexdigest()

def match_and_remove_first_occurrence(main_string: str, sub_string: str):
    index = main_string.find(sub_string)

    if index != -1:
        new_string = main_string[:index] + main_string[index + len(sub_string):]
        return index, new_string
    else:
        return index, main_string
    
def similar_action(action: str, action_list: List[str], word2vec_model, lemmatizer) -> str:
    if action.upper() in action_list:
        return action.lower()
    else:
        closest_word = None
        max_similarity = -1
        if action not in word2vec_model:
            raise "wrong action!"
        for target_word in action_list:
            if target_word in word2vec_model:
                similarity = word2vec_model.similarity(action.lower(),
                                                        lemmatizer.lemmatize(target_word.lower(), pos='v'))
                if similarity > max_similarity:
                    max_similarity = similarity
                    closest_word = target_word
        return closest_word.lower()