行和列的位置都在以下三个列表中的一列中,则对应位置为1,其余位置全为0
"htmlcode">
def generateMaskBasedOnDom(dom_path, length): """ :param dom_path: this is a file path, which contains the following information: [7-56,239-327,438-454,522-556,574-586][57-85,96-112,221-238][113-220,328-437,455-521,557-573] each [...] means one domain :param length: this is the length of this protein :return: the mask matrix with size length x length, 1 means inner domain residue pair, otherwise 0 """ # 读取文件 with open(dom_path, "r", encoding="utf-8") as file: contents = file.readlines() # 获得mask位置数据 list0 = [] list1 = [] list2 = [] for list_idx, content in enumerate(contents): num_range_list = content.strip()[1:-1].split(",") for num_range in num_range_list: start_num = int(num_range.split("-")[0]) end_num = int(num_range.split("-")[1]) for num in range(start_num, end_num+1): if list_idx == 0: list0.append(num) elif list_idx == 1: list1.append(num) else: list2.append(num) mask = np.zeros((length, length)) # 遍历矩阵每个元素 for row in range(mask.shape[0]): for col in range(mask.shape[1]): if (row in list0 and col in list0) or (row in list1 and col in list1) or (row in list2 and col in list2): mask[row][col] = 1 return mask if __name__ == "__main__": # if no dom file ,please get dom file first with open("dom.txt", "w", encoding="utf-8") as f: f.write("[7-56,239-327,438-454,522-556,574-586]" + "\n" + "[57-85,96-112,221-238]" + "\n" + "[113-220,328-437,455-521,557-573]") file_path = "./dom.txt" protein_length = 1000 # mask_matrix size mask_matrix = generateMaskBasedOnDom(file_path, protein_length) print("*************Generate Mask Matrix Successful!*************") # 随机测试几组 print(mask_matrix[7][56]) # 1 print(mask_matrix[7][239]) # 1 print(mask_matrix[8][57]) # 0 print(mask_matrix[57][95]) # 0 print(mask_matrix[113][573]) # 1