当前位置:首页 >> 脚本专栏

python实现mask矩阵示例(根据列表所给元素)

行和列的位置都在以下三个列表中的一列中,则对应位置为1,其余位置全为0

"htmlcode">

def generateMaskBasedOnDom(dom_path, length):
  """
  :param dom_path: this is a file path, which contains the following information:
  [7-56,239-327,438-454,522-556,574-586][57-85,96-112,221-238][113-220,328-437,455-521,557-573]
  each [...] means one domain
  :param length: this is the length of this protein
  :return: the mask matrix with size length x length, 1 means inner domain residue pair, otherwise 0
  """
  # 读取文件
  with open(dom_path, "r", encoding="utf-8") as file:
    contents = file.readlines()

  # 获得mask位置数据
  list0 = []
  list1 = []
  list2 = []
  for list_idx, content in enumerate(contents):
    num_range_list = content.strip()[1:-1].split(",")
    for num_range in num_range_list:
      start_num = int(num_range.split("-")[0])
      end_num = int(num_range.split("-")[1])
      for num in range(start_num, end_num+1):
        if list_idx == 0:
          list0.append(num)
        elif list_idx == 1:
          list1.append(num)
        else:
          list2.append(num)

  mask = np.zeros((length, length))
  # 遍历矩阵每个元素
  for row in range(mask.shape[0]):
    for col in range(mask.shape[1]):
      if (row in list0 and col in list0) or (row in list1 and col in list1) or (row in list2 and col in list2):
        mask[row][col] = 1

  return mask

if __name__ == "__main__":

  # if no dom file ,please get dom file first
  with open("dom.txt", "w", encoding="utf-8") as f:
    f.write("[7-56,239-327,438-454,522-556,574-586]" + "\n" + "[57-85,96-112,221-238]" + "\n" + "[113-220,328-437,455-521,557-573]")

  file_path = "./dom.txt"
  protein_length = 1000  # mask_matrix size
  mask_matrix = generateMaskBasedOnDom(file_path, protein_length)
  print("*************Generate Mask Matrix Successful!*************")

  # 随机测试几组
  print(mask_matrix[7][56]) # 1
  print(mask_matrix[7][239]) # 1
  print(mask_matrix[8][57])  # 0
  print(mask_matrix[57][95]) # 0
  print(mask_matrix[113][573]) # 1