一个精准计算中心区域与外周区域的行为数据方法(不用画圈版⭕)

BA-Analyzer处理得到的数据想要单独分析中央区域和外周区域行为的差异,但又不想在BA-Explorer中画圈(或者懒得画圈 :shushing_face:)一个脚本就能解决!

复制以下代码到python中,并正确下载安装所需的包后,准确修改:warning:处的信息(一定要准确 :bangbang:),一键运行即可获得动作占比数据。

该脚本目前仅兼容BA-Explorer V1.2.0注释后的工程,如果需要其他版本的结果请留言告诉我 :wink:

import os, fnmatch
import os.path as osp
import h5py
import numpy as np
import pandas as pd
from tqdm import tqdm

# ⚠设置数据路径
working_path = r'D:\test_data'  # ⚠修改工程所在路径
h5_path = os.path.join(working_path, 'results\BeAOutputs')  # 定位到h5文件所在路径

hdf_names = fnmatch.filter(os.listdir(h5_path), '*.h5')
            
group = {}
for filename in hdf_names[:]:
    sample = filename.split('_')[0]
    group[sample] = sample.split('-')[2]

# ⚠手动整理注释的所有动作标签(大小写需要保持一致)        
anntation_labels = ('Sniffing','Grooming','Running','Strething_to_climb','Rearing',
                    'Sniffing_in_situ','Walking_with_sniff_to_ground','Sniffing_while_walking',
                    'Jumping','Still','Sniffing_upwards','Twirling','Shaking_heads',
                    'Rearing_still','Digging')

# 分别计算中心区域与外周区域结果
cent_results = []
peri_results = []
for ih in tqdm(hdf_names, desc='Cut'):
    ih_path = osp.join(h5_path, ih)
    ihdf_ = h5py.File(ih_path, 'r')
    file_name = ih.split('_')[0]
    
    # 提取身体点三维坐标
    skeleton3D = ihdf_['3Dskeleton']
    fps = int(skeleton3D.get('FPS')[()])
    data3d = skeleton3D['data3D'][:]
    bodyparts = skeleton3D['Bodyparts'][:]
    bodyparts_str = [part.decode('utf-8') for part in bodyparts]
    column_names = []
    for part in bodyparts_str:
        column_names.extend([f"{part}_x", f"{part}_y", f"{part}_z"])
    data3d_df = pd.DataFrame(data3d, columns=column_names)

    # 提取所有带有 _x 和 _y 后缀的列
    x_columns = [col for col in data3d_df.columns if col.endswith('_x')]
    y_columns = [col for col in data3d_df.columns if col.endswith('_y')]
    
    # 计算所有 _x 和 _y 列的最大值和最小值
    x_max = data3d_df[x_columns].max().max()
    x_min = data3d_df[x_columns].min().min()
    y_max = data3d_df[y_columns].max().max()
    y_min = data3d_df[y_columns].min().min()
    
    # 计算中心点 C 的位置和半径 r
    C_x = (x_max + x_min) / 2
    C_y = (y_max + y_min) / 2
    r = max((x_max - x_min) / 2, (y_max - y_min) / 2)
    
    # 逐行计算所有 _x 和 _y 列的均值,作为身体中心点的二维坐标
    data3d_df['center_x'] = data3d_df[x_columns].mean(axis=1)
    data3d_df['center_y'] = data3d_df[y_columns].mean(axis=1)
    
    # ⚠判断每帧身体中心点是否在以 C 为中心,r/2 为半径的圆形区域内
    data3d_df['is_central'] = np.sqrt((data3d_df['center_x'] - C_x)**2 + (data3d_df['center_y'] - C_y)**2) <= r / 2
    
    # 将逐行判断的结果分别储存在 central_df 和 peripheral_df 中
    central_df = data3d_df[data3d_df['is_central']]
    peripheral_df = data3d_df[~data3d_df['is_central']]
    central_indices = central_df.index.to_numpy()
    peripheral_indices = peripheral_df.index.to_numpy()
    
    # 提取运动学参数和动作序列
    FrameLevel_paras = ihdf_['FrameLevel_paras']
    frame_dist = FrameLevel_paras['Paras_data'][:,32]
    frame_dist_cent = frame_dist[central_indices]
    frame_dist_peri = frame_dist[peripheral_indices]
    # Paras_name = FrameLevel_paras['Paras_names'][:]

    # Movement_features = ihdf_['Movement_features']
    # movement_label = Movement_features['movement_label']
    # movement_label_seg = Movement_features['segBoundary']
    
    ann_Movement_features = ihdf_['Movement_annotation']
    ann_movement_label = ann_Movement_features['frame_level_movement_annotation'][:]
    # 转换字节数组为字符串
    ann_movement_label = np.array([[item.decode('utf-8').strip('\x00') for item in row] for row in ann_movement_label])
    
    # 计算中心区域动作出现次数及占比
    ann_movement_cent = ann_movement_label[central_indices, 2]
    unique_cent, cent_counts = np.unique(ann_movement_cent, return_counts=True)
    cent_total_count = np.sum(cent_counts)
    # cent_prop = cent_counts / cent_total_count
    # 初始化结果字典,所有字符串的占比默认为 0
    cent_result = {string: 0 for string in anntation_labels}
    cent_result['samplename'] = file_name  # 添加文件名作为标识
    cent_result['total_times(s)'] = cent_total_count / fps # 添加总时间
    cent_result['total_displacement(mm)'] = sum(frame_dist_cent)  # 添加总位移
    cent_result['group'] = group[file_name]

    # 更新结果字典,计算实际出现的字符串的占比
    for string, count in zip(unique_cent, cent_counts):
        cent_result[string] = count / cent_total_count
    # 将结果存储到列表中
    cent_results.append(cent_result)
    
    # 计算外周区域动作出现次数及占比
    ann_movement_peri = ann_movement_label[peripheral_indices, 2]
    unique_peri, peri_counts = np.unique(ann_movement_peri, return_counts=True)
    peri_total_count = np.sum(peri_counts)
    # 初始化结果字典,所有字符串的占比默认为 0
    peri_result = {string: 0 for string in anntation_labels}
    peri_result['samplename'] = file_name  # 添加文件名作为标识
    peri_result['total_frames'] = peri_total_count  # 添加总帧数
    peri_result['total_displacement(mm)'] = sum(frame_dist_peri)  # 添加总位移
    peri_result['group'] = group[file_name]

    # 更新结果字典,计算实际出现的字符串的占比
    for string, count in zip(unique_peri, peri_counts):
        peri_result[string] = count / peri_total_count
    # 将结果存储到列表中
    peri_results.append(peri_result)
    
    ihdf_.close()
    
cent_results_df = pd.DataFrame(cent_results).set_index('samplename')
peri_results_df = pd.DataFrame(peri_results).set_index('samplename')

# 储存结果为csv文件
cent_results_df.to_csv(os.path.join(working_path, 'central_data.csv'))
peri_results_df.to_csv(os.path.join(working_path, 'peripheral_data.csv'))