BA-Analyzer处理得到的数据想要单独分析中央区域和外周区域行为的差异,但又不想在BA-Explorer中画圈(或者懒得画圈
)一个脚本就能解决!
复制以下代码到python中,并正确下载安装所需的包后,准确修改
处的信息(一定要准确
),一键运行即可获得动作占比数据。
该脚本目前仅兼容BA-Explorer V1.2.0注释后的工程,如果需要其他版本的结果请留言告诉我 ![]()
import os, fnmatch
import os.path as osp
import h5py
import numpy as np
import pandas as pd
from tqdm import tqdm
# ⚠设置数据路径
working_path = r'D:\test_data' # ⚠修改工程所在路径
h5_path = os.path.join(working_path, 'results\BeAOutputs') # 定位到h5文件所在路径
hdf_names = fnmatch.filter(os.listdir(h5_path), '*.h5')
group = {}
for filename in hdf_names[:]:
sample = filename.split('_')[0]
group[sample] = sample.split('-')[2]
# ⚠手动整理注释的所有动作标签(大小写需要保持一致)
anntation_labels = ('Sniffing','Grooming','Running','Strething_to_climb','Rearing',
'Sniffing_in_situ','Walking_with_sniff_to_ground','Sniffing_while_walking',
'Jumping','Still','Sniffing_upwards','Twirling','Shaking_heads',
'Rearing_still','Digging')
# 分别计算中心区域与外周区域结果
cent_results = []
peri_results = []
for ih in tqdm(hdf_names, desc='Cut'):
ih_path = osp.join(h5_path, ih)
ihdf_ = h5py.File(ih_path, 'r')
file_name = ih.split('_')[0]
# 提取身体点三维坐标
skeleton3D = ihdf_['3Dskeleton']
fps = int(skeleton3D.get('FPS')[()])
data3d = skeleton3D['data3D'][:]
bodyparts = skeleton3D['Bodyparts'][:]
bodyparts_str = [part.decode('utf-8') for part in bodyparts]
column_names = []
for part in bodyparts_str:
column_names.extend([f"{part}_x", f"{part}_y", f"{part}_z"])
data3d_df = pd.DataFrame(data3d, columns=column_names)
# 提取所有带有 _x 和 _y 后缀的列
x_columns = [col for col in data3d_df.columns if col.endswith('_x')]
y_columns = [col for col in data3d_df.columns if col.endswith('_y')]
# 计算所有 _x 和 _y 列的最大值和最小值
x_max = data3d_df[x_columns].max().max()
x_min = data3d_df[x_columns].min().min()
y_max = data3d_df[y_columns].max().max()
y_min = data3d_df[y_columns].min().min()
# 计算中心点 C 的位置和半径 r
C_x = (x_max + x_min) / 2
C_y = (y_max + y_min) / 2
r = max((x_max - x_min) / 2, (y_max - y_min) / 2)
# 逐行计算所有 _x 和 _y 列的均值,作为身体中心点的二维坐标
data3d_df['center_x'] = data3d_df[x_columns].mean(axis=1)
data3d_df['center_y'] = data3d_df[y_columns].mean(axis=1)
# ⚠判断每帧身体中心点是否在以 C 为中心,r/2 为半径的圆形区域内
data3d_df['is_central'] = np.sqrt((data3d_df['center_x'] - C_x)**2 + (data3d_df['center_y'] - C_y)**2) <= r / 2
# 将逐行判断的结果分别储存在 central_df 和 peripheral_df 中
central_df = data3d_df[data3d_df['is_central']]
peripheral_df = data3d_df[~data3d_df['is_central']]
central_indices = central_df.index.to_numpy()
peripheral_indices = peripheral_df.index.to_numpy()
# 提取运动学参数和动作序列
FrameLevel_paras = ihdf_['FrameLevel_paras']
frame_dist = FrameLevel_paras['Paras_data'][:,32]
frame_dist_cent = frame_dist[central_indices]
frame_dist_peri = frame_dist[peripheral_indices]
# Paras_name = FrameLevel_paras['Paras_names'][:]
# Movement_features = ihdf_['Movement_features']
# movement_label = Movement_features['movement_label']
# movement_label_seg = Movement_features['segBoundary']
ann_Movement_features = ihdf_['Movement_annotation']
ann_movement_label = ann_Movement_features['frame_level_movement_annotation'][:]
# 转换字节数组为字符串
ann_movement_label = np.array([[item.decode('utf-8').strip('\x00') for item in row] for row in ann_movement_label])
# 计算中心区域动作出现次数及占比
ann_movement_cent = ann_movement_label[central_indices, 2]
unique_cent, cent_counts = np.unique(ann_movement_cent, return_counts=True)
cent_total_count = np.sum(cent_counts)
# cent_prop = cent_counts / cent_total_count
# 初始化结果字典,所有字符串的占比默认为 0
cent_result = {string: 0 for string in anntation_labels}
cent_result['samplename'] = file_name # 添加文件名作为标识
cent_result['total_times(s)'] = cent_total_count / fps # 添加总时间
cent_result['total_displacement(mm)'] = sum(frame_dist_cent) # 添加总位移
cent_result['group'] = group[file_name]
# 更新结果字典,计算实际出现的字符串的占比
for string, count in zip(unique_cent, cent_counts):
cent_result[string] = count / cent_total_count
# 将结果存储到列表中
cent_results.append(cent_result)
# 计算外周区域动作出现次数及占比
ann_movement_peri = ann_movement_label[peripheral_indices, 2]
unique_peri, peri_counts = np.unique(ann_movement_peri, return_counts=True)
peri_total_count = np.sum(peri_counts)
# 初始化结果字典,所有字符串的占比默认为 0
peri_result = {string: 0 for string in anntation_labels}
peri_result['samplename'] = file_name # 添加文件名作为标识
peri_result['total_frames'] = peri_total_count # 添加总帧数
peri_result['total_displacement(mm)'] = sum(frame_dist_peri) # 添加总位移
peri_result['group'] = group[file_name]
# 更新结果字典,计算实际出现的字符串的占比
for string, count in zip(unique_peri, peri_counts):
peri_result[string] = count / peri_total_count
# 将结果存储到列表中
peri_results.append(peri_result)
ihdf_.close()
cent_results_df = pd.DataFrame(cent_results).set_index('samplename')
peri_results_df = pd.DataFrame(peri_results).set_index('samplename')
# 储存结果为csv文件
cent_results_df.to_csv(os.path.join(working_path, 'central_data.csv'))
peri_results_df.to_csv(os.path.join(working_path, 'peripheral_data.csv'))