import pandas as pd
# 读取 Excel 文件
df = pd.read_excel('before_sorted_data.xlsx')
# 将时间列转换为 datetime 类型
df['LG seal Start Time'] = pd.to_datetime(df['LG seal Start Time'])
df['Start Time FCM to HSG LG'] = pd.to_datetime(df['Start Time FCM to HSG LG'])
# 创建一个新的 DataFrame 来存储结果
result_df = pd.DataFrame(columns=['start_time','end_time','Serial Number', 'Time Difference'])
# 遍历 E 列中的每个值
for serial_number1 in df['Serial Number.1']:
# 获取 E 列中等于当前值的所有行
rows_e = df[df['Serial Number.1'] == serial_number1]
# print("XXX")
# print(type(rows_e))
# print(rows_e)
# print("YYY")
# 获取 B 列中等于当前值的所有行
rows_b = df[df['Serial Number'] == serial_number1]
# print("XXX")
# print(type(rows_b))
# if not rows_b.empty:
# print(rows_b)
# print("YYY")
if rows_b.empty:
continue
# 计算时间差
print("AA") #为什么 在 下面 result_df._append 要print一下, 否则会报 FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
# result_df = result_df._append 这个错
time_diff = (rows_e['Start Time FCM to HSG LG'].iloc[0] - rows_b['LG seal Start Time'].iloc[0]).total_seconds() / 3600
start_time = rows_b['LG seal Start Time'].iloc[0]
end_time = rows_e['Start Time FCM to HSG LG'].iloc[0]
# 添加到结果 DataFrame 中,
result_df = result_df._append(
{'start_time': start_time, 'end_time': end_time, 'Serial Number': serial_number1,
'Time Difference': time_diff}, ignore_index=True)
# 按照时间差的倒序排序
result_df.sort_values(by='Time Difference', ascending=False, inplace=True)
# 输出结果
# print(result_df)
file_path = 'after_sorted_data.xlsx'
result_df.to_excel("after_sorted_data.xlsx", index=False)
print(f"DataFrame 已保存到 {file_path}")