import glob import datetime import argparse import random import math import numpy as np import pandas as pd import matplotlib.colors as mcolors from dateutil.relativedelta import relativedelta from utils import * def match_one_txtfile(h5_file, txt_folder="mapMatch_result/rlt_/", sample_size=400, group_size=10, selected_tripIDs=None, outputfolder="visualization/map_match/"): nb = h5_file.split("/")[-1].split(".h5")[0] txt_file=txt_folder+str(nb)+".txt" with open(txt_file, 'r') as file: lines = file.readlines() if not os.path.exists(outputfolder+str(nb)+"/"): os.makedirs(outputfolder+str(nb)+"/") if len(glob.glob(outputfolder+str(nb)+"/*.html"))!=math.ceil(sample_size/group_size): print(txt_file+"is visited.") return all_df = read_h5(h5_file, time_converse=False) all_df["speed_"]=all_df['speed_'].round(3) all_df["timestamp"] = pd.to_timedelta(all_df.secs, unit='s')+datetime.datetime(2020, 10, 1) all_df.drop(["secs"], axis=1, inplace=True) multipleTraces, multipleRoutes, multipleProjection={}, {}, {} starttrip, rd = int(nb)*ptsDf_group_size, 0 line_nb = len(lines) random_integers = random.sample(range(1, line_nb), sample_size) # for line in tqdm(file.readlines()[1:], desc="Plot "+txt_file, unit="item"): for i in tqdm(random_integers, desc="Plot "+txt_file, unit="item"): l = lines[i].strip() tripID = int(l.split(",")[0]) if selected_tripIDs is not None and tripID not in selected_tripIDs: continue multipleTraces[tripID] = [[], [], [], []] #multipleTraces[tripID][0], obs_lat, obs_color, obs_info multipleRoutes[tripID] = [[], [], None, []] #lons, lats, None, info df = all_df[all_df.tripID==tripID] _, _, projected_lon, projected_lat = get_accurate_start_end_point(df, streetmap, edgesDf) multipleProjection[tripID] = [projected_lon, projected_lat] raw_df = all_raw_df[all_raw_df.tripID==tripID] multipleTraces[tripID][1]=multipleTraces[tripID][1]+raw_df.lat.values.tolist()+[None] multipleTraces[tripID][0]=multipleTraces[tripID][0]+raw_df.lon.values.tolist()+[None] for index in range(len(raw_df)): if index in df.index.unique(): row = df.loc[index] multipleTraces[tripID][3]=multipleTraces[tripID][3]+['tripID: '+str(row["tripID"])+ "<br>Timestamps: "+str(row["timestamp"])+ "<br>Coarse edge: "+str(row["edge"])+ "<br>Uturn: "+str(row["uturn"])+ "<br>Speed: "+str(row["speed_"])+ "<br>Direction: "+str(row["dir"])+ '<br>Fraction: '+str(row["frcalong"])] else: row = raw_df.iloc[index] multipleTraces[tripID][3]=multipleTraces[tripID][3]+['tripID: '+str(row["tripID"])+ "<br>Timestamps: "+str(row["timestamp"])] multipleTraces[tripID][3]=multipleTraces[tripID][3]+[' '] multipleTraces[tripID][2]= multipleTraces[tripID][2]+colorFader(raw_df.timestamp, c2='#FDC9C9', c1='#920808') + ["#000000"] edges = [int(i) for i in l.split(",")[1].split(" ")] unique_full_edges = np.concatenate([coarse2full_edge[i] for i in edges]) selected_edge=streetmap.loc[unique_full_edges] geo_list = [] for index, row in selected_edge.iterrows(): for j in row.geometry.coords: geo_list += [list(j)] multipleRoutes[tripID][3] = multipleRoutes[tripID][3] + ['Road type:'+row["type"]+ '<br>tripID: '+str(tripID)+ '<br>Coarse edge: '+str(row["c_edge"]) for i in range(len(row.geometry.coords))]+[' '] geo_list += [[None, None]] geo_list = np.asarray(geo_list) multipleRoutes[tripID][0], multipleRoutes[tripID][1] = geo_list[:,0], geo_list[:,1] if (rd+1)%group_size==0: plot_tool.plot_trace(outputpath=outputfolder+str(nb)+"/"+str(rd//group_size)+".html", background=background, multipleRoutes=multipleRoutes, multipleTraces=multipleTraces, multipleProjection=multipleProjection) starttrip = tripID+1 multipleTraces, multipleRoutes, multipleProjection={}, {}, {} rd += 1 if rd%group_size!=0: plot_tool.plot_trace(outputpath=outputfolder+str(nb)+"/"+str(rd//group_size+1)+".html", background=background, multipleRoutes=multipleRoutes, multipleTraces=multipleTraces, multipleProjection=multipleProjection) # if (rd+1)%group_size==0: # plot_tool.plot_trace(outputpath=outputfolder+str(nb)+"/"+str(rd//group_size)+"_"+str(starttrip)+".html", background=background, multipleRoutes=multipleRoutes, multipleTraces=multipleTraces, multipleProjection=multipleProjection) # starttrip = tripID+1 # multipleTraces, multipleRoutes, multipleProjection={}, {}, {} # rd += 1 # if rd%group_size!=0: # plot_tool.plot_trace(outputpath=outputfolder+str(nb)+"/"+str(rd//group_size+1)+"_"+str(starttrip)+".html", background=background, multipleRoutes=multipleRoutes, multipleTraces=multipleTraces, multipleProjection=multipleProjection) def selected_roads_plot(input_folder="mapMatch_result/rlt_/", outputfolder="visualization/map_match/", all_tripID=None): # parse the map matching result to get unique coarse roads and full roads edges= [] print("Read text files.") edge_day_dict = {i:[] for i in all_tripID} for txt_file in tqdm(glob.glob(input_folder+"*.txt")): if txt_file.split("/")[-1]=="visited.txt": continue h5_file = input_folder+"viterbi/"+txt_file.split("/")[-1].split(".txt")[0]+".h5" all_df = read_h5(h5_file, time_converse=False) all_df["timestamp"] = pd.to_timedelta(all_df.secs, unit='s')+datetime.datetime(2020, 10, 1) all_df["day"]= (all_df['timestamp'] - pd.Timestamp('2020-10-01')).dt.days ## don't consider the same tripID in different days all_df = all_df.drop_duplicates(subset=["tripID"], keep="first") tripID_time_dict = dict(zip(all_df.tripID, all_df.day)) with open(txt_file, 'r') as file: for line in file.readlines()[1:]: l = line.strip() day_ = tripID_time_dict[int(l.split(",")[0])] edges_to_add = [int(i) for i in l.split(",")[1].split(" ")] for edge in edges_to_add: edge_day_dict[edge].append(day_) edges += edges_to_add edge_nunique_day = {i:len(np.unique(edge_day_dict[i])) for i in edge_day_dict.keys()} unique_coarse_edges, counts=np.unique(edges, return_counts=True) counts = [counts[i]/edge_nunique_day[unique_coarse_edges[i]] for i in range(len(unique_coarse_edges))] coarse_count = dict(zip(unique_coarse_edges, counts)) normalized_counts = (counts - np.min(counts)) / (np.max(counts) - np.min(counts)) cmap = plt.get_cmap('cool') rgb_colors = [cmap(norm) for norm in normalized_counts] hex_colors = [mcolors.to_hex(rgb) for rgb in rgb_colors] coarse_color = dict(zip(unique_coarse_edges, hex_colors)) full_edges = np.concatenate([coarse2full_edge[i] for i in unique_coarse_edges]) unique_full_edges = np.unique(full_edges) geo_list, info, color = [], [], [] selected_edge=streetmap.loc[unique_full_edges] print("Plotting heatmap for selected roads.") for index, row in tqdm(selected_edge.iterrows()): lst = [np.asarray(j) for j in row.geometry.coords] lst = add_intermediate_coords(lst) geo_list += lst+[[None, None]] nodes = [row["source"]]+[None for i in range(len(lst)-2)]+[row["target"]] info = info + ['Road type:'+row["type"]+ '<br>Node: '+str(nodes[i])+ '<br>Average Daily Count: '+str(coarse_count[full2coarse_edge[index]])+ '<br>Source: '+str(row['source'])+ '<br>Target: '+str(row['target']) for i in range(len(lst))] + [' '] color += [coarse_color[full2coarse_edge[index]] for i in range(len(lst))] + ["#000000"] geo_list = np.asarray(geo_list) lons, lats = geo_list[:,0], geo_list[:,1] roads = [lons, lats, color, info] plot_tool.plot_trace(outputpath=outputfolder+"heatmap_selected_roads.html", marker_size=5, line_marker_size=5, background=background, routes=roads) print("Plotting selected roads.") geo_list, info, color = [], [], [] for index, row in tqdm(selected_edge.iterrows()): lst = [np.asarray(j) for j in row.geometry.coords] geo_list += lst+[[None, None]] nodes = [row["source"], row["target"]] info = info + ['Road type:'+row["type"]+ '<br>Node: '+str(nodes[i])+ '<br>Count: '+str(coarse_count[full2coarse_edge[index]])+ '<br>Source: '+str(row['source'])+ '<br>Target: '+str(row['target']) for i in range(len(lst))] + [' '] geo_list = np.asarray(geo_list) lons, lats = geo_list[:,0], geo_list[:,1] roads = [lons, lats, None, info] plot_tool.plot_trace(outputpath=outputfolder+"selected_roads.html", marker_size=5, line_marker_size=5, background=background, routes=roads) print("Plotting projected location.") lon, lat = [],[] for h5_file in glob.glob(input_folder+"viterbi/*.h5"): df = read_h5(h5_file, time_converse=False) _, _, projected_lon, projected_lat = get_accurate_start_end_point(df, streetmap, edgesDf) lon.append(projected_lon) lat.append(projected_lat) lon, lat=np.concatenate(lon), np.concatenate(lat) plot_tool.heatmap_plot(data=pd.DataFrame({"lon":lon, "lat":lat}), outputpath=outputfolder+"projection_heatmap.html") return def get_background(street_map, coarse_street_map,): streetmap = gpd.read_file(street_map) streetmap.set_index("edge", inplace=True) edgesDf = pd.read_csv(coarse_street_map) edgesDf.set_index("edge", inplace=True) coarse2full_edge = {i:[] for i in edgesDf.index} full2coarse_edge = dict(streetmap.c_edge) for full_edge in full2coarse_edge: coarse_edge = full2coarse_edge[full_edge] coarse2full_edge[coarse_edge].append(full_edge) geo_list, info = [], [] for index, row in streetmap.iterrows(): for j in row.geometry.coords: geo_list += [list(j)] nodes = [row["source"], row["target"]] geo_list += [[None, None]] info = info + ['Road type:'+row["type"]+ '<br>Node: '+str(nodes[i])+ '<br>Full edge: '+str(index)+ '<br>Coarse edge: '+str(row["c_edge"])+ '<br>Oneway:'+str(row['oneway'])+ '<br>Source: '+str(row['source'])+ '<br>Target: '+str(row['target']) for i in range(len(row.geometry.coords))] + [' '] geo_list = np.asarray(geo_list) lons, lats = geo_list[:,0], geo_list[:,1] background = [lons, lats, None, info] return background, coarse2full_edge, full2coarse_edge, streetmap, edgesDf def penality_roads_plot(input_str, outputpath): unique_coarse_edges = [int(i) for i in input_str.split(",")] full_edges = np.concatenate([coarse2full_edge[i] for i in unique_coarse_edges]) unique_full_edges = np.unique(full_edges) geo_list, info = [], [] selected_edge=streetmap.loc[unique_full_edges] for index, row in selected_edge.iterrows(): lst = [np.asarray(j) for j in row.geometry.coords] geo_list += lst+[[None, None]] info = info + ['Road type:'+row["type"]+ '<br>Coarse edge: '+str(row["c_edge"])+ '<br>Full edge: '+str(index)+ '<br>Source: '+str(row["source"])+ '<br>Target: '+str(row["target"]) for i in range(len(lst))] + [' '] geo_list = np.asarray(geo_list) lons, lats = geo_list[:,0], geo_list[:,1] roads = [lons, lats, None, info] plot_tool.plot_trace(outputpath=outputpath, background=background, routes=roads) def count_trajectory(data:pd.DataFrame): trip_dic = {} for df_id, df in data.groupby("tripID"): trip_dic[df_id] = len(df) return np.asarray([min(300,trip_dic[i]) for i in trip_dic.keys()]) def count_time_interval(data:pd.DataFrame): df = data.sort_values(by=['tripID', 'timestamp']) df['time_difference'] = df.groupby('tripID')['timestamp'].diff() arr = df["time_difference"].to_numpy()/60 arr[arr>10]=10 return arr def count_duration(data:pd.DataFrame): trip_dic = {} data = data.sort_values(by=['tripID', 'timestamp']) for df_id, df in data.groupby("tripID"): trip_dic[df_id] = (df["timestamp"].max()-df["timestamp"].min())/60 return np.asarray([min(300,trip_dic[i]) for i in trip_dic.keys()]) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--input_file", type=str) parser.add_argument("--function_name", type=str) parser.add_argument("--ptsDf_group_size", type=int, default=2000) parser.add_argument("--output_path", type=str, default="visualization/map_match/punish_roads.html") parser.add_argument("--outputfolder", type=str, default="visualization/map_match/") parser.add_argument("--raw_file", type=str, default="data/stepII.h5") parser.add_argument("--street_map", type=str, default="mapMatch_result/full_roads.shp") parser.add_argument("--coarse_street_map", type=str, default="mapMatch_result/coarse_roads.csv") args = parser.parse_args() outputfolder=args.outputfolder if not os.path.exists(outputfolder): os.mkdir(outputfolder) plot_tool = Plot_html() plot_plt = Plot_plt() # get full roads and coarse roads if args.function_name=="data_plot": data = tracetable(args.input_file) traj_duration = count_duration(data=data) plot_plt.hist_density_plot(data=traj_duration, x_label="Duration of routes (min)", y_label="distribution", title=None, bin=100, outputpath=args.outputfolder+"duration.png") traj_count = count_trajectory(data=data) plot_plt.hist_density_plot(data=traj_count, x_label="Nb of data points per route", y_label="distribution", title=None, bin=100, outputpath=args.outputfolder+"nb_datapoint.png") interval_count = count_time_interval(data=data) plot_plt.hist_density_plot(data=interval_count, x_label="Data point interval (min)", y_label="distribution", title=None, bin=100, outputpath=args.outputfolder+"time_interval.png") if args.function_name=="shapefile_plot": shapefile = gpd.read_file(args.input_file) geo_list, info = [], [] print(shapefile) box = plot_tool.shp_plot_box(shapefile=shapefile, colorby="Layer") # print(box) plot_tool.plot_map_objs(outputpath=args.outputfolder+args.input_file.split("/")[-1][:-4]+".html", line_box=box) exit() background, coarse2full_edge, full2coarse_edge, streetmap, edgesDf = get_background(args.street_map, args.coarse_street_map) if args.function_name=="penality_roads_plot": penality_roads_plot(input_str=args.input_file, outputpath=args.output_path) if args.function_name=="selected_roads_plot": all_raw_df=read_h5(args.raw_file) all_tripID = all_raw_df.tripID.unique() if not os.path.exists(outputfolder): os.mkdir(outputfolder) selected_roads_plot(input_folder=args.input_file, outputfolder=args.outputfolder, all_tripID=all_tripID)