Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
28e6237
Update requirements.txt
golphy-yy Oct 31, 2024
423a6db
training weight porto-20200_TrajCL_best.pt
golphy-yy Nov 6, 2024
4472083
Update train_trajsimi.py
golphy-yy Nov 12, 2024
2aab2c5
Update trajsimi.py
golphy-yy Nov 12, 2024
ae00407
Update train_trajsimi.py
golphy-yy Nov 12, 2024
350ef6d
Update train_trajsimi.py
golphy-yy Nov 12, 2024
2b25d83
Update train_trajsimi.py
golphy-yy Nov 12, 2024
7fadf04
Update preprocessing_porto.py
golphy-yy Jan 10, 2025
f041495
Update preprocessing_porto.py
golphy-yy Jan 10, 2025
ceeaf60
Update preprocessing_porto.py
golphy-yy Jan 10, 2025
edae4d5
Update train.py
golphy-yy Jan 11, 2025
7fb9e37
Preprocessing_Bangkok
golphy-yy Jan 11, 2025
9ea3ea7
Add files via upload
golphy-yy Jan 11, 2025
c620e23
Update preprocessing_porto_edited.py
golphy-yy Jan 11, 2025
934cbe3
Add files via upload
golphy-yy Jan 12, 2025
8f5e9dc
Update preprocessing_porto_edited.py
golphy-yy Jan 12, 2025
92509fc
Update preprocessing_porto_edited.py
golphy-yy Jan 13, 2025
7044a74
Update preprocessing_porto_edited.py
golphy-yy Jan 13, 2025
9aebc84
Update preprocessing_porto_edited.py
golphy-yy Jan 13, 2025
3bfee54
Update preprocessing_porto_edited.py
golphy-yy Jan 13, 2025
9a83b7f
Update data_loader.py
golphy-yy Jan 17, 2025
df19694
Update preprocessing_porto_edited.py
golphy-yy Jan 17, 2025
b0cc180
Update preprocessing_porto_edited.py
golphy-yy Jan 17, 2025
bfb8a45
Update preprocessing_porto_edited.py
golphy-yy Jan 17, 2025
4970212
Update preprocessing_porto_edited.py
golphy-yy Jan 17, 2025
cff8a24
Update preprocessing_porto_edited.py
golphy-yy Jan 17, 2025
8a2e9e5
Update preprocessing_porto_edited.py
golphy-yy Jan 17, 2025
1d71096
Update config.py
golphy-yy Jan 20, 2025
1017480
Update train_trajsimi.py
golphy-yy Jan 24, 2025
6d3c3d0
Update config.py
golphy-yy Jan 31, 2025
473d584
Update preprocessing_porto_edited.py
golphy-yy Feb 10, 2025
7c95c00
Update and rename preprocessing_porto_edited.py to preprocessing_bang…
golphy-yy Feb 12, 2025
e3b97b3
Update data_loader.py
golphy-yy Feb 12, 2025
03a4d2c
Update preprocessing_bangkok_edited.py
golphy-yy Feb 12, 2025
b06660a
Update preprocessing_bangkok_edited.py
golphy-yy Feb 12, 2025
84c4bfd
Update data_loader.py
golphy-yy Feb 25, 2025
b60a14a
edit for Bangkok by golphy 2/28/25
golphy-yy Feb 28, 2025
ad65c0a
Update trajsimi.py
golphy-yy Mar 15, 2025
bf41cc0
Update train_trajsimi.py
golphy-yy Mar 15, 2025
059099f
makkasan Update config.py
golphy-yy Apr 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,25 @@ def post_value_updates(cls):
cls.min_lat = 41.1001
cls.max_lon = -8.5192
cls.max_lat = 41.2086

elif 'Bangkok' == cls.dataset: #min_lon , max_lon : 100.56030256583969 100.56129743416032 , min_lat,max_lat 13.73621679334274 13.737183205661077
cls.dataset_prefix = 'Bangkok_100'
cls.min_lon = -8.7005
cls.min_lat = 41.1001
cls.max_lon = -8.5192
cls.max_lat = 41.2086
#cls.min_lon = 100.56030256583969
#cls.min_lat = 13.73621679334274
#cls.max_lon = 100.56129743416032
#cls.max_lat = 13.737183205661077

elif 'Makkasan' == cls.dataset:
cls.dataset_prefix = 'Makkasan_100'
cls.min_lon = -8.7005
cls.min_lat = 41.1001
cls.max_lon = -8.5192
cls.max_lat = 41.2086

else:
pass

Expand Down
Binary file added porto_20200_TrajCL_best.pt
Binary file not shown.
335 changes: 335 additions & 0 deletions preprocessing_bangkok_edited.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,335 @@
import sys
sys.path.append('..')
sys.path.append('/content/TrajCL')
sys.path.append('/content/traj-dist')
sys.path.append('/content/traj-dist/traj_dist')
sys.path.append('/content/traj-dist/traj_dist/cydist')
sys.path.append('/content/traj-dist/traj_dist/pydist')
import os
import math
import time
import random
import logging
import torch
import pickle
import pandas as pd
from ast import literal_eval
import numpy as np
import traj_dist.distance as tdist
import multiprocessing as mp
from functools import partial

from config import Config
from utils import tool_funcs
from utils.cellspace import CellSpace
from utils.tool_funcs import lonlat2meters
from model.node2vec_ import train_node2vec
from utils.edwp import edwp
from utils.data_loader import read_trajsimi_traj_dataset

#เช็คว่าอยู่ในช่วงมั้ย
def inrange(lon, lat):
if lon <= Config.min_lon or lon >= Config.max_lon \
or lat <= Config.min_lat or lat >= Config.max_lat:
return False
return True


def clean_and_output_data():
# 1. บันทึกเวลาเริ่มต้น
_time = time.time()
# https://archive.ics.uci.edu/ml/machine-learning-databases/00339/
# download train.csv.zip and unzip it. rename train.csv to porto.csv
# 2. โหลดข้อมูลจากไฟล์ CSV
dfraw = pd.read_csv(Config.root_dir + '/data/Bangkok.csv')
print('1')
#dfraw = dfraw[:100000] #delete this
dfraw = dfraw.rename(columns = {"POLYLINE": "wgs_seq"}) # เปลี่ยนชื่อคอลัมน์ "POLYLINE" เป็น "wgs_seq"

dfraw = dfraw[dfraw.MISSING_DATA == False]

# length requirement
dfraw.wgs_seq = dfraw.wgs_seq.apply(literal_eval)
dfraw['trajlen'] = dfraw.wgs_seq.apply(lambda traj: len(traj))
dfraw = dfraw[(dfraw.trajlen >= Config.min_traj_len) & (dfraw.trajlen <= Config.max_traj_len*100)]
logging.info('Preprocessed-rm length. #traj={}'.format(dfraw.shape[0]))
# calc min,max (lon,lat ) (x,y) ##################################
min_x_set = list()
max_x_set = list()
for i in dfraw['wgs_seq']:
min_x_set.append(sorted([j[0] for j in i])[0])
#print(sorted([j[0] for j in i]))
max_x_set.append(sorted([j[0] for j in i])[-1])
min_lon = sorted(min_x_set)[0]
max_lon = sorted(max_x_set)[-1]
#print(min_lon,max_lon)

# calc min,max lat
min_y_set = list()
max_y_set = list()
for i in dfraw['wgs_seq']:
min_y_set.append(sorted([j[1] for j in i])[0])
#print(sorted([j[0] for j in i]))
max_y_set.append(sorted([j[1] for j in i])[-1])
min_lat = sorted(min_y_set)[0]
max_lat = sorted(max_y_set)[-1]
print('min_lon , max_lon :',min_lon,max_lon)
print('min_lon,max_lon',min_lat,max_lat)


# range requirement
dfraw['inrange'] = dfraw.wgs_seq.map(lambda traj: sum([inrange(p[0], p[1]) for p in traj]) == len(traj) ) # True: valid
dfraw = dfraw[dfraw.inrange == True]
logging.info('Preprocessed-rm range. #traj={}'.format(dfraw.shape[0]))

# convert to Mercator # 6. แปลงพิกัดเส้นทาง (WGS84) เป็นระบบเมอร์เคเตอร์ (Mercator)
dfraw['merc_seq'] = dfraw.wgs_seq.apply(lambda traj: [list(lonlat2meters(p[0], p[1])) for p in traj])

logging.info('Preprocessed-output. #traj={}'.format(dfraw.shape[0]))
dfraw = dfraw[['trajlen', 'wgs_seq', 'merc_seq']].reset_index(drop = True)

dfraw.to_pickle(Config.dataset_file,protocol = 4)
print('Exported')
logging.info('Preprocess end. @={:.0f}'.format(time.time() - _time))
return


def init_cellspace():
# 1. create cellspase
# 2. initialize cell embeddings (create graph, train, and dump to file)

x_min, y_min = lonlat2meters(Config.min_lon, Config.min_lat)
x_max, y_max = lonlat2meters(Config.max_lon, Config.max_lat)
x_min -= Config.cellspace_buffer
y_min -= Config.cellspace_buffer
x_max += Config.cellspace_buffer
y_max += Config.cellspace_buffer

cell_size = int(Config.cell_size)
cs = CellSpace(cell_size, cell_size, x_min, y_min, x_max, y_max)
with open(Config.dataset_cell_file, 'wb') as fh:
pickle.dump(cs, fh, protocol = 4)

_, edge_index = cs.all_neighbour_cell_pairs_permutated_optmized()
edge_index = torch.tensor(edge_index, dtype = torch.long, device = Config.device).T
train_node2vec(edge_index)
return


def generate_newsimi_test_dataset():
trajs = pd.read_pickle(Config.dataset_file) # using test part only
l = trajs.shape[0]
n_query = 1000
n_db = 100000
test_idx = (int(l*0.8), int(l*0.8)+n_db)
test_trajs = trajs[test_idx[0]: test_idx[1]]
logging.info("Test trajs loaded.")

# for varying db size
def _raw_dataset():
query_lst = [] # [N, len, 2]
db_lst = []
i = 0
for _, v in test_trajs.merc_seq.items():
if i < n_query:
query_lst.append(np.array(v)[::2].tolist())
db_lst.append(np.array(v)[1::2].tolist())
i += 1

output_file_name = Config.dataset_file + '_newsimi_raw.pkl'
with open(output_file_name, 'wb') as fh:
pickle.dump( (query_lst, db_lst) , fh, protocol = 4)
logging.info("_raw_dataset done.")
return

# for varying downsampling rate
def _downsample_dataset(rate):
unrate = 1-rate # preserved rate
query_lst = [] # [N, len, 2]
db_lst = []
i = 0
for _, v in test_trajs.merc_seq.items():
if i < n_query:
_q = np.array(v)[::2]
_q_len = _q.shape[0]
_idx = np.sort(np.random.choice(_q_len, math.ceil(_q_len*unrate), replace = False))
query_lst.append( _q[_idx].tolist() )
_db = np.array(v)[1::2]
_db_len = _db.shape[0]
_idx = np.sort(np.random.choice(_db_len, math.ceil(_db_len*unrate), replace = False))
db_lst.append( _db[_idx].tolist() )
i += 1

output_file_name = Config.dataset_file + '_newsimi_downsampling_' + str(rate) + '.pkl'
with open(output_file_name, 'wb') as fh:
pickle.dump( (query_lst, db_lst) , fh, protocol = 4)
logging.info("_downsample_dataset done. rate={}".format(rate))
return

# for varying distort rate
def _distort_dataset(rate):
query_lst = [] # [N, len, 2]
db_lst = []
i = 0
for _, v in test_trajs.merc_seq.items():
if i < n_query:
_q = np.array(v)[::2]
for _row in range(_q.shape[0]):
if random.random() < rate:
_q[_row] = _q[_row] + [tool_funcs.truncated_rand(), tool_funcs.truncated_rand()]
query_lst.append( _q.tolist() )

_db = np.array(v)[1::2]
for _row in range(_db.shape[0]):
if random.random() < rate:
_db[_row] = _db[_row] + [tool_funcs.truncated_rand(), tool_funcs.truncated_rand()]
db_lst.append( _db.tolist() )
i += 1

output_file_name = Config.dataset_file + '_newsimi_distort_' + str(rate) + '.pkl'
with open(output_file_name, 'wb') as fh:
pickle.dump( (query_lst, db_lst) , fh, protocol = 4)
logging.info("_distort_dataset done. rate={}".format(rate))
return


_raw_dataset()

for rate in [0.1, 0.2, 0.3, 0.4, 0.5]:
_downsample_dataset(rate)

for rate in [0.1, 0.2, 0.3, 0.4, 0.5]:
_distort_dataset(rate)

return


# ===calculate trajsimi distance matrix for trajsimi learning===
def traj_simi_computation(fn_name = 'hausdorff'):
# 1. read trajs from file, and split to 3 datasets, and data normalization
# 2. calculate simi in 3 datasets separately.
# 3. dump 3 datasets into files

logging.info("traj_simi_computation starts. fn={}".format(fn_name))
_time = time.time()

# 1.
trains, evals, tests = read_trajsimi_traj_dataset(Config.dataset_file)
trains, evals, tests = _normalization([trains, evals, tests])

logging.info("traj dataset sizes. traj: trains/evals/tests={}/{}/{}" \
.format(trains.shape[0], evals.shape[0], tests.shape[0]))

# 2.
fn = _get_simi_fn(fn_name)
tests_simi = _simi_matrix(fn, tests)
evals_simi = _simi_matrix(fn, evals)
trains_simi = _simi_matrix(fn, trains) # [ [simi, simi, ... ], ... ]

max_distance = max( max(map(max, trains_simi)), max(map(max, evals_simi)), max(map(max, tests_simi)) )

_output_file = '{}_traj_simi_dict_{}.pkl'.format(Config.dataset_file, fn_name)
with open(_output_file, 'wb') as fh:
tup = trains_simi, evals_simi, tests_simi, max_distance
pickle.dump(tup, fh, protocol = 4)

logging.info("traj_simi_computation ends. @={:.3f}".format(time.time() - _time))
return tup


def _normalization(lst_df):
# lst_df: [df, df, df]
xs = []
ys = []
for df in lst_df:
for _, v in df.merc_seq.items():
arr = np.array(v)
xs.append(arr[:,0])
ys.append(arr[:,1])

xs = np.concatenate(xs)
ys = np.concatenate(ys)
mean = np.array([xs.mean(), ys.mean()])
std = np.array([xs.std(), ys.std()])

for i in range(len(lst_df)):
lst_df[i].merc_seq = lst_df[i].merc_seq.apply(lambda lst: ( (np.array(lst)-mean)/std ).tolist())

return lst_df


def _get_simi_fn(fn_name):
fn = {'lcss': tdist.lcss, 'edr': tdist.edr, 'frechet': tdist.frechet,
'discret_frechet': tdist.discret_frechet,
'hausdorff': tdist.hausdorff, 'edwp': edwp}.get(fn_name, None)
if fn_name == 'lcss' or fn_name == 'edr':
fn = partial(fn, eps = Config.test_exp1_lcss_edr_epsilon)
return fn


def _simi_matrix(fn, df):
_time = time.time()

l = df.shape[0]
batch_size = 50
assert l % batch_size == 0

# parallel init
tasks = []
for i in range(math.ceil(l / batch_size)):
if i < math.ceil(l / batch_size) - 1:
tasks.append( (fn, df, list(range(batch_size * i, batch_size * (i+1)))) )
else:
tasks.append( (fn, df, list(range(batch_size * i, l))) )
num_cores = int(mp.cpu_count())
assert num_cores > 0
num_cores = int(mp.cpu_count())
logging.info("pool.size={}".format(num_cores))
pool = mp.Pool(num_cores)
lst_simi = pool.starmap(_simi_comp_operator, tasks)
pool.close()

# extend lst_simi to matrix simi and pad 0s
lst_simi = sum(lst_simi, [])
for i, row_simi in enumerate(lst_simi):
lst_simi[i] = [0]*(i+1) + row_simi
assert sum(map(len, lst_simi)) == l ** 2
logging.info('simi_matrix computation done., @={}, #={}'.format(time.time() - _time, len(lst_simi)))

return lst_simi


# async operator
def _simi_comp_operator(fn, df_trajs, sub_idx):
simi = []
l = df_trajs.shape[0]
for _i in sub_idx:
t_i = np.array(df_trajs.iloc[_i].merc_seq)
simi_row = []
for _j in range(_i + 1, l):
t_j = np.array(df_trajs.iloc[_j].merc_seq)
simi_row.append( float(fn(t_i, t_j)) )
simi.append(simi_row)
logging.debug('simi_comp_operator ends. sub_idx=[{}:{}], pid={}' \
.format(sub_idx[0], sub_idx[-1], os.getpid()))
return simi



# nohup python ./preprocessing_porto.py &> ../result &
if __name__ == '__main__':
logging.basicConfig(level = logging.DEBUG,
format = "[%(filename)s:%(lineno)s %(funcName)s()] -> %(message)s",
handlers = [logging.FileHandler(Config.root_dir+'/exp/log/'+tool_funcs.log_file_name(), mode = 'w'),
logging.StreamHandler()]
)
Config.dataset = 'porto'
Config.post_value_updates()
print('HELLO WORLD')
clean_and_output_data()
print('Clean and output done')
init_cellspace()
print('init_cellspace done')
generate_newsimi_test_dataset()
print('generate_newsimi done')
traj_simi_computation('edwp') # edr edwp discret_frechet hausdorff
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ torch_geometric==1.7.0
torch_scatter==2.0.7
torch_sparse==0.6.9
torch_spline_conv==1.2.1
traj_dist==1.15
traj_dist==1.1
Loading