-
Notifications
You must be signed in to change notification settings - Fork 23
Merge users (additional task) #28
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
f0bd3c8
1d846ff
d3602f6
22ffad7
3709d12
e3e497c
75d57fa
fa8ba68
f13ca9f
2964511
5ae7967
bbf95ff
9c125dd
f336d1a
c8bafd0
ec36842
6a07b97
40382cc
f4641dd
594122f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Users merge | ||
|
||
There are n users, each of them corresponds to a list of emails (total m emails). For example: | ||
user1 -> [email protected], [email protected], [email protected] | ||
user2 -> [email protected], [email protected] | ||
user3 -> [email protected], [email protected] | ||
user4 -> [email protected], [email protected] | ||
user5 -> [email protected] | ||
|
||
It is assumed that if two users have a common email, then they are the same user. | ||
|
||
We need to build and implement an algorithm that merges users. The output should be a list of users with their email addresses (the same as the input). | ||
|
||
In the above example, the answer to the task would be as follows: | ||
user1 -> [email protected], [email protected], [email protected], [email protected], [email protected] | ||
user3 -> [email protected], [email protected] | ||
|
||
|
||
Translated with DeepL.com (free version) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#include "merge_users.hpp" | ||
|
||
int main() { | ||
std::vector<User> users = { | ||
User("user1", {"[email protected]", "[email protected]", "[email protected]"}), | ||
User("user2", {"[email protected]", "[email protected]"}), | ||
User("user3", {"[email protected]", "[email protected]"}), | ||
User("user4", {"[email protected]", "[email protected]"}), | ||
User("user5", {"[email protected]"})}; | ||
|
||
std::vector<User> merged_users = MergeUsers(users); | ||
|
||
// Print the merged users and their emails | ||
for (const auto& user : merged_users) std::cout << user << std::endl; | ||
|
||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#include "merge_users.hpp" | ||
|
||
std::vector<User> DFSMergeUtil(const Graph<Vertex<UserData>, UserData>& graph, | ||
std::map<std::string, std::size_t>& node_index) { | ||
std::vector<bool> visited(graph.Size(), false); | ||
std::vector<User> merged_users; | ||
|
||
for (std::size_t i = 0; i < graph.Size(); ++i) | ||
if (!visited[i]) { | ||
std::set<std::string> names; | ||
std::set<std::string> emails; | ||
std::stack<std::size_t> stack; | ||
stack.push(i); | ||
visited[i] = true; | ||
|
||
while (!stack.empty()) { | ||
std::size_t u = stack.top(); | ||
stack.pop(); | ||
UserData node = graph[u]->data; | ||
|
||
if (node.type == Type::Name) | ||
names.insert(node.data); | ||
else | ||
emails.insert(node.data); | ||
|
||
for (const auto& neighbor : graph[u]->adjacent) { | ||
int v = node_index[neighbor->data.data]; | ||
if (!visited[v]) { | ||
visited[v] = true; | ||
stack.push(v); | ||
} | ||
} | ||
} | ||
// Take the first user as representative | ||
if (!names.empty()) merged_users.push_back(User(*names.cbegin(), emails)); | ||
} | ||
return merged_users; | ||
} | ||
|
||
std::vector<User> MergeUsers(std::vector<User>& users) { | ||
// Create a graph | ||
Graph<Vertex<UserData>, UserData> graph; | ||
|
||
// Create a map to store the index of each node (string to integer) | ||
std::map<std::string, std::size_t> node_index; | ||
|
||
// Add vertices to the graph | ||
for (const auto& user : users) { | ||
if (node_index.find(user.name) == node_index.end()) { | ||
node_index[user.name] = graph.Size(); | ||
graph.AddVertex(UserData(user.name, Type::Name)); | ||
} | ||
for (const std::string& email : user.emails) { | ||
if (node_index.find(email) == node_index.end()) { | ||
node_index[email] = graph.Size(); | ||
graph.AddVertex(UserData(email, Type::Email)); | ||
} | ||
graph.AddEdge(node_index[user.name], node_index[email]); | ||
} | ||
} | ||
std::vector<User> merged_users = DFSMergeUtil(graph, node_index); | ||
return merged_users; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#pragma once | ||
|
||
#include <iostream> | ||
#include <map> | ||
#include <set> | ||
#include <stack> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "graph.hpp" | ||
|
||
/// @brief Type of user data | ||
enum Type { Name, Email }; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: enum 'Type' uses a larger base type ('unsigned int', size: 4 bytes) than necessary for its value set, consider using 'std::uint8_t' (1 byte) as the base type to reduce its size [performance-enum-size] enum Type { Name, Email };
^ |
||
|
||
/// @brief User data | ||
struct UserData { | ||
UserData(const std::string& data, Type type) : data(data), type(type) {} | ||
std::string data; | ||
Type type; | ||
}; | ||
|
||
inline std::ostream& operator<<(std::ostream& os, const UserData& u_data) { | ||
os << u_data.data; | ||
return os; | ||
} | ||
|
||
/// @brief User with a name and email | ||
struct User { | ||
User(const std::string& name, const std::set<std::string>& emails) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: constructor does not initialize these fields: emails [cppcoreguidelines-pro-type-member-init] additional_tasks/merge_users/src/merge_users.hpp:31: - std::set<std::string> emails; // set to avoid duplicates
+ std::set<std::string> emails{}; // set to avoid duplicates |
||
: name(name), emails(emails) {} | ||
std::string name; | ||
std::set<std::string> emails; // set to avoid duplicates | ||
}; | ||
|
||
inline std::ostream& operator<<(std::ostream& os, const User& u) { | ||
os << u.name << " -> "; | ||
for (const std::string& email : u.emails) os << email << " "; | ||
return os; | ||
} | ||
|
||
inline bool operator==(const User& fu, const User& su) { | ||
return fu.name == su.name && fu.emails == su.emails; | ||
} | ||
|
||
/// @brief Perform Depth-First Search (DFS) to find connected components | ||
/// @param graph : The graph to search | ||
/// @param node_index : Quick access form data to index | ||
/// @return Contatiner of merged users | ||
std::vector<User> DFSMergeUtil(const Graph<Vertex<UserData>, UserData>& graph, | ||
std::map<std::string, std::size_t>& node_index); | ||
|
||
/// @brief Merge users with matching emails | ||
/// @param users : The list of users to merge | ||
/// @return Merged users | ||
std::vector<User> MergeUsers(std::vector<User>& users); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
#include <gtest/gtest.h> | ||
|
||
#include "merge_users.hpp" | ||
|
||
// Test case to check merging of users with overlapping emails | ||
TEST(MergeUsersTest, OverlappingEmails) { | ||
std::vector<User> users = { | ||
User("user1", {"[email protected]", "[email protected]"}), | ||
User("user2", {"[email protected]", "[email protected]"}), | ||
User("user3", {"[email protected]"})}; | ||
|
||
std::vector<User> except_merge = { | ||
User("user1", | ||
{"[email protected]", "[email protected]", "[email protected]"}), | ||
User("user3", {"[email protected]"})}; | ||
|
||
std::vector<User> merged_users = MergeUsers(users); | ||
|
||
EXPECT_EQ(merged_users.size(), except_merge.size()); | ||
for (size_t i = 0; i < merged_users.size(); ++i) | ||
EXPECT_EQ(merged_users[i], except_merge[i]); | ||
} | ||
|
||
// Test case to check merging of users with no overlapping emails | ||
TEST(MergeUsersTest, NoOverlappingEmails) { | ||
std::vector<User> users = {User("user1", {"[email protected]"}), | ||
User("user2", {"[email protected]"}), | ||
User("user3", {"[email protected]"})}; | ||
|
||
std::vector<User> except_merge = users; | ||
|
||
std::vector<User> merged_users = MergeUsers(users); | ||
|
||
EXPECT_EQ(merged_users.size(), except_merge.size()); | ||
for (size_t i = 0; i < merged_users.size(); ++i) | ||
EXPECT_EQ(merged_users[i], except_merge[i]); | ||
} | ||
|
||
// Test case to check merging of users with multiple names and emails | ||
TEST(MergeUsersTest, MultipleNamesAndEmails) { | ||
std::vector<User> users = { | ||
User("user1", {"[email protected]"}), | ||
User("user2", {"[email protected]", "[email protected]"}), | ||
User("user3", {"[email protected]"})}; | ||
|
||
std::vector<User> except_merge = { | ||
User("user1", {"[email protected]", "[email protected]"}), | ||
User("user3", {"[email protected]"})}; | ||
|
||
std::vector<User> merged_users = MergeUsers(users); | ||
|
||
EXPECT_EQ(merged_users.size(), except_merge.size()); | ||
for (size_t i = 0; i < merged_users.size(); ++i) | ||
EXPECT_EQ(merged_users[i], except_merge[i]); | ||
} | ||
|
||
// Test case to check merging when there are no users | ||
TEST(MergeUsersTest, NoUsers) { | ||
std::vector<User> users = {}; | ||
|
||
std::vector<User> except_merge = {}; | ||
|
||
std::vector<User> merged_users = MergeUsers(users); | ||
|
||
EXPECT_EQ(merged_users.size(), except_merge.size()); | ||
for (size_t i = 0; i < merged_users.size(); ++i) | ||
EXPECT_EQ(merged_users[i], except_merge[i]); | ||
} | ||
|
||
// Test case to check merging of users with multiple overlapping emails and | ||
// names | ||
TEST(MergeUsersTest, ComplexOverlappingNamesAndEmails) { | ||
std::vector<User> users = { | ||
User("user1", {"[email protected]", "[email protected]"}), | ||
User("user2", {"[email protected]", "[email protected]"}), | ||
User("user3", {"[email protected]"}), | ||
User("user4", {"[email protected]", "[email protected]"}), | ||
User("user5", {"[email protected]"})}; | ||
|
||
std::vector<User> except_merge = { | ||
User("user1", {"[email protected]", "[email protected]", | ||
"[email protected]", "[email protected]"}), | ||
User("user3", {"[email protected]"})}; | ||
|
||
std::vector<User> merged_users = MergeUsers(users); | ||
|
||
EXPECT_EQ(merged_users.size(), except_merge.size()); | ||
for (size_t i = 0; i < merged_users.size(); ++i) | ||
EXPECT_EQ(merged_users[i], except_merge[i]); | ||
} | ||
|
||
// Test case to check merging with users having nested relationships | ||
TEST(MergeUsersTest, NestedRelationships) { | ||
std::vector<User> users = { | ||
User("alice", {"[email protected]", "[email protected]"}), | ||
User("bob", {"[email protected]", "[email protected]"}), | ||
User("charlie", {"[email protected]", "[email protected]"}), | ||
User("dave", {"[email protected]"}), User("eve", {"[email protected]"})}; | ||
|
||
std::vector<User> except_merge = { | ||
User("alice", {"[email protected]", "[email protected]", | ||
"[email protected]", "[email protected]"}), | ||
User("eve", {"[email protected]"})}; | ||
|
||
std::vector<User> merged_users = MergeUsers(users); | ||
|
||
EXPECT_EQ(merged_users.size(), except_merge.size()); | ||
for (size_t i = 0; i < merged_users.size(); ++i) | ||
EXPECT_EQ(merged_users[i], except_merge[i]); | ||
} | ||
|
||
// Test case to check merging when users have circular references | ||
TEST(MergeUsersTest, CircularReferences) { | ||
std::vector<User> users = { | ||
User("userA", {"[email protected]", "[email protected]"}), | ||
User("userB", {"[email protected]", "[email protected]"}), | ||
User("userC", {"[email protected]", "[email protected]"})}; | ||
|
||
std::vector<User> except_merge = { | ||
User("userA", | ||
{"[email protected]", "[email protected]", "[email protected]"})}; | ||
|
||
std::vector<User> merged_users = MergeUsers(users); | ||
|
||
EXPECT_EQ(merged_users.size(), except_merge.size()); | ||
for (size_t i = 0; i < merged_users.size(); ++i) | ||
EXPECT_EQ(merged_users[i], except_merge[i]); | ||
} | ||
|
||
// Test case to check merging when users have different names but share all | ||
// emails | ||
TEST(MergeUsersTest, DifferentNamesSameEmails) { | ||
std::vector<User> users = {User("user1", {"[email protected]"}), | ||
User("user2", {"[email protected]"}), | ||
User("user3", {"[email protected]"})}; | ||
|
||
std::vector<User> except_merge = {User("user1", {"[email protected]"})}; | ||
|
||
std::vector<User> merged_users = MergeUsers(users); | ||
|
||
EXPECT_EQ(merged_users.size(), except_merge.size()); | ||
for (size_t i = 0; i < merged_users.size(); ++i) | ||
EXPECT_EQ(merged_users[i], except_merge[i]); | ||
} |
This file was deleted.
This file was deleted.
This file was deleted.
This file was deleted.
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: 'graph.hpp' file not found [clang-diagnostic-error]