Hash Set

A hash set is a data structure that implements a set abstract data type, storing unique elements. It uses a hash function to compute an index into an array of buckets or slots, from which the desired element can be found. Hash sets automatically ensure that all elements are unique.

Features

Unique element storage - automatically prevents duplicate elements
Fast operations - O(1) average time for insert, delete, and lookup
Automatic resizing - maintains performance by resizing when load factor is exceeded
Collision resolution - uses separate chaining to handle hash collisions
Pluggable hash functions - supports custom hash functions for different use cases
Set operations - supports union, intersection, difference, and symmetric difference
Generic type support - type-safe elements

Time Complexities

Operation	Average Case	Worst Case
Add	O(1)	O(n)
Remove	O(1)	O(n)
Contains	O(1)	O(n)
Union	O(m + n)	O(m + n)
Intersection	O(min(m, n))	O(m * n)
Difference	O(m)	O(m * n)
Symmetric Difference	O(m + n)	O(m * n)
Iteration	O(n)	O(n)

Note: Worst case occurs when all elements hash to the same bucket. m and n are sizes of the sets.

Basic Usage

Creating and Initializing

from algo.data_structs.hash_tables.hash_set import HashSet

# Create empty hash set with default settings
hs = HashSet()

# Check if empty
print(hs.is_empty())    # True
print(len(hs))          # 0
print(hs.size())        # 0
print(hs.capacity())    # 16 (default)
print(hs.load_factor()) # 0.0

# Create with custom parameters
hs = HashSet(initial_capacity=32, load_factor=0.5)
print(hs.capacity())    # 32

Adding Elements

# Add individual elements
result1 = hs.add("apple")
print(result1)         # True (new element added)

result2 = hs.add("banana")
print(result2)         # True (new element added)

result3 = hs.add("apple")  # Duplicate
print(result3)         # False (element already exists)

print(hs.size())       # 2 (duplicates not counted)

# Add multiple elements
elements = ["cherry", "date", "elderberry"]
for element in elements:
    hs.add(element)

print(hs.size())       # 5

Checking for Elements

# Check if elements exist
print(hs.contains("apple"))    # True
print(hs.contains("grape"))    # False

# Using 'in' operator
print("banana" in hs)          # True
print("grape" in hs)           # False

# Check set properties
print(f"Size: {hs.size()}")
print(f"Is empty: {hs.is_empty()}")
print(f"Capacity: {hs.capacity()}")
print(f"Load Factor: {hs.load_factor():.2f}")

Removing Elements

# Remove specific element
removed = hs.remove("banana")
print(removed)         # True (element was removed)
print(hs.size())       # 4

# Try to remove non-existent element
removed = hs.remove("grape")
print(removed)         # False (element not found)

# Discard element (no error if not present)
hs.discard("grape")    # No error, even if element doesn't exist
hs.discard("apple")    # Removes apple if present
print(hs.size())       # 3

# Pop arbitrary element
if not hs.is_empty():
    popped = hs.pop()
    print(f"Popped: {popped}")
    print(f"Size after pop: {hs.size()}")

# Clear all elements
hs.clear()
print(hs.size())       # 0
print(hs.is_empty())   # True

Set Operations

Union (Combining Sets)

# Create two sets
fruits = HashSet()
fruits.add("apple")
fruits.add("banana")
fruits.add("cherry")

citrus = HashSet()
citrus.add("orange")
citrus.add("lemon")
citrus.add("lime")

# Union - elements from both sets
all_fruits = fruits.union(citrus)
print(all_fruits.size())  # 6
print("apple" in all_fruits)   # True
print("orange" in all_fruits)  # True

# Using | operator
all_fruits2 = fruits | citrus
print(all_fruits == all_fruits2)  # True

# In-place union
fruits |= citrus
print(fruits.size())  # 6 (fruits now contains all elements)

Intersection (Common Elements)

# Create overlapping sets
set1 = HashSet()
set1.add("red")
set1.add("green")
set1.add("blue")

set2 = HashSet()
set2.add("blue")
set2.add("yellow")
set2.add("green")

# Intersection - common elements only
common = set1.intersection(set2)
print(common.size())      # 2
print("green" in common)  # True
print("blue" in common)   # True
print("red" in common)    # False

# Using & operator
common2 = set1 & set2
print(common == common2)  # True

# In-place intersection
set1 &= set2
print(set1.size())  # 2 (set1 now contains only common elements)

Difference (Elements in One Set but Not Another)

# Create sets with some overlap
animals = HashSet()
animals.add("cat")
animals.add("dog")
animals.add("bird")
animals.add("fish")

pets = HashSet()
pets.add("cat")
pets.add("dog")
pets.add("hamster")

# Difference - elements in animals but not in pets
wild_animals = animals.difference(pets)
print(wild_animals.size())     # 2
print("bird" in wild_animals)  # True
print("fish" in wild_animals)  # True
print("cat" in wild_animals)   # False

# Using - operator
wild_animals2 = animals - pets
print(wild_animals == wild_animals2)  # True

# In-place difference
animals -= pets
print(animals.size())  # 2 (animals now excludes pets)

Symmetric Difference (Elements in Either Set but Not Both)

# Create two sets with some overlap
tech_skills = HashSet()
tech_skills.add("Python")
tech_skills.add("Java")
tech_skills.add("SQL")

job_requirements = HashSet()
job_requirements.add("Java")
job_requirements.add("JavaScript")
job_requirements.add("Docker")

# Symmetric difference - elements in either set but not both
unique_skills = tech_skills.symmetric_difference(job_requirements)
print(unique_skills.size())           # 4
print("Python" in unique_skills)      # True (only in tech_skills)
print("JavaScript" in unique_skills)  # True (only in job_requirements)
print("Docker" in unique_skills)      # True (only in job_requirements)
print("SQL" in unique_skills)         # True (only in tech_skills)
print("Java" in unique_skills)        # False (in both sets)

# Using ^ operator
unique_skills2 = tech_skills ^ job_requirements
print(unique_skills == unique_skills2)  # True

# In-place symmetric difference
tech_skills ^= job_requirements
print(tech_skills.size())  # 4

Set Relationship Testing

Subset and Superset

# Create nested sets
small_set = HashSet()
small_set.add("a")
small_set.add("b")

large_set = HashSet()
large_set.add("a")
large_set.add("b")
large_set.add("c")
large_set.add("d")

# Subset testing
print(small_set.is_subset(large_set))    # True
print(large_set.is_subset(small_set))    # False

# Using <= operator
print(small_set <= large_set)            # True
print(small_set < large_set)             # True (proper subset)

# Superset testing
print(large_set.is_superset(small_set))  # True
print(small_set.is_superset(large_set))  # False

# Using >= operator
print(large_set >= small_set)            # True
print(large_set > small_set)             # True (proper superset)

Disjoint Sets

# Create non-overlapping sets
vowels = HashSet()
vowels.add("a")
vowels.add("e")
vowels.add("i")

consonants = HashSet()
consonants.add("b")
consonants.add("c")
consonants.add("d")

# Test if sets have no common elements
print(vowels.is_disjoint(consonants))    # True

# Add overlapping element
consonants.add("a")
print(vowels.is_disjoint(consonants))    # False

Iteration and Traversal

colors = HashSet()
colors.add("red")
colors.add("green")
colors.add("blue")
colors.add("yellow")

# Iterate over elements
print("Colors:")
for color in colors:
    print(color)  # red, green, blue, yellow (order not guaranteed)

# Convert to list
colors_list = list(colors)
print(colors_list)  # ['red', 'green', 'blue', 'yellow']

# Check if iteration works on empty set
empty_set = HashSet()
empty_list = list(empty_set)
print(empty_list)  # []

Working with Different Types

String Elements

words = HashSet()
words.add("hello")
words.add("world")
words.add("python")
words.add("hello")  # Duplicate ignored

print(words.size())         # 3
print("python" in words)    # True

Numeric Elements

numbers = HashSet()
numbers.add(1)
numbers.add(2)
numbers.add(42)
numbers.add(3.14)

print(numbers.contains(1))    # True
print(numbers.contains(42))   # True
print(numbers.size())         # 4

Mixed Types

mixed = HashSet()
mixed.add("string")
mixed.add(42)
mixed.add((1, 2))  # Tuples are hashable
mixed.add(True)

print(mixed.contains("string"))  # True
print(mixed.contains(42))        # True
print(mixed.contains((1, 2)))    # True
print(mixed.size())              # 4

Tuple Elements

coordinates = HashSet()
coordinates.add((0, 0))
coordinates.add((1, 1))
coordinates.add((2, 3))
coordinates.add((0, 0))  # Duplicate ignored

print(coordinates.size())         # 3
print((1, 1) in coordinates)      # True
print((5, 5) in coordinates)      # False

Custom Objects as Elements

class Person:
    def __init__(self, name: str, age: int):
        self.name = name
        self.age = age

    def __eq__(self, other):
        return isinstance(other, Person) and self.name == other.name

    def __hash__(self):
        return hash(self.name)

    def __str__(self):
        return f"Person({self.name}, {self.age})"

people = HashSet()
alice = Person("Alice", 30)
bob = Person("Bob", 25)
alice2 = Person("Alice", 31)  # Same name, different age

people.add(alice)
people.add(bob)
people.add(alice2)  # Won't be added (same name as alice)

print(people.size())    # 2
print(alice in people)  # True
print(alice2 in people) # True (considered equal to alice)

Hash Set Resizing and Performance

Automatic Resizing

# Create small hash set to demonstrate resizing
hs = HashSet(initial_capacity=4, load_factor=0.5)
print(f"Initial capacity: {hs.capacity()}")  # 4

# Add elements up to threshold (4 * 0.5 = 2)
hs.add("element1")
hs.add("element2")
print(f"Before resize - Capacity: {hs.capacity()}, Load Factor: {hs.load_factor():.2f}")

# Adding one more triggers resize
hs.add("element3")
print(f"After resize - Capacity: {hs.capacity()}, Load Factor: {hs.load_factor():.2f}")

# Verify all data is still accessible
for i in range(1, 4):
    print(f"element{i} in set: {f'element{i}' in hs}")

Load Factor Management

hs = HashSet(initial_capacity=8, load_factor=0.75)

print(f"Empty - Load Factor: {hs.load_factor()}")  # 0.0

for i in range(6):
    hs.add(f"item{i}")
    print(f"After {i+1} items - Load Factor: {hs.load_factor():.2f}")

# Load factor stays below 0.75 due to automatic resizing

Hash Functions and Collision Handling

Using Custom Hash Functions

from algo.algorithms.hashing.hash_functions import FNVHash, DJB2Hash, MurmurHash3

# Create hash set with FNV hash function
fnv_hs = HashSet(hash_function=FNVHash())
fnv_hs.add("test")
print(f"Hash function: {type(fnv_hs.get_hash_function()).__name__}")

# Switch hash functions (rehashes all data)
djb2_hash = DJB2Hash()
fnv_hs.set_hash_function(djb2_hash)
print(f"New hash function: {type(fnv_hs.get_hash_function()).__name__}")
print(f"Data still accessible: {'test' in fnv_hs}")  # True

# Create with MurmurHash3
murmur_hs = HashSet(hash_function=MurmurHash3(seed=42))

Analyzing Hash Distribution

hs = HashSet(initial_capacity=8, load_factor=0.9)

# Add data to potentially cause collisions
for i in range(15):
    hs.add(f"element_{i:02d}")

# Get bucket distribution
distribution = hs.get_bucket_distribution()
print(f"Bucket distribution: {distribution}")
print(f"Total items: {sum(distribution)}")

# Get detailed statistics
stats = hs.get_hash_statistics()
print(f"Collisions: {stats['collisions']}")
print(f"Max chain length: {stats['max_chain_length']}")
print(f"Average chain length: {stats['average_chain_length']:.2f}")
print(f"Load factor: {stats['load_factor']:.2f}")

Error Handling

hs = HashSet()

# Safe operations that don't raise errors
hs.discard("nonexistent")      # No error
result = hs.remove("nonexistent")  # Returns False
print(result)  # False

# Operations that can raise errors
try:
    hs.pop()  # KeyError on empty set
except KeyError:
    print("Cannot pop from empty set")

# Invalid initialization parameters
try:
    invalid_hs = HashSet(initial_capacity=0)
except ValueError as e:
    print(f"Invalid capacity: {e}")

try:
    invalid_hs = HashSet(load_factor=1.5)
except ValueError as e:
    print(f"Invalid load factor: {e}")

Copying and Equality

Copying Hash Sets

original = HashSet()
original.add("a")
original.add("b")
original.add("c")

# Create a copy
copy_set = original.copy()

# Verify they have same elements
print(copy_set.size() == original.size())  # True
for element in original:
    print(element in copy_set)  # True for all elements

# Verify they're independent
original.add("d")
print("d" in copy_set)  # False (copy is independent)

Equality Comparison

# Hash sets are equal if they contain same elements
hs1 = HashSet()
hs1.add("apple")
hs1.add("banana")

hs2 = HashSet()
hs2.add("banana")  # Different insertion order
hs2.add("apple")

print(hs1 == hs2)  # True (same content)

# Different content
hs3 = HashSet()
hs3.add("apple")
hs3.add("cherry")  # Different element

print(hs1 == hs3)  # False

# Different sizes
hs4 = HashSet()
hs4.add("apple")

print(hs1 == hs4)  # False (different sizes)

# Comparison with non-HashSet objects
print(hs1 == {"apple", "banana"})  # False (different types)

String Representation

hs = HashSet()
hs.add("apple")
hs.add("banana")
hs.add("cherry")

# String representation (like Python set)
print(str(hs))   # {apple, banana, cherry}

# Detailed representation for debugging
print(repr(hs))  # HashSet(size=3, capacity=16, load_factor=0.19, hash_func=FNVHash)

# Empty hash set representation
empty_hs = HashSet()
print(str(empty_hs))  # {}

Common Patterns

Removing Duplicates from Collections

# Remove duplicates from a list
items = ["apple", "banana", "apple", "cherry", "banana", "date"]
unique_items = HashSet()
for item in items:
    unique_items.add(item)

unique_list = list(unique_items)
print(unique_list)  # ['apple', 'banana', 'cherry', 'date']

Finding Common Elements

def find_common_elements(list1, list2):
    set1 = HashSet()
    set2 = HashSet()

    for item in list1:
        set1.add(item)

    for item in list2:
        set2.add(item)

    return set1.intersection(set2)

# Usage
users_2023 = ["alice", "bob", "charlie", "diana"]
users_2024 = ["bob", "diana", "eve", "frank"]

common_users = find_common_elements(users_2023, users_2024)
print(list(common_users))  # ['bob', 'diana']

Set-based Filtering

# Filter items based on membership in another set
all_products = ["laptop", "mouse", "keyboard", "monitor", "printer"]

available_products = HashSet()
available_products.add("laptop")
available_products.add("keyboard")
available_products.add("monitor")

# Get only available products
filtered_products = []
for product in all_products:
    if product in available_products:
        filtered_products.append(product)

print(filtered_products)  # ['laptop', 'keyboard', 'monitor']

Membership Testing for Performance

# Use hash set for fast membership testing
large_dataset = list(range(10000))
search_items = [500, 1500, 2500, 3500, 15000]  # Some exist, some don't

# Convert to hash set for O(1) lookup
dataset_set = HashSet()
for item in large_dataset:
    dataset_set.add(item)

# Fast membership testing
found_items = []
for item in search_items:
    if item in dataset_set:
        found_items.append(item)

print(found_items)  # [500, 1500, 2500, 3500]

Tag and Category Management

# Manage tags for content
class Article:
    def __init__(self, title: str):
        self.title = title
        self.tags = HashSet()

    def add_tag(self, tag: str):
        self.tags.add(tag)

    def remove_tag(self, tag: str):
        self.tags.remove(tag)

    def has_tag(self, tag: str) -> bool:
        return tag in self.tags

    def get_common_tags(self, other: 'Article') -> HashSet:
        return self.tags.intersection(other.tags)

# Usage
article1 = Article("Python Basics")
article1.add_tag("python")
article1.add_tag("programming")
article1.add_tag("tutorial")

article2 = Article("Java Programming")
article2.add_tag("java")
article2.add_tag("programming")
article2.add_tag("oop")

common_tags = article1.get_common_tags(article2)
print(list(common_tags))  # ['programming']

User Permissions and Roles

# Role-based access control
class User:
    def __init__(self, username: str):
        self.username = username
        self.permissions = HashSet()

    def grant_permission(self, permission: str):
        self.permissions.add(permission)

    def revoke_permission(self, permission: str):
        self.permissions.remove(permission)

    def has_permission(self, permission: str) -> bool:
        return permission in self.permissions

    def has_all_permissions(self, required_permissions: HashSet) -> bool:
        return required_permissions.is_subset(self.permissions)

# Usage
user = User("alice")
user.grant_permission("read")
user.grant_permission("write")
user.grant_permission("delete")

required_for_admin = HashSet()
required_for_admin.add("read")
required_for_admin.add("write")
required_for_admin.add("delete")
required_for_admin.add("admin")

print(user.has_all_permissions(required_for_admin))  # False (missing 'admin')

Use Cases

When to Use Hash Sets

Good for:

Storing unique elements only
Fast membership testing
Set operations (union, intersection, difference)
Removing duplicates from data
Tag and category systems
Permission and role management
Caching unique identifiers

Examples:

# Unique visitor tracking
unique_visitors = HashSet()
unique_visitors.add("user123")
unique_visitors.add("user456")
print(f"Total unique visitors: {unique_visitors.size()}")

# Feature flags
enabled_features = HashSet()
enabled_features.add("dark_mode")
enabled_features.add("notifications")
if "dark_mode" in enabled_features:
    print("Dark mode is enabled")

# Email subscription lists
subscribers = HashSet()
subscribers.add("alice@example.com")
subscribers.add("bob@example.com")

# Inventory tracking
in_stock_items = HashSet()
in_stock_items.add("ITEM001")
in_stock_items.add("ITEM002")

When NOT to Use Hash Sets

Avoid for:

When you need ordered elements (use ordered structures)
When you need to store duplicates
When you need indexed access
When memory usage is critical and you have many small sets

Better alternatives:

# For ordered unique elements, use OrderedDict keys or maintain separate order
from collections import OrderedDict
ordered_unique = list(OrderedDict.fromkeys(items))

# For duplicates allowed, use regular list
duplicates_allowed = ["item1", "item2", "item1"]  # Keep all

# For indexed access, use list or array
indexed_data = ["item1", "item2", "item3"]
print(indexed_data[1])  # Direct index access

Performance Characteristics

Time Complexity Analysis

Average case performance: O(1) for basic operations
Worst case performance: O(n) when all elements hash to same bucket
Space complexity: O(n) where n is number of elements
Resizing cost: O(n) but amortized to O(1) per operation

Memory Efficiency

# Hash sets use additional memory for:
# - Bucket array
# - Hash table metadata  
# - Chain nodes for collision resolution

hs = HashSet(initial_capacity=16)
print(f"Initial capacity: {hs.capacity()}")  # 16 buckets allocated

# Memory grows with load factor
for i in range(100):
    hs.add(f"element{i}")

print(f"Final capacity: {hs.capacity()}")  # Expanded as needed
print(f"Load factor: {hs.load_factor():.2f}")  # Maintained below threshold

Collision Resolution Performance

# Separate chaining performance depends on chain length
hs = HashSet(initial_capacity=4, load_factor=0.9)  # Force collisions

# Add many items to small hash set
for i in range(20):
    hs.add(f"item{i}")

stats = hs.get_hash_statistics()
print(f"Max chain length: {stats['max_chain_length']}")
print(f"Average chain length: {stats['average_chain_length']:.2f}")
print(f"Collision rate: {stats['collisions'] / stats['total_elements']:.2f}")

Advanced Usage

Custom Hash Function Performance Comparison

from algo.algorithms.hashing.hash_functions import (
    FNVHash, DJB2Hash, MurmurHash3, PolynomialHash
)

# Test different hash functions with same data
test_elements = [f"element_{i:03d}" for i in range(100)]

hash_functions = [
    ("FNV", FNVHash()),
    ("DJB2", DJB2Hash()),
    ("MurmurHash3", MurmurHash3()),
    ("Polynomial", PolynomialHash())
]

for name, hash_func in hash_functions:
    hs = HashSet(initial_capacity=32, hash_function=hash_func)

    # Add test data
    for element in test_elements:
        hs.add(element)

    # Analyze distribution
    stats = hs.get_hash_statistics()
    print(f"{name} Hash Function:")
    print(f"  Collisions: {stats['collisions']}")
    print(f"  Max chain length: {stats['max_chain_length']}")
    print(f"  Distribution variance: {stats['distribution_variance']:.2f}")
    print()

Hash Set as Building Block

# Implement a simple bloom filter using multiple hash sets
class SimpleBloomFilter:
    def __init__(self, capacity: int = 1000):
        self.sets = [
            HashSet(initial_capacity=capacity, hash_function=FNVHash()),
            HashSet(initial_capacity=capacity, hash_function=DJB2Hash()),
            HashSet(initial_capacity=capacity, hash_function=MurmurHash3())
        ]

    def add(self, item):
        for hash_set in self.sets:
            # Use hash function to create different "hashes"
            hash_val = hash_set.get_hash_function().hash(item)
            hash_set.add(hash_val % 1000)  # Store hash values

    def might_contain(self, item) -> bool:
        for hash_set in self.sets:
            hash_val = hash_set.get_hash_function().hash(item)
            if (hash_val % 1000) not in hash_set:
                return False  # Definitely not in set
        return True  # Might be in set

# Usage
bloom = SimpleBloomFilter()
bloom.add("test_item")
print(bloom.might_contain("test_item"))      # True (probably)
print(bloom.might_contain("other_item"))     # False (definitely not)

Graph Algorithms with Hash Sets

# Represent graph adjacency and implement DFS
class Graph:
    def __init__(self):
        self.adjacency = {}  # vertex -> HashSet of neighbors

    def add_vertex(self, vertex):
        if vertex not in self.adjacency:
            self.adjacency[vertex] = HashSet()

    def add_edge(self, v1, v2):
        self.add_vertex(v1)
        self.add_vertex(v2)
        self.adjacency[v1].add(v2)
        self.adjacency[v2].add(v1)  # Undirected graph

    def dfs(self, start):
        visited = HashSet()
        result = []

        def dfs_helper(vertex):
            if vertex in visited:
                return

            visited.add(vertex)
            result.append(vertex)

            for neighbor in self.adjacency.get(vertex, HashSet()):
                dfs_helper(neighbor)

        dfs_helper(start)
        return result

# Usage
graph = Graph()
graph.add_edge("A", "B")
graph.add_edge("B", "C")
graph.add_edge("C", "D")
graph.add_edge("A", "D")

traversal = graph.dfs("A")
print(traversal)  # ['A', 'B', 'C', 'D'] (or similar order)

Performance Comparison

Operation	HashSet	Python set	List	Binary Search Tree
Add	O(1) avg	O(1) avg	O(1)	O(log n)
Remove	O(1) avg	O(1) avg	O(n)	O(log n)
Contains	O(1) avg	O(1) avg	O(n)	O(log n)
Union	O(m + n)	O(m + n)	O(m * n)	O(m + n)
Intersection	O(min(m,n))	O(min(m,n))	O(m * n)	O(m + n)
Ordered Iteration	No	No	Yes	Yes
Memory Overhead	Medium	Low	Low	High
Duplicates	No	No	Yes	No

API Reference

Hash Set Implementation

A hash set is a data structure that implements a set abstract data type, storing unique elements. It uses a hash function to compute an index into an array of buckets or slots, from which the desired element can be found.

Key Features:

Unique element storage
Fast insertion, deletion, and lookup operations
Automatic resizing to maintain load factor
Collision resolution via separate chaining
Support for any hashable element type
Pluggable hash functions for different use cases

Time Complexities (average case):

Insert: O(1)
Delete: O(1)
Search: O(1)
Worst case: O(n) when all elements hash to same bucket

Space Complexity: O(n)

`HashSet(initial_capacity=16, load_factor=0.75, hash_function=None)`

Bases: Generic[T]

Hash Set implementation using separate chaining for collision resolution.

This implementation automatically resizes when the load factor exceeds a threshold to maintain good performance characteristics. It supports pluggable hash functions for different use cases and performance requirements.

Initialize hash set.

Parameters:

Name	Type	Description	Default
`initial_capacity`	`int`	Initial number of buckets (should be power of 2)	`16`
`load_factor`	`float`	Maximum ratio of size to capacity before resizing	`0.75`
`hash_function`	`Optional[HashFunction]`	Custom hash function to use (defaults to FNV hash)	`None`

`and(other)`

Intersection operator (&).

`contains(element)`

Check if element exists (in operator support).

`eq(other)`

Check equality with another hash set.

`ge(other)`

Superset operator (>=).

`gt(other)`

Proper superset operator (>).

`iand(other)`

In-place intersection operator (&=).

`ior(other)`

In-place union operator (|=).

`isub(other)`

In-place difference operator (-=).

`iter()`

Return iterator over elements.

`ixor(other)`

In-place symmetric difference operator (^=).

`le(other)`

Subset operator (<=).

`len()`

Return number of elements.

`lt(other)`

Proper subset operator (<).

`ne(other)`

Check inequality with another hash set.

`or(other)`

Union operator (|).

`repr()`

Return detailed string representation for debugging.

`str()`

Return string representation.

`sub(other)`

Difference operator (-).

`xor(other)`

Symmetric difference operator (^).

`add(element)`

Add an element to the hash set.

Parameters:

Name	Type	Description	Default
`element`	`T`	Element to add	required

Returns:

Type	Description
`bool`	True if element was added (wasn't already present), False otherwise

`capacity()`

Return current capacity (number of buckets).

`clear()`

Remove all elements from hash set.

`contains(element)`

Check if element exists in hash set.

Parameters:

Name	Type	Description	Default
`element`	`T`	Element to check	required

Returns:

Type	Description
`bool`	True if element exists, False otherwise

`copy()`

Return a shallow copy of the hash set.

Returns:

Type	Description
`HashSet[T]`	New hash set with same elements

`difference(other)`

Return a new hash set with elements in this set but not in other.

Parameters:

Name	Type	Description	Default
`other`	`HashSet[T]`	Other hash set to compute difference with	required

Returns:

Type	Description
`HashSet[T]`	New hash set containing difference

`difference_update(other)`

Update hash set, removing elements found in other.

Parameters:

Name	Type	Description	Default
`other`	`HashSet[T]`	Other hash set to subtract	required

`discard(element)`

Remove an element from the hash set if present.

Parameters:

Name	Type	Description	Default
`element`	`T`	Element to remove	required

`get_bucket_distribution()`

Get distribution of elements across buckets (for debugging/analysis).

Returns:

Type	Description
`List[int]`	List where index i contains count of elements in bucket i

`get_hash_function()`

Get the current hash function.

Returns:

Type	Description
`HashFunction`	Current hash function instance

`get_hash_statistics()`

Get detailed statistics about hash function performance.

Returns:

Type	Description
`dict`	Dictionary with hash distribution and collision statistics

`intersection(other)`

Return a new hash set with elements common to both sets.

Parameters:

Name	Type	Description	Default
`other`	`HashSet[T]`	Other hash set to intersect with	required

Returns:

Type	Description
`HashSet[T]`	New hash set containing intersection of both sets

`intersection_update(other)`

Update hash set, keeping only elements found in both sets.

Parameters:

Name	Type	Description	Default
`other`	`HashSet[T]`	Other hash set to intersect with	required

`is_disjoint(other)`

Check if this set has no elements in common with other.

Parameters:

Name	Type	Description	Default
`other`	`HashSet[T]`	Other hash set to check against	required

Returns:

Type	Description
`bool`	True if sets are disjoint

`is_empty()`

Check if hash set is empty.

`is_subset(other)`

Check if this set is a subset of other.

Parameters:

Name	Type	Description	Default
`other`	`HashSet[T]`	Other hash set to check against	required

Returns:

Type	Description
`bool`	True if this set is subset of other

`is_superset(other)`

Check if this set is a superset of other.

Parameters:

Name	Type	Description	Default
`other`	`HashSet[T]`	Other hash set to check against	required

Returns:

Type	Description
`bool`	True if this set is superset of other

`load_factor()`

Return current load factor.

`pop()`

Remove and return an arbitrary element from the set.

Returns:

Type	Description
`T`	An element from the set

Raises:

Type	Description
`KeyError`	If set is empty

`remove(element)`

Remove an element from the hash set.

Parameters:

Name	Type	Description	Default
`element`	`T`	Element to remove	required

Returns:

Type	Description
`bool`	True if element was removed, False if not found

`set_hash_function(hash_function)`

Change the hash function and rehash all existing elements.

Parameters:

Name	Type	Description	Default
`hash_function`	`HashFunction`	New hash function to use	required

`size()`

Return number of elements.

`symmetric_difference(other)`

Return a new hash set with elements in either set but not both.

Parameters:

Name	Type	Description	Default
`other`	`HashSet[T]`	Other hash set to compute symmetric difference with	required

Returns:

Type	Description
`HashSet[T]`	New hash set containing symmetric difference

`symmetric_difference_update(other)`

Update hash set, keeping only elements found in either set but not both.

Parameters:

Name	Type	Description	Default
`other`	`HashSet[T]`	Other hash set to compute symmetric difference with	required

`union(other)`

Return a new hash set with elements from both sets.

Parameters:

Name	Type	Description	Default
`other`	`HashSet[T]`	Other hash set to union with	required

Returns:

Type	Description
`HashSet[T]`	New hash set containing union of both sets

`update(other)`

Update hash set with elements from another hash set.

Parameters:

Name	Type	Description	Default
`other`	`HashSet[T]`	Other hash set to update from	required

`HashSetNode(element)`

Bases: Generic[T]

Node class for hash set entries using separate chaining.

Hash Set

Features

Time Complexities

Basic Usage

Creating and Initializing

Adding Elements

Checking for Elements

Removing Elements

Set Operations

Union (Combining Sets)

Intersection (Common Elements)

Difference (Elements in One Set but Not Another)

Symmetric Difference (Elements in Either Set but Not Both)

Set Relationship Testing

Subset and Superset

Disjoint Sets

Iteration and Traversal

Working with Different Types

String Elements

Numeric Elements

Mixed Types

Tuple Elements

Custom Objects as Elements

Hash Set Resizing and Performance

Automatic Resizing

Load Factor Management

Hash Functions and Collision Handling

Using Custom Hash Functions

Analyzing Hash Distribution

Error Handling

Copying and Equality

Copying Hash Sets

Equality Comparison

String Representation

Common Patterns

Removing Duplicates from Collections

Finding Common Elements

Set-based Filtering

Membership Testing for Performance

Tag and Category Management

User Permissions and Roles

Use Cases

When to Use Hash Sets

When NOT to Use Hash Sets

Performance Characteristics

Time Complexity Analysis

Memory Efficiency

Collision Resolution Performance

Advanced Usage

Custom Hash Function Performance Comparison

Hash Set as Building Block

Graph Algorithms with Hash Sets

Performance Comparison

API Reference

HashSet(initial_capacity=16, load_factor=0.75, hash_function=None)

__and__(other)

__contains__(element)

__eq__(other)

__ge__(other)

__gt__(other)

__iand__(other)

__ior__(other)

__isub__(other)

__iter__()

__ixor__(other)

__le__(other)

__len__()

__lt__(other)

__ne__(other)

__or__(other)

__repr__()

__str__()

__sub__(other)

__xor__(other)

add(element)

capacity()

clear()

contains(element)

copy()

difference(other)

`HashSet(initial_capacity=16, load_factor=0.75, hash_function=None)`

`and(other)`

`contains(element)`

`eq(other)`

`ge(other)`

`gt(other)`

`iand(other)`

`ior(other)`

`isub(other)`

`iter()`

`ixor(other)`

`le(other)`

`len()`

`lt(other)`

`ne(other)`

`or(other)`

`repr()`

`str()`

`sub(other)`

`xor(other)`

`add(element)`

`capacity()`

`clear()`

`contains(element)`

`copy()`

`difference(other)`

`difference_update(other)`

`discard(element)`

`get_bucket_distribution()`

`get_hash_function()`

`get_hash_statistics()`

`intersection(other)`

`intersection_update(other)`

`is_disjoint(other)`

`is_empty()`

`is_subset(other)`

`is_superset(other)`

`load_factor()`

`pop()`

`remove(element)`

`set_hash_function(hash_function)`

`size()`

`symmetric_difference(other)`

`symmetric_difference_update(other)`

`union(other)`

`update(other)`

`HashSetNode(element)`