Hash Map

A hash map (also known as hash table or dictionary) is a data structure that implements an associative array abstract data type, mapping keys to values. It uses a hash function to compute an index into an array of buckets or slots, from which the desired value can be found.

Features

Key-value pair storage - maps keys to values efficiently
Fast operations - O(1) average time for insert, delete, and lookup
Automatic resizing - maintains performance by resizing when load factor is exceeded
Collision resolution - uses separate chaining to handle hash collisions
Pluggable hash functions - supports custom hash functions for different use cases
Generic type support - type-safe keys and values

Time Complexities

Operation	Average Case	Worst Case
Insert (put)	O(1)	O(n)
Search (get)	O(1)	O(n)
Delete (remove)	O(1)	O(n)
Contains Key	O(1)	O(n)
Contains Value	O(n)	O(n)
Iteration	O(n)	O(n)

Note: Worst case occurs when all keys hash to the same bucket

Basic Usage

Creating and Initializing

from algo.data_structs.hash_tables.hash_map import HashMap

# Create empty hash map with default settings
hm = HashMap()

# Check if empty
print(hm.is_empty())    # True
print(len(hm))          # 0
print(hm.size())        # 0
print(hm.capacity())    # 16 (default)
print(hm.load_factor()) # 0.0

# Create with custom parameters
hm = HashMap(initial_capacity=32, load_factor=0.5)
print(hm.capacity())    # 32

Adding Key-Value Pairs

# Put key-value pairs
hm.put("name", "Alice")
hm.put("age", 30)
hm.put("city", "New York")
print(hm.size())  # 3

# Dictionary-like assignment
hm["country"] = "USA"
hm["occupation"] = "Engineer"
print(hm.size())  # 5

# Put returns previous value if key existed
old_value = hm.put("age", 31)  # Update existing key
print(old_value)  # 30 (previous value)
print(hm.get("age"))  # 31 (new value)

# Adding multiple pairs
data = {"height": "5'8\"", "weight": "150lbs", "hobby": "reading"}
for key, value in data.items():
    hm.put(key, value)
print(hm.size())  # 8

Accessing Values

# Get values by key
print(hm.get("name"))     # Alice
print(hm.get("age"))      # 31
print(hm.get("city"))     # New York

# Get with default value
print(hm.get("phone"))              # None
print(hm.get("phone", "N/A"))       # N/A

# Dictionary-like access
print(hm["name"])         # Alice
print(hm["country"])      # USA

# Check current stats
print(f"Size: {hm.size()}")
print(f"Capacity: {hm.capacity()}")
print(f"Load Factor: {hm.load_factor():.2f}")

Removing Key-Value Pairs

# Remove by key
removed_value = hm.remove("hobby")
print(removed_value)      # reading
print(hm.size())          # 7

# Remove non-existent key
result = hm.remove("nonexistent")
print(result)             # None

# Dictionary-like deletion
del hm["weight"]
print(hm.size())          # 6

# Clear all elements
hm.clear()
print(hm.size())          # 0
print(hm.is_empty())      # True

Search Operations

hm = HashMap()
hm.put("apple", 1.50)
hm.put("banana", 0.75)
hm.put("cherry", 2.00)
hm.put("date", 3.00)
hm.put("elderberry", 0.75)  # Duplicate value

# Check if key exists
print(hm.contains_key("apple"))     # True
print(hm.contains_key("grape"))     # False

# Check if value exists
print(hm.contains_value(1.50))      # True
print(hm.contains_value(0.75))      # True (multiple keys can have same value)
print(hm.contains_value(5.00))      # False

# Using 'in' operator (checks keys)
print("banana" in hm)               # True
print("grape" in hm)                # False

# Get value with error handling
try:
    price = hm["mango"]  # KeyError for non-existent key
except KeyError:
    print("Mango not found in inventory")

Iteration and Traversal

hm = HashMap()
hm.put("red", "#FF0000")
hm.put("green", "#00FF00")
hm.put("blue", "#0000FF")
hm.put("yellow", "#FFFF00")

# Iterate over keys (default iteration)
print("Keys:")
for key in hm:
    print(key)  # red, green, blue, yellow (order not guaranteed)

# Iterate over keys explicitly
print("Keys (explicit):")
for key in hm.keys():
    print(key)

# Iterate over values
print("Values:")
for value in hm.values():
    print(value)  # #FF0000, #00FF00, #0000FF, #FFFF00

# Iterate over key-value pairs
print("Key-Value pairs:")
for key, value in hm.items():
    print(f"{key}: {value}")

# Convert to lists
keys_list = list(hm.keys())
values_list = list(hm.values())
items_list = list(hm.items())

Working with Different Types

String Keys and Values

str_hm = HashMap()
str_hm.put("greeting", "Hello")
str_hm.put("farewell", "Goodbye")
str_hm.put("thanks", "Thank you")

print(str_hm.get("greeting"))   # Hello
print("thanks" in str_hm)       # True
print(str_hm.size())           # 3

Numeric Keys

num_hm = HashMap()
num_hm.put(1, "one")
num_hm.put(2, "two")
num_hm.put(42, "answer")

print(num_hm.get(1))    # one
print(num_hm.get(42))   # answer
print(num_hm.size())    # 3

Mixed Value Types

mixed_hm = HashMap()
mixed_hm.put("string", "text")
mixed_hm.put("integer", 42)
mixed_hm.put("float", 3.14)
mixed_hm.put("list", [1, 2, 3])
mixed_hm.put("none", None)

print(mixed_hm.get("string"))   # text
print(mixed_hm.get("integer"))  # 42
print(mixed_hm.get("list"))     # [1, 2, 3]
print(mixed_hm.contains_key("none"))  # True (even with None value)

Tuple Keys

tuple_hm = HashMap()
tuple_hm.put((1, 2), "coordinate")
tuple_hm.put(("name", "age"), "person_data")
tuple_hm.put((2023, 12, 25), "Christmas")

print(tuple_hm.get((1, 2)))              # coordinate
print((2023, 12, 25) in tuple_hm)        # True

Custom Objects as Values

class Person:
    def __init__(self, name: str, age: int):
        self.name = name
        self.age = age

    def __str__(self):
        return f"Person({self.name}, {self.age})"

people_hm = HashMap()
people_hm.put("alice", Person("Alice", 30))
people_hm.put("bob", Person("Bob", 25))

alice = people_hm.get("alice")
print(alice)  # Person(Alice, 30)
print(alice.name)  # Alice

Hash Map Resizing and Performance

Automatic Resizing

# Create small hash map to demonstrate resizing
hm = HashMap(initial_capacity=4, load_factor=0.5)
print(f"Initial capacity: {hm.capacity()}")  # 4

# Add elements up to threshold (4 * 0.5 = 2)
hm.put("key1", "value1")
hm.put("key2", "value2")
print(f"Before resize - Capacity: {hm.capacity()}, Load Factor: {hm.load_factor():.2f}")

# Adding one more triggers resize
hm.put("key3", "value3")
print(f"After resize - Capacity: {hm.capacity()}, Load Factor: {hm.load_factor():.2f}")

# Verify all data is still accessible
for i in range(1, 4):
    print(f"key{i}: {hm.get(f'key{i}')}")

Load Factor Management

hm = HashMap(initial_capacity=8, load_factor=0.75)

print(f"Empty - Load Factor: {hm.load_factor()}")  # 0.0

for i in range(6):
    hm.put(f"item{i}", f"value{i}")
    print(f"After {i+1} items - Load Factor: {hm.load_factor():.2f}")

# Load factor stays below 0.75 due to automatic resizing

Hash Functions and Collision Handling

Using Custom Hash Functions

from algo.algorithms.hashing.hash_functions import FNVHash, DJB2Hash, MurmurHash3

# Create hash map with FNV hash function
fnv_hm = HashMap(hash_function=FNVHash())
fnv_hm.put("test", "value")
print(f"Hash function: {type(fnv_hm.get_hash_function()).__name__}")

# Switch hash functions (rehashes all data)
djb2_hash = DJB2Hash()
fnv_hm.set_hash_function(djb2_hash)
print(f"New hash function: {type(fnv_hm.get_hash_function()).__name__}")
print(f"Data still accessible: {fnv_hm.get('test')}")  # value

# Create with MurmurHash3
murmur_hm = HashMap(hash_function=MurmurHash3(seed=42))

Analyzing Hash Distribution

hm = HashMap(initial_capacity=8, load_factor=0.9)

# Add data to potentially cause collisions
for i in range(15):
    hm.put(f"key_{i:02d}", f"value_{i}")

# Get bucket distribution
distribution = hm.get_bucket_distribution()
print(f"Bucket distribution: {distribution}")
print(f"Total items: {sum(distribution)}")

# Get detailed statistics
stats = hm.get_hash_statistics()
print(f"Collisions: {stats['collisions']}")
print(f"Max chain length: {stats['max_chain_length']}")
print(f"Average chain length: {stats['average_chain_length']:.2f}")
print(f"Load factor: {stats['load_factor']:.2f}")

Error Handling

hm = HashMap()

# KeyError for non-existent keys with [] operator
try:
    value = hm["nonexistent"]
except KeyError:
    print("Key not found when using [] operator")

# Safe access with get() method
value = hm.get("nonexistent")  # Returns None
print(value)  # None

value = hm.get("nonexistent", "default")  # Returns default
print(value)  # default

# KeyError when deleting non-existent key
try:
    del hm["nonexistent"]
except KeyError:
    print("Cannot delete non-existent key")

# Safe removal with remove() method
result = hm.remove("nonexistent")  # Returns None
print(result)  # None

# Invalid initialization parameters
try:
    invalid_hm = HashMap(initial_capacity=0)
except ValueError as e:
    print(f"Invalid capacity: {e}")

try:
    invalid_hm = HashMap(load_factor=1.5)
except ValueError as e:
    print(f"Invalid load factor: {e}")

String Representation

hm = HashMap()
hm.put("name", "Alice")
hm.put("age", 30)
hm.put("city", "New York")

# String representation (like dict)
print(str(hm))   # {name: Alice, age: 30, city: New York}

# Detailed representation for debugging
print(repr(hm))  # HashMap(size=3, capacity=16, load_factor=0.19, hash_func=FNVHash)

# Empty hash map representation
empty_hm = HashMap()
print(str(empty_hm))  # {}

Common Patterns

Building from Dictionaries

# From Python dictionary
data = {"a": 1, "b": 2, "c": 3}
hm = HashMap()
for key, value in data.items():
    hm.put(key, value)

# Using update method
hm1 = HashMap()
hm1.put("x", 10)

hm2 = HashMap()
hm2.put("y", 20)
hm2.put("z", 30)

hm1.update(hm2)  # Merge hm2 into hm1
print(hm1.size())  # 3

Caching and Memoization

# Simple cache implementation
cache = HashMap()

def expensive_operation(n):
    if cache.contains_key(n):
        print(f"Cache hit for {n}")
        return cache.get(n)

    # Simulate expensive computation
    result = n * n * n
    cache.put(n, result)
    print(f"Computed and cached {n}")
    return result

print(expensive_operation(5))  # Computed and cached 5 -> 125
print(expensive_operation(5))  # Cache hit for 5 -> 125

Counting Occurrences

def count_words(text):
    word_count = HashMap()
    words = text.lower().split()

    for word in words:
        current_count = word_count.get(word, 0)
        word_count.put(word, current_count + 1)

    return word_count

text = "the quick brown fox jumps over the lazy dog the fox"
counts = count_words(text)

for word, count in counts.items():
    print(f"{word}: {count}")

Grouping Data

# Group people by age
people = [
    ("Alice", 25), ("Bob", 30), ("Charlie", 25),
    ("Diana", 30), ("Eve", 35)
]

age_groups = HashMap()
for name, age in people:
    if not age_groups.contains_key(age):
        age_groups.put(age, [])

    age_groups.get(age).append(name)

for age, names in age_groups.items():
    print(f"Age {age}: {names}")

Configuration Management

# Application configuration
config = HashMap()
config.put("database_url", "postgresql://localhost:5432/mydb")
config.put("debug_mode", True)
config.put("max_connections", 100)
config.put("timeout", 30.0)

def get_config(key, default=None):
    return config.get(key, default)

# Usage
db_url = get_config("database_url")
debug = get_config("debug_mode", False)
port = get_config("port", 8080)  # Uses default

Use Cases

When to Use Hash Maps

Good for:

Fast key-value lookups
Caching and memoization
Counting and frequency analysis
Grouping and categorization
Configuration storage
Database indexing
Symbol tables in compilers

Examples:

# User session management
sessions = HashMap()
sessions.put("session_123", {"user_id": 456, "login_time": "2023-12-01"})

# Inventory management
inventory = HashMap()
inventory.put("SKU001", {"name": "Widget", "quantity": 100, "price": 9.99})

# DNS resolution cache
dns_cache = HashMap()
dns_cache.put("example.com", "93.184.216.34")

# Database record cache
record_cache = HashMap()
record_cache.put("user:123", {"name": "Alice", "email": "alice@example.com"})

When NOT to Use Hash Maps

Avoid for:

Ordered data requirements (use OrderedDict or specialized structures)
Range queries (use trees or sorted structures)
Memory-constrained environments with many small mappings
When iteration order matters

Better alternatives:

# For ordered data, use specialized ordered structures
from collections import OrderedDict
ordered_data = OrderedDict([("first", 1), ("second", 2)])

# For range queries, use sorted structures
import bisect
sorted_keys = ["a", "c", "e", "g"]
# Find keys in range

# For small, fixed mappings, use namedtuple or dataclass
from collections import namedtuple
Point = namedtuple('Point', ['x', 'y'])

Performance Characteristics

Time Complexity Analysis

Average case performance: O(1) for basic operations
Worst case performance: O(n) when all keys hash to same bucket
Space complexity: O(n) where n is number of key-value pairs
Resizing cost: O(n) but amortized to O(1) per operation

Memory Efficiency

# Hash maps use additional memory for:
# - Bucket array
# - Hash table metadata
# - Chain nodes for collision resolution

hm = HashMap(initial_capacity=16)
print(f"Initial capacity: {hm.capacity()}")  # 16 buckets allocated

# Memory grows with load factor
for i in range(100):
    hm.put(f"key{i}", f"value{i}")

print(f"Final capacity: {hm.capacity()}")  # Expanded as needed
print(f"Load factor: {hm.load_factor():.2f}")  # Maintained below threshold

Collision Resolution Performance

# Separate chaining performance depends on chain length
hm = HashMap(initial_capacity=4, load_factor=0.9)  # Force collisions

# Add many items to small hash map
for i in range(20):
    hm.put(f"item{i}", f"value{i}")

stats = hm.get_hash_statistics()
print(f"Max chain length: {stats['max_chain_length']}")
print(f"Average chain length: {stats['average_chain_length']:.2f}")
print(f"Collision rate: {stats['collisions'] / stats['total_elements']:.2f}")

Hash Map Comparison and Equality

# Hash maps are equal if they contain same key-value pairs
hm1 = HashMap()
hm1.put("a", 1)
hm1.put("b", 2)

hm2 = HashMap()
hm2.put("b", 2)  # Different insertion order
hm2.put("a", 1)

print(hm1 == hm2)  # True (same content)

# Different values
hm3 = HashMap()
hm3.put("a", 1)
hm3.put("b", 3)  # Different value

print(hm1 == hm3)  # False

# Different sizes
hm4 = HashMap()
hm4.put("a", 1)

print(hm1 == hm4)  # False (different sizes)

# Comparison with non-HashMap objects
print(hm1 == {"a": 1, "b": 2})  # False (different types)

Advanced Usage

Custom Hash Function Performance Comparison

from algo.algorithms.hashing.hash_functions import (
    FNVHash, DJB2Hash, MurmurHash3, PolynomialHash
)

# Test different hash functions with same data
test_data = [(f"key_{i:03d}", f"value_{i}") for i in range(100)]

hash_functions = [
    ("FNV", FNVHash()),
    ("DJB2", DJB2Hash()),
    ("MurmurHash3", MurmurHash3()),
    ("Polynomial", PolynomialHash())
]

for name, hash_func in hash_functions:
    hm = HashMap(initial_capacity=32, hash_function=hash_func)

    # Add test data
    for key, value in test_data:
        hm.put(key, value)

    # Analyze distribution
    stats = hm.get_hash_statistics()
    print(f"{name} Hash Function:")
    print(f"  Collisions: {stats['collisions']}")
    print(f"  Max chain length: {stats['max_chain_length']}")
    print(f"  Distribution variance: {stats['distribution_variance']:.2f}")
    print()

Hash Map as Building Block

# Implement a simple LRU cache using HashMap
class LRUCache:
    def __init__(self, capacity: int):
        self.capacity = capacity
        self.cache = HashMap()
        self.access_order = []

    def get(self, key):
        if self.cache.contains_key(key):
            # Move to end (most recently used)
            self.access_order.remove(key)
            self.access_order.append(key)
            return self.cache.get(key)
        return None

    def put(self, key, value):
        if self.cache.contains_key(key):
            # Update existing
            self.cache.put(key, value)
            self.access_order.remove(key)
            self.access_order.append(key)
        else:
            # Add new
            if len(self.access_order) >= self.capacity:
                # Evict least recently used
                lru_key = self.access_order.pop(0)
                self.cache.remove(lru_key)

            self.cache.put(key, value)
            self.access_order.append(key)

# Usage
lru = LRUCache(3)
lru.put("a", 1)
lru.put("b", 2)
lru.put("c", 3)
lru.put("d", 4)  # Evicts "a"
print(lru.get("a"))  # None (evicted)
print(lru.get("b"))  # 2

Performance Comparison

Operation	HashMap	Python dict	Binary Search Tree	Array
Insert	O(1) avg	O(1) avg	O(log n)	O(n)
Search	O(1) avg	O(1) avg	O(log n)	O(n)
Delete	O(1) avg	O(1) avg	O(log n)	O(n)
Ordered Iteration	No	No (Python 3.7+)	Yes	Yes
Memory Overhead	Medium	Low	High	Low
Cache Locality	Poor	Good	Poor	Excellent

API Reference

Hash Map Implementation

A hash map (also known as hash table or dictionary) is a data structure that implements an associative array abstract data type, mapping keys to values. It uses a hash function to compute an index into an array of buckets or slots, from which the desired value can be found.

Key Features:

Key-value pair storage
Fast insertion, deletion, and lookup operations
Automatic resizing to maintain load factor
Collision resolution via separate chaining
Support for any hashable key type
Pluggable hash functions for different use cases

Time Complexities (average case):

Insert: O(1)
Delete: O(1)
Search: O(1)
Worst case: O(n) when all keys hash to same bucket

Space Complexity: O(n)

`HashMap(initial_capacity=16, load_factor=0.75, hash_function=None)`

Bases: Generic[K, V]

Hash Map implementation using separate chaining for collision resolution.

This implementation automatically resizes when the load factor exceeds a threshold to maintain good performance characteristics. It supports pluggable hash functions for different use cases and performance requirements.

Initialize hash map.

Parameters:

Name	Type	Description	Default
`initial_capacity`	`int`	Initial number of buckets (should be power of 2)	`16`
`load_factor`	`float`	Maximum ratio of size to capacity before resizing	`0.75`
`hash_function`	`Optional[HashFunction]`	Custom hash function to use (defaults to FNV hash)	`None`

`contains(key)`

Check if key exists (in operator support).

`delitem(key)`

Delete key-value pair (dict-like access).

`eq(other)`

Check equality with another hash map.

`getitem(key)`

Get value by key (dict-like access).

`iter()`

Return iterator over keys.

`len()`

Return number of key-value pairs.

`repr()`

Return detailed string representation for debugging.

`setitem(key, value)`

Set value by key (dict-like access).

`str()`

Return string representation.

`capacity()`

Return current capacity (number of buckets).

`clear()`

Remove all key-value pairs from hash map.

`contains_key(key)`

Check if key exists in hash map.

Parameters:

Name	Type	Description	Default
`key`	`K`	Key to check	required

Returns:

Type	Description
`bool`	True if key exists, False otherwise

`contains_value(value)`

Check if value exists in hash map.

Parameters:

Name	Type	Description	Default
`value`	`V`	Value to check	required

Returns:

Type	Description
`bool`	True if value exists, False otherwise

`get(key, default=None)`

Get value associated with key.

Parameters:

Name	Type	Description	Default
`key`	`K`	Key to look up	required
`default`	`Optional[V]`	Default value if key not found	`None`

Returns:

Type	Description
`Optional[V]`	Value associated with key, or default if not found

`get_bucket_distribution()`

Get distribution of elements across buckets (for debugging/analysis).

Returns:

Type	Description
`List[int]`	List where index i contains count of elements in bucket i

`get_hash_function()`

Get the current hash function.

Returns:

Type	Description
`HashFunction`	Current hash function instance

`get_hash_statistics()`

Get detailed statistics about hash function performance.

Returns:

Type	Description
`dict`	Dictionary with hash distribution and collision statistics

`is_empty()`

Check if hash map is empty.

`items()`

Return iterator over all key-value pairs.

`keys()`

Return iterator over all keys.

`load_factor()`

Return current load factor.

`put(key, value)`

Insert or update a key-value pair.

Parameters:

Name	Type	Description	Default
`key`	`K`	Key to insert/update	required
`value`	`V`	Value to associate with key	required

Returns:

Type	Description
`Optional[V]`	Previous value if key existed, None otherwise

`remove(key)`

Remove key-value pair from hash map.

Parameters:

Name	Type	Description	Default
`key`	`K`	Key to remove	required

Returns:

Type	Description
`Optional[V]`	Removed value if key existed, None otherwise

`set_hash_function(hash_function)`

Change the hash function and rehash all existing elements.

Parameters:

Name	Type	Description	Default
`hash_function`	`HashFunction`	New hash function to use	required

`size()`

Return number of key-value pairs.

`update(other)`

Update hash map with key-value pairs from another hash map.

`values()`

Return iterator over all values.

`HashMapNode(key, value)`

Bases: Generic[K, V]

Node class for hash map entries using separate chaining.

Hash Map

Features

Time Complexities

Basic Usage

Creating and Initializing

Adding Key-Value Pairs

Accessing Values

Removing Key-Value Pairs

Search Operations

Iteration and Traversal

Working with Different Types

String Keys and Values

Numeric Keys

Mixed Value Types

Tuple Keys

Custom Objects as Values

Hash Map Resizing and Performance

Automatic Resizing

Load Factor Management

Hash Functions and Collision Handling

Using Custom Hash Functions

Analyzing Hash Distribution

Error Handling

String Representation

Common Patterns

Building from Dictionaries

Caching and Memoization

Counting Occurrences

Grouping Data

Configuration Management

Use Cases

When to Use Hash Maps

When NOT to Use Hash Maps

Performance Characteristics

Time Complexity Analysis

Memory Efficiency

Collision Resolution Performance

Hash Map Comparison and Equality

Advanced Usage

Custom Hash Function Performance Comparison

Hash Map as Building Block

Performance Comparison

API Reference

HashMap(initial_capacity=16, load_factor=0.75, hash_function=None)

__contains__(key)

__delitem__(key)

__eq__(other)

__getitem__(key)

__iter__()

__len__()

__repr__()

__setitem__(key, value)

__str__()

capacity()

clear()

contains_key(key)

contains_value(value)

get(key, default=None)

get_bucket_distribution()

get_hash_function()

get_hash_statistics()

is_empty()

items()

keys()

load_factor()

put(key, value)

remove(key)

set_hash_function(hash_function)

size()

update(other)

values()

HashMapNode(key, value)

`HashMap(initial_capacity=16, load_factor=0.75, hash_function=None)`

`contains(key)`

`delitem(key)`

`eq(other)`

`getitem(key)`

`iter()`

`len()`

`repr()`

`setitem(key, value)`

`str()`

`capacity()`

`clear()`

`contains_key(key)`

`contains_value(value)`

`get(key, default=None)`

`get_bucket_distribution()`

`get_hash_function()`

`get_hash_statistics()`

`is_empty()`

`items()`

`keys()`

`load_factor()`

`put(key, value)`

`remove(key)`

`set_hash_function(hash_function)`

`size()`

`update(other)`

`values()`

`HashMapNode(key, value)`