Python Data Structures and Algorithms (DSA)
Table of Contents
- Data Structures
- Arrays
- Linked Lists
- Stacks
- Queues
- Trees
- Graphs
- Hash Tables
- Heaps
- Algorithms
- Sorting
- Searching
- Recursion
- Dynamic Programming
- Greedy Algorithms
- Backtracking
- Graph Algorithms
1. Data Structures
Arrays
- Definition: An array is a collection of items stored at contiguous memory locations.
- Operations: Insertion, Deletion, Traversal
- Code Example:
# 1. Accessing Elements my_list = [10, 20, 30, 40, 50] # Indexing first_element = my_list[0] print("First element:", first_element) # Slicing sublist = my_list[1:4] print("Sublist:", sublist) # 2. Modifying Lists # Assignment my_list[0] = 100 print("After assignment:", my_list) # Append my_list.append(60) print("After append:", my_list) # Insert my_list.insert(2, 'x') print("After insert:", my_list) # Extend my_list.extend([70, 80]) print("After extend:", my_list) # Remove my_list.remove(30) print("After remove:", my_list) # Pop popped_element = my_list.pop(2) print("After pop:", my_list, "Popped element:", popped_element) # Clear my_list.clear() print("After clear:", my_list) # 3. Querying Lists my_list = [10, 20, 20, 30, 40] # Count count_20 = my_list.count(20) print("Count of 20:", count_20) # Index index_30 = my_list.index(30) print("Index of 30:", index_30) # 4. Sorting and Reversing # Sort my_list = [40, 10, 30, 20] my_list.sort() print("Sorted list:", my_list) # Sorted (creates a new list) sorted_list = sorted([50, 10, 30, 20]) print("Sorted list using sorted():", sorted_list) # Reverse my_list.reverse() print("Reversed list:", my_list) # Reversed (creates an iterator) reversed_list = list(reversed(my_list)) print("Reversed list using reversed():", reversed_list) # 5. Aggregations # Length length = len(my_list) print("Length of the list:", length) # Sum total_sum = sum(my_list) print("Sum of elements:", total_sum) # Min min_element = min(my_list) print("Minimum element:", min_element) # Max max_element = max(my_list) print("Maximum element:", max_element) # 6. Copying Lists # Shallow Copy shallow_copy = my_list.copy() print("Shallow copy:", shallow_copy) # Deep Copy (for nested lists) import copy nested_list = [[1, 2], [3, 4]] deep_copy = copy.deepcopy(nested_list) print("Deep copy:", deep_copy) # 7. List Comprehension doubled = [x * 2 for x in my_list] print("Doubled list using comprehension:", doubled) # 8. Membership is_20_in_list = 20 in my_list print("Is 20 in the list?", is_20_in_list) # 9. Iteration print("Elements in the list:") for x in my_list: print(x) # 10. Joining and Splitting # Joining string_list = ['a', 'b', 'c'] joined_string = ''.join(string_list) print("Joined string:", joined_string) # Splitting split_list = "a,b,c".split(',') print("Split list:", split_list)
Linked Lists
- Definition: A linked list is a linear data structure where elements are stored in nodes, with each node pointing to the next.
- Types: Singly Linked List, Doubly Linked List
- Code Example:
class Node: def __init__(self, data): self.data = data self.next = None class LinkedList: def __init__(self): self.head = None def append(self, data): new_node = Node(data) if not self.head: self.head = new_node return last = self.head while last.next: last = last.next last.next = new_node def print_list(self): current = self.head while current: print(current.data) current = current.next ll = LinkedList() ll.append(1) ll.append(2) ll.append(3) ll.print_list()
Stacks
- Definition: A stack is a linear data structure that follows the LIFO (Last In, First Out) principle.
- Operations: Push, Pop, Peek
- Code Example:
stack = [] # Push stack.append(1) stack.append(2) stack.append(3) # Pop stack.pop() # 3 # Peek print(stack[-1]) # 2
Queues
- Definition: A queue is a linear data structure that follows the FIFO (First In, First Out) principle.
- Operations: Enqueue, Dequeue
- Code Example:
from collections import deque queue = deque() # Enqueue queue.append(1) queue.append(2) queue.append(3) # Dequeue queue.popleft() # 1
Trees
- Definition: A tree is a hierarchical data structure with nodes, where each node has zero or more children.
- Types: Binary Tree, Binary Search Tree (BST), AVL Tree
- Code Example (BST):
class TreeNode: def __init__(self, key): self.left = None self.right = None self.val = key def insert(root, key): if root is None: return TreeNode(key) else: if root.val < key: root.right = insert(root.right, key) else: root.left = insert(root.left, key) return root def inorder(root): if root: inorder(root.left) print(root.val) inorder(root.right) root = TreeNode(50) root = insert(root, 30) root = insert(root, 20) root = insert(root, 40) root = insert(root, 70) root = insert(root, 60) root = insert(root, 80) inorder(root)
Graphs
- Definition: A graph is a collection of nodes (vertices) and edges connecting them.
- Representations: Adjacency Matrix, Adjacency List
- Code Example (Adjacency List):
from collections import defaultdict class Graph: def __init__(self): self.graph = defaultdict(list) def add_edge(self, u, v): self.graph[u].append(v) def dfs_util(self, v, visited): visited.add(v) print(v, end=' ') for neighbour in self.graph[v]: if neighbour not in visited: self.dfs_util(neighbour, visited) def dfs(self, v): visited = set() self.dfs_util(v, visited) g = Graph() g.add_edge(0, 1) g.add_edge(0, 2) g.add_edge(1, 2) g.add_edge(2, 0) g.add_edge(2, 3) g.add_edge(3, 3) g.dfs(2)
Hash Tables
- Definition: A hash table is a data structure that maps keys to values using a hash function.
- Operations: Insertion, Deletion, Search
- Code Example:
hash_table = {} # Insertion hash_table['key1'] = 'value1' hash_table['key2'] = 'value2' # Deletion del hash_table['key1'] # Search print(hash_table['key2']) # value2
- Code Example:
# Example dictionary with counts count_dict = {'apple': 3, 'banana': 2, 'orange': 1} for key in count_dict.keys(): print(key) for value in count_dict.values(): print(value) for key, value in count_dict.items(): print(f"{key}: {value}")
- Code Example:
from collections import defaultdict # Sample data: tuples of (key, value) data = [('fruits', 'apple'), ('fruits', 'banana'), ('fruits', 'orange'), ('vegetables', 'carrot'), ('vegetables', 'spinach'), ('grains', 'rice')] # Initialize defaultdict with list as the default_factory grouped_dict = defaultdict(list) # Group elements by key for key, value in data: grouped_dict[key].append(value) # Iterate over the dictionary and print key-value pairs for key, values in grouped_dict.items(): print(f"{key}: {values}") fruits: ['apple', 'banana', 'orange'] vegetables: ['carrot', 'spinach'] grains: ['rice'] from collections import defaultdict # Custom default factory def default_value(): return {'count': 0, 'items': []} custom_dict = defaultdict(default_value) custom_dict['a']['count'] += 1 custom_dict['a']['items'].append('apple') custom_dict['b']['count'] += 2 custom_dict['b']['items'].append('banana') print(custom_dict) # Output: defaultdict(<function default_value at 0x...>, {'a': {'count': 1, 'items': ['apple']}, 'b': {'count': 2, 'items': ['banana']}})
Heaps
- Definition: A heap is a special tree-based data structure that satisfies the heap property.
- Types: Min-Heap, Max-Heap
- Code Example (Min-Heap):
import heapq heap = [] # Insertion heapq.heappush(heap, 1) heapq.heappush(heap, 3) heapq.heappush(heap, 5) heapq.heappush(heap, 2) heapq.heappush(heap, 4) # Deletion print(heapq.heappop(heap)) # 1
2. Algorithms
Sorting
- Bubble Sort:
def bubble_sort(arr): n = len(arr) for i in range(n): for j in range(0, n-i-1): if arr[j] > arr[j+1]: arr[j], arr[j+1] = arr[j+1], arr[j] return arr arr = [64, 34, 25, 12, 22, 11, 90] print(bubble_sort(arr))
- Quick Sort:
def partition(arr, low, high): pivot = arr[high] i = low - 1 for j in range(low, high): if arr[j] < pivot: i = i + 1 arr[i], arr[j] = arr[j], arr[i] arr[i + 1], arr[high] = arr[high], arr[i + 1] return i + 1 def quick_sort(arr, low, high): if low < high: pi = partition(arr, low, high) quick_sort(arr, low, pi - 1) quick_sort(arr, pi + 1, high) return arr arr = [10, 7, 8, 9, 1, 5] print(quick_sort(arr, 0, len(arr) - 1))
Searching
- Binary Search:
def binary_search(arr, x): low = 0 high = len(arr) - 1 mid = 0 while low <= high: mid = (high + low) // 2 if arr[mid] < x: low = mid + 1 elif arr[mid] > x: high = mid - 1 else: return mid return -1 arr = [2, 3, 4, 10, 40] x = 10 print(binary_search(arr, x))
Recursion
- Factorial:
def factorial(n): if n == 0: return 1 else: return n * factorial(n-1) print(factorial(5)) # 120
Dynamic Programming
- Fibonacci Sequence:
def fibonacci(n, memo={}): if n in memo: return memo[n] if n <= 1: return n memo[n] = fibonacci(n-1, memo) + fibonacci(n-2, memo) return memo[n] print(fibonacci(10)) # 55
Greedy Algorithms
Definition: Greedy algorithms build up a solution piece by piece, always choosing the next piece that offers the most immediate benefit.
Coin Change Problem
Given a set of coins and a total amount, find the minimum number of coins needed to make the amount.
def min_coins(coins, amount):
coins.sort(reverse=True)
count = 0
for coin in coins:
while amount >= coin:
amount -= coin
count += 1
return count
coins = [1, 2, 5, 10, 20, 50, 100, 200]
amount = 289
print(min_coins(coins, amount)) # Output: 4 (200 + 50 + 20 + 10 + 5 + 2 + 2)
Activity Selection Problem
Given a set of activities with start and finish times, select the maximum number of activities that don’t overlap.
def activity_selection(activities):
activities.sort(key=lambda x: x[1])
selected = [activities[0]]
for i in range(1, len(activities)):
if activities[i][0] >= selected[-1][1]:
selected.append(activities[i])
return selected
activities = [(1, 3), (2, 5), (4, 6), (6, 7), (5, 8)]
print(activity_selection(activities)) # Output: [(1, 3), (4, 6), (6, 7)]
Backtracking
Definition: Backtracking is a general algorithm for finding all (or some) solutions to computational problems by incrementally building candidates to the solutions and abandoning a candidate as soon as it determines that the candidate cannot possibly be completed to a valid solution.
N-Queens Problem
Place N queens on an N×N chessboard so that no two queens threaten each other.
def is_safe(board, row, col, n):
for i in range(col):
if board[row][i] == 1:
return False
for i, j in zip(range(row, -1, -1), range(col, -1, -1)):
if board[i][j] == 1:
return False
for i, j in zip(range(row, n, 1), range(col, -1, -1)):
if board[i][j] == 1:
return False
return True
def solve_nqueens(board, col, n):
if col >= n:
return True
for i in range(n):
if is_safe(board, i, col, n):
board[i][col] = 1
if solve_nqueens(board, col + 1, n):
return True
board[i][col] = 0
return False
def nqueens(n):
board = [[0 for _ in range(n)] for _ in range(n)]
if not solve_nqueens(board, 0, n):
return "No solution"
return board
n = 4
solution = nqueens(n)
for row in solution:
print(row)
Subset Sum Problem
Find a subset of a given set that sums to a given value.
def subset_sum(arr, target):
result = []
def backtrack(start, current_sum, current_list):
if current_sum == target:
result.append(list(current_list))
return
for i in range(start, len(arr)):
if current_sum + arr[i] > target:
continue
current_list.append(arr[i])
backtrack(i + 1, current_sum + arr[i], current_list)
current_list.pop()
arr.sort()
backtrack(0, 0, [])
return result
arr = [3, 34, 4, 12, 5, 2]
target = 9
print(subset_sum(arr, target)) # Output: [[4, 5]]
Graph Algorithms
Definition: Graph algorithms are a set of instructions that traverse (visit nodes of a) graph.
Depth-First Search (DFS)
DFS is an algorithm for traversing or searching tree or graph data structures. It starts at the root and explores as far as possible along each branch before backtracking.
def dfs(graph, start, visited=None):
if visited is None:
visited = set()
visited.add(start)
print(start, end=' ')
for neighbor in graph[start]:
if neighbor not in visited:
dfs(graph, neighbor, visited)
graph = {
'A': ['B', 'C'],
'B': ['D', 'E'],
'C': ['F'],
'D': [],
'E': ['F'],
'F': []
}
dfs(graph, 'A') # Output: A B D E F C
Breadth-First Search (BFS)
BFS is an algorithm for traversing or searching tree or graph data structures. It starts at the tree root and explores all nodes at the present depth level before moving on to the nodes at the next depth level.
from collections import deque
def bfs(graph, start):
visited = set()
queue = deque([start])
visited.add(start)
while queue:
vertex = queue.popleft()
print(vertex, end=' ')
for neighbor in graph[vertex]:
if neighbor not in visited:
visited.add(neighbor)
queue.append(neighbor)
graph = {
'A': ['B', 'C'],
'B': ['D', 'E'],
'C': ['F'],
'D': [],
'E': ['F'],
'F': []
}
bfs(graph, 'A') # Output: A B C D E F
Dijkstra’s Algorithm
Dijkstra’s algorithm is used for finding the shortest paths between nodes in a graph.
import heapq
def dijkstra(graph, start):
pq = [(0, start)]
distances = {node: float('infinity') for node in graph}
distances[start] = 0
while pq:
current_distance, current_node = heapq.heappop(pq)
if current_distance > distances[current_node]:
continue
for neighbor, weight in graph[current_node].items():
distance = current_distance + weight
if distance < distances[neighbor]:
distances[neighbor] = distance
heapq.heappush(pq, (distance, neighbor))
return distances
graph = {
'A': {'B': 1, 'C': 4},
'B': {'A': 1, 'C': 2, 'D': 5},
'C': {'A': 4, 'B': 2, 'D': 1},
'D': {'B': 5, 'C': 1}
}
print(dijkstra(graph, 'A')) # Output: {'A': 0, 'B': 1, 'C': 3, 'D': 4}