Objective: To implement the Euclidean GCD algorithm using ASM-based Verilog design and verify using simulation
Tool Used: Xilinx Vivado
The Euclidean algorithm is an efficient method to find the Greatest Common Divisor (GCD) of two numbers by repeatedly replacing the larger number with the remainder of their division until the remainder becomes zero. The final non-zero value is the GCD.
In this experiment, the algorithm is implemented using an Arithmetic State Machine (ASM), where each step of the GCD computation is represented as a state. The system uses registers to store input numbers and intermediate results, while comparators and subtractors are used to perform operations like comparison and subtraction.
The circuitry mainly consists of state registers (flip-flops), combinational logic, and arithmetic units. The control unit (FSM) decides the next operation based on conditions such as whether one number is greater than the other or equal.
Data Path
// 1. Parallel In, Parallel Out (PIPO) Register
module PIPO (
input [15:0] din,
input load, clk,
output reg [15:0] dout
);
always @(posedge clk) begin
if (load) dout <= din;
end
endmodule
// 2. Combinational Subtractor
module SUB (
input [15:0] in1, in2,
output reg [15:0] out
);
always @(*) begin
out = in1 - in2;
end
endmodule
// 3. Comparator (Generates Status Flags for the FSM)
module COMP (
input [15:0] data1, data2,
output lt, gt, eq
);
assign lt = (data1 < data2);
assign gt = (data1 > data2);
assign eq = (data1 == data2);
endmodule
// 4. 2-to-1 Multiplexer
module MUX21 (
input [15:0] in0, in1,
input sel,
output reg [15:0] out
);
always @(*) begin
out = sel ? in1 : in0;
end
endmodule
module GCD_datapath (
input [15:0] data_in,
input loadA, loadB, sel1, sel2, sel_in, clk,
output lt, gt, eq,
output [15:0] final_out // To easily view the answer in the testbench
);
wire [15:0] A_out, B_out, X, Y, sub_out, bus;
// Output assignment
assign final_out = A_out;
// Instantiate Multiplexer for the main input bus
// sel_in = 1 selects data_in, sel_in = 0 selects sub_out
MUX21 mux_in (.in0(sub_out), .in1(data_in), .sel(sel_in), .out(bus));
// Instantiate Registers
PIPO regA (.din(bus), .load(loadA), .clk(clk), .dout(A_out));
PIPO regB (.din(bus), .load(loadB), .clk(clk), .dout(B_out));
// Instantiate Multiplexers for Subtractor inputs
// sel = 0 selects A_out, sel = 1 selects B_out
MUX21 mux1 (.in0(A_out), .in1(B_out), .sel(sel1), .out(X));
MUX21 mux2 (.in0(A_out), .in1(B_out), .sel(sel2), .out(Y));
// Instantiate Subtractor and Comparator
SUB subtractor (.in1(X), .in2(Y), .out(sub_out));
COMP comparator (.data1(A_out), .data2(B_out), .lt(lt), .gt(gt), .eq(eq));
endmodule
Control Path
module GCD_controller (
input clk, start, lt, gt, eq,
output reg loadA, loadB, sel1, sel2, sel_in, done
);
reg [2:0] state, next_state;
localparam S0=3'd0, S1=3'd1, S2=3'd2, S3=3'd3, S4=3'd4, S5=3'd5;
// --------------------------------------------------------
// BLOCK 1: Sequential State Memory (Clock Triggered)
// --------------------------------------------------------
always @(posedge clk) begin
state <= next_state;
end
// --------------------------------------------------------
// BLOCK 2: Combinational Next-State & Output Logic
// --------------------------------------------------------
always @(*) begin
// 1. Default assignments to prevent accidental latches
loadA = 0; loadB = 0; sel1 = 0; sel2 = 0; sel_in = 0; done = 0;
next_state = state;
case (state)
S0: begin // Wait for start
if (start) next_state = S1;
end
S1: begin // Load Register A
loadA = 1;
sel_in = 1; // Route data_in to bus
next_state = S2;
end
S2: begin // Load Register B
loadB = 1;
sel_in = 1; // Route data_in to bus
next_state = S3; // Move to evaluation state
end
S3: begin // Evaluate and Branch
if (eq) next_state = S5;
else if (lt) next_state = S4; // A < B
else if (gt) next_state = S3; // A > B (Remain in S3 to subtract A)
// If A > B (gt is true), execute A = A - B
if (gt) begin
sel1 = 0; // X = A
sel2 = 1; // Y = B
sel_in = 0; // Route sub_out to bus
loadA = 1;
end
end
S4: begin // Execute B = B - A
if (eq) next_state = S5;
else if (lt) next_state = S4; // Remain in S4 to subtract B
else if (gt) next_state = S3; // Jump back to S3
// If A < B (lt is true), execute B = B - A
if (lt) begin
sel1 = 1; // X = B
sel2 = 0; // Y = A
sel_in = 0; // Route sub_out to bus
loadB = 1;
end
end
S5: begin // Done State
done = 1;
next_state = S5; // Halt here
end
default: next_state = S0;
endcase
end
endmodule
Test Bench
module tb_GCD();
reg [15:0] data_in;
reg clk, start;
wire lt, gt, eq;
wire loadA, loadB, sel1, sel2, sel_in, done;
wire [15:0] final_out;
// Instantiate Datapath
GCD_datapath DP (
.data_in(data_in), .loadA(loadA), .loadB(loadB),
.sel1(sel1), .sel2(sel2), .sel_in(sel_in), .clk(clk),
.lt(lt), .gt(gt), .eq(eq), .final_out(final_out)
);
// Instantiate Controller
GCD_controller CON (
.clk(clk), .start(start), .lt(lt), .gt(gt), .eq(eq),
.loadA(loadA), .loadB(loadB), .sel1(sel1), .sel2(sel2),
.sel_in(sel_in), .done(done)
);
// Clock Generation
initial begin
clk = 0;
forever #5 clk = ~clk; // 10ns period
end
// Test Sequence
initial begin
// Dump waves for GTKWave or Vivado
$dumpfile("gcd.vcd");
$dumpvars(0, tb_GCD);
start = 0;
#3 start = 1; // Trigger start
// Provide 143 (A) at clock edge 1, and 78 (B) at clock edge 2
#12 data_in = 16'd143;
#10 data_in = 16'd78;
#300 $finish; // Let the repeated subtraction run
end
// Console Monitor
initial begin
$display("Time | State | Data In | Result A | Done");
$display("-------------------------------------------");
$monitor("%0t | %d | %d | %d | %b",
$time, CON.state, data_in, final_out, done);
end
endmodule
Conclusion: The Euclidean GCD algorithm was implemented using ASM and verified successfully. The design correctly computed the GCD through iterative subtraction/comparison steps. Simulation in Xilinx Vivado validated accurate results and state transitions.

