Objective: To implement the Euclidean GCD algorithm using ASM-based Verilog design and verify using simulation

Tool Used: Xilinx Vivado

Theory:

The Euclidean algorithm is an efficient method to find the Greatest Common Divisor (GCD) of two numbers by repeatedly replacing the larger number with the remainder of their division until the remainder becomes zero. The final non-zero value is the GCD.

In this experiment, the algorithm is implemented using an Arithmetic State Machine (ASM), where each step of the GCD computation is represented as a state. The system uses registers to store input numbers and intermediate results, while comparators and subtractors are used to perform operations like comparison and subtraction.

The circuitry mainly consists of state registers (flip-flops), combinational logic, and arithmetic units. The control unit (FSM) decides the next operation based on conditions such as whether one number is greater than the other or equal.

Data Path

// 1. Parallel In, Parallel Out (PIPO) Register
module PIPO (
    input [15:0] din,
    input load, clk,
    output reg [15:0] dout
);
    always @(posedge clk) begin
        if (load) dout <= din;
    end
endmodule

// 2. Combinational Subtractor
module SUB (
    input [15:0] in1, in2,
    output reg [15:0] out
);
    always @(*) begin
        out = in1 - in2;
    end
endmodule

// 3. Comparator (Generates Status Flags for the FSM)
module COMP (
    input [15:0] data1, data2,
    output lt, gt, eq
);
    assign lt = (data1 < data2);
    assign gt = (data1 > data2);
    assign eq = (data1 == data2);
endmodule

// 4. 2-to-1 Multiplexer
module MUX21 (
    input [15:0] in0, in1,
    input sel,
    output reg [15:0] out
);
    always @(*) begin
        out = sel ? in1 : in0;
    end
endmodule

module GCD_datapath (
    input [15:0] data_in,
    input loadA, loadB, sel1, sel2, sel_in, clk,
    output lt, gt, eq,
    output [15:0] final_out // To easily view the answer in the testbench
);
    wire [15:0] A_out, B_out, X, Y, sub_out, bus;

    // Output assignment
    assign final_out = A_out;

    // Instantiate Multiplexer for the main input bus
    // sel_in = 1 selects data_in, sel_in = 0 selects sub_out
    MUX21 mux_in (.in0(sub_out), .in1(data_in), .sel(sel_in), .out(bus));

    // Instantiate Registers
    PIPO regA (.din(bus), .load(loadA), .clk(clk), .dout(A_out));
    PIPO regB (.din(bus), .load(loadB), .clk(clk), .dout(B_out));

    // Instantiate Multiplexers for Subtractor inputs
    // sel = 0 selects A_out, sel = 1 selects B_out
    MUX21 mux1 (.in0(A_out), .in1(B_out), .sel(sel1), .out(X));
    MUX21 mux2 (.in0(A_out), .in1(B_out), .sel(sel2), .out(Y));

    // Instantiate Subtractor and Comparator
    SUB subtractor (.in1(X), .in2(Y), .out(sub_out));
    COMP comparator (.data1(A_out), .data2(B_out), .lt(lt), .gt(gt), .eq(eq));

endmodule

Control Path

module GCD_controller (
    input clk, start, lt, gt, eq,
    output reg loadA, loadB, sel1, sel2, sel_in, done
);
    reg [2:0] state, next_state;
    
    localparam S0=3'd0, S1=3'd1, S2=3'd2, S3=3'd3, S4=3'd4, S5=3'd5;

    // --------------------------------------------------------
    // BLOCK 1: Sequential State Memory (Clock Triggered)
    // --------------------------------------------------------
    always @(posedge clk) begin
        state <= next_state;
    end

    // --------------------------------------------------------
    // BLOCK 2: Combinational Next-State & Output Logic
    // --------------------------------------------------------
    always @(*) begin
        // 1. Default assignments to prevent accidental latches
        loadA = 0; loadB = 0; sel1 = 0; sel2 = 0; sel_in = 0; done = 0;
        next_state = state; 

        case (state)
            S0: begin // Wait for start
                if (start) next_state = S1;
            end
            
            S1: begin // Load Register A
                loadA = 1;
                sel_in = 1; // Route data_in to bus
                next_state = S2;
            end
            
            S2: begin // Load Register B
                loadB = 1;
                sel_in = 1; // Route data_in to bus
                next_state = S3; // Move to evaluation state
            end
            
            S3: begin // Evaluate and Branch
                if (eq) next_state = S5;
                else if (lt) next_state = S4; // A < B
                else if (gt) next_state = S3; // A > B (Remain in S3 to subtract A)
                
                // If A > B (gt is true), execute A = A - B
                if (gt) begin
                    sel1 = 0;   // X = A
                    sel2 = 1;   // Y = B
                    sel_in = 0; // Route sub_out to bus
                    loadA = 1;
                end
            end
            
            S4: begin // Execute B = B - A
                if (eq) next_state = S5;
                else if (lt) next_state = S4; // Remain in S4 to subtract B
                else if (gt) next_state = S3; // Jump back to S3
                
                // If A < B (lt is true), execute B = B - A
                if (lt) begin
                    sel1 = 1;   // X = B
                    sel2 = 0;   // Y = A
                    sel_in = 0; // Route sub_out to bus
                    loadB = 1;
                end
            end
            
            S5: begin // Done State
                done = 1;
                next_state = S5; // Halt here
            end
            
            default: next_state = S0;
        endcase
    end
endmodule

Test Bench

module tb_GCD();
    reg [15:0] data_in;
    reg clk, start;
    
    wire lt, gt, eq;
    wire loadA, loadB, sel1, sel2, sel_in, done;
    wire [15:0] final_out;

    // Instantiate Datapath
    GCD_datapath DP (
        .data_in(data_in), .loadA(loadA), .loadB(loadB), 
        .sel1(sel1), .sel2(sel2), .sel_in(sel_in), .clk(clk),
        .lt(lt), .gt(gt), .eq(eq), .final_out(final_out)
    );

    // Instantiate Controller
    GCD_controller CON (
        .clk(clk), .start(start), .lt(lt), .gt(gt), .eq(eq),
        .loadA(loadA), .loadB(loadB), .sel1(sel1), .sel2(sel2), 
        .sel_in(sel_in), .done(done)
    );

    // Clock Generation
    initial begin
        clk = 0;
        forever #5 clk = ~clk; // 10ns period
    end

    // Test Sequence
    initial begin
        // Dump waves for GTKWave or Vivado
        $dumpfile("gcd.vcd");
        $dumpvars(0, tb_GCD);

        start = 0;
        #3 start = 1; // Trigger start

        // Provide 143 (A) at clock edge 1, and 78 (B) at clock edge 2
        #12 data_in = 16'd143; 
        #10 data_in = 16'd78;  
        
        #300 $finish; // Let the repeated subtraction run
    end

    // Console Monitor
    initial begin
        $display("Time | State | Data In | Result A | Done");
        $display("-------------------------------------------");
        $monitor("%0t   |   %d   |   %d   |    %d    |  %b", 
                 $time, CON.state, data_in, final_out, done);
    end
endmodule

Conclusion: The Euclidean GCD algorithm was implemented using ASM and verified successfully. The design correctly computed the GCD through iterative subtraction/comparison steps. Simulation in Xilinx Vivado validated accurate results and state transitions.

image.png

image.png