A Simple JavaScript Library to make it easy for people to use KMeans algorithms with Tensorflow JS.

The library was born out of another project in which except KMeans, our code completely depended on TF.JS

As such, moving to TF.JS helped standardise our code base substantially and reduce dependency on other libraries


npm install “@tensorflow/tfjs-core” -g


//const KMeans = require("tf-kmeans");
const KMeans = require("../lib/index.js");
const tf = require("@tensorflow/tfjs");
function SyncTest() {
  tf.tidy(() => {
    const kmeans = new KMeans.default({
      k: 2,
      maxIter: 30,
      distanceFunction: KMeans.default.EuclideanDistance
    const dataset = tf.tensor([[2, 2, 2], [5, 5, 5], [3, 3, 3], [4, 4, 4], [7, 8, 7]]);
    const predictions = kmeans.Train(

    console.log("Assigned To ", predictions.arraySync());
    console.log("Centroids Used are ", kmeans.Centroids().arraySync());
    console.log("Prediction for Given Value is");
    kmeans.Predict(tf.tensor([2, 3, 2])).print();
    console.log("Amount of Memory Used is ", tf.memory());
    // Use this In case kmeans not executed in Tidy Function

async function AsyncTest() {
  const kmeans = new KMeans.default({
    k: 3,
    maxIter: 30,
    distanceFunction: KMeans.default.EuclideanDistance
  const dataset = tf.tensor([[2, 2, 2], [5, 5, 5], [3, 3, 3], [4, 4, 4], [7, 8, 7]]);

  console.log("nnAsync Test");
  const predictions = await kmeans.TrainAsync(
    // Called At End of Every Iteration
    async(iter, centroid, preds)=>{
      console.log("Iteration Count", iter);
      console.log("Centroid ", await centroid.array());
      console.log("Prediction ", await preds.array());
      // You could instead use TFVIS for Plotting Here
  console.log("Assigned To ", await predictions.array());
  console.log("Centroids Used are ", await kmeans.Centroids().array());
  console.log("Prediction for Given Value is");
  kmeans.Predict(tf.tensor([2, 3, 2])).print();
  console.log("Amount of Memory Used is ", tf.memory());



AsyncTest().then(() => console.log("Hi"));


[Running] node "d:tf-kmeans-mastersamplesindex.js"

Hi there ?. Looks like you are running TensorFlow.js in Node.js. To speed things up dramatically, install our node backend, which binds to TensorFlow C  , by running npm i @tensorflow/tfjs-node, or npm i @tensorflow/tfjs-node-gpu if you have CUDA. Then call require('@tensorflow/tfjs-node'); (-gpu suffix for CUDA) at the start of your program. Visit https://github.com/tensorflow/tfjs-node for more details.
Assigned To  [ 1, 0, 1, 0, 0 ]
Centroids Used are  [ [ 5.333333492279053, 5.666666507720947, 5.333333492279053 ],
  [ 2.5, 2.5, 2.5 ] ]
Prediction for Given Value is
Amount of Memory Used is  { unreliable: true,
   [ 'The reported memory is an upper bound. Due to automatic garbage collection, the true allocated memory may be less.' ],
  numTensors: 7,
  numDataBuffers: 7,
  numBytes: 160 }

Async Test
Iteration Count 0
Centroid  [ [ 4.5, 4.5, 4.5 ], [ 7, 8, 7 ], [ 2.5, 2.5, 2.5 ] ]
Prediction  [ 2, 0, 2, 0, 1 ]
Assigned To  [ 2, 0, 2, 0, 1 ]
Centroids Used are  [ [ 4.5, 4.5, 4.5 ], [ 7, 8, 7 ], [ 2.5, 2.5, 2.5 ] ]
Prediction for Given Value is
Amount of Memory Used is  { unreliable: true,
   [ 'The reported memory is an upper bound. Due to automatic garbage collection, the true allocated memory may be less.' ],
  numTensors: 6,
  numDataBuffers: 6,
  numBytes: 152 }

[Done] exited with code=0 in 0.304 seconds



  1. Constructor Takes 3 Optional parameters
    1. k:- Number of Clusters
    2. maxIter:- Max Iterations
    3. distanceFunction:- The Distance function Used Currently only Eucledian Distance Provided
  2. Train Takes Dataset as Parameter Performs Training on This Dataset Sync callback function is optional
  3. TrainAsync Takes Dataset as Parameter Performs Training on This Dataset Also takes async callback function called at the end of every iteration
  4. Centroids Returns the Centroids found for the dataset on which KMeans was Trained
  5. Predict Performs Predictions on the data Provided as Input


import * as tf from "@tensorflow/tfjs-core";

export default class KMeans {
    public k: number = 2;
    public maxIter: number = 200;
    public distanceFunction = KMeans.EuclideanDistance;
    public centroids!: tf.Tensor;

    public constructor({ k = 2, maxIter = 10, distanceFunction = KMeans.EuclideanDistance } = {}) {
        this.k = k;
        this.maxIter = maxIter;
        this.distanceFunction = distanceFunction;

    public static EuclideanDistance(values: tf.Tensor, centroids: tf.Tensor) {
        return tf.tidy(() => values.squaredDifference(centroids).sum(1).sqrt());
    private GenerateIndices(rows: number) {
        const indices: number[] = [];
        indices.length = rows;
        for (let i = 0; i < indices.length;   i)
            indices[i] = i;
        return indices;
    private NewCentroidSingle(values: tf.Tensor, assignments: tf.Tensor, cluster: number, rows: number) {
        return tf.tidy(() => {
            // Make All Values Of Array to be of Same Size as Our Cluster
            let selectedIndices: number[] = [];
            selectedIndices.length = rows;
            selectedIndices = selectedIndices.fill(cluster);
            const selectedIndicesT = tf.tensor(selectedIndices);

            let where = tf.equal(assignments, selectedIndicesT).asType("int32");
            where = where.reshape([where.shape[0], 1]);
            const count = where.sum();

            const newCentroid = values.mul(where).sum(0).div(count)
            return newCentroid;
    private NewCentroids(values: tf.Tensor, assignments: tf.Tensor) {
        return tf.tidy(() => {
            const rows = values.shape[0];
            const centroids: tf.Tensor[] = [];
            for (let cluster = 0; cluster < this.k;   cluster) {
                centroids.push(this.NewCentroidSingle(values, assignments, cluster, rows));
            return tf.stack(centroids);
    private AssignCluster(value: tf.Tensor, centroids: tf.Tensor) {
        return tf.tidy(() => this.distanceFunction(value, centroids).argMin(0));
    private AssignClusters(values: tf.Tensor, centroids: tf.Tensor) {
        return tf.tidy(() => {
            const rows = values.shape[0];
            const minIndexes: tf.Tensor[] = [];
            for (const index of this.GenerateIndices(rows)) {
                const value = values.gather(index);
                minIndexes.push(this.AssignCluster(value, centroids));
            return tf.stack(minIndexes);
    private RandomSample(vals: tf.Tensor) {
        return tf.tidy(() => {
            const rows = vals.shape[0];
            if (rows < this.k)
                throw new Error("Rows are Less than K");

            const indicesRaw = tf.util.createShuffledIndices(rows).slice(0, this.k);
            const indices: number[] = [];
            indicesRaw.forEach((index: number) => indices.push(index))
            // Extract Random Indices
            return tf.gatherND(vals, tf.tensor(indices, [this.k, 1], "int32"))
    private CheckCentroidSimmilarity(newCentroids: tf.Tensor, centroids: tf.Tensor, vals: tf.Tensor) {
        return tf.tidy(() => newCentroids
    private TrainSingleStep(values: tf.Tensor) {
        return tf.tidy(() => {
            const predictions = this.Predict(values);
            const newCentroids = this.NewCentroids(values, predictions);
            return [newCentroids, predictions];
    public Train(values: tf.Tensor, callback = (_centroid: tf.Tensor, _predictions: tf.Tensor) => { }) {
        this.centroids = this.RandomSample(values);
        let iter = 0;
        while (true) {
            let [newCentroids, predictions] = this.TrainSingleStep(values);
            const same = this.CheckCentroidSimmilarity(newCentroids, this.centroids, values);
            if (same || iter >= this.maxIter) {
                return predictions;
            this.centroids = newCentroids;
            callback(this.centroids, predictions);
    public async TrainAsync(values: tf.Tensor, callback = async (_iter: number, _centroid: tf.Tensor, _predictions: tf.Tensor) => { }) {
        this.centroids = this.RandomSample(values);
        let iter = 0;
        while (true) {
            let [newCentroids, predictions] = this.TrainSingleStep(values);
            const same = this.CheckCentroidSimmilarity(newCentroids, this.centroids, values);
            if (same || iter >= this.maxIter) {
                return predictions;
            this.centroids = newCentroids;
            await callback(iter, this.centroids, predictions);
    public Predict(y: tf.Tensor) {
        return tf.tidy(() => {
            if (y.shape[1] == null)
                y = y.reshape([1, y.shape[0]]);
            return this.AssignClusters(y, this.centroids);
    public Centroids() {
        return this.centroids;
    public Dispose() {


