Tuesday, October 26, 2010

A generic HashMap, my crazy implementation...

Last but not least, my crazy implementation of a generic HashMap (aka dictionary), is crazy because I use TStrings for storing buckets, which give very fast access in any case...

The way HashMap works is like contacts in your movile device, they are stored as Name/Number (key/value) and the dispersion factor is the First letter (A...Z), in a hashmap you define the large your contact list is and how you store each entry, the "how" is the most  important, for instance, take 1000 entries, if your dispersion ratio is something like five, all your entries will be only inserted into five buckets...

bucket = Hash(aKey) mod SIZE;

Source can be found in BB.Collection.Hash


THashList<T: class> = class(TPooledObject, IIterator<T>)
  private
    FData: THashData;
    FCount: integer;
    FOwned: boolean;
    FBucket: integer;
    FIndex: integer;
    FSize: integer;

    function GetItem(aIndex: integer): T;
    procedure GetOffset(const aElement: integer; out aBucket, aIndex: integer);
    function GetKey(aIndex: integer): string;
    procedure SetSize(const aValue: integer);
    procedure Grow(aFrom: integer; aTo: integer);
  protected
    function FirstObject: T;
    function NextObject: T;
    function LastObject: T;
    function PriorObject: T;
    function GetCount: integer;
    function IsEmpty: boolean;
  public
    constructor Create(aSize: integer = DEFAULT_HASHMAP_SIZE); reintroduce;
    destructor Destroy; override;
    function Add(const aKey: string; aObject: T): integer; virtual;
    procedure Clear; override;
    procedure Rename(const aOldKey, aNewKey: string; aObject: T); overload;
    function Modify(const aKey: string; aObject: T): integer; overload; virtual;
    procedure Delete(const aKey: string);
    function IndexOf(const aKey: string): T; overload;
    function Find(const aPartialKey: string): T;
    function Sort: TStrings;
    function GetStatistics: TArrayOfInt;
    // IIterator
    function First: boolean;
    function Last: boolean;
    function Next: boolean;
    function GetCurrent: T;

    property Items[index: integer]: T read GetItem; default;
    property Keys[index: integer]: string read GetKey;
    property Owned: boolean read FOwned write FOwned;
    property Size: integer read FSize write SetSize;
    property Count: integer read GetCount;
  end;


Implementation:


 {THashList }

function THashList<T>.Add(const aKey: string; aObject: T): integer;
begin
  result := FData[TVar.GetHash(aKey) mod FSize].AddObject(aKey, aObject);

  Inc(FCount);
end;

procedure THashList<T>.Clear;
var
  i, j: integer;
  obj: T;
  l: TStringList;

begin
  inherited;

  for i := Low(FData) to High(FData) do
  begin
    l := FData[i];
    for j := FData[i].Count - 1 DownTo 0 do
    begin
      obj := l.Objects[j];
      if (obj <> nil) and (FOwned) then
        obj.Free;
    end;

    l.Clear;
  end;

  FCount := 0;
end;

function THashList<T>.GetCount: integer;
begin
  result := FCount;
end;

constructor THashList<T>.Create(aSize: integer = DEFAULT_HASHMAP_SIZE);
begin
  inherited Create;

  FOwned := False;
  FCount := 0;
  FBucket := -1;
  FIndex := -1;

  Grow( Low(FData), aSize);
end;

procedure THashList<T>.Delete(const aKey: string);
var
  bucket, i: integer;
  obj: T;
  l: TStringList;

begin
  bucket := TVar.GetHash(aKey) mod FSize;
  l := FData[bucket];
  i := l.IndexOf(aKey);
  obj := l.Objects[i];
  l.Delete(i);

  Dec(FCount);
  if FOwned then
    obj.Free;
end;

destructor THashList<T>.Destroy;
var
  i: integer;

begin
  Clear;
  for i := Low(FData) to High(FData) do
    FData[i].Free;

  inherited Destroy;
end;

function THashList<T>.Find(const aPartialKey: string): T;
var
  obj: T;
  key: string;

begin
  result := nil;
  obj := FirstObject;
  while obj <> nil do
  begin
    key := FData[FBucket].Strings[FIndex];
    if Pos(aPartialKey, key) > 0 then
    begin
      result := FData[FBucket].Objects[FIndex];
      Break;
    end;

    obj := NextObject;
  end;
end;

function THashList<T>.First: boolean;
begin
  result := FirstObject <> nil;
end;

function THashList<T>.FirstObject: T;
begin
  if IsEmpty then
  begin
    result := nil;
    Exit;
  end;

  FBucket := 0;
  while FData[FBucket].Count = 0 do
    Inc(FBucket);
  FIndex := 0;

  result := GetCurrent;
end;

function THashList<T>.GetCurrent: T;
begin
  if (FBucket > -1) and (FIndex < FData[FBucket].Count) then
    result := FData[FBucket].Objects[FIndex]
  else
    result := nil;
end;

function THashList<T>.GetItem(aIndex: integer): T;
var
  bucket, i: integer;

begin
  GetOffset(aIndex, bucket, i);
  result := FData[bucket].Objects[i];
end;

function THashList<T>.GetKey(aIndex: integer): string;
var
  bucket, i: integer;

begin
  GetOffset(aIndex, bucket, i);
  result := FData[bucket][i];
end;

procedure THashList<T>.GetOffset(const aElement: integer;
  out aBucket, aIndex: integer);
var
  total: integer;

begin
  total := 0;
  aBucket := 0;
  aIndex := aElement;
  while (total <= aElement) or (total = 0) do
  begin
    Inc(total, FData[aBucket].Count);
    Inc(aBucket);
  end;
  Dec(aBucket);

  Dec(total, FData[aBucket].Count);
  Dec(aIndex, total);
end;

function THashList<T>.IndexOf(const aKey: string): T;
var
  bucket, i: integer;
  l: TStringList;

begin
  bucket := TVar.GetHash(aKey) mod FSize;
  l := FData[bucket];
  i := l.IndexOf(aKey);
  if i > -1 then
    result := l.Objects[i]
  else
    result := nil;
end;

function THashList<T>.IsEmpty: boolean;
begin
  result := FCount = 0;
end;

function THashList<T>.Last: boolean;
begin
  result := LastObject <> nil;
end;

function THashList<T>.LastObject: T;
begin
  if IsEmpty then
  begin
    result := nil;
    Exit;
  end;

  FBucket := High(THashData);
  while FData[FBucket].Count = 0 do
    Dec(FBucket);
  FIndex := FData[FBucket].Count - 1;

  result := GetCurrent;
end;

function THashList<T>.Modify(const aKey: string; aObject: T): integer;
var
  bucket: integer;
  l: TStringList;

begin
  bucket := TVar.GetHash(aKey) mod FSize;
  l := FData[bucket];
  result := l.IndexOf(aKey);
  if result > -1 then
  begin
    if FOwned then
      l.Objects[result].Free;
    l.Objects[result] := aObject;
  end
  else
    result := Add(aKey, aObject);
end;

function THashList<T>.Next: boolean;
begin
  result := NextObject <> nil;
end;

function THashList<T>.NextObject: T;
begin
  if IsEmpty then
  begin
    result := nil;
    Exit;
  end;

  Inc(FIndex);
  if FIndex > FData[FBucket].Count - 1 then
  begin
    Inc(FBucket);
    while (FData[FBucket].Count = 0) and (FBucket < High(FData)) do
      Inc(FBucket);
    FIndex := 0;
  end;

  result := GetCurrent;
end;

function THashList<T>.PriorObject: T;
begin
  if IsEmpty then
  begin
    result := nil;
    Exit;
  end;

  Dec(FIndex);
  if FIndex < 0 then
  begin
    Inc(FBucket);
    while (FData[FBucket].Count = 0) and (FBucket > 0) do
      Dec(FBucket);
    FIndex := FData[FBucket].Count - 1;
  end;

  result := GetCurrent;
end;

procedure THashList<T>.SetSize(const aValue: integer);
begin
  if aValue < FSize then
    raise Exception.Create('Hash size can only grow');

  Grow(FSize + 1, aValue);
end;

function THashList<T>.Sort: TStrings;
var
  i, index, bucket: integer;

begin
  result := TStringList.Create;
  TStringList(result).Sorted := True;
  TStringList(result).Duplicates := dupError;

  for i := 0 to GetCount - 1 do
  begin
    GetOffset(i, bucket, index);
    result.AddObject(FData[bucket].Strings[index],
      FData[bucket].Objects[index]);
  end;
end;

function THashList<T>.GetStatistics: TArrayOfInt;
var
  i: integer;

begin
  SetLength(result, Length(FData));
  for i := Low(FData) to High(FData) do
    result[i] := FData[i].Count;
end;

procedure THashList<T>.Grow(aFrom: integer; aTo: integer);
var
  n: integer;

begin
  if aTo = 0 then
    raise Exception.Create('HashList size cannot be zero');

  FSize := aTo;
  SetLength(FData, FSize + 1);

  for n := Low(FData) to High(FData) do
  begin
    FData[n] := TStringList.Create;
    FData[n].Sorted := True;
    FData[n].Duplicates := dupError;
    FData[n].Capacity := 100;
  end;
end;

procedure THashList<T>.Rename(const aOldKey, aNewKey: string; aObject: T);
begin
  Delete(aOldKey);
  Add(aNewKey, aObject);
end;
The hash function looks like this:
class function TVar.GetHash(const aText: string): integer;
var
  i: Integer;

begin
  result := 5381;
  for i := 1 to Length(aText) do
    result := ((result shl 5) + result) + Ord(aText[i]);
//    result := result * 2 + n xor Ord(aText[n]);
end;
The comments means that I'm not happy with the dispersion ratio.

No comments:

Post a Comment