The way HashMap works is like contacts in your movile device, they are stored as Name/Number (key/value) and the dispersion factor is the First letter (A...Z), in a hashmap you define the large your contact list is and how you store each entry, the "how" is the most important, for instance, take 1000 entries, if your dispersion ratio is something like five, all your entries will be only inserted into five buckets...
bucket = Hash(aKey) mod SIZE;
Source can be found in BB.Collection.Hash
THashList<T: class> = class(TPooledObject, IIterator<T>) private FData: THashData; FCount: integer; FOwned: boolean; FBucket: integer; FIndex: integer; FSize: integer; function GetItem(aIndex: integer): T; procedure GetOffset(const aElement: integer; out aBucket, aIndex: integer); function GetKey(aIndex: integer): string; procedure SetSize(const aValue: integer); procedure Grow(aFrom: integer; aTo: integer); protected function FirstObject: T; function NextObject: T; function LastObject: T; function PriorObject: T; function GetCount: integer; function IsEmpty: boolean; public constructor Create(aSize: integer = DEFAULT_HASHMAP_SIZE); reintroduce; destructor Destroy; override; function Add(const aKey: string; aObject: T): integer; virtual; procedure Clear; override; procedure Rename(const aOldKey, aNewKey: string; aObject: T); overload; function Modify(const aKey: string; aObject: T): integer; overload; virtual; procedure Delete(const aKey: string); function IndexOf(const aKey: string): T; overload; function Find(const aPartialKey: string): T; function Sort: TStrings; function GetStatistics: TArrayOfInt; // IIterator function First: boolean; function Last: boolean; function Next: boolean; function GetCurrent: T; property Items[index: integer]: T read GetItem; default; property Keys[index: integer]: string read GetKey; property Owned: boolean read FOwned write FOwned; property Size: integer read FSize write SetSize; property Count: integer read GetCount; end;
Implementation:
{THashList } function THashList<T>.Add(const aKey: string; aObject: T): integer; begin result := FData[TVar.GetHash(aKey) mod FSize].AddObject(aKey, aObject); Inc(FCount); end; procedure THashList<T>.Clear; var i, j: integer; obj: T; l: TStringList; begin inherited; for i := Low(FData) to High(FData) do begin l := FData[i]; for j := FData[i].Count - 1 DownTo 0 do begin obj := l.Objects[j]; if (obj <> nil) and (FOwned) then obj.Free; end; l.Clear; end; FCount := 0; end; function THashList<T>.GetCount: integer; begin result := FCount; end; constructor THashList<T>.Create(aSize: integer = DEFAULT_HASHMAP_SIZE); begin inherited Create; FOwned := False; FCount := 0; FBucket := -1; FIndex := -1; Grow( Low(FData), aSize); end; procedure THashList<T>.Delete(const aKey: string); var bucket, i: integer; obj: T; l: TStringList; begin bucket := TVar.GetHash(aKey) mod FSize; l := FData[bucket]; i := l.IndexOf(aKey); obj := l.Objects[i]; l.Delete(i); Dec(FCount); if FOwned then obj.Free; end; destructor THashList<T>.Destroy; var i: integer; begin Clear; for i := Low(FData) to High(FData) do FData[i].Free; inherited Destroy; end; function THashList<T>.Find(const aPartialKey: string): T; var obj: T; key: string; begin result := nil; obj := FirstObject; while obj <> nil do begin key := FData[FBucket].Strings[FIndex]; if Pos(aPartialKey, key) > 0 then begin result := FData[FBucket].Objects[FIndex]; Break; end; obj := NextObject; end; end; function THashList<T>.First: boolean; begin result := FirstObject <> nil; end; function THashList<T>.FirstObject: T; begin if IsEmpty then begin result := nil; Exit; end; FBucket := 0; while FData[FBucket].Count = 0 do Inc(FBucket); FIndex := 0; result := GetCurrent; end; function THashList<T>.GetCurrent: T; begin if (FBucket > -1) and (FIndex < FData[FBucket].Count) then result := FData[FBucket].Objects[FIndex] else result := nil; end; function THashList<T>.GetItem(aIndex: integer): T; var bucket, i: integer; begin GetOffset(aIndex, bucket, i); result := FData[bucket].Objects[i]; end; function THashList<T>.GetKey(aIndex: integer): string; var bucket, i: integer; begin GetOffset(aIndex, bucket, i); result := FData[bucket][i]; end; procedure THashList<T>.GetOffset(const aElement: integer; out aBucket, aIndex: integer); var total: integer; begin total := 0; aBucket := 0; aIndex := aElement; while (total <= aElement) or (total = 0) do begin Inc(total, FData[aBucket].Count); Inc(aBucket); end; Dec(aBucket); Dec(total, FData[aBucket].Count); Dec(aIndex, total); end; function THashList<T>.IndexOf(const aKey: string): T; var bucket, i: integer; l: TStringList; begin bucket := TVar.GetHash(aKey) mod FSize; l := FData[bucket]; i := l.IndexOf(aKey); if i > -1 then result := l.Objects[i] else result := nil; end; function THashList<T>.IsEmpty: boolean; begin result := FCount = 0; end; function THashList<T>.Last: boolean; begin result := LastObject <> nil; end; function THashList<T>.LastObject: T; begin if IsEmpty then begin result := nil; Exit; end; FBucket := High(THashData); while FData[FBucket].Count = 0 do Dec(FBucket); FIndex := FData[FBucket].Count - 1; result := GetCurrent; end; function THashList<T>.Modify(const aKey: string; aObject: T): integer; var bucket: integer; l: TStringList; begin bucket := TVar.GetHash(aKey) mod FSize; l := FData[bucket]; result := l.IndexOf(aKey); if result > -1 then begin if FOwned then l.Objects[result].Free; l.Objects[result] := aObject; end else result := Add(aKey, aObject); end; function THashList<T>.Next: boolean; begin result := NextObject <> nil; end; function THashList<T>.NextObject: T; begin if IsEmpty then begin result := nil; Exit; end; Inc(FIndex); if FIndex > FData[FBucket].Count - 1 then begin Inc(FBucket); while (FData[FBucket].Count = 0) and (FBucket < High(FData)) do Inc(FBucket); FIndex := 0; end; result := GetCurrent; end; function THashList<T>.PriorObject: T; begin if IsEmpty then begin result := nil; Exit; end; Dec(FIndex); if FIndex < 0 then begin Inc(FBucket); while (FData[FBucket].Count = 0) and (FBucket > 0) do Dec(FBucket); FIndex := FData[FBucket].Count - 1; end; result := GetCurrent; end; procedure THashList<T>.SetSize(const aValue: integer); begin if aValue < FSize then raise Exception.Create('Hash size can only grow'); Grow(FSize + 1, aValue); end; function THashList<T>.Sort: TStrings; var i, index, bucket: integer; begin result := TStringList.Create; TStringList(result).Sorted := True; TStringList(result).Duplicates := dupError; for i := 0 to GetCount - 1 do begin GetOffset(i, bucket, index); result.AddObject(FData[bucket].Strings[index], FData[bucket].Objects[index]); end; end; function THashList<T>.GetStatistics: TArrayOfInt; var i: integer; begin SetLength(result, Length(FData)); for i := Low(FData) to High(FData) do result[i] := FData[i].Count; end; procedure THashList<T>.Grow(aFrom: integer; aTo: integer); var n: integer; begin if aTo = 0 then raise Exception.Create('HashList size cannot be zero'); FSize := aTo; SetLength(FData, FSize + 1); for n := Low(FData) to High(FData) do begin FData[n] := TStringList.Create; FData[n].Sorted := True; FData[n].Duplicates := dupError; FData[n].Capacity := 100; end; end; procedure THashList<T>.Rename(const aOldKey, aNewKey: string; aObject: T); begin Delete(aOldKey); Add(aNewKey, aObject); end;
The hash function looks like this:
class function TVar.GetHash(const aText: string): integer;
var
i: Integer;
begin
result := 5381;
for i := 1 to Length(aText) do
result := ((result shl 5) + result) + Ord(aText[i]);
// result := result * 2 + n xor Ord(aText[n]);
end;
The comments means that I'm not happy with the dispersion ratio.
No comments:
Post a Comment