On Fri, 15 Feb 2008 20:29:30 GMT, Roedy Green
<> wrote, quoted or indirectly quoted
someone who said :
>I wrote this code to experiment. Turns out binary search is the pits.
>HashSet starts to get better around 11 items.
The sample keys are nicely scrambled to make HashSet happier than it
deserves to be.
package com.mindprod.example;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
/*
* Test relative speed of 3 methods of looking up a key to see if it is
is the list of legal keys.
* HashSet, binary search, linear search for different numbers of keys
n.
* <p/>
* composed with IntelliJ IDEA
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0, 2008-02-15
*/
public class TestFinding
{
// ------------------------------ FIELDS
------------------------------
/**
* lookup the random strings
*/
private static HashSet<String> h;
/**
* random number generator
*/
private final static Random wheel = new Random();
/**
* list of strings we want to look up
*/
private static String[] keys;
// -------------------------- STATIC METHODS
--------------------------
/**
* build lookup structure
*/
private static void build()
{
// build HashSet
h = new HashSet<String>( Arrays.asList( keys ) );
// build binary search
Arrays.sort( keys );
}
/**
* find all the keys by BinarySearch
*/
private static void findAllViaBinarySearch()
{
for ( String lookFor : keys )
{
int whereFound = Arrays.binarySearch( keys, lookFor );
// -ve means not found +ve tells where found.
}
}
/**
* find all the keys by HashSet
*/
private static void findAllViaHashSet()
{
for ( String lookFor : keys )
{
boolean found = h.contains( lookFor );
}
}
/**
* find all the keys by linear search
*/
private static void findAllViaLinearSearch()
{
for ( String lookFor : keys )
{
boolean found = false;
for ( String candidate : keys )
{
if ( lookFor.equals( candidate ) )
{
found = true;
break;
}
}
}
}
/**
* generate N random keys and their lookup structure
*/
private static void generate( int n )
{
keys = new String[n];
for ( int i = 0; i < n; i++ )
{
keys[ i ] = Long.toString( wheel.nextLong() );
}
}
// --------------------------- main() method
---------------------------
/**
* main method
*
* @param args not used
*/
public static void main( String[] args )
{
for ( int n = 1; n <= 10000; n *= 10 )
{
generate( n );
build();
final long t1 = System.nanoTime();
findAllViaBinarySearch();
final long t2 = System.nanoTime();
findAllViaHashSet();
final long t3 = System.nanoTime();
findAllViaLinearSearch();
final long t4 = System.nanoTime();
System.out
.println( "n:" +
n +
" binary search: " +
( t2 - t1 ) +
" HashSet: " +
( t3 - t2 ) +
" Linear: " +
( t4 - t3 ) );
}
// Here in output on my machine With JDK 1.6.0_04
//Linear is best for 10 or fewer, then HashSet.
// Binary search is never optimal.
// n:1 binary search: 21600 HashSet: 9720 Linear: 6400*
// n:10 binary search: 58720 HashSet: 11360 Linear: 11120*
// n:100 binary search: 707320 HashSet: 97520* Linear: 695960
// n:1000 binary search: 1294360 HashSet: 1255480* Linear:
10911040
// n:10000 binary search: 9823600 HashSet: 3920200* Linear:
1513846600
// Here in output on my machine with Jet
// n:1 binary search: 5520 HashSet: 4760 Linear: 1240*
// n:10 binary search: 3560 HashSet: 2480 Linear: 2400*
// n:100 binary search: 28160 HashSet: 5480* Linear: 44840
// n:1000 binary search: 408840 HashSet: 43560* Linear:
7862960
// n:10000 binary search: 9142440 HashSet: 2295320* Linear:
1852690520
}
}
--
Roedy Green Canadian Mind Products
The Java Glossary
http://mindprod.com