Velocity Reviews - Computer Hardware Reviews

Velocity Reviews > Newsgroups > Programming > Python > Re: Best way to make a list unique?

Reply
Thread Tools

Re: Best way to make a list unique?

 
 
Michael Spencer
Guest
Posts: n/a
 
      03-09-2005
Delaney, Timothy C (Timothy) wrote:
> Michael Hoffman wrote:
>
>
>>>>For those who don't know, these implement a hash set/map which
>>>>iterates in the order that the keys were first added to the set/map.

>>
>>I would love to see such a thing.

>
>
> I've proposed this on python-dev, but the general feeling so far is
> against it. So far the only use case is to remove duplicates without
> changing order, and there are iterator-based solutions which would
> normally be preferable.
>
> It's pretty simple to roll your own, and I'll probably put together a
> Cookbook recipe for it.
>
> Tim Delaney


Here's something to work with:

class OrdSet(object):
def __init__(self, iterable):
"""Build an ordered, unique collection of hashable items"""
self._data = {None:[None, None]} # None is the pointer to the first
# element. This is unsatisfactory
# because it cannot then be a member
# of the collection
self._last = None
self.update(iterable)

def add(self, obj):
"""Add an element to the collection"""
data = self._data
if not obj in data:
last = self._last
data[last][1] = obj
data[obj] = [last, None]
self._last = obj

def update(self, iterable):
"""Update the collection with the union of itself and another"""
obj = self._last
data = self._data
last = data[obj][0]
for item in iterable:
if item not in data:
data[obj] = [last, item]
last, obj = obj, item
data[obj] = [last, None]
self._last = obj

def remove(self, item):
"""Remove an element from a set; it must be a member.

If the element is not a member, raise a KeyError."""
data = self._data
prev, next = data[item]
data[prev][1] = next
data[next][0] = prev

def discard(self, item):
"""Remove an element from a set if it is a member.
If the element is not a member, do nothing."""
try:
self.remove(item)
except KeyError:
pass

def __contains__(self, item):
return item in self._data

def pop(self):
"""Remove and the return the oldest element"""
data = self._data
prev, first = data[None]
data[None] = [None,data[first][1]]
return first

def clear(self):
self.__init__([])

def __iter__(self):
"""Iterate over the collection in order"""
data = self._data
prev, next = data[None]
while next is not None:
yield next
prev, next = data[next]

def __len__(self):
return len(self._data)-1

def __repr__(self):
return "%s(%s)" % (self.__class__.__name__,list(self))

>>> a= OrdSet(range(10))
>>> a

OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> a.update(range(5,15))
>>> a

OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
>>> a.discard(
>>> a

OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14])
>>>



Michael

 
Reply With Quote
 
 
 
 
Marc Christiansen
Guest
Posts: n/a
 
      03-09-2005
Michael Spencer <(E-Mail Removed)> wrote:

Nice. When you replace None by an object(), you have no restriction on
the elements any more:

> Here's something to work with:
>
> class OrdSet(object):
> def __init__(self, iterable):
> """Build an ordered, unique collection of hashable items"""
> #self._data = {None:[None, None]} # None is the pointer to the first
> # # element. This is unsatisfactory
> # # because it cannot then be a
> # # member of the collection
> #self._last = None

self._last = self._root = root = object()
self._data = {root:[root, root]}
> self.update(iterable)
>
> def add(self, obj):
> """Add an element to the collection"""
> data = self._data
> if not obj in data:
> last = self._last
> data[last][1] = obj
> #data[obj] = [last, None]

data[obj] = [last, self._root]
> self._last = obj
>
> def update(self, iterable):
> """Update the collection with the union of itself and another"""
> obj = self._last
> data = self._data
> last = data[obj][0]
> for item in iterable:
> if item not in data:
> data[obj] = [last, item]
> last, obj = obj, item
> #data[obj] = [last, None]

data[obj] = [last, self._root]
> self._last = obj
>
> def remove(self, item):
> """Remove an element from a set; it must be a member.
>
> If the element is not a member, raise a KeyError."""
> data = self._data
> prev, next = data[item]
> data[prev][1] = next
> data[next][0] = prev
>
> def discard(self, item):
> """Remove an element from a set if it is a member.
> If the element is not a member, do nothing."""
> try:
> self.remove(item)
> except KeyError:
> pass
>
> def __contains__(self, item):
> return item in self._data
>
> def pop(self):
> """Remove and the return the oldest element"""
> data = self._data
> #prev, first = data[None]
> #data[None] = [None,data[first][1]]

root = self._root
prev, first = data[root]
data[root] = [root,data[first][1]]
> return first
>
> def clear(self):
> self.__init__([])
>
> def __iter__(self):
> """Iterate over the collection in order"""
> data = self._data
> #prev, next = data[None]
> #while next is not None:

root = self._root
prev, next = data[root]
while next is not root:
> yield next
> prev, next = data[next]
>
> def __len__(self):
> return len(self._data)-1
>
> def __repr__(self):
> return "%s(%s)" % (self.__class__.__name__,list(self))


>>> a=OrdSet([None,1,None,3])
>>> a

OrdSet([None, 1, 3])

Marc
 
Reply With Quote
 
 
 
 
Michael Spencer
Guest
Posts: n/a
 
      03-09-2005
Marc Christiansen wrote:
> Michael Spencer <(E-Mail Removed)> wrote:
>
> Nice. When you replace None by an object(), you have no restriction on
> the elements any more:
>
>

Thanks for the suggestion, Marc.

Note that if there is no need to access the middle of the collection, then the
implementation is simpler, and less resource-intensive, since the items can be
singly-linked

class UniqueQueue(object):
def __init__(self, iterable):
self._data = _data = {}
self._last = self._root = object() # An object the user is unlikely to
# reference - thanks Marc
self.update(iterable)

def push(self, obj):
if not obj in self._data:
self._data[self._last] = obj
self._last = obj

def pop(self):
data = self._data
first = data.pop(self._root)
self._root = first
return first

def update(self, iterable):
last = self._last
data = self._data
for item in iterable:
if item not in data:
data[last] = item
last = item
self._last = last

def __iter__(self):
data = self._data
next = self._root
try:
while 1:
next = data[next]
yield next
except KeyError:
raise StopIteration

def __repr__(self):
return "%s(%s)" % (self.__class__.__name__,list(self))


>>> q = UniqueQueue(range(5))
>>> q.update(range(3,)
>>> q

UniqueQueue([0, 1, 2, 3, 4, 5, 6, 7])
>>> q.pop()

0
>>> q

UniqueQueue([1, 2, 3, 4, 5, 6, 7])
>>>
>>> q.push(None)
>>> q

UniqueQueue([1, 2, 3, 4, 5, 6, 7, None])
>>>



Michael



 
Reply With Quote
 
 
 
Reply

Thread Tools

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is On
HTML code is Off
Trackbacks are On
Pingbacks are On
Refbacks are Off


Similar Threads
Thread Thread Starter Forum Replies Last Post
Best way to delete contents of a list list of (pointers) Angus C++ 6 01-23-2010 01:05 PM
What is the best way to make dual-way xml databinding? Stan ASP .Net 3 05-05-2005 02:07 PM
RE: Best way to make a list unique? Delaney, Timothy C (Timothy) Python 2 03-12-2005 01:41 AM
RE: Best way to make a list unique? Delaney, Timothy C (Timothy) Python 3 03-09-2005 11:47 AM
The best way to make a generic report page using a web service indicated in a xml file. Pablo Gutierrez ASP .Net 0 10-27-2003 11:03 PM



Advertisments