From 7f00bbb69ea2902fbb1bead8e75df4971366ee79 Mon Sep 17 00:00:00 2001 From: Steve Linton Date: Sun, 10 Jan 2016 19:41:03 +0000 Subject: [PATCH 1/2] IsPlistRep implies HasLength. --- lib/list.gd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/list.gd b/lib/list.gd index c038e0c56e..0a6f4bfcf2 100644 --- a/lib/list.gd +++ b/lib/list.gd @@ -134,6 +134,8 @@ BIND_GLOBAL( "MAX_SIZE_LIST_INTERNAL", 2^(8*GAPInfo.BytesPerVariable-4) - 1 ); ## <#/GAPDoc> ## DeclareAttributeKernel( "Length", IsList, LENGTH ); +InstallTrueMethod(HasLength,IsPlistRep); + ############################################################################# From 5deeb112fd5650ea111cae4d23744e8eb18098f4 Mon Sep 17 00:00:00 2001 From: Steve Linton Date: Sun, 10 Jan 2016 20:06:33 +0000 Subject: [PATCH 2/2] New Union implementation. Output format change to one existing test, new test. Improve test file to get 100% coverage of new code. Fix some bugs. Document change. Add a sweep phase to deal with the matching stride case faster. --- dev/Updates/union | 24 ++ lib/coll.gd | 5 +- lib/coll.gi | 571 +++++++++++++++++++++++-------------- tst/testinstall/oprt.tst | 2 +- tst/teststandard/union.tst | 30 ++ 5 files changed, 421 insertions(+), 211 deletions(-) create mode 100644 dev/Updates/union diff --git a/dev/Updates/union b/dev/Updates/union new file mode 100644 index 0000000000..04e8ecd55e --- /dev/null +++ b/dev/Updates/union @@ -0,0 +1,24 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Format 'yyyy/mm/dd' +!! Date +2016/01/11 +!! Changed by +SL +! Reported by +Nick Loughlin +!! Type of Change +Fix: wrong result + +!! Description +Under some circumstances the union of a combination of lists of small integers +and ranges could be computed incorrectly. This change fixes this and also +avoids expanding ranges unnecessarily in some cases. + +! Test Code +# included in union.tst + +! Prefetch + +!! Changeset + +!! End diff --git a/lib/coll.gd b/lib/coll.gd index a3973eb8b3..e10eb50d5f 100644 --- a/lib/coll.gd +++ b/lib/coll.gd @@ -3092,7 +3092,10 @@ DeclareOperation( "Intersection2", ## [ 1, 2, 3, 4 ] ## gap> Union( [ ] ); ## [ ] -## ]]> +## ]]>

+## When computing the Union of lists or sets of small integers and ranges, +## every attempt is made to return the result as a range and to avoid expanding +## ranges provided as input. ## ## ## <#/GAPDoc> diff --git a/lib/coll.gi b/lib/coll.gi index ee3087ba3d..76939d0a3f 100644 --- a/lib/coll.gi +++ b/lib/coll.gi @@ -2678,223 +2678,377 @@ end); Unbind(AbsInt); -# Test routine for joining random ranges. -# Test:=function() -# local a0,b0,da,db,a1,b1,ra,rb,r,u; -# a0:=Random([1..50]); -# da:=Random([0..6]); -# a1:=a0+Random([1..5])*da; -# if da=0 then -# ra:=[a0]; -# else -# ra:=[a0,a0+da..a1]; -# fi; -# b0:=Random([1..50]); -# db:=Random([0..6]); -# b1:=b0+Random([1..5])*db; -# if db=0 then -# rb:=[b0]; -# else -# rb:=[b0,b0+db..b1]; -# fi; -# -# u:=Union(ra,rb); -# IsRange(u); -# r:=JoinRanges(a0,da,a1,b0,db,b1); -# if r=fail then -# Print("Join ",ra," ",rb," to ",r,"\n"); -# if IsRangeRep(u) then -# Error("did not recognize range"); -# fi; -# elif r<>fail then -# if r[2]=0 then -# r:=[r[1]]; -# else -# r:=[r[1],r[1]+r[2]..r[3]]; -# fi; -# Print("Join ",ra," ",rb," to ",r,"\n"); -# if u<>r then -# Error("wrong union"); -# fi; -# fi; -# end; - - ############################################################################# ## #F Union( ) #F Union( , ... ) ## -InstallGlobalFunction( Union, function ( arg ) - local lists, # concatenation of arguments that are lists - ranges, # those arguments that are proper ranges - other, # those arguments that are not lists - D, # domain or list, running over the arguments - U, # union, result - start, # start position in `other' - better, # did pairwise join of ranges give improvement? - i,j, # loop variable - passes, # attempts to merge passes - progress; # inner loop finding matches - - # unravel the argument list if necessary - if Length(arg) = 1 then - arg := arg[1]; - fi; - - # empty case first - if Length( arg ) = 0 then - return [ ]; - fi; - - # Separate ranges, lists and domains. - lists:= []; - other:= []; - ranges:=[]; - for D in arg do - if (IsPlistRep(D) and Length(D)=1 and IsSmallIntRep(D[1])) then - # detect lists that could be ranges - Add(ranges,[D[1],0,D[1]]); - elif IS_RANGE_REP(D) then - Add(ranges,[D[1],D[2]-D[1],D[Length(D)]]); - elif IsList( D ) then - Append( lists, D ); - else - Add( other, D ); +## This apparently simple function (given that the work is usually done in +## Union2, UniteSet or Set) is complicated by the presence of ranges, something +## which comes up in a lot of permutation group and combinatorial applications +## We want to avoid unpacking long ranges if we possibly can, and return the result +## as a range if it can be. +## +## This code uses IS_RANGE and IS_RANGE_REP in place of ConvertToRangeRep and +## IsRangeRep because it is loaded early. +## +InstallGlobalFunction(Union, function(arg) + local tounite, handles, x, useUnion2, rangeSeen, distinct, + lasthandle, i, h, u, allDense, smallest, secondsmallest, + largest, ranges, sets, singletons, rd, singleton, min, max, + smin, s, stride, goal, data, sizebound, needed, minNeeded, + rstart, r, rmax, split, r2, newneeded; + # + # Union of one list is assumed to run over the list + # + if Length(arg) = 1 then + arg := arg[1]; fi; - od; - - # if lists is long processing would take long - if Length(ranges)>0 and Length(lists)<50 and ForAll(lists,IsSmallIntRep) then - # is lists also a range? - lists:=Set(lists); - if Length(lists)>0 and IS_RANGE(lists) then - if Length(lists)=1 then - Add(ranges,[lists[1],0,lists[1]]); - else - Add(ranges,[lists[1],lists[2]-lists[1],lists[Length(lists)]]); - fi; - lists:=[]; - fi; - - # try to merge smaller by considering all pairs of ranges - better:=true; # in case of length 1 - passes:=3; # only going to try 3 passes - while Length(ranges)>1 and passes > 0 do - passes:=passes-1; - ranges:=Set(ranges); - better:=false; - i:=1; - while ifail then - # worked, replace one and overwrite other - ranges[i]:=U; - Unbind(ranges[j]); - better:=true; - progress:=true; - fi; - j:=j+1; - od; - fi; - i:=i+1; - od; - - if better=false then - # no join was possible -- need to go list way - for i in ranges do - if i[2]= 0 then - j:=[i[1]]; - else - j:=[i[1],i[1]+i[2]..i[3]]; - fi; - Append(lists,j); - od; - ranges:=[]; - fi; - + if Length(arg) = 0 then + return []; + fi; + # + # First scan. O(1) time per list + # + tounite := []; + handles := []; + for x in arg do + if not IsListOrCollection(x) then + Error("Union: arguments must be lists or collections"); + fi; + if (HasLength(x) and Length(x) = 0) or (HasSize(x) and Size(x) = 0) then + continue; + fi; + Add(tounite,x); + Add(handles, HANDLE_OBJ(x)); od; - - if better then - # we were able to join to a single range - ranges:=ranges[1]; - - i:=1; - better:=true; - while i<=Length(lists) and better do - U:=JoinRanges(ranges[1],ranges[2],ranges[3],lists[i],0,lists[i]); - if U<>fail then - ranges:=U; - else - better:=false; - fi; - i:=i+1; - od; - - if ranges[2]=0 then - ranges:=[ranges[1]]; - else - ranges:=[ranges[1],ranges[1]+ranges[2]..ranges[3]]; - fi; - - if better then - # all one range - lists:=ranges; - else - Append(lists,ranges); - fi; - # now all ranges are merged in lists, but lists might be in range - # rep. - + if Length(tounite) = 0 then + return []; + elif Length(tounite) = 1 then + return Set(tounite[1]); + fi; + # + # Now eliminate identical lists + # and scan for anything except plain lists and ranges + # + useUnion2 := false; + rangeSeen := false; + SortParallel(handles,tounite); + distinct := []; + lasthandle := fail; + for i in [1..Length(tounite)] do + h := handles[i]; + if h <> lasthandle then + x := tounite[i]; + Add(distinct,x); + lasthandle := h; + if IS_RANGE_REP(x) then + rangeSeen := true; + elif not IsPlistRep(x) then + useUnion2 := true; + fi; + fi; + od; + tounite := distinct; + + if Length(tounite) = 1 then + return Set(tounite[1]); + fi; + # + # if we spotted anything except a plain list or range then we use + # Union2 and UniteSet since the objects might have good methods installed + #T could be cleverer if the first "non-plain" object is late in a long list + #T but it's not clear whether the clever thing is to unite all the rest, or + #T to start with the external object. + # + if useUnion2 then + u := Union2(tounite[1],tounite[2]); + for i in [3..Length(tounite)] do + UniteSet(u,tounite[i]); + od; fi; - - else - # joining nonintegers or a lot of lists -- forget the ranges. - for i in ranges do - if i[2]= 0 then - Add(lists,i[1]); - else - Append(lists,[i[1],i[1]+i[2]..i[3]]); - fi; + # + # if we have nothing but plain lists then it is at most linear in space and time + # to concatenate and sort and then check for range + # + if not rangeSeen then + u := Set(Concatenation(tounite)); + IS_RANGE(u); + return u; + fi; + # + # Next pass looks at all entries of lists and the defining data of ranges + # linear in the total memory occupied by the (remaining) input. + # in this pass we will notice any elements that are not small integers and also + # work out what range the union is, if in fact it is a range. + # + + smallest := infinity; + secondsmallest := infinity; + largest := -infinity; + ranges := []; + sets := []; + singletons := []; + + + + for x in tounite do + rd := fail; + singleton := false; + + if Length(x) = 1 then + singleton := true; + min := x[1]; + max := x[1]; + elif IS_RANGE_REP(x) then + if x[2] < x[1] then + x := Reversed(x); + fi; + rd := x; + min := x[1]; + smin := x[2]; + max := x[Length(x)]; + else + s := Set(x); + if Length(s) = 1 then + singleton := true; + min := s[1]; + max := s[1]; + else + if IS_RANGE(s) then + rd := s; + else + rd := fail; + if not ForAll(s, IsSmallIntRep) then + # + # result cannot be a range, so fall back + # + return Set(Concatenation(tounite)); + fi; + fi; + min := s[1]; + smin := s[2]; + max := s[Length(s)]; + fi; + fi; + # + # At this point either x was a singleton whose value is now in max and min + # or x was a range of length 2 or more and rd contains it + # or x was NOT a range rd is fail and s contains x sorted and with duplicates removed + # but x does consist entirely of small integers + # + # Furthermore min, smin and max contain the smallest, second smallest and largest + # entries of x (except if singleton is true) + # + + if singleton then + if not IsSmallIntRep(min) then + return Set(Concatenation(tounite)); + fi; + Add(singletons, min); + elif rd = fail then + Add(sets,s); + else + Add(ranges,rd); + fi; + + if min < smallest then + secondsmallest := smallest; + smallest := min; + elif min > smallest and min < secondsmallest then + secondsmallest := min; + fi; + if not singleton and smin < secondsmallest then + secondsmallest := smin; + fi; + if max > largest then + largest := max; + fi; od; - - fi; - - # Then unite the lists. - # (This can be regarded as the most usual case. - # For efficiency reasons, we use one `Set' call instead of - # repeated `UniteSet' calls.) -#T However, this causes a lot of space loss -#T if many long and redundant lists occur; -#T using `UniteSet' would be much slower but ``conservative''. - if Length( lists ) = 0 then - if Length(other)=0 then - return lists; - fi; - U:= other[1]; - start:= 2; - else - U:= Set( lists ); - start:= 1; - fi; - - # Now loop over the domains. - for i in [ start .. Length( other ) ] do - U:= Union2( U, other[i] ); - od; - - # return the union - if IsList( U ) and not IsSSortedList( U ) then - U := Set( U ); - fi; - return U; + + singletons := Set(singletons); + Add(sets, singletons); + + # So, if we get to here we know that everything is small integers and that if the result is a range + # then we know which range it is. Now we somehow have to work out if it actually is that range or not + # it's not too hard to check if it is a subset of that range (indeed if the stride is 1 we know it is). + # but we're trying hard to avoid time or space proportional to the size of that range. + + # Since we know by this point that we started with at least one range, we know we had + # at least two values, so if the result is a range it will have a defined stride + + stride := secondsmallest - smallest; + if (largest - smallest) mod stride <> 0 then + return Set(Concatenation(tounite)); + fi; + + goal := [smallest, secondsmallest .. largest]; + + + # + # We want the stride 1 ranges in front, ordered by starting position + # + + data := List(ranges, r-> [r[2]-r[1],r[1],-Length(r)]); + SortParallel(data,ranges); + + # + # Check for inclusion + # + + if stride > 1 and + (ForAny(ranges, r->not r[1] in goal or (r[2]-r[1]) mod stride <> 0) or + ForAny(sets, s-> ForAny(s, a -> not a in goal))) then + return Set(Concatenation(tounite)); + fi; + + + # + # So now we have the hard part. We need to check that the whole range is actually covered + # + + + # + # Start with an easy size bound + # + + sizebound := Sum(sets,Size) + Sum(ranges, Size); + + if sizebound < Size(goal) then + # + # Even if everything is disjoint there are not enough points to cover the range + # + return Set(Concatenation(tounite)); + fi; + + # + # We can deal with the ranges with matching stride super-quickly by + # sweeping through them in order + # + needed := []; + minNeeded := goal[1]; + rstart := Length(ranges)+1; + for i in [1..Length(ranges)] do + r := ranges[i]; + if r[2] -r[1] > stride then + # + # passed all the stuff with matching stride, leave the rest to the more complex + # code + # + rstart := i; + break; + fi; + # if we were out of phase we'd have failed the inclusion check above + if r[1] > minNeeded then + # + # leaves a hole + # + Add(needed,[minNeeded, minNeeded+stride..r[1]-stride]); + fi; + rmax := r[Length(r)]; + if rmax >= minNeeded then + # + # Progress with sweep + # + minNeeded := rmax+stride; + fi; + od; + # + # Don't forget the last bit. + # + if minNeeded <= largest then + Add(needed,[minNeeded, minNeeded+stride.. largest]); + fi; + + if needed = [] then + return goal; + fi; + + + # Finally then, we are in a case where we really need to be clever, so + # We keep track of the points in goal we haven't seen yet as we run through the ranges + # of non-matching stride + # But we represent them as a union of ranges. + + split := function(r, r2) + local outs, max2, max, stride, stride2, i; + # + # This function essentially computes the difference between r and r2 + # but represents it as a union of ranges + # + outs := []; + max2 := r2[Length(r2)]; + if Length(r) = 1 then + # + # This case is simpler + if not r[1] in r2 then + Add(outs,r); + fi; + return outs; + fi; + max := r[Length(r)]; + + # + # There is a good kernel intersection for two ranges + # replacing r2 by the intersection (which is always a range) + # makes the next part simpler + # + r2 := Intersection(r2,r); + + # + # We might miss completely + # + + if Length(r2) = 0 then + Add(outs,r); + return outs; + fi; + + max2 := r2[Length(r2)]; + + # + # In general we have the bit before r2, the bit after and + # the stuff within r2 but which it misses + # + stride := r[2]-r[1]; + if r2[1] > r[1] then + Add(outs, [r[1],r[2]..r2[1]-stride]); + fi; + if max > max2 then + Add(outs, [max2+stride,max2+2*stride..max]); + fi; + if Length(r2) > 1 then + stride2 := r2[2]-r2[1]; + if stride2 > stride then + for i in [stride,stride*2..stride2-stride] do + Add(outs,[r2[1]+i,r2[1]+stride2+i..max2-stride2+i]); + od; + fi; + fi; + + return outs; + end; + + # + # Now we subtract the ranges we have from the goal + # + for r2 in ranges{[rstart..Length(ranges)]} do + newneeded := []; + for r in needed do + Append(newneeded,split(r,r2)); + od; + needed := newneeded; + od; + + # + # and then any remaining points must be in the sets + # + + if ForAny(needed, r-> ForAny(r, x-> ForAll(sets, s -> not x in s))) then + return Set(Concatenation(tounite)); + else + return goal; + fi; end); @@ -3070,4 +3224,3 @@ InstallMethod( CanComputeIsSubset,"default: no, unless identical", ############################################################################# ## #E - diff --git a/tst/testinstall/oprt.tst b/tst/testinstall/oprt.tst index 1ad9ba1411..d57de682a1 100644 --- a/tst/testinstall/oprt.tst +++ b/tst/testinstall/oprt.tst @@ -39,7 +39,7 @@ gap> Blocks(eo); gap> RepresentativesMinimalBlocks(eo); [ [ 1, 5, 9 ], [ 1, 7 ] ] gap> MaximalBlocks(eo); -[ [ 1, 3, 5, 7, 9, 11 ], [ 2, 4, 6, 8, 10, 12 ] ] +[ [ 1, 3 .. 11 ], [ 2, 4 .. 12 ] ] gap> eo:=ExternalOrbit(G,[1..12],1,OnPoints); 1^G gap> IsTransitive(eo); diff --git a/tst/teststandard/union.tst b/tst/teststandard/union.tst index 57c0ea78ac..859cc4a7c6 100644 --- a/tst/teststandard/union.tst +++ b/tst/teststandard/union.tst @@ -35,6 +35,36 @@ gap> for i in [-4..4] do > od; > od; > od; +gap> mylist := [ [ 1 ], [ -1 ], [ 2 ], [ -5, -2 ], [ 3 ], [ -3 ], [ 4 ], [ -4 ], [ 5 ], [ 6 ], [ -6 ] ];; +gap> Set( Flat( mylist ) ) = Union( mylist ); +true +gap> Union([]); +[ ] +gap> Union([Z(5)]); +Error, Union: arguments must be lists or collections +gap> l := [1,4,2];; +gap> Union([l,l]); +[ 1, 2, 4 ] +gap> Union([1..5],[1/2]); +[ 1/2, 1, 2, 3, 4, 5 ] +gap> Union([1..5],[1,3,4]); +[ 1 .. 5 ] +gap> Union([1..5],[1/2,7]); +[ 1/2, 1, 2, 3, 4, 5, 7 ] +gap> Union([[1],[1],[1]]); +[ 1 ] +gap> Union([1,2],"a"); +[ 1, 2, 'a' ] +gap> Union([1,2],"a",[3,4]); +[ 1, 2, 3, 4, 'a' ] +gap> Union([1,5..19997],[3],[7,11,19999],[15,23..19991],[19,27..19995]); +[ 1, 3 .. 19999 ] +gap> IsRangeRep(Union([1,5..19997],[3],[7,15,19999],[15,23..19991],[19,27..19995])); +false +gap> f := x -> List([1..x], y -> [y*5..(y+1)*5]);; Union(f(10000)); +[ 5 .. 50005 ] +gap> f := x -> List([1..x], y -> [y*15,(y+1)*15..(y+5)*15]);; Union(f(10000)); +[ 15, 30 .. 150075 ] gap> STOP_TEST( "union.tst", 35140000); #############################################################################