forked from I2P_Developers/i2p.i2p
i2psnark: Better comment deduping, fixes rating average
This commit is contained in:
@ -10,7 +10,11 @@ import net.i2p.I2PAppContext;
|
|||||||
import net.i2p.data.DataHelper;
|
import net.i2p.data.DataHelper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Store comments
|
* Store a single comment and/or rating.
|
||||||
|
* Unmodifiable except for marking as hidden.
|
||||||
|
* Stores a one-second timestamp but designed so identical
|
||||||
|
* comments within a certain time frame (bucket) are equal.
|
||||||
|
* Don't store in a plain set - see equals().
|
||||||
*
|
*
|
||||||
* @since 0.9.31
|
* @since 0.9.31
|
||||||
*/
|
*/
|
||||||
@ -30,6 +34,7 @@ public class Comment implements Comparable<Comment> {
|
|||||||
private static final int MAX_TEXT_LEN = 512;
|
private static final int MAX_TEXT_LEN = 512;
|
||||||
private static final int BUCKET_SIZE = 10*60*1000;
|
private static final int BUCKET_SIZE = 10*60*1000;
|
||||||
private static final long TIME_SHRINK = 1000L;
|
private static final long TIME_SHRINK = 1000L;
|
||||||
|
private static final int MAX_SKEW = (int) (BUCKET_SIZE / TIME_SHRINK);
|
||||||
// 1/1/2005
|
// 1/1/2005
|
||||||
private static final long TIME_OFFSET = 1104537600000L;
|
private static final long TIME_OFFSET = 1104537600000L;
|
||||||
|
|
||||||
@ -181,26 +186,29 @@ public class Comment implements Comparable<Comment> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return bucket number
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return time / (BUCKET_SIZE / (int) TIME_SHRINK);
|
return time / MAX_SKEW;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Comments in the same 10-minute bucket and otherwise equal
|
* Comments within 10 minutes (not necessarily in same bucket)
|
||||||
* are considered equal. This will result in duplicates
|
* and otherwise equal are considered equal.
|
||||||
* near the border.
|
* Violates contract, as equal objects may have different hashcodes and
|
||||||
|
* be in adjacent buckets.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object o) {
|
public boolean equals(Object o) {
|
||||||
if (o == null) return false;
|
if (o == null) return false;
|
||||||
if (!(o instanceof Comment)) return false;
|
if (!(o instanceof Comment)) return false;
|
||||||
Comment c = (Comment) o;
|
Comment c = (Comment) o;
|
||||||
return rating == c.rating &&
|
int tdiff = time - c.time;
|
||||||
eq(text, c.text) &&
|
if (tdiff > MAX_SKEW || tdiff < 0 - MAX_SKEW)
|
||||||
eq(name, c.name) &&
|
return false;
|
||||||
hashCode() == c.hashCode();
|
return equalsIgnoreTimestamp(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -58,7 +58,7 @@ public class CommentSet extends AbstractSet<Comment> {
|
|||||||
// Assume most comments are short or null.
|
// Assume most comments are short or null.
|
||||||
private static final int MAX_TOTAL_TEXT_LEN = MAX_SIZE * 16;
|
private static final int MAX_TOTAL_TEXT_LEN = MAX_SIZE * 16;
|
||||||
|
|
||||||
public CommentSet() {
|
private CommentSet() {
|
||||||
super();
|
super();
|
||||||
map = new HashMap<Integer, List<Comment>>(4);
|
map = new HashMap<Integer, List<Comment>>(4);
|
||||||
}
|
}
|
||||||
@ -80,7 +80,7 @@ public class CommentSet extends AbstractSet<Comment> {
|
|||||||
try {
|
try {
|
||||||
br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), "UTF-8"));
|
br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), "UTF-8"));
|
||||||
String line = null;
|
String line = null;
|
||||||
while ( (line = br.readLine()) != null) {
|
while ((line = br.readLine()) != null) {
|
||||||
Comment c = Comment.fromPersistentString(line);
|
Comment c = Comment.fromPersistentString(line);
|
||||||
if (c != null)
|
if (c != null)
|
||||||
add(c);
|
add(c);
|
||||||
@ -129,7 +129,18 @@ public class CommentSet extends AbstractSet<Comment> {
|
|||||||
// If isMine and no text and rating changed, don't bother
|
// If isMine and no text and rating changed, don't bother
|
||||||
if (c.isMine() && c.getText() == null && c.getRating() == myRating)
|
if (c.isMine() && c.getText() == null && c.getRating() == myRating)
|
||||||
return false;
|
return false;
|
||||||
Integer hc = Integer.valueOf(c.hashCode());
|
int hCode = c.hashCode();
|
||||||
|
// check previous and next buckets
|
||||||
|
Integer phc = Integer.valueOf(hCode - 1);
|
||||||
|
List<Comment> plist = map.get(phc);
|
||||||
|
if (plist != null && plist.contains(c))
|
||||||
|
return false;
|
||||||
|
Integer nhc = Integer.valueOf(hCode + 1);
|
||||||
|
List<Comment> nxlist = map.get(nhc);
|
||||||
|
if (nxlist != null && nxlist.contains(c))
|
||||||
|
return false;
|
||||||
|
// check this bucket
|
||||||
|
Integer hc = Integer.valueOf(hCode);
|
||||||
List<Comment> list = map.get(hc);
|
List<Comment> list = map.get(hc);
|
||||||
if (list == null) {
|
if (list == null) {
|
||||||
list = Collections.singletonList(c);
|
list = Collections.singletonList(c);
|
||||||
|
Reference in New Issue
Block a user