diff --git a/apps/syndie/java/src/net/i2p/syndie/Sucker.java b/apps/syndie/java/src/net/i2p/syndie/Sucker.java index 59a7c16e8..a62175b47 100644 --- a/apps/syndie/java/src/net/i2p/syndie/Sucker.java +++ b/apps/syndie/java/src/net/i2p/syndie/Sucker.java @@ -46,9 +46,6 @@ public class Sucker { public Sucker() {} - /** - * Constructor for BlogManager. - */ public Sucker(String[] strings) throws IllegalArgumentException { SuckerState state = new SuckerState(); state.pushToSyndie=true; @@ -75,6 +72,7 @@ public class Sucker { state.user = state.bm.getUser(blogHash); if(state.user==null) throw new IllegalArgumentException("wtf, user==null? hash:"+blogHash); + state.history = new ArrayList(); _state = state; } @@ -162,11 +160,6 @@ public class Sucker { _log.debug("message number: " + _state.messageNumber); - // Create historyFile if missing - _state.historyFile = new File(_state.historyPath); - if (!_state.historyFile.exists()) - _state.historyFile.createNewFile(); - _state.shouldProxy = false; _state.proxyPortNum = -1; if ( (_state.proxyHost != null) && (_state.proxyPort != null) ) { @@ -194,7 +187,7 @@ public class Sucker { boolean ok = lsnr.waitForSuccess(); if (!ok) { _log.debug("success? " + ok); - System.err.println("Unable to retrieve the url after " + numRetries + " tries."); + System.err.println("Unable to retrieve the url [" + _state.urlToLoad + "] after " + numRetries + " tries."); fetched.delete(); return _state.entriesPosted; } @@ -213,31 +206,56 @@ public class Sucker { _log.debug("entries: " + entries.size()); - FileOutputStream hos = null; + loadHistory(); + + // Process list backwards to get syndie to display the + // entries in the right order. (most recent at top) + List feedMessageIds = new ArrayList(); + for (int i = entries.size()-1; i >= 0; i--) { + SyndEntry e = (SyndEntry) entries.get(i); - try { - hos = new FileOutputStream(_state.historyFile, true); + _state.attachmentCounter=0; + + if (_log.shouldLog(Log.DEBUG)) + _log.debug("Syndicate entry: " + e.getLink()); + + // Calculate messageId, and check if we have got the message already + String feedHash = sha1(_state.urlToLoad); + String itemHash = sha1(e.getTitle() + e.getDescription()); + Date d = e.getPublishedDate(); + String time; + if(d!=null) + time = "" + d.getTime(); + else + time = "" + new Date().getTime(); + String outputFileName = _state.outputDir + "/" + _state.messageNumber; + String messageId = feedHash + ":" + itemHash + ":" + time + ":" + outputFileName; - // Process list backwards to get syndie to display the - // entries in the right order. (most recent at top) - for (int i = entries.size()-1; i >= 0; i--) { - SyndEntry e = (SyndEntry) entries.get(i); - - _state.attachmentCounter=0; - - if (_log.shouldLog(Log.DEBUG)) - _log.debug("Syndicate entry: " + e.getLink()); - - String messageId = convertToSml(_state, e); - if (messageId!=null) { - hos.write(messageId.getBytes()); - hos.write("\n".getBytes()); - } - } - } finally { - if (hos != null) try { hos.close(); } catch (IOException ioe) {} + // Make sure these messageIds get into the history file + feedMessageIds.add(messageId); + + // Check if we already have this + if (existsInHistory(_state, messageId)) + continue; + + infoLog("new: " + messageId); + + // process the new entry + processEntry(_state, e, time); } + // update history + pruneHistory(_state.urlToLoad, 42*10); // could use 0 if we were sure old entries never re-appear + Iterator iter = feedMessageIds.iterator(); + while(iter.hasNext()) + { + String newMessageId = (String)iter.next(); + if(!existsInHistory(_state, newMessageId)) + addHistory(newMessageId); // add new message ids from current feed to history + } + storeHistory(); + + // call script if we don't just feed syndie if(!_state.pushToSyndie) { FileOutputStream fos = null; try { @@ -264,6 +282,111 @@ public class Sucker { return _state.entriesPosted; } + private void loadHistory() { + try { + // Create historyFile if missing + _state.historyFile = new File(_state.historyPath); + if (!_state.historyFile.exists()) + _state.historyFile.createNewFile(); + + FileInputStream is = new FileInputStream(_state.historyFile); + String s; + while((s=readLine(is))!=null) + { + addHistory(s); + } + } catch (FileNotFoundException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + private boolean existsInHistory(SuckerState state, String messageId) { + int idx; + idx = messageId.lastIndexOf(":"); + String lineToCompare = messageId.substring(0, idx-1); + idx = lineToCompare.lastIndexOf(":"); + lineToCompare = lineToCompare.substring(0, idx-1); + Iterator iter = _state.history.iterator(); + while(iter.hasNext()) + { + String line = (String)iter.next(); + idx = line.lastIndexOf(":"); + if (idx < 0) + return false; + line = line.substring(0, idx-1); + idx = line.lastIndexOf(":"); + if (idx < 0) + return false; + line = line.substring(0, idx-1); + if (line.equals(lineToCompare)) + return true; + } + return false; + } + + private void addHistory(String messageId) { + _state.history.add(messageId); + } + + private void pruneHistory(String url, int nrToKeep) { + int i=0; + String urlHash=sha1(url); + + // Count nr of entries containing url hash + Iterator iter = _state.history.iterator(); + while(iter.hasNext()) + { + String historyLine = (String) iter.next(); + if(historyLine.startsWith(urlHash)) + { + i++; + } + } + + // keep first nrToKeep entries + i = i - nrToKeep; + if(i>0) + { + iter = _state.history.iterator(); + while(i>0 && iter.hasNext()) + { + String historyLine = (String) iter.next(); + if(historyLine.startsWith(urlHash)) + { + iter.remove(); + i--; + } + } + } + } + + private void storeHistory() { + FileOutputStream hos = null; + try { + hos = new FileOutputStream(_state.historyFile, false); + Iterator iter = _state.history.iterator(); + while(iter.hasNext()) + { + String historyLine = (String) iter.next(); + hos.write(historyLine.getBytes()); + hos.write("\n".getBytes()); + } + } catch (FileNotFoundException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } finally { + if (hos != null) try { hos.close(); } catch (IOException ioe) {} + } + } + public static void main(String[] args) { Sucker sucker = new Sucker(); boolean ok = sucker.parseArgs(args); @@ -288,8 +411,6 @@ public class Sucker { */ private static boolean execPushScript(SuckerState state, String id, String time) { try { - String ls_str; - String cli = state.pushScript + " " + state.outputDir + " " + id + " " + time; Process pushScript_proc = Runtime.getRuntime().exec(cli); @@ -327,28 +448,11 @@ public class Sucker { /** * Converts the SyndEntry e to sml and fetches any images as attachments */ - private static String convertToSml(SuckerState state, SyndEntry e) { + private static boolean processEntry(SuckerState state, SyndEntry e, String time) { String subject; state.stripNewlines=false; - // Calculate messageId, and check if we have got the message already - String feedHash = sha1(state.urlToLoad); - String itemHash = sha1(e.getTitle() + e.getDescription()); - Date d = e.getPublishedDate(); - String time; - if(d!=null) - time = "" + d.getTime(); - else - time = "" + new Date().getTime(); - String outputFileName = state.outputDir + "/" + state.messageNumber; - String messageId = feedHash + ":" + itemHash + ":" + time + ":" + outputFileName; - // Check if we already have this - if (existsInHistory(state, messageId)) - return null; - - infoLog("new: " + messageId); - try { String sml=""; @@ -370,7 +474,6 @@ public class Sucker { List l = e.getContents(); if(l!=null) { - debugLog("There is content"); iter = l.iterator(); while(iter.hasNext()) { @@ -402,8 +505,8 @@ public class Sucker { } String source=e.getLink(); //Uri(); - if(source.indexOf("http")<0) - source=state.baseUrl+source; + if(!source.startsWith("http://")) + source=state.baseUrl+source; sml += "[link schema=\"web\" location=\""+source+"\"]source[/link]\n"; if(state.pushToSyndie) { @@ -426,7 +529,7 @@ public class Sucker { if(uri==null) { errorLog("pushToSyndie failure."); - return null; + return false; } else { state.entriesPosted.add(uri); infoLog("pushToSyndie success, uri: "+uri.toString()); @@ -448,14 +551,14 @@ public class Sucker { } state.messageNumber++; deleteTempFiles(state); - return messageId; + return true; } catch (FileNotFoundException e1) { e1.printStackTrace(); } catch (IOException e2) { e2.printStackTrace(); } deleteTempFiles(state); - return null; + return false; } private static void deleteTempFiles(SuckerState state) { @@ -570,7 +673,7 @@ public class Sucker { ret+="[/img]"; - if(imageLink.indexOf("http")<0) + if(!imageLink.startsWith("http://")) imageLink=state.baseUrl+"/"+imageLink; fetchAttachment(state, imageLink); @@ -592,7 +695,7 @@ public class Sucker { if (b >= htmlTagLowerCase.length()) return null; // abort the b0rked tag String link=htmlTag.substring(a,b); - if(link.indexOf("http")<0) + if(!link.startsWith("http://")) link=state.baseUrl+"/"+link; String schema="web"; @@ -613,6 +716,7 @@ public class Sucker { state.pendingEndLink=false; return "[/link]"; } + return ""; } if("".equals(htmlTagLowerCase)) @@ -645,8 +749,21 @@ public class Sucker { return ""; if("
".equals(htmlTagLowerCase)) return ""; - if(htmlTagLowerCase.startsWith("".equals(htmlTagLowerCase)) // emulate table with hr + if(htmlTagLowerCase.startsWith("".equals(htmlTagLowerCase)) + return ""; + if("".equals(htmlTagLowerCase)) + return ""; + if("
".equals(htmlTagLowerCase)) + return "[quote]"; + if("
".equals(htmlTagLowerCase)) + return "[/quote]"; + if(htmlTagLowerCase.startsWith("".equals(htmlTagLowerCase)) // emulate table with hr :) return "[hr][/hr]"; + if(htmlTagLowerCase.startsWith("

todo:

- *

caching (eepget should do it) - *

enclosures support (requires cvs rome) - *

syndie.sucker.minHistory/maxHistory used to roll over the history file? - *

configurable update period - * */ public class ImportFeedServlet extends BaseServlet { protected String getTitle() { return "Syndie :: Import feed"; } @@ -80,7 +73,6 @@ public class ImportFeedServlet extends BaseServlet { } } } else if ( (action != null) && ("Delete".equals(action)) ) { - out.write("Delete some thing
"); if (url == null || blog == null || tagPrefix == null) { out.write("error, some fields were empty.
"); } else { diff --git a/history.txt b/history.txt index de7b9009c..b3296f20a 100644 --- a/history.txt +++ b/history.txt @@ -1,4 +1,7 @@ -$Id: history.txt,v 1.406 2006/02/18 22:22:33 jrandom Exp $ +$Id: history.txt,v 1.407 2006/02/19 07:29:57 jrandom Exp $ + +2006-02-19 dust + * Added pruning of suckers history (it used to grow indefinitely). 2006-02-19 jrandom * Moved the current net's reseed URL to a different location than where