# Hello, this script is written in Python - http://www.python.org
#
# newsarchiver 1.1p - Newsgroup archiver
#
# Purpose:
# This script will download all available message from the desired Usenet group
# and save them as plain text files. Usefull for bulk group archiving.
#
# Usage:
#
# Syntaxe : python newsarchiver.py <groupname> [firstArticle]
#
# where groupname is the group name (eg. comp.lang.python)
# firstArticle is the article number to fetch from (optional)
#
# Example 1 : python newsarchiver.py comp.lang.python
# (fetch all article available from comp.lang.python)
#
# Example 2 : python newsarchiver.py comp.lang.python 108224
# (fetch all article available from comp.lang.python
# starting with article number 108224)
#
# Password will be asked when the script is run.
#
# Server name, login and destination directory are hardcoded.
# Tweak the lines below the 'import' statement to suit your needs.
# Variable names should be self-explanatory.
#
# Then run this script with and enter your password for the corresponding login.
# This script will then connect to news server and start fetching messages.
#
# You can stop this script at anytime (break with CTRL+C)
# and re-run it later to continue to fetch messages.
# This script will not fetch message it has already fetched.
#
# All messages will be saved as individual files in the form:
# groupname_messageNumber
# (with dots replaced by underscores)
# ( eg : comp_lang_python_104208 )
#
# Keep in mind that 'messageNumber' is server-dependant.
# (If you change newsserver, the messageNumber will be different : you will
# have to erase all files and fetch them all to have a coherent fileset)
# The messageNumber matches the Xref reference number in each message.
#
# Group must exist on server.
#
# Changes:
# 1.0p : - first version
# 1.1p : - added group name and first article number as command-line parameters.
# - added help screen
#
# Author's comments:
# Oh my, I wouldn't beleive this would be so easy to program... thanks to Python !
#
# Credits:
# I created this script for 2 purposes:
# - train Python programming (this is one of my first Python scripts)
# - archive comp.lang.python and other interesting newsgroups.
#
# This author of this script is Sebastien SAUVAGE <sebsauvage at sebsauvage dot net>
# http://sebsauvage.net
# Other quick & dirty Python stuff is likely to be available at http://sebsauvage.net/python/
#
# Legal:
# This script is public domain. Feel free to re-use and tweak the code.
#
import os.path,nntplib,string,getpass,sys
destination = 'c:\\ngarchive\\' # do not forget the trailing [back]slash !
newsserver = '127.0.0.1'
loginname = 'sebsauvage'
if len( sys.argv ) < 2:
print '>>> newsArchiver 1.1p\n'
print ' *** IMPORTANT ***'
print ' See comments in code for more information before running this script !'
print ' (News server address and login name are hardcoded :'
print ' you need to tailor them before using this script.)'
print ' News server',newsserver,"will be used with login '"+loginname+"'"
print ' Destination path is',destination,'\n'
print ' Syntax : python newsarchiver.py <groupname> [firstArticle]\n'
print ' Example 1 : python newsarchiver.py comp.lang.python'
print ' (fetch all article available from comp.lang.python)\n'
print ' Example 2 : python newsarchiver.py comp.lang.python 108224'
print ' (fetch all article available from comp.lang.python'
print ' starting with article number 108224)\n'
sys.exit()
groupName = sys.argv[1]
firstArticle = 0
if len( sys.argv ) > 2:
try:
firstArticle = int(sys.argv[2])
except:
print 'Error : firstArticle parameters must be numeric.'
sys.exit()
loginpassword = getpass.getpass('>>> Please enter password for login '+loginname+'@'+newsserver+' : ')
print '>>> Connecting to news server',newsserver,'...'
try:
ns = nntplib.NNTP(newsserver,119,loginname,loginpassword)
except:
print '>>> Could not connect to news server.'
else:
print '>>> News server welcomes us:'
print ns.getwelcome()
print '>>> Accessing group', groupName
try:
group = ns.group(groupName)
except:
print '>>> Could not open group',groupName
else:
count = group[1] # nb of articles available on server
first = group[2] # ID of first available article
last = group[3] # ID of last available article
print '>>> Article count :',count
print '>>> First :',first
print '>>> Last :',last
if (firstArticle > int(first)) and (firstArticle <= int(last)):
first = str(firstArticle)
print '>>> Fetching from article',first
for articleNumber in range(int(first),int(last)+1):
fileName = destination+string.replace(groupName+'.'+str(articleNumber),'.','_')
if not os.path.isfile( fileName ):
print '>>> Fetching article',articleNumber,'out of',last,'from',groupName
try:
article = ns.article(str(articleNumber))
except:
print '>>> Could not fetch article',articleNumber
else:
f=open(fileName, 'w+')
for line in article[3]:
f.write(line+'\n')
f.close()
print '>>> Closing connection with news server...'
ns.quit()
#//python/5173